codicent-app-sdk 0.3.102 → 0.3.103

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,7 @@ export interface RealtimeVoice {
31
31
  items: ItemType[];
32
32
  realtimeEvents: RealtimeEvent[];
33
33
  isConnected: boolean;
34
+ isSessionReady: boolean;
34
35
  canPushToTalk: boolean;
35
36
  isRecording: boolean;
36
37
  clientCanvasRef: React.RefObject<HTMLCanvasElement>;
@@ -1 +1 @@
1
- {"version":3,"file":"useRealtimeVoiceAI.d.ts","sourceRoot":"","sources":["../../../src/hooks/useRealtimeVoiceAI.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2CAA2C,CAAC;AAK/E,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;IAChB,SAAS,CAAC,EAAE;QACV,KAAK,CAAC,EAAE,UAAU,CAAC;QACnB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,IAAI,CAAC,EAAE,GAAG,CAAC;KACZ,CAAC;CACH;AAED;;GAEG;AACH,UAAU,aAAa;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAE,CAAC;CAC/B;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,cAAc,EAAE,aAAa,EAAE,CAAC;IAChC,WAAW,EAAE,OAAO,CAAC;IACrB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,OAAO,CAAC;IACrB,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IACjD,UAAU,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,mBAAmB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,sBAAsB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5C,sBAAsB,EAAE,CAAC,EAAE,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,cAAc,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,aAAa,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACnC,iBAAiB,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACpD,gBAAgB,EAAE,MAAM,MAAM,CAAC;IAC/B,cAAc,EAAE,MAAM,MAAM,CAAC;IAC7B,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,YAAY,EAAE,MAAM,KAAK,IAAI,CAAC;IACnD,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;CACzC;AAED,QAAA,MAAM,kBAAkB,oBACL,eAAe,UACxB,MAAM,SACP;IAAE,UAAU,EAAE,kBAAkB,CAAC;IAAC,OAAO,EAAE,QAAQ,CAAA;CAAE,EAAE,UACtD,MAAM,KACb,aAAa,GAAG,SA0pBlB,CAAC;AAEF,eAAe,kBAAkB,CAAC"}
1
+ {"version":3,"file":"useRealtimeVoiceAI.d.ts","sourceRoot":"","sources":["../../../src/hooks/useRealtimeVoiceAI.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2CAA2C,CAAC;AAK/E,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;IAChB,SAAS,CAAC,EAAE;QACV,KAAK,CAAC,EAAE,UAAU,CAAC;QACnB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,IAAI,CAAC,EAAE,GAAG,CAAC;KACZ,CAAC;CACH;AAED;;GAEG;AACH,UAAU,aAAa;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAE,CAAC;CAC/B;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,cAAc,EAAE,aAAa,EAAE,CAAC;IAChC,WAAW,EAAE,OAAO,CAAC;IACrB,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,OAAO,CAAC;IACrB,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IACjD,UAAU,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,mBAAmB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,sBAAsB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5C,sBAAsB,EAAE,CAAC,EAAE,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,cAAc,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,aAAa,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACnC,iBAAiB,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACpD,gBAAgB,EAAE,MAAM,MAAM,CAAC;IAC/B,cAAc,EAAE,MAAM,MAAM,CAAC;IAC7B,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,YAAY,EAAE,MAAM,KAAK,IAAI,CAAC;IACnD,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;CACzC;AAED,QAAA,MAAM,kBAAkB,oBACL,eAAe,UACxB,MAAM,SACP;IAAE,UAAU,EAAE,kBAAkB,CAAC;IAAC,OAAO,EAAE,QAAQ,CAAA;CAAE,EAAE,UACtD,MAAM,KACb,aAAa,GAAG,SA8qBlB,CAAC;AAEF,eAAe,kBAAkB,CAAC"}
@@ -1 +1 @@
1
- "use strict";Object.defineProperty(exports,"__esModule",{value:!0});var e=require("react"),t=require("../utils/wav_renderer.js");require("../lib/wavtools/lib/wav_packer.js"),require("../lib/wavtools/lib/analysis/audio_analysis.js");var n=require("../lib/wavtools/lib/wav_stream_player.js"),r=require("../lib/wavtools/lib/wav_recorder.js"),a=require("../config/index.js");exports.default=(o,s,i,c)=>{const l=!!a.getConfigValue("APP_CONFIG"),u=!!a.getConfigValue("APP_BUTTONS");l||console.warn("APP_CONFIG is not set. Voice AI will not be available."),u||console.warn("APP_BUTTONS is not set. Voice AI will not be available.");const d=a.getConfigValue("APP_CONFIG"),p=a.getConfigValue("APP_BUTTONS");a.getConfigValue("API_BASE_URL").replace(/\/$/,""),a.getConfigValue("USE_REALTIME_SESSION_ENDPOINT"),a.getConfigValue("REALTIME_SESSION_ENDPOINT");const f=c||a.getConfigValue("REALTIME_VOICE_MODEL")||"alloy",g=["alloy","shimmer","echo"],m=g.includes(f)?f:"alloy";f!==m&&console.warn(`[codicent-app-sdk] Voice "${f}" is not supported in the current SDK version. Supported voices: ${g.join(", ")}. Falling back to "${m}".`);const y=e.useRef(new r.WavRecorder({sampleRate:24e3})),_=e.useRef(new n.WavStreamPlayer({sampleRate:24e3})),S=e.useRef(null),w=e.useRef(null),v=e.useRef(null),h=e.useRef(null),O=e.useRef(!1),C=e.useRef(null),R=e.useRef(null),I=e.useRef(null),T=e.useRef((new Date).toISOString()),[N,E]=e.useState([]),[b,A]=e.useState([]),[P,k]=e.useState(!1),[V,D]=e.useState(!1),[M,x]=e.useState(!1),F=e.useRef(0),J=e.useRef(0),[$,L]=e.useState(""),[j,q]=e.useState("en-US"),U=e.useRef(new Map),[B,W]=e.useState((()=>l&&u&&p&&d&&d.apps&&d.apps[p]?d.apps[p].voiceInstructions||d.REALTIME_VOICE_INSTRUCTIONS||"":d&&d.REALTIME_VOICE_INSTRUCTIONS||"")),G=e.useCallback((e=>{const t=T.current,n=new Date(t).valueOf(),r=new Date(e).valueOf()-n,a=Math.floor(r/10)%100,o=Math.floor(r/1e3)%60,s=e=>{let t=e+"";for(;t.length<2;)t="0"+t;return t};return`${s(Math.floor(r/6e4)%60)}:${s(o)}.${s(a)}`}),[]),H=e.useCallback((async()=>{try{T.current=(new Date).toISOString(),k(!0),A([]),E([]),U.current.clear();const e=await o.getRealtimeSessionToken(m);if(!e)throw new Error("No ephemeral key returned from session endpoint");const t=new RTCPeerConnection;S.current=t,v.current||(v.current=new Audio,v.current.autoplay=!0),t.ontrack=e=>{v.current&&e.streams[0]&&(v.current.srcObject=e.streams[0])};const n=await navigator.mediaDevices.getUserMedia({audio:!0});h.current=n;const r=n.getTracks()[0];t.addTrack(r,n);const s=t.createDataChannel("oai-events");w.current=s,s.addEventListener("message",(e=>{try{const t=JSON.parse(e.data);if("session.created"===t.type);else if("conversation.item.created"===t.type)E((e=>[...e,t.item]));else if("conversation.item.input_audio_transcription.completed"===t.type)E((e=>{const n=[...e],r=n.findIndex((e=>e.id===t.item_id));return-1!==r&&n[r].formatted&&(n[r].formatted.transcript=t.transcript),n}));else if("response.audio_transcript.delta"===t.type);else if("response.audio_transcript.done"===t.type);else if("response.output_item.added"===t.type){const e=t.item;"function_call"===e?.type&&(console.log("[Voice AI] Function call initiated:",e.name),U.current.set(e.id,{name:e.name||"",arguments:"",call_id:e.call_id||e.id}))}else if("response.function_call_arguments.delta"===t.type){const e=t.item_id,n=t.delta,r=U.current.get(e);r&&n&&(r.arguments+=n,U.current.set(e,r))}else if("response.function_call_arguments.done"===t.type){const e=t.item_id,n=U.current.get(e);if(n){console.log(`[Voice AI] Executing tool: ${n.name}`);const t=i.find((e=>e.definition.name===n.name));if(t){let e={};try{e=n.arguments?JSON.parse(n.arguments):{}}catch(t){console.error("[Voice AI] Failed to parse tool arguments:",t),e={}}Promise.resolve(t.handler(e)).then((e=>{console.log(`[Voice AI] Tool ${n.name} completed:`,e),s&&"open"===s.readyState&&(s.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify(e)}})),s.send(JSON.stringify({type:"response.create"})))})).catch((e=>{console.error(`[Voice AI] Tool ${n.name} failed:`,e),s&&"open"===s.readyState&&(s.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify({error:e.message||"Tool execution failed"})}})),s.send(JSON.stringify({type:"response.create"})))}))}else console.error(`[Voice AI] Tool not found: ${n.name}`);U.current.delete(e)}}else"error"===t.type&&console.error("Server error:",t.error)}catch(t){console.warn("Invalid message:",e.data)}})),s.onopen=()=>{console.log("Data channel opened, configuring session"),s.send(JSON.stringify({type:"session.update",session:{instructions:B.replace("{{name}}",$).replace("{{language}}",j).replace("{{time}}",(new Date).toISOString()),modalities:["text","audio"],input_audio_transcription:{model:"whisper-1"},turn_detection:{type:"server_vad",threshold:.5,prefix_padding_ms:300,silence_duration_ms:500},voice:m,temperature:.8,max_response_output_tokens:4096,input_audio_format:"pcm16",output_audio_format:"pcm16",tools:i.map((e=>({type:"function",...e.definition})))}}))},s.onclose=()=>{console.log("Data channel closed")};const c=await t.createOffer();await t.setLocalDescription(c);let l="gpt-4o-realtime-preview";try{if(a.getConfigValue("REALTIME_CONFIG_ENDPOINT")){const e=await o.getRealtimeConfig();e&&e.model&&(l=e.model)}}catch(e){console.warn("Failed to fetch realtime config, using default model:",e)}const u="https://api.openai.com/v1/realtime",d=await fetch(`${u}?model=${l}`,{method:"POST",body:c.sdp,headers:{Authorization:`Bearer ${e}`,"Content-Type":"application/sdp"}});if(!d.ok)throw new Error(`Failed to get SDP answer: ${d.statusText}`);const p={type:"answer",sdp:await d.text()};await t.setRemoteDescription(p),D(!1);const f=y.current,g=_.current;await f.begin(),await g.connect()}catch(e){throw console.error("[codicent-app-sdk] Failed to establish WebRTC connection:",e),k(!1),e}}),[o,m,B,$,j,i]),z=e.useCallback((async()=>{k(!1),A([]),E([]),U.current.clear(),w.current&&(w.current.close(),w.current=null),S.current&&(S.current.close(),S.current=null),h.current&&(h.current.getTracks().forEach((e=>e.stop())),h.current=null),v.current&&(v.current.pause(),v.current.srcObject=null);const e=y.current;await e.end();const t=_.current;await t.interrupt()}),[]),K=e.useCallback((async e=>{const t=w.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"conversation.item.delete",item_id:e}))}),[]),Q=e.useCallback((async()=>{x(!0);const e=w.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"input_audio_buffer.commit"}))}),[]),X=e.useCallback((async()=>{x(!1);const e=w.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"response.create"}))}),[]),Y=e.useCallback((async e=>{const t=w.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"session.update",session:{turn_detection:"none"===e?null:{type:"server_vad"}}})),D("none"===e)}),[]);e.useEffect((()=>{if($&&P){const e=w.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"session.update",session:{instructions:B.replace("{{name}}",$).replace("{{language}}",j).replace("{{time}}",(new Date).toISOString())}}))}}),[B,$,j,P]),e.useEffect((()=>{let e=!0;const n=y.current,r=C.current;let a=null;const o=_.current,s=R.current;let i=null;const c=()=>{if(e){if(r&&(r.width&&r.height||(r.width=r.offsetWidth,r.height=r.offsetHeight),a=a||r.getContext("2d"),a)){a.clearRect(0,0,r.width,r.height);const e=n.recording?n.getFrequencies("voice"):{values:new Float32Array([0])},o=1-Math.max(...e.values);F.current=o,t.WavRenderer.drawCircularBars(r,a,e.values,"#0099ff",20,0,8)}if(s&&(s.width&&s.height||(s.width=s.offsetWidth,s.height=s.offsetHeight),i=i||s.getContext("2d"),i)){i.clearRect(0,0,s.width,s.height);const e=o.analyser?o.getFrequencies("voice"):{values:new Float32Array([0])},n=1-Math.max(...e.values);J.current=n,t.WavRenderer.drawCircularBars(s,i,e.values,"#009900",20,0,8)}window.requestAnimationFrame(c)}};return c(),()=>{e=!1}}),[]),e.useEffect((()=>{!O.current&&i&&B&&(O.current=!0)}),[i,B]);const Z=e.useCallback((e=>{const t=e.replace("{{name}}",$).replace("{{language}}",j).replace("{{time}}",(new Date).toISOString()),n=w.current;n&&"open"===n.readyState&&n.send(JSON.stringify({type:"session.update",session:{instructions:t}})),W(e)}),[$,j]);return e.useMemo((()=>{if(l&&u)return{items:N,realtimeEvents:b,isConnected:P,canPushToTalk:V,isRecording:M,clientCanvasRef:C,serverCanvasRef:R,eventsScrollRef:I,formatTime:G,connectConversation:H,disconnectConversation:z,deleteConversationItem:K,startRecording:Q,stopRecording:X,changeTurnEndType:Y,getRecorderLevel:()=>F.current,getStreamLevel:()=>J.current,setUsername:L,updateInstructions:Z,setLanguage:q}}),[l,u,N,b,P,V,M,C,R,I])};
1
+ "use strict";Object.defineProperty(exports,"__esModule",{value:!0});var e=require("react"),t=require("../utils/wav_renderer.js");require("../lib/wavtools/lib/wav_packer.js"),require("../lib/wavtools/lib/analysis/audio_analysis.js");var n=require("../lib/wavtools/lib/wav_stream_player.js"),r=require("../lib/wavtools/lib/wav_recorder.js"),a=require("../config/index.js");exports.default=(s,o,i,c)=>{const l=!!a.getConfigValue("APP_CONFIG"),u=!!a.getConfigValue("APP_BUTTONS");l||console.warn("APP_CONFIG is not set. Voice AI will not be available."),u||console.warn("APP_BUTTONS is not set. Voice AI will not be available.");const d=a.getConfigValue("APP_CONFIG"),p=a.getConfigValue("APP_BUTTONS");a.getConfigValue("API_BASE_URL").replace(/\/$/,""),a.getConfigValue("USE_REALTIME_SESSION_ENDPOINT"),a.getConfigValue("REALTIME_SESSION_ENDPOINT");const f=c||a.getConfigValue("REALTIME_VOICE_MODEL")||"alloy",g=["alloy","shimmer","echo"],m=g.includes(f)?f:"alloy";f!==m&&console.warn(`[codicent-app-sdk] Voice "${f}" is not supported in the current SDK version. Supported voices: ${g.join(", ")}. Falling back to "${m}".`);const y=e.useRef(new r.WavRecorder({sampleRate:24e3})),_=e.useRef(new n.WavStreamPlayer({sampleRate:24e3})),S=e.useRef(null),w=e.useRef(null),v=e.useRef(null),h=e.useRef(null),O=e.useRef(!1),R=e.useRef(null),C=e.useRef(null),I=e.useRef(null),T=e.useRef((new Date).toISOString()),[N,E]=e.useState([]),[b,A]=e.useState([]),[P,k]=e.useState(!1),[V,D]=e.useState(!1),[M,x]=e.useState(!1),[F,J]=e.useState(!1),$=e.useRef(0),L=e.useRef(0),[j,q]=e.useState(""),[U,B]=e.useState("en-US"),W=e.useRef(new Map),G=e.useRef(null),[H,z]=e.useState((()=>l&&u&&p&&d&&d.apps&&d.apps[p]?d.apps[p].voiceInstructions||d.REALTIME_VOICE_INSTRUCTIONS||"":d&&d.REALTIME_VOICE_INSTRUCTIONS||"")),K=e.useCallback((e=>{const t=T.current,n=new Date(t).valueOf(),r=new Date(e).valueOf()-n,a=Math.floor(r/10)%100,s=Math.floor(r/1e3)%60,o=e=>{let t=e+"";for(;t.length<2;)t="0"+t;return t};return`${o(Math.floor(r/6e4)%60)}:${o(s)}.${o(a)}`}),[]),Q=e.useCallback((async()=>{try{T.current=(new Date).toISOString(),k(!0),A([]),E([]),W.current.clear();const e=await s.getRealtimeSessionToken(m);if(!e)throw new Error("No ephemeral key returned from session endpoint");const t=new RTCPeerConnection;S.current=t,v.current||(v.current=new Audio,v.current.autoplay=!0),t.ontrack=e=>{v.current&&e.streams[0]&&(v.current.srcObject=e.streams[0])};const n=await navigator.mediaDevices.getUserMedia({audio:!0});h.current=n;const r=n.getTracks()[0];t.addTrack(r,n);const o=t.createDataChannel("oai-events");w.current=o,o.addEventListener("message",(e=>{try{const t=JSON.parse(e.data);if("session.created"===t.type);else if("conversation.item.created"===t.type)E((e=>[...e,t.item]));else if("conversation.item.input_audio_transcription.completed"===t.type)E((e=>{const n=[...e],r=n.findIndex((e=>e.id===t.item_id));return-1!==r&&n[r].formatted&&(n[r].formatted.transcript=t.transcript),n}));else if("response.audio_transcript.delta"===t.type);else if("response.audio_transcript.done"===t.type);else if("response.output_item.added"===t.type){const e=t.item;"function_call"===e?.type&&(console.log("[Voice AI] Function call initiated:",e.name),W.current.set(e.id,{name:e.name||"",arguments:"",call_id:e.call_id||e.id}))}else if("response.function_call_arguments.delta"===t.type){const e=t.item_id,n=t.delta,r=W.current.get(e);r&&n&&(r.arguments+=n,W.current.set(e,r))}else if("response.function_call_arguments.done"===t.type){const e=t.item_id,n=W.current.get(e);if(n){console.log(`[Voice AI] Executing tool: ${n.name}`);const t=i.find((e=>e.definition.name===n.name));if(t){let e={};try{e=n.arguments?JSON.parse(n.arguments):{}}catch(t){console.error("[Voice AI] Failed to parse tool arguments:",t),e={}}Promise.resolve(t.handler(e)).then((e=>{console.log(`[Voice AI] Tool ${n.name} completed:`,e),o&&"open"===o.readyState&&(o.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify(e)}})),o.send(JSON.stringify({type:"response.create"})))})).catch((e=>{console.error(`[Voice AI] Tool ${n.name} failed:`,e),o&&"open"===o.readyState&&(o.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify({error:e.message||"Tool execution failed"})}})),o.send(JSON.stringify({type:"response.create"})))}))}else console.error(`[Voice AI] Tool not found: ${n.name}`);W.current.delete(e)}}else"error"===t.type&&console.error("Server error:",t.error)}catch(t){console.warn("Invalid message:",e.data)}})),o.onopen=()=>{console.log("Data channel opened, configuring session");const e=G.current||H;G.current=null,o.send(JSON.stringify({type:"session.update",session:{instructions:e.replace("{{name}}",j).replace("{{language}}",U).replace("{{time}}",(new Date).toISOString()),modalities:["text","audio"],input_audio_transcription:{model:"whisper-1"},turn_detection:{type:"server_vad",threshold:.5,prefix_padding_ms:300,silence_duration_ms:500},voice:m,temperature:.8,max_response_output_tokens:4096,input_audio_format:"pcm16",output_audio_format:"pcm16",tools:i.map((e=>({type:"function",...e.definition})))}})),D(!0)},o.onclose=()=>{console.log("Data channel closed"),D(!1)};const c=await t.createOffer();await t.setLocalDescription(c);let l="gpt-4o-realtime-preview";try{if(a.getConfigValue("REALTIME_CONFIG_ENDPOINT")){const e=await s.getRealtimeConfig();e&&e.model&&(l=e.model)}}catch(e){console.warn("Failed to fetch realtime config, using default model:",e)}const u="https://api.openai.com/v1/realtime",d=await fetch(`${u}?model=${l}`,{method:"POST",body:c.sdp,headers:{Authorization:`Bearer ${e}`,"Content-Type":"application/sdp"}});if(!d.ok)throw new Error(`Failed to get SDP answer: ${d.statusText}`);const p={type:"answer",sdp:await d.text()};await t.setRemoteDescription(p),x(!1);const f=y.current,g=_.current;await f.begin(),await g.connect()}catch(e){throw console.error("[codicent-app-sdk] Failed to establish WebRTC connection:",e),k(!1),e}}),[s,m,H,j,U,i]),X=e.useCallback((async()=>{k(!1),D(!1),A([]),E([]),W.current.clear(),G.current=null,w.current&&(w.current.close(),w.current=null),S.current&&(S.current.close(),S.current=null),h.current&&(h.current.getTracks().forEach((e=>e.stop())),h.current=null),v.current&&(v.current.pause(),v.current.srcObject=null);const e=y.current;await e.end();const t=_.current;await t.interrupt()}),[]),Y=e.useCallback((async e=>{const t=w.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"conversation.item.delete",item_id:e}))}),[]),Z=e.useCallback((async()=>{J(!0);const e=w.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"input_audio_buffer.commit"}))}),[]),ee=e.useCallback((async()=>{J(!1);const e=w.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"response.create"}))}),[]),te=e.useCallback((async e=>{const t=w.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"session.update",session:{turn_detection:"none"===e?null:{type:"server_vad"}}})),x("none"===e)}),[]);e.useEffect((()=>{if(j&&V){const e=w.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"session.update",session:{instructions:H.replace("{{name}}",j).replace("{{language}}",U).replace("{{time}}",(new Date).toISOString())}}))}}),[H,j,U,V]),e.useEffect((()=>{let e=!0;const n=y.current,r=R.current;let a=null;const s=_.current,o=C.current;let i=null;const c=()=>{if(e){if(r&&(r.width&&r.height||(r.width=r.offsetWidth,r.height=r.offsetHeight),a=a||r.getContext("2d"),a)){a.clearRect(0,0,r.width,r.height);const e=n.recording?n.getFrequencies("voice"):{values:new Float32Array([0])},s=1-Math.max(...e.values);$.current=s,t.WavRenderer.drawCircularBars(r,a,e.values,"#0099ff",20,0,8)}if(o&&(o.width&&o.height||(o.width=o.offsetWidth,o.height=o.offsetHeight),i=i||o.getContext("2d"),i)){i.clearRect(0,0,o.width,o.height);const e=s.analyser?s.getFrequencies("voice"):{values:new Float32Array([0])},n=1-Math.max(...e.values);L.current=n,t.WavRenderer.drawCircularBars(o,i,e.values,"#009900",20,0,8)}window.requestAnimationFrame(c)}};return c(),()=>{e=!1}}),[]),e.useEffect((()=>{!O.current&&i&&H&&(O.current=!0)}),[i,H]);const ne=e.useCallback((e=>{z(e);const t=w.current;if(t&&"open"===t.readyState){const n=e.replace("{{name}}",j).replace("{{language}}",U).replace("{{time}}",(new Date).toISOString());t.send(JSON.stringify({type:"session.update",session:{instructions:n}}))}else G.current=e}),[j,U]);return e.useMemo((()=>{if(l&&u)return{items:N,realtimeEvents:b,isConnected:P,isSessionReady:V,canPushToTalk:M,isRecording:F,clientCanvasRef:R,serverCanvasRef:C,eventsScrollRef:I,formatTime:K,connectConversation:Q,disconnectConversation:X,deleteConversationItem:Y,startRecording:Z,stopRecording:ee,changeTurnEndType:te,getRecorderLevel:()=>$.current,getStreamLevel:()=>L.current,setUsername:q,updateInstructions:ne,setLanguage:B}}),[l,u,N,b,P,V,M,F,R,C,I])};
@@ -31,6 +31,7 @@ export interface RealtimeVoice {
31
31
  items: ItemType[];
32
32
  realtimeEvents: RealtimeEvent[];
33
33
  isConnected: boolean;
34
+ isSessionReady: boolean;
34
35
  canPushToTalk: boolean;
35
36
  isRecording: boolean;
36
37
  clientCanvasRef: React.RefObject<HTMLCanvasElement>;
@@ -1 +1 @@
1
- {"version":3,"file":"useRealtimeVoiceAI.d.ts","sourceRoot":"","sources":["../../../src/hooks/useRealtimeVoiceAI.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2CAA2C,CAAC;AAK/E,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;IAChB,SAAS,CAAC,EAAE;QACV,KAAK,CAAC,EAAE,UAAU,CAAC;QACnB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,IAAI,CAAC,EAAE,GAAG,CAAC;KACZ,CAAC;CACH;AAED;;GAEG;AACH,UAAU,aAAa;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAE,CAAC;CAC/B;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,cAAc,EAAE,aAAa,EAAE,CAAC;IAChC,WAAW,EAAE,OAAO,CAAC;IACrB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,OAAO,CAAC;IACrB,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IACjD,UAAU,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,mBAAmB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,sBAAsB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5C,sBAAsB,EAAE,CAAC,EAAE,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,cAAc,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,aAAa,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACnC,iBAAiB,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACpD,gBAAgB,EAAE,MAAM,MAAM,CAAC;IAC/B,cAAc,EAAE,MAAM,MAAM,CAAC;IAC7B,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,YAAY,EAAE,MAAM,KAAK,IAAI,CAAC;IACnD,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;CACzC;AAED,QAAA,MAAM,kBAAkB,oBACL,eAAe,UACxB,MAAM,SACP;IAAE,UAAU,EAAE,kBAAkB,CAAC;IAAC,OAAO,EAAE,QAAQ,CAAA;CAAE,EAAE,UACtD,MAAM,KACb,aAAa,GAAG,SA0pBlB,CAAC;AAEF,eAAe,kBAAkB,CAAC"}
1
+ {"version":3,"file":"useRealtimeVoiceAI.d.ts","sourceRoot":"","sources":["../../../src/hooks/useRealtimeVoiceAI.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2CAA2C,CAAC;AAK/E,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC;IAChB,SAAS,CAAC,EAAE;QACV,KAAK,CAAC,EAAE,UAAU,CAAC;QACnB,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,IAAI,CAAC,EAAE,GAAG,CAAC;KACZ,CAAC;CACH;AAED;;GAEG;AACH,UAAU,aAAa;IACrB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,KAAK,EAAE;QAAE,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAE,CAAC;CAC/B;AAED,MAAM,WAAW,aAAa;IAC5B,KAAK,EAAE,QAAQ,EAAE,CAAC;IAClB,cAAc,EAAE,aAAa,EAAE,CAAC;IAChC,WAAW,EAAE,OAAO,CAAC;IACrB,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;IACvB,WAAW,EAAE,OAAO,CAAC;IACrB,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACpD,eAAe,EAAE,KAAK,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IACjD,UAAU,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,MAAM,CAAC;IAC1C,mBAAmB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,sBAAsB,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5C,sBAAsB,EAAE,CAAC,EAAE,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtD,cAAc,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,aAAa,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACnC,iBAAiB,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACpD,gBAAgB,EAAE,MAAM,MAAM,CAAC;IAC/B,cAAc,EAAE,MAAM,MAAM,CAAC;IAC7B,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;IACxC,kBAAkB,EAAE,CAAC,YAAY,EAAE,MAAM,KAAK,IAAI,CAAC;IACnD,WAAW,EAAE,CAAC,QAAQ,EAAE,MAAM,KAAK,IAAI,CAAC;CACzC;AAED,QAAA,MAAM,kBAAkB,oBACL,eAAe,UACxB,MAAM,SACP;IAAE,UAAU,EAAE,kBAAkB,CAAC;IAAC,OAAO,EAAE,QAAQ,CAAA;CAAE,EAAE,UACtD,MAAM,KACb,aAAa,GAAG,SA8qBlB,CAAC;AAEF,eAAe,kBAAkB,CAAC"}
@@ -1 +1 @@
1
- import{useRef as e,useState as t,useCallback as n,useEffect as r,useMemo as o}from"react";import{WavRenderer as a}from"../utils/wav_renderer.js";import"../lib/wavtools/lib/wav_packer.js";import"../lib/wavtools/lib/analysis/audio_analysis.js";import{WavStreamPlayer as i}from"../lib/wavtools/lib/wav_stream_player.js";import{WavRecorder as s}from"../lib/wavtools/lib/wav_recorder.js";import{getConfigValue as c}from"../config/index.js";const l=(l,u,d,p)=>{const m=!!c("APP_CONFIG"),f=!!c("APP_BUTTONS");m||console.warn("APP_CONFIG is not set. Voice AI will not be available."),f||console.warn("APP_BUTTONS is not set. Voice AI will not be available.");const g=c("APP_CONFIG"),y=c("APP_BUTTONS");c("API_BASE_URL").replace(/\/$/,""),c("USE_REALTIME_SESSION_ENDPOINT"),c("REALTIME_SESSION_ENDPOINT");const _=p||c("REALTIME_VOICE_MODEL")||"alloy",w=["alloy","shimmer","echo"],h=w.includes(_)?_:"alloy";_!==h&&console.warn(`[codicent-app-sdk] Voice "${_}" is not supported in the current SDK version. Supported voices: ${w.join(", ")}. Falling back to "${h}".`);const S=e(new s({sampleRate:24e3})),v=e(new i({sampleRate:24e3})),O=e(null),I=e(null),T=e(null),N=e(null),E=e(!1),A=e(null),C=e(null),R=e(null),P=e((new Date).toISOString()),[b,D]=t([]),[F,J]=t([]),[$,x]=t(!1),[M,k]=t(!1),[L,V]=t(!1),U=e(0),j=e(0),[B,G]=t(""),[q,W]=t("en-US"),H=e(new Map),[z,K]=t((()=>m&&f&&y&&g&&g.apps&&g.apps[y]?g.apps[y].voiceInstructions||g.REALTIME_VOICE_INSTRUCTIONS||"":g&&g.REALTIME_VOICE_INSTRUCTIONS||"")),Q=n((e=>{const t=P.current,n=new Date(t).valueOf(),r=new Date(e).valueOf()-n,o=Math.floor(r/10)%100,a=Math.floor(r/1e3)%60,i=e=>{let t=e+"";for(;t.length<2;)t="0"+t;return t};return`${i(Math.floor(r/6e4)%60)}:${i(a)}.${i(o)}`}),[]),X=n((async()=>{try{P.current=(new Date).toISOString(),x(!0),J([]),D([]),H.current.clear();const e=await l.getRealtimeSessionToken(h);if(!e)throw new Error("No ephemeral key returned from session endpoint");const t=new RTCPeerConnection;O.current=t,T.current||(T.current=new Audio,T.current.autoplay=!0),t.ontrack=e=>{T.current&&e.streams[0]&&(T.current.srcObject=e.streams[0])};const n=await navigator.mediaDevices.getUserMedia({audio:!0});N.current=n;const r=n.getTracks()[0];t.addTrack(r,n);const o=t.createDataChannel("oai-events");I.current=o,o.addEventListener("message",(e=>{try{const t=JSON.parse(e.data);if("session.created"===t.type);else if("conversation.item.created"===t.type)D((e=>[...e,t.item]));else if("conversation.item.input_audio_transcription.completed"===t.type)D((e=>{const n=[...e],r=n.findIndex((e=>e.id===t.item_id));return-1!==r&&n[r].formatted&&(n[r].formatted.transcript=t.transcript),n}));else if("response.audio_transcript.delta"===t.type);else if("response.audio_transcript.done"===t.type);else if("response.output_item.added"===t.type){const e=t.item;"function_call"===e?.type&&(console.log("[Voice AI] Function call initiated:",e.name),H.current.set(e.id,{name:e.name||"",arguments:"",call_id:e.call_id||e.id}))}else if("response.function_call_arguments.delta"===t.type){const e=t.item_id,n=t.delta,r=H.current.get(e);r&&n&&(r.arguments+=n,H.current.set(e,r))}else if("response.function_call_arguments.done"===t.type){const e=t.item_id,n=H.current.get(e);if(n){console.log(`[Voice AI] Executing tool: ${n.name}`);const t=d.find((e=>e.definition.name===n.name));if(t){let e={};try{e=n.arguments?JSON.parse(n.arguments):{}}catch(t){console.error("[Voice AI] Failed to parse tool arguments:",t),e={}}Promise.resolve(t.handler(e)).then((e=>{console.log(`[Voice AI] Tool ${n.name} completed:`,e),o&&"open"===o.readyState&&(o.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify(e)}})),o.send(JSON.stringify({type:"response.create"})))})).catch((e=>{console.error(`[Voice AI] Tool ${n.name} failed:`,e),o&&"open"===o.readyState&&(o.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify({error:e.message||"Tool execution failed"})}})),o.send(JSON.stringify({type:"response.create"})))}))}else console.error(`[Voice AI] Tool not found: ${n.name}`);H.current.delete(e)}}else"error"===t.type&&console.error("Server error:",t.error)}catch(t){console.warn("Invalid message:",e.data)}})),o.onopen=()=>{console.log("Data channel opened, configuring session"),o.send(JSON.stringify({type:"session.update",session:{instructions:z.replace("{{name}}",B).replace("{{language}}",q).replace("{{time}}",(new Date).toISOString()),modalities:["text","audio"],input_audio_transcription:{model:"whisper-1"},turn_detection:{type:"server_vad",threshold:.5,prefix_padding_ms:300,silence_duration_ms:500},voice:h,temperature:.8,max_response_output_tokens:4096,input_audio_format:"pcm16",output_audio_format:"pcm16",tools:d.map((e=>({type:"function",...e.definition})))}}))},o.onclose=()=>{console.log("Data channel closed")};const a=await t.createOffer();await t.setLocalDescription(a);let i="gpt-4o-realtime-preview";try{if(c("REALTIME_CONFIG_ENDPOINT")){const e=await l.getRealtimeConfig();e&&e.model&&(i=e.model)}}catch(e){console.warn("Failed to fetch realtime config, using default model:",e)}const s="https://api.openai.com/v1/realtime",u=await fetch(`${s}?model=${i}`,{method:"POST",body:a.sdp,headers:{Authorization:`Bearer ${e}`,"Content-Type":"application/sdp"}});if(!u.ok)throw new Error(`Failed to get SDP answer: ${u.statusText}`);const p={type:"answer",sdp:await u.text()};await t.setRemoteDescription(p),k(!1);const m=S.current,f=v.current;await m.begin(),await f.connect()}catch(e){throw console.error("[codicent-app-sdk] Failed to establish WebRTC connection:",e),x(!1),e}}),[l,h,z,B,q,d]),Y=n((async()=>{x(!1),J([]),D([]),H.current.clear(),I.current&&(I.current.close(),I.current=null),O.current&&(O.current.close(),O.current=null),N.current&&(N.current.getTracks().forEach((e=>e.stop())),N.current=null),T.current&&(T.current.pause(),T.current.srcObject=null);const e=S.current;await e.end();const t=v.current;await t.interrupt()}),[]),Z=n((async e=>{const t=I.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"conversation.item.delete",item_id:e}))}),[]),ee=n((async()=>{V(!0);const e=I.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"input_audio_buffer.commit"}))}),[]),te=n((async()=>{V(!1);const e=I.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"response.create"}))}),[]),ne=n((async e=>{const t=I.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"session.update",session:{turn_detection:"none"===e?null:{type:"server_vad"}}})),k("none"===e)}),[]);r((()=>{if(B&&$){const e=I.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"session.update",session:{instructions:z.replace("{{name}}",B).replace("{{language}}",q).replace("{{time}}",(new Date).toISOString())}}))}}),[z,B,q,$]),r((()=>{let e=!0;const t=S.current,n=A.current;let r=null;const o=v.current,i=C.current;let s=null;const c=()=>{if(e){if(n&&(n.width&&n.height||(n.width=n.offsetWidth,n.height=n.offsetHeight),r=r||n.getContext("2d"),r)){r.clearRect(0,0,n.width,n.height);const e=t.recording?t.getFrequencies("voice"):{values:new Float32Array([0])},o=1-Math.max(...e.values);U.current=o,a.drawCircularBars(n,r,e.values,"#0099ff",20,0,8)}if(i&&(i.width&&i.height||(i.width=i.offsetWidth,i.height=i.offsetHeight),s=s||i.getContext("2d"),s)){s.clearRect(0,0,i.width,i.height);const e=o.analyser?o.getFrequencies("voice"):{values:new Float32Array([0])},t=1-Math.max(...e.values);j.current=t,a.drawCircularBars(i,s,e.values,"#009900",20,0,8)}window.requestAnimationFrame(c)}};return c(),()=>{e=!1}}),[]),r((()=>{!E.current&&d&&z&&(E.current=!0)}),[d,z]);const re=n((e=>{const t=e.replace("{{name}}",B).replace("{{language}}",q).replace("{{time}}",(new Date).toISOString()),n=I.current;n&&"open"===n.readyState&&n.send(JSON.stringify({type:"session.update",session:{instructions:t}})),K(e)}),[B,q]);return o((()=>{if(m&&f)return{items:b,realtimeEvents:F,isConnected:$,canPushToTalk:M,isRecording:L,clientCanvasRef:A,serverCanvasRef:C,eventsScrollRef:R,formatTime:Q,connectConversation:X,disconnectConversation:Y,deleteConversationItem:Z,startRecording:ee,stopRecording:te,changeTurnEndType:ne,getRecorderLevel:()=>U.current,getStreamLevel:()=>j.current,setUsername:G,updateInstructions:re,setLanguage:W}}),[m,f,b,F,$,M,L,A,C,R])};export{l as default};
1
+ import{useRef as e,useState as t,useCallback as n,useEffect as r,useMemo as o}from"react";import{WavRenderer as a}from"../utils/wav_renderer.js";import"../lib/wavtools/lib/wav_packer.js";import"../lib/wavtools/lib/analysis/audio_analysis.js";import{WavStreamPlayer as s}from"../lib/wavtools/lib/wav_stream_player.js";import{WavRecorder as i}from"../lib/wavtools/lib/wav_recorder.js";import{getConfigValue as c}from"../config/index.js";const l=(l,u,d,p)=>{const m=!!c("APP_CONFIG"),f=!!c("APP_BUTTONS");m||console.warn("APP_CONFIG is not set. Voice AI will not be available."),f||console.warn("APP_BUTTONS is not set. Voice AI will not be available.");const g=c("APP_CONFIG"),y=c("APP_BUTTONS");c("API_BASE_URL").replace(/\/$/,""),c("USE_REALTIME_SESSION_ENDPOINT"),c("REALTIME_SESSION_ENDPOINT");const _=p||c("REALTIME_VOICE_MODEL")||"alloy",w=["alloy","shimmer","echo"],h=w.includes(_)?_:"alloy";_!==h&&console.warn(`[codicent-app-sdk] Voice "${_}" is not supported in the current SDK version. Supported voices: ${w.join(", ")}. Falling back to "${h}".`);const S=e(new i({sampleRate:24e3})),v=e(new s({sampleRate:24e3})),O=e(null),I=e(null),T=e(null),N=e(null),E=e(!1),A=e(null),R=e(null),C=e(null),P=e((new Date).toISOString()),[b,D]=t([]),[F,J]=t([]),[$,x]=t(!1),[M,k]=t(!1),[L,V]=t(!1),[U,j]=t(!1),B=e(0),G=e(0),[q,W]=t(""),[H,z]=t("en-US"),K=e(new Map),Q=e(null),[X,Y]=t((()=>m&&f&&y&&g&&g.apps&&g.apps[y]?g.apps[y].voiceInstructions||g.REALTIME_VOICE_INSTRUCTIONS||"":g&&g.REALTIME_VOICE_INSTRUCTIONS||"")),Z=n((e=>{const t=P.current,n=new Date(t).valueOf(),r=new Date(e).valueOf()-n,o=Math.floor(r/10)%100,a=Math.floor(r/1e3)%60,s=e=>{let t=e+"";for(;t.length<2;)t="0"+t;return t};return`${s(Math.floor(r/6e4)%60)}:${s(a)}.${s(o)}`}),[]),ee=n((async()=>{try{P.current=(new Date).toISOString(),x(!0),J([]),D([]),K.current.clear();const e=await l.getRealtimeSessionToken(h);if(!e)throw new Error("No ephemeral key returned from session endpoint");const t=new RTCPeerConnection;O.current=t,T.current||(T.current=new Audio,T.current.autoplay=!0),t.ontrack=e=>{T.current&&e.streams[0]&&(T.current.srcObject=e.streams[0])};const n=await navigator.mediaDevices.getUserMedia({audio:!0});N.current=n;const r=n.getTracks()[0];t.addTrack(r,n);const o=t.createDataChannel("oai-events");I.current=o,o.addEventListener("message",(e=>{try{const t=JSON.parse(e.data);if("session.created"===t.type);else if("conversation.item.created"===t.type)D((e=>[...e,t.item]));else if("conversation.item.input_audio_transcription.completed"===t.type)D((e=>{const n=[...e],r=n.findIndex((e=>e.id===t.item_id));return-1!==r&&n[r].formatted&&(n[r].formatted.transcript=t.transcript),n}));else if("response.audio_transcript.delta"===t.type);else if("response.audio_transcript.done"===t.type);else if("response.output_item.added"===t.type){const e=t.item;"function_call"===e?.type&&(console.log("[Voice AI] Function call initiated:",e.name),K.current.set(e.id,{name:e.name||"",arguments:"",call_id:e.call_id||e.id}))}else if("response.function_call_arguments.delta"===t.type){const e=t.item_id,n=t.delta,r=K.current.get(e);r&&n&&(r.arguments+=n,K.current.set(e,r))}else if("response.function_call_arguments.done"===t.type){const e=t.item_id,n=K.current.get(e);if(n){console.log(`[Voice AI] Executing tool: ${n.name}`);const t=d.find((e=>e.definition.name===n.name));if(t){let e={};try{e=n.arguments?JSON.parse(n.arguments):{}}catch(t){console.error("[Voice AI] Failed to parse tool arguments:",t),e={}}Promise.resolve(t.handler(e)).then((e=>{console.log(`[Voice AI] Tool ${n.name} completed:`,e),o&&"open"===o.readyState&&(o.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify(e)}})),o.send(JSON.stringify({type:"response.create"})))})).catch((e=>{console.error(`[Voice AI] Tool ${n.name} failed:`,e),o&&"open"===o.readyState&&(o.send(JSON.stringify({type:"conversation.item.create",item:{type:"function_call_output",call_id:n.call_id,output:JSON.stringify({error:e.message||"Tool execution failed"})}})),o.send(JSON.stringify({type:"response.create"})))}))}else console.error(`[Voice AI] Tool not found: ${n.name}`);K.current.delete(e)}}else"error"===t.type&&console.error("Server error:",t.error)}catch(t){console.warn("Invalid message:",e.data)}})),o.onopen=()=>{console.log("Data channel opened, configuring session");const e=Q.current||X;Q.current=null,o.send(JSON.stringify({type:"session.update",session:{instructions:e.replace("{{name}}",q).replace("{{language}}",H).replace("{{time}}",(new Date).toISOString()),modalities:["text","audio"],input_audio_transcription:{model:"whisper-1"},turn_detection:{type:"server_vad",threshold:.5,prefix_padding_ms:300,silence_duration_ms:500},voice:h,temperature:.8,max_response_output_tokens:4096,input_audio_format:"pcm16",output_audio_format:"pcm16",tools:d.map((e=>({type:"function",...e.definition})))}})),k(!0)},o.onclose=()=>{console.log("Data channel closed"),k(!1)};const a=await t.createOffer();await t.setLocalDescription(a);let s="gpt-4o-realtime-preview";try{if(c("REALTIME_CONFIG_ENDPOINT")){const e=await l.getRealtimeConfig();e&&e.model&&(s=e.model)}}catch(e){console.warn("Failed to fetch realtime config, using default model:",e)}const i="https://api.openai.com/v1/realtime",u=await fetch(`${i}?model=${s}`,{method:"POST",body:a.sdp,headers:{Authorization:`Bearer ${e}`,"Content-Type":"application/sdp"}});if(!u.ok)throw new Error(`Failed to get SDP answer: ${u.statusText}`);const p={type:"answer",sdp:await u.text()};await t.setRemoteDescription(p),V(!1);const m=S.current,f=v.current;await m.begin(),await f.connect()}catch(e){throw console.error("[codicent-app-sdk] Failed to establish WebRTC connection:",e),x(!1),e}}),[l,h,X,q,H,d]),te=n((async()=>{x(!1),k(!1),J([]),D([]),K.current.clear(),Q.current=null,I.current&&(I.current.close(),I.current=null),O.current&&(O.current.close(),O.current=null),N.current&&(N.current.getTracks().forEach((e=>e.stop())),N.current=null),T.current&&(T.current.pause(),T.current.srcObject=null);const e=S.current;await e.end();const t=v.current;await t.interrupt()}),[]),ne=n((async e=>{const t=I.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"conversation.item.delete",item_id:e}))}),[]),re=n((async()=>{j(!0);const e=I.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"input_audio_buffer.commit"}))}),[]),oe=n((async()=>{j(!1);const e=I.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"response.create"}))}),[]),ae=n((async e=>{const t=I.current;t&&"open"===t.readyState&&t.send(JSON.stringify({type:"session.update",session:{turn_detection:"none"===e?null:{type:"server_vad"}}})),V("none"===e)}),[]);r((()=>{if(q&&M){const e=I.current;e&&"open"===e.readyState&&e.send(JSON.stringify({type:"session.update",session:{instructions:X.replace("{{name}}",q).replace("{{language}}",H).replace("{{time}}",(new Date).toISOString())}}))}}),[X,q,H,M]),r((()=>{let e=!0;const t=S.current,n=A.current;let r=null;const o=v.current,s=R.current;let i=null;const c=()=>{if(e){if(n&&(n.width&&n.height||(n.width=n.offsetWidth,n.height=n.offsetHeight),r=r||n.getContext("2d"),r)){r.clearRect(0,0,n.width,n.height);const e=t.recording?t.getFrequencies("voice"):{values:new Float32Array([0])},o=1-Math.max(...e.values);B.current=o,a.drawCircularBars(n,r,e.values,"#0099ff",20,0,8)}if(s&&(s.width&&s.height||(s.width=s.offsetWidth,s.height=s.offsetHeight),i=i||s.getContext("2d"),i)){i.clearRect(0,0,s.width,s.height);const e=o.analyser?o.getFrequencies("voice"):{values:new Float32Array([0])},t=1-Math.max(...e.values);G.current=t,a.drawCircularBars(s,i,e.values,"#009900",20,0,8)}window.requestAnimationFrame(c)}};return c(),()=>{e=!1}}),[]),r((()=>{!E.current&&d&&X&&(E.current=!0)}),[d,X]);const se=n((e=>{Y(e);const t=I.current;if(t&&"open"===t.readyState){const n=e.replace("{{name}}",q).replace("{{language}}",H).replace("{{time}}",(new Date).toISOString());t.send(JSON.stringify({type:"session.update",session:{instructions:n}}))}else Q.current=e}),[q,H]);return o((()=>{if(m&&f)return{items:b,realtimeEvents:F,isConnected:$,isSessionReady:M,canPushToTalk:L,isRecording:U,clientCanvasRef:A,serverCanvasRef:R,eventsScrollRef:C,formatTime:Z,connectConversation:ee,disconnectConversation:te,deleteConversationItem:ne,startRecording:re,stopRecording:oe,changeTurnEndType:ae,getRecorderLevel:()=>B.current,getStreamLevel:()=>G.current,setUsername:W,updateInstructions:se,setLanguage:z}}),[m,f,b,F,$,M,L,U,A,R,C])};export{l as default};
package/dist/index.d.ts CHANGED
@@ -662,6 +662,7 @@ interface RealtimeVoice {
662
662
  items: ItemType[];
663
663
  realtimeEvents: RealtimeEvent[];
664
664
  isConnected: boolean;
665
+ isSessionReady: boolean;
665
666
  canPushToTalk: boolean;
666
667
  isRecording: boolean;
667
668
  clientCanvasRef: React.RefObject<HTMLCanvasElement>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codicent-app-sdk",
3
- "version": "0.3.102",
3
+ "version": "0.3.103",
4
4
  "description": "SDK for building AI-powered applications with Codicent",
5
5
  "type": "module",
6
6
  "main": "dist/cjs/index.js",