@keyframelabs/elements 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/index.d.ts +2 -2
- package/dist/agents/openai-realtime.d.ts +0 -19
- package/dist/index.d.ts +1 -1
- package/dist/index.js +11 -21
- package/dist/kfl-embed.js +6 -13
- package/dist/types.d.ts +1 -4
- package/package.json +1 -1
package/dist/agents/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { ElevenLabsAgent, ElevenLabsConfig } from './elevenlabs';
|
|
2
|
-
import { OpenAIRealtimeAgent, OpenAIRealtimeConfig
|
|
2
|
+
import { OpenAIRealtimeAgent, OpenAIRealtimeConfig } from './openai-realtime';
|
|
3
3
|
import { KflAgent, KflAgentConfig } from './kfl';
|
|
4
4
|
/**
|
|
5
5
|
* Agent implementations for voice AI platforms.
|
|
@@ -10,7 +10,7 @@ import { KflAgent, KflAgentConfig } from './kfl';
|
|
|
10
10
|
export { BaseAgent, DEFAULT_INPUT_SAMPLE_RATE } from './base';
|
|
11
11
|
export type { Agent, AgentConfig, AgentEventMap, AgentState, Emotion } from './types';
|
|
12
12
|
export { ElevenLabsAgent, type ElevenLabsConfig };
|
|
13
|
-
export { OpenAIRealtimeAgent, type OpenAIRealtimeConfig
|
|
13
|
+
export { OpenAIRealtimeAgent, type OpenAIRealtimeConfig };
|
|
14
14
|
export { KflAgent, type KflAgentConfig };
|
|
15
15
|
export { SAMPLE_RATE, base64ToBytes, bytesToBase64, resamplePcm, createEventEmitter, floatTo16BitPCM, createPcmWorkletNode } from './audio-utils';
|
|
16
16
|
/** Supported agent types */
|
|
@@ -1,28 +1,9 @@
|
|
|
1
1
|
import { AgentConfig } from './types';
|
|
2
2
|
import { BaseAgent } from './base';
|
|
3
|
-
/**
|
|
4
|
-
* Turn detection configuration for OpenAI Realtime.
|
|
5
|
-
* @see https://developers.openai.com/api/docs/guides/realtime-vad
|
|
6
|
-
*/
|
|
7
|
-
export type TurnDetection = {
|
|
8
|
-
type: 'server_vad';
|
|
9
|
-
/** Activation threshold 0-1. Higher = requires louder audio. */
|
|
10
|
-
threshold?: number;
|
|
11
|
-
/** Audio (ms) to include before detected speech. */
|
|
12
|
-
prefix_padding_ms?: number;
|
|
13
|
-
/** Silence duration (ms) before speech stop is detected. */
|
|
14
|
-
silence_duration_ms?: number;
|
|
15
|
-
} | {
|
|
16
|
-
type: 'semantic_vad';
|
|
17
|
-
/** How eager the model is to consider a turn finished. Default: 'auto'. */
|
|
18
|
-
eagerness?: 'low' | 'medium' | 'high' | 'auto';
|
|
19
|
-
};
|
|
20
3
|
/** OpenAI Realtime specific configuration */
|
|
21
4
|
export interface OpenAIRealtimeConfig extends AgentConfig {
|
|
22
5
|
/** Model to use (defaults to gpt-realtime) */
|
|
23
6
|
model?: string;
|
|
24
|
-
/** Turn detection / VAD settings. Defaults to semantic_vad with eagerness 'high'. */
|
|
25
|
-
turnDetection?: TurnDetection;
|
|
26
7
|
}
|
|
27
8
|
/**
|
|
28
9
|
* OpenAI Realtime agent implementation.
|
package/dist/index.d.ts
CHANGED
|
@@ -4,7 +4,7 @@ export { PersonaView } from './PersonaView';
|
|
|
4
4
|
export type { PersonaViewOptions } from './PersonaView';
|
|
5
5
|
export type { EmbedStatus, VideoFit, VoiceAgentDetails, SessionDetails, BaseCallbacks, } from './types';
|
|
6
6
|
export { createAgent, ElevenLabsAgent, OpenAIRealtimeAgent, KflAgent, BaseAgent, AGENT_REGISTRY, getAgentInfo, } from './agents';
|
|
7
|
-
export type { AgentType, AgentConfig, AgentEventMap, Agent, AnyAgent, AgentTypeInfo, ElevenLabsConfig, OpenAIRealtimeConfig, KflAgentConfig,
|
|
7
|
+
export type { AgentType, AgentConfig, AgentEventMap, Agent, AnyAgent, AgentTypeInfo, ElevenLabsConfig, OpenAIRealtimeConfig, KflAgentConfig, } from './agents';
|
|
8
8
|
export type { AgentState } from '@keyframelabs/sdk';
|
|
9
9
|
export { floatTo16BitPCM, resamplePcm, base64ToBytes, bytesToBase64, SAMPLE_RATE, createEventEmitter, } from './agents';
|
|
10
10
|
export { KflEmbedElement } from './KflEmbedElement';
|
package/dist/index.js
CHANGED
|
@@ -344,7 +344,7 @@ class F extends v {
|
|
|
344
344
|
this.initialized = !1, this.lastInterruptId = 0, this.resetTurnState(), super.close();
|
|
345
345
|
}
|
|
346
346
|
}
|
|
347
|
-
const
|
|
347
|
+
const j = ["neutral", "angry", "sad", "happy"], U = "wss://api.openai.com/v1/realtime", D = "gpt-realtime", m = 24e3, z = {
|
|
348
348
|
type: "function",
|
|
349
349
|
name: "set_emotion",
|
|
350
350
|
description: "Set the emotional expression of the avatar. Call this on every turn to reflect the tone of your response.",
|
|
@@ -378,12 +378,12 @@ class $ extends v {
|
|
|
378
378
|
if (!e.apiKey)
|
|
379
379
|
throw new Error("OpenAI Realtime token is required");
|
|
380
380
|
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
381
|
-
const t = e.model ??
|
|
382
|
-
return this.initialSessionUpdate = this.buildSessionUpdate(
|
|
381
|
+
const t = e.model ?? D;
|
|
382
|
+
return this.initialSessionUpdate = this.buildSessionUpdate(t), new Promise((i, n) => {
|
|
383
383
|
this.connectResolve = i, this.connectReject = n, this.connectTimeout = setTimeout(() => {
|
|
384
384
|
this.rejectPendingConnect(new Error("Timed out waiting for OpenAI Realtime session setup")), this.close();
|
|
385
385
|
}, 1e4), this.ws = new WebSocket(
|
|
386
|
-
`${
|
|
386
|
+
`${U}?model=${encodeURIComponent(t)}`,
|
|
387
387
|
["realtime", `openai-insecure-api-key.${e.apiKey}`]
|
|
388
388
|
), this.ws.onopen = () => {
|
|
389
389
|
}, this.ws.onerror = () => {
|
|
@@ -469,29 +469,25 @@ class $ extends v {
|
|
|
469
469
|
close() {
|
|
470
470
|
this.rejectPendingConnect(new Error("Connection closed")), this.clearConnectTimeout(), this.resetTurnState(), this.initialSessionUpdate = null, this.handledFunctionCallIds.clear(), super.close();
|
|
471
471
|
}
|
|
472
|
-
buildSessionUpdate(e
|
|
473
|
-
const i = e.turnDetection ?? { type: "semantic_vad", eagerness: "high" };
|
|
472
|
+
buildSessionUpdate(e) {
|
|
474
473
|
return {
|
|
475
474
|
type: "session.update",
|
|
476
475
|
session: {
|
|
477
476
|
type: "realtime",
|
|
478
|
-
model:
|
|
477
|
+
model: e,
|
|
479
478
|
output_modalities: ["audio"],
|
|
480
|
-
instructions: e.systemPrompt,
|
|
481
479
|
audio: {
|
|
482
480
|
input: {
|
|
483
481
|
format: {
|
|
484
482
|
type: "audio/pcm",
|
|
485
483
|
rate: m
|
|
486
|
-
}
|
|
487
|
-
turn_detection: i
|
|
484
|
+
}
|
|
488
485
|
},
|
|
489
486
|
output: {
|
|
490
487
|
format: {
|
|
491
488
|
type: "audio/pcm",
|
|
492
489
|
rate: h
|
|
493
|
-
}
|
|
494
|
-
...e.voice ? { voice: e.voice } : {}
|
|
490
|
+
}
|
|
495
491
|
}
|
|
496
492
|
},
|
|
497
493
|
tools: [z],
|
|
@@ -540,7 +536,7 @@ class $ extends v {
|
|
|
540
536
|
return { error: `Unsupported function: ${e.name}` };
|
|
541
537
|
try {
|
|
542
538
|
const i = (e.arguments ? JSON.parse(e.arguments) : {}).emotion?.toLowerCase();
|
|
543
|
-
return i &&
|
|
539
|
+
return i && j.includes(i) ? (this.events.emit("emotion", i), { result: "ok" }) : { error: "Invalid emotion" };
|
|
544
540
|
} catch {
|
|
545
541
|
return { error: "Invalid function arguments" };
|
|
546
542
|
}
|
|
@@ -882,10 +878,7 @@ class Y {
|
|
|
882
878
|
else if (e.type === "openai")
|
|
883
879
|
await this.agent.connect({
|
|
884
880
|
...t,
|
|
885
|
-
apiKey: e.token
|
|
886
|
-
systemPrompt: e.system_prompt,
|
|
887
|
-
voice: e.voice,
|
|
888
|
-
turnDetection: e.turn_detection
|
|
881
|
+
apiKey: e.token
|
|
889
882
|
});
|
|
890
883
|
else if (e.type === "kfl") {
|
|
891
884
|
if (!e.signed_url)
|
|
@@ -1028,10 +1021,7 @@ class be {
|
|
|
1028
1021
|
else if (e.type === "openai")
|
|
1029
1022
|
await this.agent.connect({
|
|
1030
1023
|
...t,
|
|
1031
|
-
apiKey: e.token
|
|
1032
|
-
systemPrompt: e.system_prompt,
|
|
1033
|
-
voice: e.voice,
|
|
1034
|
-
turnDetection: e.turn_detection
|
|
1024
|
+
apiKey: e.token
|
|
1035
1025
|
});
|
|
1036
1026
|
else if (e.type === "kfl") {
|
|
1037
1027
|
if (!e.signed_url)
|
package/dist/kfl-embed.js
CHANGED
|
@@ -17896,7 +17896,7 @@ class Wu extends ts {
|
|
|
17896
17896
|
throw new Error("OpenAI Realtime token is required");
|
|
17897
17897
|
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
17898
17898
|
const t = e.model ?? Vu;
|
|
17899
|
-
return this.initialSessionUpdate = this.buildSessionUpdate(
|
|
17899
|
+
return this.initialSessionUpdate = this.buildSessionUpdate(t), new Promise((i, s) => {
|
|
17900
17900
|
this.connectResolve = i, this.connectReject = s, this.connectTimeout = setTimeout(() => {
|
|
17901
17901
|
this.rejectPendingConnect(new Error("Timed out waiting for OpenAI Realtime session setup")), this.close();
|
|
17902
17902
|
}, 1e4), this.ws = new WebSocket(
|
|
@@ -17986,29 +17986,25 @@ class Wu extends ts {
|
|
|
17986
17986
|
close() {
|
|
17987
17987
|
this.rejectPendingConnect(new Error("Connection closed")), this.clearConnectTimeout(), this.resetTurnState(), this.initialSessionUpdate = null, this.handledFunctionCallIds.clear(), super.close();
|
|
17988
17988
|
}
|
|
17989
|
-
buildSessionUpdate(e
|
|
17990
|
-
const i = e.turnDetection ?? { type: "semantic_vad", eagerness: "high" };
|
|
17989
|
+
buildSessionUpdate(e) {
|
|
17991
17990
|
return {
|
|
17992
17991
|
type: "session.update",
|
|
17993
17992
|
session: {
|
|
17994
17993
|
type: "realtime",
|
|
17995
|
-
model:
|
|
17994
|
+
model: e,
|
|
17996
17995
|
output_modalities: ["audio"],
|
|
17997
|
-
instructions: e.systemPrompt,
|
|
17998
17996
|
audio: {
|
|
17999
17997
|
input: {
|
|
18000
17998
|
format: {
|
|
18001
17999
|
type: "audio/pcm",
|
|
18002
18000
|
rate: Zi
|
|
18003
|
-
}
|
|
18004
|
-
turn_detection: i
|
|
18001
|
+
}
|
|
18005
18002
|
},
|
|
18006
18003
|
output: {
|
|
18007
18004
|
format: {
|
|
18008
18005
|
type: "audio/pcm",
|
|
18009
18006
|
rate: si
|
|
18010
|
-
}
|
|
18011
|
-
...e.voice ? { voice: e.voice } : {}
|
|
18007
|
+
}
|
|
18012
18008
|
}
|
|
18013
18009
|
},
|
|
18014
18010
|
tools: [qu],
|
|
@@ -18391,10 +18387,7 @@ class $u {
|
|
|
18391
18387
|
else if (e.type === "openai")
|
|
18392
18388
|
await this.agent.connect({
|
|
18393
18389
|
...t,
|
|
18394
|
-
apiKey: e.token
|
|
18395
|
-
systemPrompt: e.system_prompt,
|
|
18396
|
-
voice: e.voice,
|
|
18397
|
-
turnDetection: e.turn_detection
|
|
18390
|
+
apiKey: e.token
|
|
18398
18391
|
});
|
|
18399
18392
|
else if (e.type === "kfl") {
|
|
18400
18393
|
if (!e.signed_url)
|
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AgentType
|
|
1
|
+
import { AgentType } from './agents';
|
|
2
2
|
import { AgentState } from '@keyframelabs/sdk';
|
|
3
3
|
export type EmbedStatus = 'connecting' | 'connected' | 'error' | 'disconnected';
|
|
4
4
|
export type VideoFit = 'cover' | 'contain';
|
|
@@ -7,9 +7,6 @@ export type VoiceAgentDetails = {
|
|
|
7
7
|
token?: string;
|
|
8
8
|
agent_id?: string;
|
|
9
9
|
signed_url?: string;
|
|
10
|
-
system_prompt?: string;
|
|
11
|
-
voice?: string;
|
|
12
|
-
turn_detection?: TurnDetection;
|
|
13
10
|
};
|
|
14
11
|
export type SessionDetails = {
|
|
15
12
|
server_url: string;
|