@d-id/client-sdk 1.1.22-staging.103 → 1.1.22-staging.104

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,256 @@
1
+ import { c as oe, a as ae, S as se, d as ce, T as de, C as S, b as E, A as _, e as U, f as R, g as j } from "./index-sABN87aB.js";
2
+ function ue(m, T, h, i) {
3
+ const y = oe(m, `${T}/v2/agents/${h}`, i);
4
+ return {
5
+ async createStream(s) {
6
+ return y.post("/sessions", s);
7
+ }
8
+ };
9
+ }
10
+ async function F() {
11
+ try {
12
+ return await import("./livekit-client.esm-C1GRQdZY.js");
13
+ } catch {
14
+ throw new Error(
15
+ "LiveKit client is required for this streaming manager. Please install it using: npm install livekit-client"
16
+ );
17
+ }
18
+ }
19
+ const he = {
20
+ excellent: E.Strong,
21
+ good: E.Strong,
22
+ poor: E.Weak,
23
+ lost: E.Unknown,
24
+ unknown: E.Unknown
25
+ }, K = JSON.stringify({
26
+ kind: "InternalServerError",
27
+ description: "Stream Error"
28
+ });
29
+ function V(m, T, h, i) {
30
+ var y, s;
31
+ throw T("Failed to connect to LiveKit room:", m), i(), (y = h.onConnectionStateChange) == null || y.call(h, S.Fail), (s = h.onError) == null || s.call(h, m, { sessionId: "" }), m;
32
+ }
33
+ async function fe(m, T, h) {
34
+ var N, x;
35
+ const i = ae(h.debug || !1, "LiveKitStreamingManager"), { Room: y, RoomEvent: s, ConnectionState: l } = await F(), { callbacks: e, auth: O, baseURL: J, analytics: Q, microphoneStream: ge } = h;
36
+ let c = null, w = !1;
37
+ const $ = se.Fluent;
38
+ let A = !0, C = null, g = null;
39
+ c = new y({
40
+ adaptiveStream: !1,
41
+ // Must be false to use mediaStreamTrack directly
42
+ dynacast: !0
43
+ });
44
+ const q = ue(O, J || ce, m, e.onError);
45
+ let p, k, P;
46
+ try {
47
+ const n = await q.createStream({
48
+ transport_provider: de.Livekit,
49
+ chat_persist: T.chat_persist ?? !0
50
+ }), { id: t, session_token: r, session_url: o } = n;
51
+ (N = e.onStreamCreated) == null || N.call(e, { session_id: t, stream_id: t, agent_id: m }), p = t, k = r, P = o, await c.prepareConnection(P, k);
52
+ } catch (n) {
53
+ V(n, i, e, () => {
54
+ A = !1;
55
+ });
56
+ }
57
+ if (!P || !k || !p)
58
+ return Promise.reject(new Error("Failed to initialize LiveKit stream"));
59
+ c.on(s.ConnectionStateChanged, b).on(s.ConnectionQualityChanged, z).on(s.ActiveSpeakersChanged, W).on(s.ParticipantConnected, B).on(s.TrackSubscribed, G).on(s.TrackUnsubscribed, H).on(s.DataReceived, X).on(s.MediaDevicesError, Y).on(s.EncryptionError, Z).on(s.TrackSubscriptionFailed, ee), (x = e.onConnectionStateChange) == null || x.call(e, S.New);
60
+ try {
61
+ await c.connect(P, k), i("LiveKit room joined successfully"), A = !1;
62
+ } catch (n) {
63
+ V(n, i, e, () => {
64
+ A = !1;
65
+ });
66
+ }
67
+ Q.enrich({
68
+ "stream-type": $
69
+ });
70
+ function b(n) {
71
+ var t, r, o, a, u;
72
+ switch (i("Connection state changed:", n), n) {
73
+ case l.Connecting:
74
+ (t = e.onConnectionStateChange) == null || t.call(e, S.Connecting);
75
+ break;
76
+ case l.Connected:
77
+ i("LiveKit room connected successfully"), w = !0, A ? queueMicrotask(() => {
78
+ var d;
79
+ (d = e.onConnectionStateChange) == null || d.call(e, S.Connected);
80
+ }) : (r = e.onConnectionStateChange) == null || r.call(e, S.Connected);
81
+ break;
82
+ case l.Disconnected:
83
+ i("LiveKit room disconnected"), w = !1, (o = e.onConnectionStateChange) == null || o.call(e, S.Disconnected);
84
+ break;
85
+ case l.Reconnecting:
86
+ i("LiveKit room reconnecting..."), (a = e.onConnectionStateChange) == null || a.call(e, S.Connecting);
87
+ break;
88
+ case l.SignalReconnecting:
89
+ i("LiveKit room signal reconnecting..."), (u = e.onConnectionStateChange) == null || u.call(e, S.Connecting);
90
+ break;
91
+ }
92
+ }
93
+ function z(n, t) {
94
+ var r;
95
+ i("Connection quality:", n), t != null && t.isLocal && ((r = e.onConnectivityStateChange) == null || r.call(e, he[n]));
96
+ }
97
+ function W(n) {
98
+ var r, o;
99
+ i("Active speakers changed:", n), n[0] ? (r = e.onAgentActivityStateChange) == null || r.call(e, _.Talking) : (o = e.onAgentActivityStateChange) == null || o.call(e, _.Idle);
100
+ }
101
+ function B(n) {
102
+ i("Participant connected:", n.identity);
103
+ }
104
+ function G(n, t, r) {
105
+ var a, u, d;
106
+ i(`Track subscribed: ${n.kind} from ${r.identity}`);
107
+ const o = n.mediaStreamTrack;
108
+ if (!o) {
109
+ i(`No mediaStreamTrack available for ${n.kind}`);
110
+ return;
111
+ }
112
+ C ? (C.addTrack(o), i(`Added ${n.kind} track to shared MediaStream`)) : (C = new MediaStream([o]), i(`Created shared MediaStream with ${n.kind} track`)), n.kind === "video" && ((a = e.onStreamReady) == null || a.call(e), (u = e.onSrcObjectReady) == null || u.call(e, C), (d = e.onVideoStateChange) == null || d.call(e, U.Start));
113
+ }
114
+ function H(n, t, r) {
115
+ var o;
116
+ i(`Track unsubscribed: ${n.kind} from ${r.identity}`), n.kind === "video" && ((o = e.onVideoStateChange) == null || o.call(e, U.Stop));
117
+ }
118
+ function X(n, t, r, o) {
119
+ var u, d, M;
120
+ const a = new TextDecoder().decode(n);
121
+ i("Data received:", a);
122
+ try {
123
+ const f = JSON.parse(a), L = o || f.subject;
124
+ if (L === R.ChatAnswer) {
125
+ const v = j.Answer;
126
+ (u = e.onMessage) == null || u.call(e, v, {
127
+ event: v,
128
+ ...f
129
+ });
130
+ } else if ([R.StreamVideoCreated, R.StreamVideoDone].includes(L)) {
131
+ const v = (f == null ? void 0 : f.role) || (t == null ? void 0 : t.identity) || "datachannel";
132
+ (d = e.onMessage) == null || d.call(e, L, {
133
+ [v]: f
134
+ });
135
+ } else if (L === R.ChatAudioTranscribed) {
136
+ const v = j.Transcribe;
137
+ (M = e.onMessage) == null || M.call(e, v, {
138
+ event: v,
139
+ ...f
140
+ });
141
+ }
142
+ } catch (f) {
143
+ i("Failed to parse data channel message:", f);
144
+ }
145
+ }
146
+ function Y(n) {
147
+ var t;
148
+ i("Media devices error:", n), (t = e.onError) == null || t.call(e, new Error(K), { sessionId: p });
149
+ }
150
+ function Z(n) {
151
+ var t;
152
+ i("Encryption error:", n), (t = e.onError) == null || t.call(e, new Error(K), { sessionId: p });
153
+ }
154
+ function ee(n, t, r) {
155
+ i("Track subscription failed:", { trackSid: n, participant: t, reason: r });
156
+ }
157
+ async function ne(n) {
158
+ if (!c) return null;
159
+ const { Track: t } = await F(), r = c.localParticipant.audioTrackPublications;
160
+ if (r) {
161
+ for (const [o, a] of r)
162
+ if (a.source === t.Source.Microphone && a.track) {
163
+ const d = a.track.mediaStreamTrack;
164
+ if (d === n || d && d.id === n.id)
165
+ return a;
166
+ }
167
+ }
168
+ return null;
169
+ }
170
+ function te(n) {
171
+ if (!g || !g.track)
172
+ return !1;
173
+ const t = g.track.mediaStreamTrack;
174
+ return t !== n && (t == null ? void 0 : t.id) !== n.id;
175
+ }
176
+ async function ie(n) {
177
+ var u, d;
178
+ if (!w || !c)
179
+ throw i("Room is not connected, cannot publish microphone stream"), new Error("Room is not connected");
180
+ const t = n.getAudioTracks();
181
+ if (t.length === 0) {
182
+ i("No audio track found in the provided MediaStream");
183
+ return;
184
+ }
185
+ const r = t[0], { Track: o } = await F(), a = await ne(r);
186
+ if (a) {
187
+ i("Microphone track is already published, skipping", {
188
+ trackId: r.id,
189
+ publishedTrackId: (d = (u = a.track) == null ? void 0 : u.mediaStreamTrack) == null ? void 0 : d.id
190
+ }), g = a;
191
+ return;
192
+ }
193
+ te(r) && (i("Unpublishing existing microphone track before publishing new one"), await D()), i("Publishing microphone track from provided MediaStream", { trackId: r.id });
194
+ try {
195
+ g = await c.localParticipant.publishTrack(r, {
196
+ source: o.Source.Microphone
197
+ }), i("Microphone track published successfully", { trackSid: g.trackSid });
198
+ } catch (M) {
199
+ throw i("Failed to publish microphone track:", M), M;
200
+ }
201
+ }
202
+ async function D() {
203
+ if (!(!g || !g.track))
204
+ try {
205
+ c && (await c.localParticipant.unpublishTrack(g.track), i("Microphone track unpublished"));
206
+ } catch (n) {
207
+ i("Error unpublishing microphone track:", n);
208
+ } finally {
209
+ g = null;
210
+ }
211
+ }
212
+ function re() {
213
+ C && (C.getTracks().forEach((n) => n.stop()), C = null);
214
+ }
215
+ async function I(n, t = "lk.chat") {
216
+ var r, o;
217
+ if (!w || !c) {
218
+ i("Room is not connected for sending messages"), (r = e.onError) == null || r.call(e, new Error(K), {
219
+ sessionId: p
220
+ });
221
+ return;
222
+ }
223
+ try {
224
+ await c.localParticipant.sendText(n, { topic: t }), i("Message sent successfully:", n);
225
+ } catch (a) {
226
+ i("Failed to send message:", a), (o = e.onError) == null || o.call(e, new Error(K), { sessionId: p });
227
+ }
228
+ }
229
+ return {
230
+ speak(n) {
231
+ const t = typeof n == "string" ? n : JSON.stringify(n);
232
+ return I(
233
+ t,
234
+ "did.speak"
235
+ /* Speak */
236
+ );
237
+ },
238
+ async disconnect() {
239
+ var n, t;
240
+ c && (await D(), await c.disconnect(), c = null), re(), w = !1, (n = e.onConnectionStateChange) == null || n.call(e, S.Completed), (t = e.onAgentActivityStateChange) == null || t.call(e, _.Idle);
241
+ },
242
+ sendDataChannelMessage: I,
243
+ sendTextMessage: I,
244
+ publishMicrophoneStream: ie,
245
+ unpublishMicrophoneStream: D,
246
+ sessionId: p,
247
+ streamId: p,
248
+ streamType: $,
249
+ interruptAvailable: !0,
250
+ triggersAvailable: !1
251
+ };
252
+ }
253
+ export {
254
+ fe as createLiveKitStreamingManager,
255
+ V as handleInitError
256
+ };
@@ -26,6 +26,19 @@ export type StreamingManager<T extends CreateStreamOptions | CreateSessionV2Opti
26
26
  * supported only for livekit manager
27
27
  */
28
28
  sendTextMessage?(payload: string): Promise<void>;
29
+ /**
30
+ * Publish a microphone stream to the DataChannel
31
+ * Can be called after connection to add microphone input
32
+ * @param stream The MediaStream containing the microphone audio track
33
+ * supported only for livekit manager
34
+ */
35
+ publishMicrophoneStream?(stream: MediaStream): Promise<void>;
36
+ /**
37
+ * Unpublish the currently published microphone stream
38
+ * Can be called after connection to remove microphone input
39
+ * supported only for livekit manager
40
+ */
41
+ unpublishMicrophoneStream?(): Promise<void>;
29
42
  /**
30
43
  * Session identifier information, should be returned in the body of all streaming requests
31
44
  */
@@ -27,6 +27,7 @@ export interface Message {
27
27
  context?: string;
28
28
  videoId?: string;
29
29
  interrupted?: boolean;
30
+ transcribed?: boolean;
30
31
  }
31
32
  export interface ChatPayload {
32
33
  messages: Message[];
@@ -26,6 +26,10 @@ export declare enum ChatProgress {
26
26
  * Server processed message and returned response
27
27
  */
28
28
  Answer = "answer",
29
+ /**
30
+ * Audio-transcribed user message
31
+ */
32
+ Transcribe = "transcribe",
29
33
  /**
30
34
  * Chat was closed
31
35
  */
@@ -138,6 +142,7 @@ export interface AgentManagerOptions {
138
142
  streamOptions?: StreamOptions;
139
143
  initialMessages?: Message[];
140
144
  persistentChat?: boolean;
145
+ microphoneStream?: MediaStream;
141
146
  }
142
147
  export interface AgentManager {
143
148
  /**
@@ -178,6 +183,19 @@ export interface AgentManager {
178
183
  * Method to close all connections with agent, stream and web socket
179
184
  */
180
185
  disconnect: () => Promise<void>;
186
+ /**
187
+ * Publish a microphone stream to the data channel
188
+ * Can be called after connection to add microphone input
189
+ * @param stream The MediaStream containing the microphone audio track
190
+ * supported only for livekit manager
191
+ */
192
+ publishMicrophoneStream?: (stream: MediaStream) => Promise<void>;
193
+ /**
194
+ * Unpublish the currently published microphone stream
195
+ * Can be called after connection to remove microphone input
196
+ * supported only for livekit manager
197
+ */
198
+ unpublishMicrophoneStream?: () => Promise<void>;
181
199
  /**
182
200
  * Method to send a chat message to existing chat with the agent
183
201
  * @param messages
@@ -22,6 +22,7 @@ export declare enum AgentActivityState {
22
22
  export declare enum StreamEvents {
23
23
  ChatAnswer = "chat/answer",
24
24
  ChatPartial = "chat/partial",
25
+ ChatAudioTranscribed = "chat/audio-transcribed",
25
26
  StreamDone = "stream/done",
26
27
  StreamStarted = "stream/started",
27
28
  StreamFailed = "stream/error",
@@ -89,6 +90,12 @@ export interface StreamingManagerOptions {
89
90
  debug?: boolean;
90
91
  auth: Auth;
91
92
  analytics: Analytics;
93
+ /**
94
+ * Optional MediaStream to use for microphone input.
95
+ * If provided, the audio track from this stream will be published to the data channel.
96
+ * Supported by LiveKit streaming managers.
97
+ */
98
+ microphoneStream?: MediaStream;
92
99
  }
93
100
  export interface SlimRTCStatsReport {
94
101
  index: number;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@d-id/client-sdk",
3
3
  "private": false,
4
- "version": "1.1.22-staging.103",
4
+ "version": "1.1.22-staging.104",
5
5
  "type": "module",
6
6
  "description": "d-id client sdk",
7
7
  "repository": {
@@ -1,193 +0,0 @@
1
- import { c as l, a as k, S as b, d as ee, T as ne, C as u, b as w, A as _, e as N, f as $, g as te } from "./index-Cx4MD7mS.js";
2
- function oe(c, y, s, o) {
3
- const v = l(c, `${y}/v2/agents/${s}`, o);
4
- return {
5
- async createStream(a) {
6
- return v.post("/sessions", a);
7
- }
8
- };
9
- }
10
- async function ie() {
11
- try {
12
- return await import("./livekit-client.esm-C1GRQdZY.js");
13
- } catch {
14
- throw new Error(
15
- "LiveKit client is required for this streaming manager. Please install it using: npm install livekit-client"
16
- );
17
- }
18
- }
19
- const re = {
20
- excellent: w.Strong,
21
- good: w.Strong,
22
- poor: w.Weak,
23
- lost: w.Unknown,
24
- unknown: w.Unknown
25
- }, K = JSON.stringify({
26
- kind: "InternalServerError",
27
- description: "Stream Error"
28
- });
29
- function U(c, y, s, o) {
30
- var v, a;
31
- throw y("Failed to connect to LiveKit room:", c), o(), (v = s.onConnectionStateChange) == null || v.call(s, u.Fail), (a = s.onError) == null || a.call(s, c, { sessionId: "" }), c;
32
- }
33
- async function se(c, y, s) {
34
- var P, I;
35
- const o = k(s.debug || !1, "LiveKitStreamingManager"), { Room: v, RoomEvent: a, ConnectionState: p } = await ie(), { callbacks: e, auth: j, baseURL: x, analytics: V } = s;
36
- let g = null, E = !1;
37
- const F = b.Fluent;
38
- let T = !0, f = null;
39
- g = new v({
40
- adaptiveStream: !1,
41
- // Must be false to use mediaStreamTrack directly
42
- dynacast: !0
43
- });
44
- const O = oe(j, x || ee, c, e.onError);
45
- let S, A, M;
46
- try {
47
- const n = await O.createStream({
48
- transport_provider: ne.Livekit,
49
- chat_persist: y.chat_persist ?? !0
50
- }), { id: t, session_token: i, session_url: r } = n;
51
- (P = e.onStreamCreated) == null || P.call(e, { session_id: t, stream_id: t, agent_id: c }), S = t, A = i, M = r, await g.prepareConnection(M, A);
52
- } catch (n) {
53
- U(n, o, e, () => {
54
- T = !1;
55
- });
56
- }
57
- if (!M || !A || !S)
58
- return Promise.reject(new Error("Failed to initialize LiveKit stream"));
59
- g.on(a.ConnectionStateChanged, J).on(a.ConnectionQualityChanged, Q).on(a.ActiveSpeakersChanged, q).on(a.ParticipantConnected, z).on(a.TrackSubscribed, W).on(a.TrackUnsubscribed, B).on(a.DataReceived, G).on(a.MediaDevicesError, H).on(a.EncryptionError, X).on(a.TrackSubscriptionFailed, Y), (I = e.onConnectionStateChange) == null || I.call(e, u.New);
60
- try {
61
- await g.connect(M, A), o("LiveKit room joined successfully"), T = !1;
62
- } catch (n) {
63
- U(n, o, e, () => {
64
- T = !1;
65
- });
66
- }
67
- V.enrich({
68
- "stream-type": F
69
- });
70
- function J(n) {
71
- var t, i, r, d, C;
72
- switch (o("Connection state changed:", n), n) {
73
- case p.Connecting:
74
- (t = e.onConnectionStateChange) == null || t.call(e, u.Connecting);
75
- break;
76
- case p.Connected:
77
- o("LiveKit room connected successfully"), E = !0, T ? queueMicrotask(() => {
78
- var m;
79
- (m = e.onConnectionStateChange) == null || m.call(e, u.Connected);
80
- }) : (i = e.onConnectionStateChange) == null || i.call(e, u.Connected);
81
- break;
82
- case p.Disconnected:
83
- o("LiveKit room disconnected"), E = !1, (r = e.onConnectionStateChange) == null || r.call(e, u.Disconnected);
84
- break;
85
- case p.Reconnecting:
86
- o("LiveKit room reconnecting..."), (d = e.onConnectionStateChange) == null || d.call(e, u.Connecting);
87
- break;
88
- case p.SignalReconnecting:
89
- o("LiveKit room signal reconnecting..."), (C = e.onConnectionStateChange) == null || C.call(e, u.Connecting);
90
- break;
91
- }
92
- }
93
- function Q(n, t) {
94
- var i;
95
- o("Connection quality:", n), t != null && t.isLocal && ((i = e.onConnectivityStateChange) == null || i.call(e, re[n]));
96
- }
97
- function q(n) {
98
- var i, r;
99
- o("Active speakers changed:", n), n[0] ? (i = e.onAgentActivityStateChange) == null || i.call(e, _.Talking) : (r = e.onAgentActivityStateChange) == null || r.call(e, _.Idle);
100
- }
101
- function z(n) {
102
- o("Participant connected:", n.identity);
103
- }
104
- function W(n, t, i) {
105
- var d, C, m;
106
- o(`Track subscribed: ${n.kind} from ${i.identity}`);
107
- const r = n.mediaStreamTrack;
108
- if (!r) {
109
- o(`No mediaStreamTrack available for ${n.kind}`);
110
- return;
111
- }
112
- f ? (f.addTrack(r), o(`Added ${n.kind} track to shared MediaStream`)) : (f = new MediaStream([r]), o(`Created shared MediaStream with ${n.kind} track`)), n.kind === "video" && ((d = e.onStreamReady) == null || d.call(e), (C = e.onSrcObjectReady) == null || C.call(e, f), (m = e.onVideoStateChange) == null || m.call(e, N.Start));
113
- }
114
- function B(n, t, i) {
115
- var r;
116
- o(`Track unsubscribed: ${n.kind} from ${i.identity}`), n.kind === "video" && ((r = e.onVideoStateChange) == null || r.call(e, N.Stop));
117
- }
118
- function G(n, t, i, r) {
119
- var C, m;
120
- const d = new TextDecoder().decode(n);
121
- o("Data received:", d);
122
- try {
123
- const h = JSON.parse(d), D = r || h.subject;
124
- if (D === $.ChatAnswer) {
125
- const L = te.Answer;
126
- (C = e.onMessage) == null || C.call(e, L, {
127
- event: L,
128
- ...h
129
- });
130
- } else if ([$.StreamVideoCreated, $.StreamVideoDone].includes(D)) {
131
- const L = (h == null ? void 0 : h.role) || (t == null ? void 0 : t.identity) || "datachannel";
132
- (m = e.onMessage) == null || m.call(e, D, {
133
- [L]: h
134
- });
135
- }
136
- } catch (h) {
137
- o("Failed to parse data channel message:", h);
138
- }
139
- }
140
- function H(n) {
141
- var t;
142
- o("Media devices error:", n), (t = e.onError) == null || t.call(e, new Error(K), { sessionId: S });
143
- }
144
- function X(n) {
145
- var t;
146
- o("Encryption error:", n), (t = e.onError) == null || t.call(e, new Error(K), { sessionId: S });
147
- }
148
- function Y(n, t, i) {
149
- o("Track subscription failed:", { trackSid: n, participant: t, reason: i });
150
- }
151
- function Z() {
152
- f && (f.getTracks().forEach((n) => n.stop()), f = null);
153
- }
154
- async function R(n, t = "lk.chat") {
155
- var i, r;
156
- if (!E || !g) {
157
- o("Room is not connected for sending messages"), (i = e.onError) == null || i.call(e, new Error(K), {
158
- sessionId: S
159
- });
160
- return;
161
- }
162
- try {
163
- await g.localParticipant.sendText(n, { topic: t }), o("Message sent successfully:", n);
164
- } catch (d) {
165
- o("Failed to send message:", d), (r = e.onError) == null || r.call(e, new Error(K), { sessionId: S });
166
- }
167
- }
168
- return {
169
- speak(n) {
170
- const t = typeof n == "string" ? n : JSON.stringify(n);
171
- return R(
172
- t,
173
- "did.speak"
174
- /* Speak */
175
- );
176
- },
177
- async disconnect() {
178
- var n, t;
179
- g && (await g.disconnect(), g = null), Z(), E = !1, (n = e.onConnectionStateChange) == null || n.call(e, u.Completed), (t = e.onAgentActivityStateChange) == null || t.call(e, _.Idle);
180
- },
181
- sendDataChannelMessage: R,
182
- sendTextMessage: R,
183
- sessionId: S,
184
- streamId: S,
185
- streamType: F,
186
- interruptAvailable: !0,
187
- triggersAvailable: !1
188
- };
189
- }
190
- export {
191
- se as createLiveKitStreamingManager,
192
- U as handleInitError
193
- };