@runwayml/avatars-react 0.9.0 → 0.10.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import * as _livekit_components_react from '@livekit/components-react';
2
- import { TrackReferenceOrPlaceholder } from '@livekit/components-react';
3
- export { RoomAudioRenderer as AudioRenderer, VideoTrack } from '@livekit/components-react';
2
+ import { TrackReference, TrackReferenceOrPlaceholder } from '@livekit/components-react';
3
+ export { RoomAudioRenderer as AudioRenderer, VideoTrack, isTrackReference } from '@livekit/components-react';
4
4
  import * as react_jsx_runtime from 'react/jsx-runtime';
5
5
  import * as livekit_client from 'livekit-client';
6
6
  import { ComponentPropsWithoutRef, ReactNode } from 'react';
@@ -30,7 +30,7 @@ interface SessionCredentials {
30
30
  /**
31
31
  * Props for the AvatarSession component
32
32
  */
33
- interface AvatarSessionProps {
33
+ interface AvatarSessionProps<E extends ClientEvent = ClientEvent> {
34
34
  /** Connection credentials from Runway API */
35
35
  credentials: SessionCredentials;
36
36
  /** Children to render inside the session */
@@ -43,6 +43,8 @@ interface AvatarSessionProps {
43
43
  onEnd?: () => void;
44
44
  /** Callback when an error occurs */
45
45
  onError?: (error: Error) => void;
46
+ /** Callback when a client event is received from the avatar */
47
+ onClientEvent?: ClientEventHandler<E>;
46
48
  /**
47
49
  * Pre-captured screen share stream (from getDisplayMedia).
48
50
  * When provided, screen sharing activates automatically once the session connects.
@@ -57,7 +59,7 @@ interface AvatarSessionProps {
57
59
  /**
58
60
  * Props for the AvatarCall component
59
61
  */
60
- interface AvatarCallProps extends Omit<React.ComponentPropsWithoutRef<'div'>, 'onError'> {
62
+ interface AvatarCallProps<E extends ClientEvent = ClientEvent> extends Omit<React.ComponentPropsWithoutRef<'div'>, 'onError'> {
61
63
  /** The avatar ID to connect to */
62
64
  avatarId: string;
63
65
  /** Session ID (use with sessionKey - package will call consumeSession) */
@@ -82,6 +84,8 @@ interface AvatarCallProps extends Omit<React.ComponentPropsWithoutRef<'div'>, 'o
82
84
  onEnd?: () => void;
83
85
  /** Callback when an error occurs */
84
86
  onError?: (error: Error) => void;
87
+ /** Callback when a client event is received from the avatar */
88
+ onClientEvent?: ClientEventHandler<E>;
85
89
  /** Custom children - defaults to AvatarVideo + ControlBar if not provided */
86
90
  children?: React.ReactNode;
87
91
  /**
@@ -129,8 +133,51 @@ interface UseLocalMediaReturn {
129
133
  /** The local video track reference */
130
134
  localVideoTrackRef: _livekit_components_react.TrackReferenceOrPlaceholder | null;
131
135
  }
136
+ /**
137
+ * Client event received from the avatar via the data channel.
138
+ * These are fire-and-forget events triggered by the avatar model.
139
+ *
140
+ * @typeParam T - The tool name (defaults to string for untyped usage)
141
+ * @typeParam A - The args type (defaults to Record<string, unknown>)
142
+ *
143
+ * @example
144
+ * ```typescript
145
+ * // Untyped usage
146
+ * const event: ClientEvent = { type: 'client_event', tool: 'show_caption', args: { text: 'Hello' } };
147
+ *
148
+ * // Typed usage with discriminated union
149
+ * type MyEvent = ClientEvent<'show_caption', { text: string }>;
150
+ * ```
151
+ */
152
+ interface ClientEvent<T extends string = string, A = Record<string, unknown>> {
153
+ type: 'client_event';
154
+ tool: T;
155
+ args: A;
156
+ }
157
+ /**
158
+ * Handler function for client events
159
+ */
160
+ type ClientEventHandler<E extends ClientEvent = ClientEvent> = (event: E) => void;
161
+ /**
162
+ * A transcription segment received from the session.
163
+ * SDK-owned type wrapping the underlying transport's transcription data.
164
+ */
165
+ interface TranscriptionEntry {
166
+ /** Unique segment identifier */
167
+ id: string;
168
+ /** Transcribed text */
169
+ text: string;
170
+ /** Whether this is a final (non-streaming) segment */
171
+ final: boolean;
172
+ /** Identity of the participant who spoke */
173
+ participantIdentity: string;
174
+ }
175
+ /**
176
+ * Handler function for transcription events
177
+ */
178
+ type TranscriptionHandler = (entry: TranscriptionEntry) => void;
132
179
 
133
- declare function AvatarCall({ avatarId, sessionId, sessionKey, credentials: directCredentials, connectUrl, connect, baseUrl, audio, video, avatarImageUrl, onEnd, onError, children, initialScreenStream, __unstable_roomOptions, ...props }: AvatarCallProps): react_jsx_runtime.JSX.Element;
180
+ declare function AvatarCall<E extends ClientEvent = ClientEvent>({ avatarId, sessionId, sessionKey, credentials: directCredentials, connectUrl, connect, baseUrl, audio, video, avatarImageUrl, onEnd, onError, onClientEvent, children, initialScreenStream, __unstable_roomOptions, ...props }: AvatarCallProps<E>): react_jsx_runtime.JSX.Element;
134
181
 
135
182
  /**
136
183
  * AvatarSession component - the main entry point for avatar sessions
@@ -138,7 +185,7 @@ declare function AvatarCall({ avatarId, sessionId, sessionKey, credentials: dire
138
185
  * Establishes a WebRTC connection and provides session state to children.
139
186
  * This is a headless component that renders minimal DOM.
140
187
  */
141
- declare function AvatarSession({ credentials, children, audio: requestAudio, video: requestVideo, onEnd, onError, initialScreenStream, __unstable_roomOptions, }: AvatarSessionProps): react_jsx_runtime.JSX.Element;
188
+ declare function AvatarSession<E extends ClientEvent = ClientEvent>({ credentials, children, audio: requestAudio, video: requestVideo, onEnd, onError, onClientEvent, initialScreenStream, __unstable_roomOptions, }: AvatarSessionProps<E>): react_jsx_runtime.JSX.Element;
142
189
 
143
190
  /**
144
191
  * useAvatarStatus Hook
@@ -176,7 +223,7 @@ type AvatarStatus = {
176
223
  status: 'waiting';
177
224
  } | {
178
225
  status: 'ready';
179
- videoTrackRef: TrackReferenceOrPlaceholder;
226
+ videoTrackRef: TrackReference;
180
227
  } | {
181
228
  status: 'ending';
182
229
  } | {
@@ -292,6 +339,70 @@ type UseAvatarSessionReturn = {
292
339
  */
293
340
  declare function useAvatarSession(): UseAvatarSessionReturn;
294
341
 
342
+ type EventArgs<E extends ClientEvent, T extends E['tool']> = Extract<E, {
343
+ tool: T;
344
+ }>['args'];
345
+ /**
346
+ * Subscribe to a single client event type by tool name.
347
+ *
348
+ * Returns the latest args as React state (`null` before the first event),
349
+ * and optionally fires a callback on each event for side effects.
350
+ *
351
+ * Must be used within an AvatarSession or AvatarCall component.
352
+ *
353
+ * @example
354
+ * ```tsx
355
+ * // State only — returns latest args
356
+ * const score = useClientEvent<TriviaEvent, 'update_score'>('update_score');
357
+ * // score: { score: number; streak: number } | null
358
+ *
359
+ * // State + side effect
360
+ * const result = useClientEvent<TriviaEvent, 'reveal_answer'>('reveal_answer', (args) => {
361
+ * if (args.correct) fireConfetti();
362
+ * });
363
+ *
364
+ * // Side effect only — ignore the return value
365
+ * useClientEvent<TriviaEvent, 'play_sound'>('play_sound', (args) => {
366
+ * new Audio(SOUNDS[args.sound]).play();
367
+ * });
368
+ * ```
369
+ */
370
+ declare function useClientEvent<E extends ClientEvent, T extends E['tool']>(toolName: T, onEvent?: (args: EventArgs<E, T>) => void): EventArgs<E, T> | null;
371
+
372
+ /**
373
+ * Hook to listen for all client events from the avatar.
374
+ *
375
+ * Use this hook in child components to handle client events without prop drilling.
376
+ * Must be used within an AvatarSession or AvatarCall component.
377
+ *
378
+ * @typeParam E - The expected event type (defaults to ClientEvent for untyped usage)
379
+ *
380
+ * @example
381
+ * ```tsx
382
+ * // Untyped usage
383
+ * useClientEvents((event) => {
384
+ * console.log('Received:', event.tool, event.args);
385
+ * });
386
+ *
387
+ * // Type-safe usage with discriminated union
388
+ * type MyEvents =
389
+ * | ClientEvent<'show_caption', { text: string }>
390
+ * | ClientEvent<'play_sound', { url: string }>;
391
+ *
392
+ * useClientEvents<MyEvents>((event) => {
393
+ * switch (event.tool) {
394
+ * case 'show_caption':
395
+ * setCaption(event.args.text); // TypeScript knows this is string
396
+ * break;
397
+ * case 'play_sound':
398
+ * new Audio(event.args.url).play();
399
+ * break;
400
+ * }
401
+ * });
402
+ * ```
403
+ */
404
+ declare function useClientEvents<E extends ClientEvent = ClientEvent>(handler: ClientEventHandler<E>): void;
405
+
295
406
  /**
296
407
  * Hook for local media controls (mic, camera, screen share).
297
408
  *
@@ -301,4 +412,81 @@ declare function useAvatarSession(): UseAvatarSessionReturn;
301
412
  */
302
413
  declare function useLocalMedia(): UseLocalMediaReturn;
303
414
 
304
- export { AvatarCall, type AvatarCallProps, AvatarSession, type AvatarStatus, AvatarVideo, type AvatarVideoStatus, ControlBar, ScreenShareVideo, type SessionCredentials, type SessionState, UserVideo, useAvatar, useAvatarSession, useAvatarStatus, useLocalMedia };
415
+ /**
416
+ * Hook to listen for transcription events from the session.
417
+ *
418
+ * Fires the handler for each transcription segment received. By default,
419
+ * only final segments are delivered. Pass `{ interim: true }` to also
420
+ * receive partial/streaming segments.
421
+ *
422
+ * Must be used within an AvatarSession or AvatarCall component.
423
+ *
424
+ * @example
425
+ * ```tsx
426
+ * useTranscription((entry) => {
427
+ * console.log(`${entry.participantIdentity}: ${entry.text}`);
428
+ * });
429
+ *
430
+ * // Include interim (non-final) segments
431
+ * useTranscription((entry) => {
432
+ * console.log(entry.final ? 'FINAL' : 'partial', entry.text);
433
+ * }, { interim: true });
434
+ * ```
435
+ */
436
+ declare function useTranscription(handler: TranscriptionHandler, options?: {
437
+ interim?: boolean;
438
+ }): void;
439
+
440
+ /**
441
+ * A standalone client tool definition. Composable — combine into arrays
442
+ * and derive event types with `ClientEventsFrom`.
443
+ *
444
+ * At runtime this is just `{ type, name, description }`. The `Args` generic
445
+ * is phantom — it only exists at the TypeScript level for type narrowing.
446
+ */
447
+ interface ClientToolDef<Name extends string = string, Args = unknown> {
448
+ readonly type: 'client_event';
449
+ readonly name: Name;
450
+ readonly description: string;
451
+ /** @internal phantom field — always `undefined` at runtime */
452
+ readonly _args?: Args;
453
+ }
454
+ /**
455
+ * Derive a discriminated union of ClientEvent types from an array of tools.
456
+ *
457
+ * @example
458
+ * ```typescript
459
+ * const tools = [showQuestion, playSound];
460
+ * type MyEvent = ClientEventsFrom<typeof tools>;
461
+ * ```
462
+ */
463
+ type ClientEventsFrom<T extends ReadonlyArray<ClientToolDef>> = T[number] extends infer U ? U extends ClientToolDef<infer Name, infer Args> ? ClientEvent<Name, Args> : never : never;
464
+ /**
465
+ * Define a single client tool.
466
+ *
467
+ * Returns a standalone object that can be composed into arrays and passed
468
+ * to `realtimeSessions.create({ tools })`.
469
+ *
470
+ * @example
471
+ * ```typescript
472
+ * const showQuestion = clientTool('show_question', {
473
+ * description: 'Display a trivia question',
474
+ * args: {} as { question: string; options: Array<string> },
475
+ * });
476
+ *
477
+ * const playSound = clientTool('play_sound', {
478
+ * description: 'Play a sound effect',
479
+ * args: {} as { sound: 'correct' | 'incorrect' },
480
+ * });
481
+ *
482
+ * // Combine and derive types
483
+ * const tools = [showQuestion, playSound];
484
+ * type MyEvent = ClientEventsFrom<typeof tools>;
485
+ * ```
486
+ */
487
+ declare function clientTool<Name extends string, Args>(name: Name, config: {
488
+ description: string;
489
+ args: Args;
490
+ }): ClientToolDef<Name, Args>;
491
+
492
+ export { AvatarCall, type AvatarCallProps, AvatarSession, type AvatarStatus, AvatarVideo, type AvatarVideoStatus, type ClientEvent, type ClientEventHandler, type ClientEventsFrom, type ClientToolDef, ControlBar, ScreenShareVideo, type SessionCredentials, type SessionState, type TranscriptionEntry, type TranscriptionHandler, UserVideo, clientTool, useAvatar, useAvatarSession, useAvatarStatus, useClientEvent, useClientEvents, useLocalMedia, useTranscription };
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { LiveKitRoom, RoomAudioRenderer, useRoomContext, useConnectionState, useRemoteParticipants, useTracks, isTrackReference, VideoTrack, useLocalParticipant, useMediaDevices, TrackToggle } from '@livekit/components-react';
2
- export { RoomAudioRenderer as AudioRenderer, VideoTrack } from '@livekit/components-react';
2
+ export { RoomAudioRenderer as AudioRenderer, VideoTrack, isTrackReference } from '@livekit/components-react';
3
3
  import { createContext, useRef, useEffect, useCallback, useState, useContext, useSyncExternalStore } from 'react';
4
- import { ConnectionState, Track } from 'livekit-client';
4
+ import { ConnectionState, Track, RoomEvent } from 'livekit-client';
5
5
  import { jsxs, jsx, Fragment } from 'react/jsx-runtime';
6
6
 
7
7
  // src/api/config.ts
@@ -153,6 +153,22 @@ function useLatest(value) {
153
153
  }, [value]);
154
154
  return ref;
155
155
  }
156
+
157
+ // src/utils/parseClientEvent.ts
158
+ function isAckMessage(args) {
159
+ return "status" in args && args.status === "event_sent";
160
+ }
161
+ function parseClientEvent(payload) {
162
+ try {
163
+ const message = JSON.parse(new TextDecoder().decode(payload));
164
+ if (message?.type === "client_event" && typeof message.tool === "string" && message.args != null && typeof message.args === "object" && !isAckMessage(message.args)) {
165
+ return message;
166
+ }
167
+ return null;
168
+ } catch {
169
+ return null;
170
+ }
171
+ }
156
172
  async function hasMediaDevice(kind, timeoutMs = 1e3) {
157
173
  try {
158
174
  const timeoutPromise = new Promise(
@@ -228,6 +244,7 @@ function AvatarSession({
228
244
  video: requestVideo = true,
229
245
  onEnd,
230
246
  onError,
247
+ onClientEvent,
231
248
  initialScreenStream,
232
249
  __unstable_roomOptions
233
250
  }) {
@@ -263,6 +280,7 @@ function AvatarSession({
263
280
  {
264
281
  sessionId: credentials.sessionId,
265
282
  onEnd,
283
+ onClientEvent,
266
284
  errorRef,
267
285
  initialScreenStream,
268
286
  children
@@ -276,6 +294,7 @@ function AvatarSession({
276
294
  function AvatarSessionContextInner({
277
295
  sessionId,
278
296
  onEnd,
297
+ onClientEvent,
279
298
  errorRef,
280
299
  initialScreenStream,
281
300
  children
@@ -284,6 +303,8 @@ function AvatarSessionContextInner({
284
303
  const connectionState = useConnectionState();
285
304
  const onEndRef = useRef(onEnd);
286
305
  onEndRef.current = onEnd;
306
+ const onClientEventRef = useRef(onClientEvent);
307
+ onClientEventRef.current = onClientEvent;
287
308
  const publishedRef = useRef(false);
288
309
  useEffect(() => {
289
310
  if (connectionState !== ConnectionState.Connected) return;
@@ -307,6 +328,18 @@ function AvatarSessionContextInner({
307
328
  });
308
329
  };
309
330
  }, [connectionState, initialScreenStream, room]);
331
+ useEffect(() => {
332
+ function handleDataReceived(payload) {
333
+ const event = parseClientEvent(payload);
334
+ if (event) {
335
+ onClientEventRef.current?.(event);
336
+ }
337
+ }
338
+ room.on(RoomEvent.DataReceived, handleDataReceived);
339
+ return () => {
340
+ room.off(RoomEvent.DataReceived, handleDataReceived);
341
+ };
342
+ }, [room]);
310
343
  const end = useCallback(async () => {
311
344
  try {
312
345
  const encoder = new TextEncoder();
@@ -369,7 +402,10 @@ function useAvatarStatus() {
369
402
  return { status: "connecting" };
370
403
  case "active":
371
404
  if (hasVideo && videoTrackRef) {
372
- return { status: "ready", videoTrackRef };
405
+ return {
406
+ status: "ready",
407
+ videoTrackRef
408
+ };
373
409
  }
374
410
  return { status: "waiting" };
375
411
  case "ending":
@@ -634,6 +670,7 @@ function AvatarCall({
634
670
  avatarImageUrl,
635
671
  onEnd,
636
672
  onError,
673
+ onClientEvent,
637
674
  children,
638
675
  initialScreenStream,
639
676
  __unstable_roomOptions,
@@ -687,6 +724,7 @@ function AvatarCall({
687
724
  video,
688
725
  onEnd,
689
726
  onError: handleSessionError,
727
+ onClientEvent,
690
728
  initialScreenStream,
691
729
  __unstable_roomOptions,
692
730
  children: children ?? defaultChildren
@@ -721,7 +759,79 @@ function ScreenShareVideo({
721
759
  }
722
760
  return /* @__PURE__ */ jsx("div", { ...props, "data-avatar-screen-share": "", "data-avatar-sharing": isSharing, children: screenShareTrackRef && isTrackReference(screenShareTrackRef) && /* @__PURE__ */ jsx(VideoTrack, { trackRef: screenShareTrackRef }) });
723
761
  }
762
+ function useClientEvent(toolName, onEvent) {
763
+ const room = useRoomContext();
764
+ const [state, setState] = useState(null);
765
+ const onEventRef = useRef(onEvent);
766
+ onEventRef.current = onEvent;
767
+ useEffect(() => {
768
+ function handleDataReceived(payload) {
769
+ const event = parseClientEvent(payload);
770
+ if (event && event.tool === toolName) {
771
+ const args = event.args;
772
+ setState(args);
773
+ onEventRef.current?.(args);
774
+ }
775
+ }
776
+ room.on(RoomEvent.DataReceived, handleDataReceived);
777
+ return () => {
778
+ room.off(RoomEvent.DataReceived, handleDataReceived);
779
+ };
780
+ }, [room, toolName]);
781
+ return state;
782
+ }
783
+ function useClientEvents(handler) {
784
+ const room = useRoomContext();
785
+ const handlerRef = useRef(handler);
786
+ handlerRef.current = handler;
787
+ useEffect(() => {
788
+ function handleDataReceived(payload) {
789
+ const event = parseClientEvent(payload);
790
+ if (event) {
791
+ handlerRef.current(event);
792
+ }
793
+ }
794
+ room.on(RoomEvent.DataReceived, handleDataReceived);
795
+ return () => {
796
+ room.off(RoomEvent.DataReceived, handleDataReceived);
797
+ };
798
+ }, [room]);
799
+ }
800
+ function useTranscription(handler, options) {
801
+ const room = useRoomContext();
802
+ const handlerRef = useRef(handler);
803
+ handlerRef.current = handler;
804
+ const interimRef = useRef(options?.interim ?? false);
805
+ interimRef.current = options?.interim ?? false;
806
+ useEffect(() => {
807
+ function handleTranscription(segments, participant) {
808
+ const identity = participant?.identity ?? "unknown";
809
+ for (const segment of segments) {
810
+ if (!interimRef.current && !segment.final) continue;
811
+ handlerRef.current({
812
+ id: segment.id,
813
+ text: segment.text,
814
+ final: segment.final,
815
+ participantIdentity: identity
816
+ });
817
+ }
818
+ }
819
+ room.on(RoomEvent.TranscriptionReceived, handleTranscription);
820
+ return () => {
821
+ room.off(RoomEvent.TranscriptionReceived, handleTranscription);
822
+ };
823
+ }, [room]);
824
+ }
825
+
826
+ // src/tools.ts
827
+ function clientTool(name, config) {
828
+ return {
829
+ type: "client_event",
830
+ name,
831
+ description: config.description
832
+ };
833
+ }
724
834
 
725
- export { AvatarCall, AvatarSession, AvatarVideo, ControlBar, ScreenShareVideo, UserVideo, useAvatar, useAvatarSession, useAvatarStatus, useLocalMedia };
835
+ export { AvatarCall, AvatarSession, AvatarVideo, ControlBar, ScreenShareVideo, UserVideo, clientTool, useAvatar, useAvatarSession, useAvatarStatus, useClientEvent, useClientEvents, useLocalMedia, useTranscription };
726
836
  //# sourceMappingURL=index.js.map
727
837
  //# sourceMappingURL=index.js.map