@vivix-ai/ivi-frontend-sdk 0.3.5 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -627,6 +627,13 @@ interface IVIVolumeControlProps {
627
627
  }
628
628
 
629
629
  type IviSubtitleRole = IviRuntimeConversationItem["role"];
630
+ type IviSubtitleSource = "conversation" | "response_audio_transcript";
631
+ type IviSubtitleCompletedReason = "conversation_done" | "response_done";
632
+ type IviCompletedSubtitleDecision = "keep" | "remove" | {
633
+ removeAfterMs: number;
634
+ };
635
+ type IviCompletedSubtitleDecisionResult = IviCompletedSubtitleDecision | void | Promise<IviCompletedSubtitleDecision | void>;
636
+ declare const DEFAULT_HIDE_COMPLETED_SUBTITLE_AFTER_MS = 3000;
630
637
  interface IviSubtitleItem {
631
638
  id: string;
632
639
  role: IviSubtitleRole;
@@ -637,16 +644,47 @@ interface IviSubtitleItem {
637
644
  displayText: string;
638
645
  content: IviRuntimeConversationItem["content"];
639
646
  item: IviRuntimeConversationItem["item"];
647
+ /** 字幕数据来源。 */
648
+ source?: IviSubtitleSource;
649
+ /** 关联的 response_id;仅 response_audio_transcript 来源有值。 */
650
+ responseId?: string;
651
+ /** 关联的 item_id;仅 response_audio_transcript 来源有值。 */
652
+ itemId?: string;
640
653
  /** 首次进入字幕队列的时间戳。 */
641
654
  timestamp: number;
642
655
  /** 最近一次字幕内容或状态更新的时间戳。 */
643
656
  updatedAt: number;
644
657
  }
658
+ interface IviSubtitleCompletedContext {
659
+ /** 当前字幕队列快照。 */
660
+ entries: readonly IviSubtitleItem[];
661
+ /** 本次完成态来自 conversation 完成还是 response 完成。 */
662
+ reason: IviSubtitleCompletedReason;
663
+ /** 当字幕被移除、会话结束、配置变化或组件卸载时触发。 */
664
+ signal: AbortSignal;
665
+ }
666
+ type IviSubtitleCompletedHandler = (item: IviSubtitleItem, context: IviSubtitleCompletedContext) => IviCompletedSubtitleDecisionResult;
645
667
  interface IviUseSubtitlesOptions {
646
668
  /** 要收集的发言人角色,默认 ["user"]。传单个字符串或数组均可。 */
647
669
  roles?: IviSubtitleRole | IviSubtitleRole[];
648
670
  /** 最多保留的字幕条数,超过后自动清理最旧条目,默认 2。 */
649
671
  maxItems?: number;
672
+ /**
673
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
674
+ * 开启后 model 字幕不再从 conversation.* 聚合;response.output_audio_transcript.done 不视为结束,
675
+ * 只有 response.done 才会把这一轮 model 字幕标记为 done。
676
+ */
677
+ useModelStreamingTranscript?: boolean;
678
+ /**
679
+ * 字幕进入完成态后多久自动移除,默认 3000ms;传 false 可恢复为一直保留直到被 maxItems 裁剪。
680
+ * 自定义 onSubtitleCompleted 时,此配置仅作为未提供自定义策略时的默认策略。
681
+ */
682
+ hideCompletedAfterMs?: number | false;
683
+ /**
684
+ * 字幕进入完成态时调用一次。返回 "remove" 立即删除,返回 { removeAfterMs } 延迟删除,
685
+ * 返回 "keep" 或 undefined 则保留。异步策略可通过 context.signal 响应取消。
686
+ */
687
+ onSubtitleCompleted?: IviSubtitleCompletedHandler;
650
688
  }
651
689
  /**
652
690
  * 监听 runtime 中的 conversation/response 事件,并维护当前应展示的字幕队列。
@@ -672,6 +710,14 @@ interface IVISubtitleOverlayProps {
672
710
  maxItems?: number;
673
711
  /** @deprecated Use `maxItems` instead. */
674
712
  maxVisible?: number;
713
+ /**
714
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
715
+ */
716
+ useModelStreamingTranscript?: boolean;
717
+ /** 字幕进入完成态后多久自动移除,默认 3000ms;传 false 可关闭自动移除。 */
718
+ hideCompletedAfterMs?: IviUseSubtitlesOptions["hideCompletedAfterMs"];
719
+ /** 字幕进入完成态时的自定义保留/删除策略,优先于 hideCompletedAfterMs。 */
720
+ onSubtitleCompleted?: IviUseSubtitlesOptions["onSubtitleCompleted"];
675
721
  /** 样式配置 */
676
722
  subtitleStyle?: IVISubtitleOverlayStyle;
677
723
  /** 自定义类名 */
@@ -756,4 +802,4 @@ declare function useManagedIviRuntime(config: IviManagedRuntimeConfig): IviRunti
756
802
 
757
803
  declare function useIviStageView(): IviStageViewContextValue;
758
804
 
759
- export { EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleItem, type IviSubtitleRole, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
805
+ export { DEFAULT_HIDE_COMPLETED_SUBTITLE_AFTER_MS, EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviCompletedSubtitleDecision, type IviCompletedSubtitleDecisionResult, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleCompletedContext, type IviSubtitleCompletedHandler, type IviSubtitleCompletedReason, type IviSubtitleItem, type IviSubtitleRole, type IviSubtitleSource, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
package/dist/index.d.ts CHANGED
@@ -627,6 +627,13 @@ interface IVIVolumeControlProps {
627
627
  }
628
628
 
629
629
  type IviSubtitleRole = IviRuntimeConversationItem["role"];
630
+ type IviSubtitleSource = "conversation" | "response_audio_transcript";
631
+ type IviSubtitleCompletedReason = "conversation_done" | "response_done";
632
+ type IviCompletedSubtitleDecision = "keep" | "remove" | {
633
+ removeAfterMs: number;
634
+ };
635
+ type IviCompletedSubtitleDecisionResult = IviCompletedSubtitleDecision | void | Promise<IviCompletedSubtitleDecision | void>;
636
+ declare const DEFAULT_HIDE_COMPLETED_SUBTITLE_AFTER_MS = 3000;
630
637
  interface IviSubtitleItem {
631
638
  id: string;
632
639
  role: IviSubtitleRole;
@@ -637,16 +644,47 @@ interface IviSubtitleItem {
637
644
  displayText: string;
638
645
  content: IviRuntimeConversationItem["content"];
639
646
  item: IviRuntimeConversationItem["item"];
647
+ /** 字幕数据来源。 */
648
+ source?: IviSubtitleSource;
649
+ /** 关联的 response_id;仅 response_audio_transcript 来源有值。 */
650
+ responseId?: string;
651
+ /** 关联的 item_id;仅 response_audio_transcript 来源有值。 */
652
+ itemId?: string;
640
653
  /** 首次进入字幕队列的时间戳。 */
641
654
  timestamp: number;
642
655
  /** 最近一次字幕内容或状态更新的时间戳。 */
643
656
  updatedAt: number;
644
657
  }
658
+ interface IviSubtitleCompletedContext {
659
+ /** 当前字幕队列快照。 */
660
+ entries: readonly IviSubtitleItem[];
661
+ /** 本次完成态来自 conversation 完成还是 response 完成。 */
662
+ reason: IviSubtitleCompletedReason;
663
+ /** 当字幕被移除、会话结束、配置变化或组件卸载时触发。 */
664
+ signal: AbortSignal;
665
+ }
666
+ type IviSubtitleCompletedHandler = (item: IviSubtitleItem, context: IviSubtitleCompletedContext) => IviCompletedSubtitleDecisionResult;
645
667
  interface IviUseSubtitlesOptions {
646
668
  /** 要收集的发言人角色,默认 ["user"]。传单个字符串或数组均可。 */
647
669
  roles?: IviSubtitleRole | IviSubtitleRole[];
648
670
  /** 最多保留的字幕条数,超过后自动清理最旧条目,默认 2。 */
649
671
  maxItems?: number;
672
+ /**
673
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
674
+ * 开启后 model 字幕不再从 conversation.* 聚合;response.output_audio_transcript.done 不视为结束,
675
+ * 只有 response.done 才会把这一轮 model 字幕标记为 done。
676
+ */
677
+ useModelStreamingTranscript?: boolean;
678
+ /**
679
+ * 字幕进入完成态后多久自动移除,默认 3000ms;传 false 可恢复为一直保留直到被 maxItems 裁剪。
680
+ * 自定义 onSubtitleCompleted 时,此配置仅作为未提供自定义策略时的默认策略。
681
+ */
682
+ hideCompletedAfterMs?: number | false;
683
+ /**
684
+ * 字幕进入完成态时调用一次。返回 "remove" 立即删除,返回 { removeAfterMs } 延迟删除,
685
+ * 返回 "keep" 或 undefined 则保留。异步策略可通过 context.signal 响应取消。
686
+ */
687
+ onSubtitleCompleted?: IviSubtitleCompletedHandler;
650
688
  }
651
689
  /**
652
690
  * 监听 runtime 中的 conversation/response 事件,并维护当前应展示的字幕队列。
@@ -672,6 +710,14 @@ interface IVISubtitleOverlayProps {
672
710
  maxItems?: number;
673
711
  /** @deprecated Use `maxItems` instead. */
674
712
  maxVisible?: number;
713
+ /**
714
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
715
+ */
716
+ useModelStreamingTranscript?: boolean;
717
+ /** 字幕进入完成态后多久自动移除,默认 3000ms;传 false 可关闭自动移除。 */
718
+ hideCompletedAfterMs?: IviUseSubtitlesOptions["hideCompletedAfterMs"];
719
+ /** 字幕进入完成态时的自定义保留/删除策略,优先于 hideCompletedAfterMs。 */
720
+ onSubtitleCompleted?: IviUseSubtitlesOptions["onSubtitleCompleted"];
675
721
  /** 样式配置 */
676
722
  subtitleStyle?: IVISubtitleOverlayStyle;
677
723
  /** 自定义类名 */
@@ -756,4 +802,4 @@ declare function useManagedIviRuntime(config: IviManagedRuntimeConfig): IviRunti
756
802
 
757
803
  declare function useIviStageView(): IviStageViewContextValue;
758
804
 
759
- export { EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleItem, type IviSubtitleRole, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
805
+ export { DEFAULT_HIDE_COMPLETED_SUBTITLE_AFTER_MS, EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviCompletedSubtitleDecision, type IviCompletedSubtitleDecisionResult, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleCompletedContext, type IviSubtitleCompletedHandler, type IviSubtitleCompletedReason, type IviSubtitleItem, type IviSubtitleRole, type IviSubtitleSource, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
package/dist/index.js CHANGED
@@ -3078,20 +3078,29 @@ function useApplyVolumeToSlot(containerRef, volume, enabled, activeSourceId) {
3078
3078
  return () => observer.disconnect();
3079
3079
  }, [containerRef, volume, enabled, activeSourceId]);
3080
3080
  }
3081
+ var DEFAULT_HIDE_COMPLETED_SUBTITLE_AFTER_MS = 3e3;
3081
3082
  function useIviSubtitles(runtime, options = {}) {
3082
3083
  const roles = options.roles ?? "user";
3083
3084
  const maxItems = normalizeMaxItems(options.maxItems);
3085
+ const hideCompletedAfterMs = normalizeHideCompletedAfterMs(options.hideCompletedAfterMs);
3086
+ const onSubtitleCompleted = options.onSubtitleCompleted;
3084
3087
  const roleKey = Array.isArray(roles) ? roles.join("\0") : roles;
3085
3088
  const roleSet = useMemo(
3086
3089
  () => new Set(roleKey.split("\0")),
3087
3090
  [roleKey]
3088
3091
  );
3092
+ const useModelStreamingTranscript = options.useModelStreamingTranscript === true && roleSet.has("model");
3089
3093
  const [subtitles, setSubtitles] = useState([]);
3090
3094
  const seenIdsRef = useRef(/* @__PURE__ */ new Set());
3091
3095
  const initializedRef = useRef(false);
3096
+ const completedSubtitleIdsRef = useRef(/* @__PURE__ */ new Set());
3097
+ const completionControllersRef = useRef(/* @__PURE__ */ new Map());
3098
+ const completionPolicyRef = useRef(null);
3092
3099
  useEffect(() => {
3093
3100
  seenIdsRef.current = /* @__PURE__ */ new Set();
3094
3101
  initializedRef.current = false;
3102
+ completedSubtitleIdsRef.current = /* @__PURE__ */ new Set();
3103
+ abortCompletedSubtitleTasks(completionControllersRef.current);
3095
3104
  setSubtitles([]);
3096
3105
  if (!runtime) {
3097
3106
  return;
@@ -3102,7 +3111,7 @@ function useIviSubtitles(runtime, options = {}) {
3102
3111
  if (!initializedRef.current) {
3103
3112
  initializedRef.current = true;
3104
3113
  for (const item of conversations) {
3105
- if (item.lifecycle === "done" || !getDisplayText(item) || !roleSet.has(item.role)) {
3114
+ if (item.lifecycle === "done" || !getDisplayText(item) || !shouldUseConversationRole(item.role, roleSet, useModelStreamingTranscript)) {
3106
3115
  seenIds.add(item.id);
3107
3116
  }
3108
3117
  }
@@ -3111,8 +3120,12 @@ function useIviSubtitles(runtime, options = {}) {
3111
3120
  const conversationMap = new Map(conversations.map((item) => [item.id, item]));
3112
3121
  const nextById = /* @__PURE__ */ new Map();
3113
3122
  for (const previousItem of previous) {
3123
+ if (previousItem.source === "response_audio_transcript") {
3124
+ nextById.set(previousItem.id, previousItem);
3125
+ continue;
3126
+ }
3114
3127
  const conversation = conversationMap.get(previousItem.id);
3115
- if (!conversation || !roleSet.has(conversation.role) || !getDisplayText(conversation)) {
3128
+ if (!conversation || !shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
3116
3129
  continue;
3117
3130
  }
3118
3131
  nextById.set(
@@ -3125,7 +3138,7 @@ function useIviSubtitles(runtime, options = {}) {
3125
3138
  );
3126
3139
  }
3127
3140
  for (const conversation of conversations) {
3128
- if (!roleSet.has(conversation.role) || !getDisplayText(conversation)) {
3141
+ if (!shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
3129
3142
  continue;
3130
3143
  }
3131
3144
  if (seenIds.has(conversation.id)) {
@@ -3135,10 +3148,7 @@ function useIviSubtitles(runtime, options = {}) {
3135
3148
  nextById.set(conversation.id, buildSubtitleItem(conversation, now, now));
3136
3149
  }
3137
3150
  const next = Array.from(nextById.values());
3138
- if (maxItems === 0) {
3139
- return [];
3140
- }
3141
- return next.length > maxItems ? next.slice(next.length - maxItems) : next;
3151
+ return trimSubtitleItems(next, maxItems);
3142
3152
  });
3143
3153
  };
3144
3154
  syncConversations(runtime.getState().conversations);
@@ -3146,15 +3156,97 @@ function useIviSubtitles(runtime, options = {}) {
3146
3156
  if (event.type === "session.ended") {
3147
3157
  seenIdsRef.current = /* @__PURE__ */ new Set();
3148
3158
  initializedRef.current = false;
3159
+ completedSubtitleIdsRef.current = /* @__PURE__ */ new Set();
3160
+ abortCompletedSubtitleTasks(completionControllersRef.current);
3149
3161
  setSubtitles([]);
3150
3162
  return;
3151
3163
  }
3164
+ if (useModelStreamingTranscript && isModelStreamingTranscriptEvent(event)) {
3165
+ setSubtitles((previous) => trimSubtitleItems(upsertModelStreamingSubtitle(previous, event), maxItems));
3166
+ return;
3167
+ }
3168
+ if (useModelStreamingTranscript && isResponseDoneEvent(event)) {
3169
+ setSubtitles((previous) => trimSubtitleItems(markModelStreamingSubtitleDone(previous, event), maxItems));
3170
+ return;
3171
+ }
3152
3172
  if (!isSubtitleRelatedEvent(event.type)) {
3153
3173
  return;
3154
3174
  }
3155
3175
  syncConversations(state.conversations);
3156
3176
  });
3157
- }, [runtime, roleSet, maxItems]);
3177
+ }, [runtime, roleSet, maxItems, useModelStreamingTranscript]);
3178
+ useEffect(() => {
3179
+ return () => {
3180
+ completedSubtitleIdsRef.current = /* @__PURE__ */ new Set();
3181
+ abortCompletedSubtitleTasks(completionControllersRef.current);
3182
+ };
3183
+ }, []);
3184
+ useEffect(() => {
3185
+ const previousPolicy = completionPolicyRef.current;
3186
+ const policyChanged = Boolean(
3187
+ previousPolicy && (previousPolicy.hideCompletedAfterMs !== hideCompletedAfterMs || previousPolicy.onSubtitleCompleted !== onSubtitleCompleted)
3188
+ );
3189
+ completionPolicyRef.current = { hideCompletedAfterMs, onSubtitleCompleted };
3190
+ if (policyChanged) {
3191
+ completedSubtitleIdsRef.current = /* @__PURE__ */ new Set();
3192
+ abortCompletedSubtitleTasks(completionControllersRef.current);
3193
+ }
3194
+ const subtitleById = new Map(subtitles.map((item) => [item.id, item]));
3195
+ const completedIds = completedSubtitleIdsRef.current;
3196
+ const controllers = completionControllersRef.current;
3197
+ for (const [id, controller] of Array.from(controllers)) {
3198
+ const item = subtitleById.get(id);
3199
+ if (!item || !isCompletedSubtitle(item)) {
3200
+ controller.abort();
3201
+ controllers.delete(id);
3202
+ completedIds.delete(id);
3203
+ }
3204
+ }
3205
+ for (const id of Array.from(completedIds)) {
3206
+ const item = subtitleById.get(id);
3207
+ if (!item || !isCompletedSubtitle(item)) {
3208
+ completedIds.delete(id);
3209
+ }
3210
+ }
3211
+ for (const item of subtitles) {
3212
+ if (!isCompletedSubtitle(item) || completedIds.has(item.id)) {
3213
+ continue;
3214
+ }
3215
+ completedIds.add(item.id);
3216
+ const controller = new AbortController();
3217
+ controllers.set(item.id, controller);
3218
+ const context = {
3219
+ entries: subtitles,
3220
+ reason: getSubtitleCompletedReason(item),
3221
+ signal: controller.signal
3222
+ };
3223
+ let decisionResult;
3224
+ try {
3225
+ decisionResult = onSubtitleCompleted ? onSubtitleCompleted(item, context) : getDefaultCompletedSubtitleDecision(hideCompletedAfterMs);
3226
+ } catch {
3227
+ decisionResult = "keep";
3228
+ }
3229
+ void Promise.resolve(decisionResult).then((decision) => {
3230
+ applyCompletedSubtitleDecision(
3231
+ item.id,
3232
+ decision,
3233
+ controller,
3234
+ controllers,
3235
+ completedIds,
3236
+ (id) => setSubtitles((previous) => removeSubtitleById(previous, id))
3237
+ );
3238
+ }).catch(() => {
3239
+ applyCompletedSubtitleDecision(
3240
+ item.id,
3241
+ "keep",
3242
+ controller,
3243
+ controllers,
3244
+ completedIds,
3245
+ (id) => setSubtitles((previous) => removeSubtitleById(previous, id))
3246
+ );
3247
+ });
3248
+ }
3249
+ }, [subtitles, hideCompletedAfterMs, onSubtitleCompleted]);
3158
3250
  return subtitles;
3159
3251
  }
3160
3252
  function normalizeMaxItems(maxItems) {
@@ -3166,6 +3258,96 @@ function normalizeMaxItems(maxItems) {
3166
3258
  }
3167
3259
  return Math.max(0, Math.floor(maxItems));
3168
3260
  }
3261
+ function normalizeHideCompletedAfterMs(hideCompletedAfterMs) {
3262
+ if (hideCompletedAfterMs === false) {
3263
+ return false;
3264
+ }
3265
+ if (hideCompletedAfterMs === void 0 || !Number.isFinite(hideCompletedAfterMs)) {
3266
+ return DEFAULT_HIDE_COMPLETED_SUBTITLE_AFTER_MS;
3267
+ }
3268
+ return Math.max(0, Math.floor(hideCompletedAfterMs));
3269
+ }
3270
+ function getDefaultCompletedSubtitleDecision(hideCompletedAfterMs) {
3271
+ if (hideCompletedAfterMs === false) {
3272
+ return "keep";
3273
+ }
3274
+ return hideCompletedAfterMs === 0 ? "remove" : { removeAfterMs: hideCompletedAfterMs };
3275
+ }
3276
+ function normalizeCompletedSubtitleDecision(decision) {
3277
+ if (decision === "keep" || decision === "remove") {
3278
+ return decision;
3279
+ }
3280
+ if (decision && typeof decision === "object" && "removeAfterMs" in decision) {
3281
+ const removeAfterMs = Number(decision.removeAfterMs);
3282
+ if (!Number.isFinite(removeAfterMs)) {
3283
+ return "keep";
3284
+ }
3285
+ const normalized = Math.max(0, Math.floor(removeAfterMs));
3286
+ return normalized === 0 ? "remove" : { removeAfterMs: normalized };
3287
+ }
3288
+ return "keep";
3289
+ }
3290
+ function shouldUseConversationRole(role, roleSet, useModelStreamingTranscript) {
3291
+ if (!roleSet.has(role)) {
3292
+ return false;
3293
+ }
3294
+ return !(useModelStreamingTranscript && role === "model");
3295
+ }
3296
+ function trimSubtitleItems(items, maxItems) {
3297
+ if (maxItems === 0) {
3298
+ return [];
3299
+ }
3300
+ const sorted = [...items].sort((a, b) => a.timestamp - b.timestamp);
3301
+ return sorted.length > maxItems ? sorted.slice(sorted.length - maxItems) : sorted;
3302
+ }
3303
+ function isCompletedSubtitle(item) {
3304
+ return item.lifecycle === "done" || item.status === "completed" || item.status === "incomplete";
3305
+ }
3306
+ function getSubtitleCompletedReason(item) {
3307
+ return item.source === "response_audio_transcript" || item.role === "model" ? "response_done" : "conversation_done";
3308
+ }
3309
+ function abortCompletedSubtitleTasks(controllers) {
3310
+ for (const controller of controllers.values()) {
3311
+ controller.abort();
3312
+ }
3313
+ controllers.clear();
3314
+ }
3315
+ function applyCompletedSubtitleDecision(itemId, decision, controller, controllers, completedIds, removeSubtitle) {
3316
+ if (controller.signal.aborted) {
3317
+ return;
3318
+ }
3319
+ const normalizedDecision = normalizeCompletedSubtitleDecision(decision);
3320
+ if (normalizedDecision === "keep") {
3321
+ controllers.delete(itemId);
3322
+ return;
3323
+ }
3324
+ const remove = () => {
3325
+ if (controller.signal.aborted) {
3326
+ return;
3327
+ }
3328
+ controllers.delete(itemId);
3329
+ completedIds.delete(itemId);
3330
+ removeSubtitle(itemId);
3331
+ };
3332
+ if (normalizedDecision === "remove") {
3333
+ remove();
3334
+ return;
3335
+ }
3336
+ const timeout = setTimeout(remove, normalizedDecision.removeAfterMs);
3337
+ controller.signal.addEventListener(
3338
+ "abort",
3339
+ () => {
3340
+ clearTimeout(timeout);
3341
+ },
3342
+ { once: true }
3343
+ );
3344
+ }
3345
+ function removeSubtitleById(items, itemId) {
3346
+ if (!items.some((item) => item.id === itemId)) {
3347
+ return items;
3348
+ }
3349
+ return items.filter((item) => item.id !== itemId);
3350
+ }
3169
3351
  function getDisplayText(item) {
3170
3352
  return item.text || item.transcript;
3171
3353
  }
@@ -3180,16 +3362,109 @@ function buildSubtitleItem(item, timestamp, updatedAt) {
3180
3362
  displayText: getDisplayText(item),
3181
3363
  content: item.content,
3182
3364
  item: item.item,
3365
+ source: "conversation",
3183
3366
  timestamp,
3184
3367
  updatedAt
3185
3368
  };
3186
3369
  }
3370
+ function upsertModelStreamingSubtitle(previous, event, now = Date.now()) {
3371
+ const responseId = getEventString(event, "responseId", "response_id");
3372
+ if (!responseId) {
3373
+ return previous;
3374
+ }
3375
+ const displayText = event.type === "response.output_audio_transcript.done" ? event.transcript : event.delta;
3376
+ if (typeof displayText !== "string" || displayText.length === 0) {
3377
+ return previous;
3378
+ }
3379
+ const id = buildModelStreamingSubtitleId(responseId);
3380
+ const itemId = getEventString(event, "itemId", "item_id");
3381
+ const existing = previous.find((item) => item.id === id);
3382
+ const nextItem = {
3383
+ id,
3384
+ role: "model",
3385
+ lifecycle: "added",
3386
+ status: "in_progress",
3387
+ text: "",
3388
+ transcript: displayText,
3389
+ displayText,
3390
+ content: [],
3391
+ item: {
3392
+ id: itemId ?? id,
3393
+ type: "message",
3394
+ role: "model",
3395
+ status: "in_progress",
3396
+ content: [{ type: "audio", transcript: displayText }]
3397
+ },
3398
+ source: "response_audio_transcript",
3399
+ responseId,
3400
+ itemId,
3401
+ timestamp: existing?.timestamp ?? now,
3402
+ updatedAt: hasStreamingSubtitleChanged(existing, displayText) ? now : existing?.updatedAt ?? now
3403
+ };
3404
+ return replaceSubtitleItem(previous, nextItem);
3405
+ }
3406
+ function markModelStreamingSubtitleDone(previous, event, now = Date.now()) {
3407
+ const responseId = event.response?.id;
3408
+ if (!responseId) {
3409
+ return previous;
3410
+ }
3411
+ const id = buildModelStreamingSubtitleId(responseId);
3412
+ const status = mapResponseStatusToConversationStatus(event.response?.status);
3413
+ return previous.map((item) => {
3414
+ if (item.id !== id || item.source !== "response_audio_transcript") {
3415
+ return item;
3416
+ }
3417
+ return {
3418
+ ...item,
3419
+ lifecycle: "done",
3420
+ status,
3421
+ item: {
3422
+ ...item.item,
3423
+ status
3424
+ },
3425
+ updatedAt: now
3426
+ };
3427
+ });
3428
+ }
3429
+ function replaceSubtitleItem(previous, nextItem) {
3430
+ const replaced = previous.map((item) => item.id === nextItem.id ? nextItem : item);
3431
+ if (replaced.some((item) => item.id === nextItem.id)) {
3432
+ return replaced;
3433
+ }
3434
+ return [...previous, nextItem];
3435
+ }
3436
+ function hasStreamingSubtitleChanged(previous, nextDisplayText) {
3437
+ return !previous || previous.displayText !== nextDisplayText || previous.lifecycle !== "added" || previous.status !== "in_progress";
3438
+ }
3439
+ function buildModelStreamingSubtitleId(responseId) {
3440
+ return `model-response:${responseId}`;
3441
+ }
3442
+ function getEventString(event, camelKey, snakeKey) {
3443
+ const record = event;
3444
+ const value = record[camelKey] ?? record[snakeKey];
3445
+ return typeof value === "string" && value.length > 0 ? value : void 0;
3446
+ }
3447
+ function mapResponseStatusToConversationStatus(status) {
3448
+ if (status === "completed") {
3449
+ return "completed";
3450
+ }
3451
+ if (status === "in_progress") {
3452
+ return "in_progress";
3453
+ }
3454
+ return "incomplete";
3455
+ }
3187
3456
  function hasSubtitleChanged(previous, next) {
3188
3457
  return previous.text !== next.text || previous.transcript !== next.transcript || previous.lifecycle !== next.lifecycle || previous.status !== next.status || previous.role !== next.role;
3189
3458
  }
3190
3459
  function isSubtitleRelatedEvent(type) {
3191
3460
  return type.startsWith("conversation.") || type.startsWith("response.");
3192
3461
  }
3462
+ function isModelStreamingTranscriptEvent(event) {
3463
+ return event.type === "response.output_audio_transcript.delta" || event.type === "response.output_audio_transcript.done";
3464
+ }
3465
+ function isResponseDoneEvent(event) {
3466
+ return event.type === "response.done";
3467
+ }
3193
3468
  var BREATHE_KEYFRAMES = `@keyframes ivi-subtitle-breathe{0%,100%{opacity:1}50%{opacity:.55}}`;
3194
3469
  function IVISubtitleOverlay(props) {
3195
3470
  const {
@@ -3197,13 +3472,19 @@ function IVISubtitleOverlay(props) {
3197
3472
  roles = "user",
3198
3473
  maxItems,
3199
3474
  maxVisible,
3475
+ useModelStreamingTranscript,
3476
+ hideCompletedAfterMs,
3477
+ onSubtitleCompleted,
3200
3478
  subtitleStyle,
3201
3479
  className,
3202
3480
  style
3203
3481
  } = props;
3204
3482
  const entries = useIviSubtitles(runtime, {
3205
3483
  roles,
3206
- maxItems: maxItems ?? maxVisible
3484
+ maxItems: maxItems ?? maxVisible,
3485
+ useModelStreamingTranscript,
3486
+ hideCompletedAfterMs,
3487
+ onSubtitleCompleted
3207
3488
  });
3208
3489
  if (entries.length === 0) return null;
3209
3490
  const fontFamily = subtitleStyle?.fontFamily ?? "system-ui, -apple-system, sans-serif";
@@ -4294,6 +4575,6 @@ function getClientLogTag(category) {
4294
4575
  return "[IVI-CLIENT]";
4295
4576
  }
4296
4577
 
4297
- export { EMPTY_RUNTIME_STATE, IVILivekitPlayer, IVIStageView, IVISubtitleOverlay, IVITrackSlot, IVITrtcPlayer, IviFrontendSdk, IviRuntimeCoordinator, IviRuntimeDispatcher, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
4578
+ export { DEFAULT_HIDE_COMPLETED_SUBTITLE_AFTER_MS, EMPTY_RUNTIME_STATE, IVILivekitPlayer, IVIStageView, IVISubtitleOverlay, IVITrackSlot, IVITrtcPlayer, IviFrontendSdk, IviRuntimeCoordinator, IviRuntimeDispatcher, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
4298
4579
  //# sourceMappingURL=index.js.map
4299
4580
  //# sourceMappingURL=index.js.map