@vivix-ai/ivi-frontend-sdk 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -627,6 +627,7 @@ interface IVIVolumeControlProps {
627
627
  }
628
628
 
629
629
  type IviSubtitleRole = IviRuntimeConversationItem["role"];
630
+ type IviSubtitleSource = "conversation" | "response_audio_transcript";
630
631
  interface IviSubtitleItem {
631
632
  id: string;
632
633
  role: IviSubtitleRole;
@@ -637,6 +638,12 @@ interface IviSubtitleItem {
637
638
  displayText: string;
638
639
  content: IviRuntimeConversationItem["content"];
639
640
  item: IviRuntimeConversationItem["item"];
641
+ /** 字幕数据来源。 */
642
+ source?: IviSubtitleSource;
643
+ /** 关联的 response_id;仅 response_audio_transcript 来源有值。 */
644
+ responseId?: string;
645
+ /** 关联的 item_id;仅 response_audio_transcript 来源有值。 */
646
+ itemId?: string;
640
647
  /** 首次进入字幕队列的时间戳。 */
641
648
  timestamp: number;
642
649
  /** 最近一次字幕内容或状态更新的时间戳。 */
@@ -647,6 +654,12 @@ interface IviUseSubtitlesOptions {
647
654
  roles?: IviSubtitleRole | IviSubtitleRole[];
648
655
  /** 最多保留的字幕条数,超过后自动清理最旧条目,默认 2。 */
649
656
  maxItems?: number;
657
+ /**
658
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
659
+ * 开启后 model 字幕不再从 conversation.* 聚合;response.output_audio_transcript.done 不视为结束,
660
+ * 只有 response.done 才会把这一轮 model 字幕标记为 done。
661
+ */
662
+ useModelStreamingTranscript?: boolean;
650
663
  }
651
664
  /**
652
665
  * 监听 runtime 中的 conversation/response 事件,并维护当前应展示的字幕队列。
@@ -672,6 +685,10 @@ interface IVISubtitleOverlayProps {
672
685
  maxItems?: number;
673
686
  /** @deprecated Use `maxItems` instead. */
674
687
  maxVisible?: number;
688
+ /**
689
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
690
+ */
691
+ useModelStreamingTranscript?: boolean;
675
692
  /** 样式配置 */
676
693
  subtitleStyle?: IVISubtitleOverlayStyle;
677
694
  /** 自定义类名 */
@@ -756,4 +773,4 @@ declare function useManagedIviRuntime(config: IviManagedRuntimeConfig): IviRunti
756
773
 
757
774
  declare function useIviStageView(): IviStageViewContextValue;
758
775
 
759
- export { EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleItem, type IviSubtitleRole, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
776
+ export { EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleItem, type IviSubtitleRole, type IviSubtitleSource, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
package/dist/index.d.ts CHANGED
@@ -627,6 +627,7 @@ interface IVIVolumeControlProps {
627
627
  }
628
628
 
629
629
  type IviSubtitleRole = IviRuntimeConversationItem["role"];
630
+ type IviSubtitleSource = "conversation" | "response_audio_transcript";
630
631
  interface IviSubtitleItem {
631
632
  id: string;
632
633
  role: IviSubtitleRole;
@@ -637,6 +638,12 @@ interface IviSubtitleItem {
637
638
  displayText: string;
638
639
  content: IviRuntimeConversationItem["content"];
639
640
  item: IviRuntimeConversationItem["item"];
641
+ /** 字幕数据来源。 */
642
+ source?: IviSubtitleSource;
643
+ /** 关联的 response_id;仅 response_audio_transcript 来源有值。 */
644
+ responseId?: string;
645
+ /** 关联的 item_id;仅 response_audio_transcript 来源有值。 */
646
+ itemId?: string;
640
647
  /** 首次进入字幕队列的时间戳。 */
641
648
  timestamp: number;
642
649
  /** 最近一次字幕内容或状态更新的时间戳。 */
@@ -647,6 +654,12 @@ interface IviUseSubtitlesOptions {
647
654
  roles?: IviSubtitleRole | IviSubtitleRole[];
648
655
  /** 最多保留的字幕条数,超过后自动清理最旧条目,默认 2。 */
649
656
  maxItems?: number;
657
+ /**
658
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
659
+ * 开启后 model 字幕不再从 conversation.* 聚合;response.output_audio_transcript.done 不视为结束,
660
+ * 只有 response.done 才会把这一轮 model 字幕标记为 done。
661
+ */
662
+ useModelStreamingTranscript?: boolean;
650
663
  }
651
664
  /**
652
665
  * 监听 runtime 中的 conversation/response 事件,并维护当前应展示的字幕队列。
@@ -672,6 +685,10 @@ interface IVISubtitleOverlayProps {
672
685
  maxItems?: number;
673
686
  /** @deprecated Use `maxItems` instead. */
674
687
  maxVisible?: number;
688
+ /**
689
+ * 当 roles 包含 "model" 时,是否使用 response.output_audio_transcript.* 事件作为 model 字幕来源。
690
+ */
691
+ useModelStreamingTranscript?: boolean;
675
692
  /** 样式配置 */
676
693
  subtitleStyle?: IVISubtitleOverlayStyle;
677
694
  /** 自定义类名 */
@@ -756,4 +773,4 @@ declare function useManagedIviRuntime(config: IviManagedRuntimeConfig): IviRunti
756
773
 
757
774
  declare function useIviStageView(): IviStageViewContextValue;
758
775
 
759
- export { EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleItem, type IviSubtitleRole, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
776
+ export { EMPTY_RUNTIME_STATE, IVILivekitPlayer, type IVILivekitPlayerProps, IVIStageView, type IVIStageViewProps, IVISubtitleOverlay, type IVISubtitleOverlayProps, type IVISubtitleOverlayStyle, IVITrackSlot, type IVITrackSlotProps, IVITrtcPlayer, type IVITrtcPlayerProps, type IviFrontendClientConfig, IviFrontendSdk, type IviManagedRuntimeConfig, type IviManagedRuntimeLogCallback, type IviManagedRuntimeLogEntry, type IviManagedRuntimeLogLevel, type IviManagedRuntimeLogSource, type IviRuntimeConversationItem, type IviRuntimeConversationLifecycle, type IviRuntimeConversationStatus, IviRuntimeCoordinator, type IviRuntimeCoordinatorConfig, IviRuntimeDispatcher, type IviRuntimeDispatcherConfig, type IviRuntimeEventListener, type IviRuntimeLogCallback, type IviRuntimeLogEntry, type IviRuntimeLogLevel, type IviRuntimeSource, type IviRuntimeSourcePreloadState, type IviRuntimeState, type IviRuntimeStatus, type IviRuntimeStream, type IviRuntimeTrtcAIDenoiserMode, type IviRuntimeTrtcAIDenoiserOptions, type IviRuntimeTrtcEvent, type IviRuntimeTrtcEventListener, type IviRuntimeTrtcEventType, type IviRuntimeUserTextToResponseCallbacks, type IviRuntimeUserTextToResponseOptions, type IviRuntimeUserTextToResponseResult, type IviSourcePlaybackLivekit, type IviSourcePlaybackLivekitDescriptor, type IviStageSlotBinding, type IviStageViewContextValue, type IviSubtitleItem, type IviSubtitleRole, type IviSubtitleSource, type IviUseSubtitlesOptions, LivekitSourceManager, TrtcSourceManager, isLivekitSourcePlayback, isReadyLivekitRuntimeSource, isSameLivekitConfig, useIviStageView, useIviSubtitles, useManagedIviRuntime, useRuntimeState };
package/dist/index.js CHANGED
@@ -3086,6 +3086,7 @@ function useIviSubtitles(runtime, options = {}) {
3086
3086
  () => new Set(roleKey.split("\0")),
3087
3087
  [roleKey]
3088
3088
  );
3089
+ const useModelStreamingTranscript = options.useModelStreamingTranscript === true && roleSet.has("model");
3089
3090
  const [subtitles, setSubtitles] = useState([]);
3090
3091
  const seenIdsRef = useRef(/* @__PURE__ */ new Set());
3091
3092
  const initializedRef = useRef(false);
@@ -3102,7 +3103,7 @@ function useIviSubtitles(runtime, options = {}) {
3102
3103
  if (!initializedRef.current) {
3103
3104
  initializedRef.current = true;
3104
3105
  for (const item of conversations) {
3105
- if (item.lifecycle === "done" || !getDisplayText(item) || !roleSet.has(item.role)) {
3106
+ if (item.lifecycle === "done" || !getDisplayText(item) || !shouldUseConversationRole(item.role, roleSet, useModelStreamingTranscript)) {
3106
3107
  seenIds.add(item.id);
3107
3108
  }
3108
3109
  }
@@ -3111,8 +3112,12 @@ function useIviSubtitles(runtime, options = {}) {
3111
3112
  const conversationMap = new Map(conversations.map((item) => [item.id, item]));
3112
3113
  const nextById = /* @__PURE__ */ new Map();
3113
3114
  for (const previousItem of previous) {
3115
+ if (previousItem.source === "response_audio_transcript") {
3116
+ nextById.set(previousItem.id, previousItem);
3117
+ continue;
3118
+ }
3114
3119
  const conversation = conversationMap.get(previousItem.id);
3115
- if (!conversation || !roleSet.has(conversation.role) || !getDisplayText(conversation)) {
3120
+ if (!conversation || !shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
3116
3121
  continue;
3117
3122
  }
3118
3123
  nextById.set(
@@ -3125,7 +3130,7 @@ function useIviSubtitles(runtime, options = {}) {
3125
3130
  );
3126
3131
  }
3127
3132
  for (const conversation of conversations) {
3128
- if (!roleSet.has(conversation.role) || !getDisplayText(conversation)) {
3133
+ if (!shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
3129
3134
  continue;
3130
3135
  }
3131
3136
  if (seenIds.has(conversation.id)) {
@@ -3135,10 +3140,7 @@ function useIviSubtitles(runtime, options = {}) {
3135
3140
  nextById.set(conversation.id, buildSubtitleItem(conversation, now, now));
3136
3141
  }
3137
3142
  const next = Array.from(nextById.values());
3138
- if (maxItems === 0) {
3139
- return [];
3140
- }
3141
- return next.length > maxItems ? next.slice(next.length - maxItems) : next;
3143
+ return trimSubtitleItems(next, maxItems);
3142
3144
  });
3143
3145
  };
3144
3146
  syncConversations(runtime.getState().conversations);
@@ -3149,12 +3151,20 @@ function useIviSubtitles(runtime, options = {}) {
3149
3151
  setSubtitles([]);
3150
3152
  return;
3151
3153
  }
3154
+ if (useModelStreamingTranscript && isModelStreamingTranscriptEvent(event)) {
3155
+ setSubtitles((previous) => trimSubtitleItems(upsertModelStreamingSubtitle(previous, event), maxItems));
3156
+ return;
3157
+ }
3158
+ if (useModelStreamingTranscript && isResponseDoneEvent(event)) {
3159
+ setSubtitles((previous) => trimSubtitleItems(markModelStreamingSubtitleDone(previous, event), maxItems));
3160
+ return;
3161
+ }
3152
3162
  if (!isSubtitleRelatedEvent(event.type)) {
3153
3163
  return;
3154
3164
  }
3155
3165
  syncConversations(state.conversations);
3156
3166
  });
3157
- }, [runtime, roleSet, maxItems]);
3167
+ }, [runtime, roleSet, maxItems, useModelStreamingTranscript]);
3158
3168
  return subtitles;
3159
3169
  }
3160
3170
  function normalizeMaxItems(maxItems) {
@@ -3166,6 +3176,19 @@ function normalizeMaxItems(maxItems) {
3166
3176
  }
3167
3177
  return Math.max(0, Math.floor(maxItems));
3168
3178
  }
3179
+ function shouldUseConversationRole(role, roleSet, useModelStreamingTranscript) {
3180
+ if (!roleSet.has(role)) {
3181
+ return false;
3182
+ }
3183
+ return !(useModelStreamingTranscript && role === "model");
3184
+ }
3185
+ function trimSubtitleItems(items, maxItems) {
3186
+ if (maxItems === 0) {
3187
+ return [];
3188
+ }
3189
+ const sorted = [...items].sort((a, b) => a.timestamp - b.timestamp);
3190
+ return sorted.length > maxItems ? sorted.slice(sorted.length - maxItems) : sorted;
3191
+ }
3169
3192
  function getDisplayText(item) {
3170
3193
  return item.text || item.transcript;
3171
3194
  }
@@ -3180,16 +3203,109 @@ function buildSubtitleItem(item, timestamp, updatedAt) {
3180
3203
  displayText: getDisplayText(item),
3181
3204
  content: item.content,
3182
3205
  item: item.item,
3206
+ source: "conversation",
3183
3207
  timestamp,
3184
3208
  updatedAt
3185
3209
  };
3186
3210
  }
3211
+ function upsertModelStreamingSubtitle(previous, event, now = Date.now()) {
3212
+ const responseId = getEventString(event, "responseId", "response_id");
3213
+ if (!responseId) {
3214
+ return previous;
3215
+ }
3216
+ const displayText = event.type === "response.output_audio_transcript.done" ? event.transcript : event.delta;
3217
+ if (typeof displayText !== "string" || displayText.length === 0) {
3218
+ return previous;
3219
+ }
3220
+ const id = buildModelStreamingSubtitleId(responseId);
3221
+ const itemId = getEventString(event, "itemId", "item_id");
3222
+ const existing = previous.find((item) => item.id === id);
3223
+ const nextItem = {
3224
+ id,
3225
+ role: "model",
3226
+ lifecycle: "added",
3227
+ status: "in_progress",
3228
+ text: "",
3229
+ transcript: displayText,
3230
+ displayText,
3231
+ content: [],
3232
+ item: {
3233
+ id: itemId ?? id,
3234
+ type: "message",
3235
+ role: "model",
3236
+ status: "in_progress",
3237
+ content: [{ type: "audio", transcript: displayText }]
3238
+ },
3239
+ source: "response_audio_transcript",
3240
+ responseId,
3241
+ itemId,
3242
+ timestamp: existing?.timestamp ?? now,
3243
+ updatedAt: hasStreamingSubtitleChanged(existing, displayText) ? now : existing?.updatedAt ?? now
3244
+ };
3245
+ return replaceSubtitleItem(previous, nextItem);
3246
+ }
3247
+ function markModelStreamingSubtitleDone(previous, event, now = Date.now()) {
3248
+ const responseId = event.response?.id;
3249
+ if (!responseId) {
3250
+ return previous;
3251
+ }
3252
+ const id = buildModelStreamingSubtitleId(responseId);
3253
+ const status = mapResponseStatusToConversationStatus(event.response?.status);
3254
+ return previous.map((item) => {
3255
+ if (item.id !== id || item.source !== "response_audio_transcript") {
3256
+ return item;
3257
+ }
3258
+ return {
3259
+ ...item,
3260
+ lifecycle: "done",
3261
+ status,
3262
+ item: {
3263
+ ...item.item,
3264
+ status
3265
+ },
3266
+ updatedAt: now
3267
+ };
3268
+ });
3269
+ }
3270
+ function replaceSubtitleItem(previous, nextItem) {
3271
+ const replaced = previous.map((item) => item.id === nextItem.id ? nextItem : item);
3272
+ if (replaced.some((item) => item.id === nextItem.id)) {
3273
+ return replaced;
3274
+ }
3275
+ return [...previous, nextItem];
3276
+ }
3277
+ function hasStreamingSubtitleChanged(previous, nextDisplayText) {
3278
+ return !previous || previous.displayText !== nextDisplayText || previous.lifecycle !== "added" || previous.status !== "in_progress";
3279
+ }
3280
+ function buildModelStreamingSubtitleId(responseId) {
3281
+ return `model-response:${responseId}`;
3282
+ }
3283
+ function getEventString(event, camelKey, snakeKey) {
3284
+ const record = event;
3285
+ const value = record[camelKey] ?? record[snakeKey];
3286
+ return typeof value === "string" && value.length > 0 ? value : void 0;
3287
+ }
3288
+ function mapResponseStatusToConversationStatus(status) {
3289
+ if (status === "completed") {
3290
+ return "completed";
3291
+ }
3292
+ if (status === "in_progress") {
3293
+ return "in_progress";
3294
+ }
3295
+ return "incomplete";
3296
+ }
3187
3297
  function hasSubtitleChanged(previous, next) {
3188
3298
  return previous.text !== next.text || previous.transcript !== next.transcript || previous.lifecycle !== next.lifecycle || previous.status !== next.status || previous.role !== next.role;
3189
3299
  }
3190
3300
  function isSubtitleRelatedEvent(type) {
3191
3301
  return type.startsWith("conversation.") || type.startsWith("response.");
3192
3302
  }
3303
+ function isModelStreamingTranscriptEvent(event) {
3304
+ return event.type === "response.output_audio_transcript.delta" || event.type === "response.output_audio_transcript.done";
3305
+ }
3306
+ function isResponseDoneEvent(event) {
3307
+ return event.type === "response.done";
3308
+ }
3193
3309
  var BREATHE_KEYFRAMES = `@keyframes ivi-subtitle-breathe{0%,100%{opacity:1}50%{opacity:.55}}`;
3194
3310
  function IVISubtitleOverlay(props) {
3195
3311
  const {
@@ -3197,13 +3313,15 @@ function IVISubtitleOverlay(props) {
3197
3313
  roles = "user",
3198
3314
  maxItems,
3199
3315
  maxVisible,
3316
+ useModelStreamingTranscript,
3200
3317
  subtitleStyle,
3201
3318
  className,
3202
3319
  style
3203
3320
  } = props;
3204
3321
  const entries = useIviSubtitles(runtime, {
3205
3322
  roles,
3206
- maxItems: maxItems ?? maxVisible
3323
+ maxItems: maxItems ?? maxVisible,
3324
+ useModelStreamingTranscript
3207
3325
  });
3208
3326
  if (entries.length === 0) return null;
3209
3327
  const fontFamily = subtitleStyle?.fontFamily ?? "system-ui, -apple-system, sans-serif";