@vivix-ai/ivi-frontend-sdk 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -380,20 +380,22 @@ function useIviSubtitles(
380
380
  ): IviSubtitleItem[];
381
381
  ```
382
382
 
383
- 基于 runtime 的 conversation / response 事件维护字幕队列,不负责渲染,也不按时间自动消失。
383
+ 基于 runtime 的 conversation / response 事件维护字幕队列,不负责渲染,也不按时间自动消失。默认从 conversation 条目聚合字幕;当 `roles` 包含 `"model"` 且开启 `useModelStreamingTranscript` 时,model 字幕改为使用 `response.output_audio_transcript.*` 事件,`response.output_audio_transcript.done` 只更新文本,只有 `response.done` 才表示这一轮 model 字幕结束。
384
384
 
385
385
  | Option | 类型 | 默认值 | 说明 |
386
386
  |--------|------|--------|------|
387
387
  | `roles` | `IviSubtitleRole \| IviSubtitleRole[]` | `"user"` | 要收集的发言人角色 |
388
388
  | `maxItems` | `number` | `2` | 最多保留的字幕条数,超过后清理最旧条目 |
389
+ | `useModelStreamingTranscript` | `boolean` | `false` | 当 `roles` 包含 `"model"` 时,model 字幕是否改用 `response.output_audio_transcript.*` 事件 |
389
390
 
390
- `IviSubtitleItem` 包含 `id`、`role`、`lifecycle`、`status`、`text`、`transcript`、`displayText`、`timestamp`、`updatedAt`、`content`、`item`。
391
+ `IviSubtitleItem` 包含 `id`、`role`、`lifecycle`、`status`、`text`、`transcript`、`displayText`、`timestamp`、`updatedAt`、`content`、`item`、`source`、`responseId`、`itemId`。
391
392
 
392
393
  ```tsx
393
394
  function CustomSubtitles({ runtime }: { runtime: IviRuntimeCoordinator | null }) {
394
395
  const subtitles = useIviSubtitles(runtime, {
395
396
  roles: ["user", "model"],
396
- maxItems: 5
397
+ maxItems: 5,
398
+ useModelStreamingTranscript: true
397
399
  });
398
400
 
399
401
  return (
@@ -464,7 +466,7 @@ function useApplyVolumeToSlot(
464
466
  | `showVolumeControl` | `boolean` | — | 是否显示音量控制浮层 |
465
467
  | `volumeControlProps` | — | — | 音量控制自定义配置 |
466
468
  | `showSubtitle` | `boolean` | — | 是否显示字幕浮层 |
467
- | `subtitleProps` | `Omit<IVISubtitleOverlayProps, "runtime">` | — | 字幕自定义配置 |
469
+ | `subtitleProps` | `Omit<IVISubtitleOverlayProps, "runtime">` | — | 字幕自定义配置,可传 `roles`、`maxItems`、`useModelStreamingTranscript` 等 |
468
470
  | `trtcPlayerProps` | — | — | TRTC 播放器自定义配置 |
469
471
  | `livekitPlayerProps` | — | — | LiveKit 播放器自定义配置 |
470
472
  | `videoProps` / `imageProps` | — | — | 透传给原生 `<video>` / `<img>` |
@@ -574,6 +576,7 @@ npm install xgplayer xgplayer-flv
574
576
  | `roles` | `IviSubtitleRole \| IviSubtitleRole[]` | `"user"` | 要展示的发言人角色 |
575
577
  | `maxItems` | `number` | `2` | 最多保留的字幕条数,超过后清理最旧条目 |
576
578
  | `maxVisible` | `number` | — | 已废弃,兼容旧字段;请使用 `maxItems` |
579
+ | `useModelStreamingTranscript` | `boolean` | `false` | 当 `roles` 包含 `"model"` 时,model 字幕是否改用 `response.output_audio_transcript.*` 事件 |
577
580
  | `subtitleStyle` | `IVISubtitleOverlayStyle` | — | 样式配置 |
578
581
  | `className` / `style` | — | — | 样式 |
579
582
 
package/dist/index.cjs CHANGED
@@ -3088,6 +3088,7 @@ function useIviSubtitles(runtime, options = {}) {
3088
3088
  () => new Set(roleKey.split("\0")),
3089
3089
  [roleKey]
3090
3090
  );
3091
+ const useModelStreamingTranscript = options.useModelStreamingTranscript === true && roleSet.has("model");
3091
3092
  const [subtitles, setSubtitles] = react.useState([]);
3092
3093
  const seenIdsRef = react.useRef(/* @__PURE__ */ new Set());
3093
3094
  const initializedRef = react.useRef(false);
@@ -3104,7 +3105,7 @@ function useIviSubtitles(runtime, options = {}) {
3104
3105
  if (!initializedRef.current) {
3105
3106
  initializedRef.current = true;
3106
3107
  for (const item of conversations) {
3107
- if (item.lifecycle === "done" || !getDisplayText(item) || !roleSet.has(item.role)) {
3108
+ if (item.lifecycle === "done" || !getDisplayText(item) || !shouldUseConversationRole(item.role, roleSet, useModelStreamingTranscript)) {
3108
3109
  seenIds.add(item.id);
3109
3110
  }
3110
3111
  }
@@ -3113,8 +3114,12 @@ function useIviSubtitles(runtime, options = {}) {
3113
3114
  const conversationMap = new Map(conversations.map((item) => [item.id, item]));
3114
3115
  const nextById = /* @__PURE__ */ new Map();
3115
3116
  for (const previousItem of previous) {
3117
+ if (previousItem.source === "response_audio_transcript") {
3118
+ nextById.set(previousItem.id, previousItem);
3119
+ continue;
3120
+ }
3116
3121
  const conversation = conversationMap.get(previousItem.id);
3117
- if (!conversation || !roleSet.has(conversation.role) || !getDisplayText(conversation)) {
3122
+ if (!conversation || !shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
3118
3123
  continue;
3119
3124
  }
3120
3125
  nextById.set(
@@ -3127,7 +3132,7 @@ function useIviSubtitles(runtime, options = {}) {
3127
3132
  );
3128
3133
  }
3129
3134
  for (const conversation of conversations) {
3130
- if (!roleSet.has(conversation.role) || !getDisplayText(conversation)) {
3135
+ if (!shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
3131
3136
  continue;
3132
3137
  }
3133
3138
  if (seenIds.has(conversation.id)) {
@@ -3137,10 +3142,7 @@ function useIviSubtitles(runtime, options = {}) {
3137
3142
  nextById.set(conversation.id, buildSubtitleItem(conversation, now, now));
3138
3143
  }
3139
3144
  const next = Array.from(nextById.values());
3140
- if (maxItems === 0) {
3141
- return [];
3142
- }
3143
- return next.length > maxItems ? next.slice(next.length - maxItems) : next;
3145
+ return trimSubtitleItems(next, maxItems);
3144
3146
  });
3145
3147
  };
3146
3148
  syncConversations(runtime.getState().conversations);
@@ -3151,12 +3153,20 @@ function useIviSubtitles(runtime, options = {}) {
3151
3153
  setSubtitles([]);
3152
3154
  return;
3153
3155
  }
3156
+ if (useModelStreamingTranscript && isModelStreamingTranscriptEvent(event)) {
3157
+ setSubtitles((previous) => trimSubtitleItems(upsertModelStreamingSubtitle(previous, event), maxItems));
3158
+ return;
3159
+ }
3160
+ if (useModelStreamingTranscript && isResponseDoneEvent(event)) {
3161
+ setSubtitles((previous) => trimSubtitleItems(markModelStreamingSubtitleDone(previous, event), maxItems));
3162
+ return;
3163
+ }
3154
3164
  if (!isSubtitleRelatedEvent(event.type)) {
3155
3165
  return;
3156
3166
  }
3157
3167
  syncConversations(state.conversations);
3158
3168
  });
3159
- }, [runtime, roleSet, maxItems]);
3169
+ }, [runtime, roleSet, maxItems, useModelStreamingTranscript]);
3160
3170
  return subtitles;
3161
3171
  }
3162
3172
  function normalizeMaxItems(maxItems) {
@@ -3168,6 +3178,19 @@ function normalizeMaxItems(maxItems) {
3168
3178
  }
3169
3179
  return Math.max(0, Math.floor(maxItems));
3170
3180
  }
3181
+ function shouldUseConversationRole(role, roleSet, useModelStreamingTranscript) {
3182
+ if (!roleSet.has(role)) {
3183
+ return false;
3184
+ }
3185
+ return !(useModelStreamingTranscript && role === "model");
3186
+ }
3187
+ function trimSubtitleItems(items, maxItems) {
3188
+ if (maxItems === 0) {
3189
+ return [];
3190
+ }
3191
+ const sorted = [...items].sort((a, b) => a.timestamp - b.timestamp);
3192
+ return sorted.length > maxItems ? sorted.slice(sorted.length - maxItems) : sorted;
3193
+ }
3171
3194
  function getDisplayText(item) {
3172
3195
  return item.text || item.transcript;
3173
3196
  }
@@ -3182,16 +3205,109 @@ function buildSubtitleItem(item, timestamp, updatedAt) {
3182
3205
  displayText: getDisplayText(item),
3183
3206
  content: item.content,
3184
3207
  item: item.item,
3208
+ source: "conversation",
3185
3209
  timestamp,
3186
3210
  updatedAt
3187
3211
  };
3188
3212
  }
3213
+ function upsertModelStreamingSubtitle(previous, event, now = Date.now()) {
3214
+ const responseId = getEventString(event, "responseId", "response_id");
3215
+ if (!responseId) {
3216
+ return previous;
3217
+ }
3218
+ const displayText = event.type === "response.output_audio_transcript.done" ? event.transcript : event.delta;
3219
+ if (typeof displayText !== "string" || displayText.length === 0) {
3220
+ return previous;
3221
+ }
3222
+ const id = buildModelStreamingSubtitleId(responseId);
3223
+ const itemId = getEventString(event, "itemId", "item_id");
3224
+ const existing = previous.find((item) => item.id === id);
3225
+ const nextItem = {
3226
+ id,
3227
+ role: "model",
3228
+ lifecycle: "added",
3229
+ status: "in_progress",
3230
+ text: "",
3231
+ transcript: displayText,
3232
+ displayText,
3233
+ content: [],
3234
+ item: {
3235
+ id: itemId ?? id,
3236
+ type: "message",
3237
+ role: "model",
3238
+ status: "in_progress",
3239
+ content: [{ type: "audio", transcript: displayText }]
3240
+ },
3241
+ source: "response_audio_transcript",
3242
+ responseId,
3243
+ itemId,
3244
+ timestamp: existing?.timestamp ?? now,
3245
+ updatedAt: hasStreamingSubtitleChanged(existing, displayText) ? now : existing?.updatedAt ?? now
3246
+ };
3247
+ return replaceSubtitleItem(previous, nextItem);
3248
+ }
3249
+ function markModelStreamingSubtitleDone(previous, event, now = Date.now()) {
3250
+ const responseId = event.response?.id;
3251
+ if (!responseId) {
3252
+ return previous;
3253
+ }
3254
+ const id = buildModelStreamingSubtitleId(responseId);
3255
+ const status = mapResponseStatusToConversationStatus(event.response?.status);
3256
+ return previous.map((item) => {
3257
+ if (item.id !== id || item.source !== "response_audio_transcript") {
3258
+ return item;
3259
+ }
3260
+ return {
3261
+ ...item,
3262
+ lifecycle: "done",
3263
+ status,
3264
+ item: {
3265
+ ...item.item,
3266
+ status
3267
+ },
3268
+ updatedAt: now
3269
+ };
3270
+ });
3271
+ }
3272
+ function replaceSubtitleItem(previous, nextItem) {
3273
+ const replaced = previous.map((item) => item.id === nextItem.id ? nextItem : item);
3274
+ if (replaced.some((item) => item.id === nextItem.id)) {
3275
+ return replaced;
3276
+ }
3277
+ return [...previous, nextItem];
3278
+ }
3279
+ function hasStreamingSubtitleChanged(previous, nextDisplayText) {
3280
+ return !previous || previous.displayText !== nextDisplayText || previous.lifecycle !== "added" || previous.status !== "in_progress";
3281
+ }
3282
+ function buildModelStreamingSubtitleId(responseId) {
3283
+ return `model-response:${responseId}`;
3284
+ }
3285
+ function getEventString(event, camelKey, snakeKey) {
3286
+ const record = event;
3287
+ const value = record[camelKey] ?? record[snakeKey];
3288
+ return typeof value === "string" && value.length > 0 ? value : void 0;
3289
+ }
3290
+ function mapResponseStatusToConversationStatus(status) {
3291
+ if (status === "completed") {
3292
+ return "completed";
3293
+ }
3294
+ if (status === "in_progress") {
3295
+ return "in_progress";
3296
+ }
3297
+ return "incomplete";
3298
+ }
3189
3299
  function hasSubtitleChanged(previous, next) {
3190
3300
  return previous.text !== next.text || previous.transcript !== next.transcript || previous.lifecycle !== next.lifecycle || previous.status !== next.status || previous.role !== next.role;
3191
3301
  }
3192
3302
  function isSubtitleRelatedEvent(type) {
3193
3303
  return type.startsWith("conversation.") || type.startsWith("response.");
3194
3304
  }
3305
+ function isModelStreamingTranscriptEvent(event) {
3306
+ return event.type === "response.output_audio_transcript.delta" || event.type === "response.output_audio_transcript.done";
3307
+ }
3308
+ function isResponseDoneEvent(event) {
3309
+ return event.type === "response.done";
3310
+ }
3195
3311
  var BREATHE_KEYFRAMES = `@keyframes ivi-subtitle-breathe{0%,100%{opacity:1}50%{opacity:.55}}`;
3196
3312
  function IVISubtitleOverlay(props) {
3197
3313
  const {
@@ -3199,13 +3315,15 @@ function IVISubtitleOverlay(props) {
3199
3315
  roles = "user",
3200
3316
  maxItems,
3201
3317
  maxVisible,
3318
+ useModelStreamingTranscript,
3202
3319
  subtitleStyle,
3203
3320
  className,
3204
3321
  style
3205
3322
  } = props;
3206
3323
  const entries = useIviSubtitles(runtime, {
3207
3324
  roles,
3208
- maxItems: maxItems ?? maxVisible
3325
+ maxItems: maxItems ?? maxVisible,
3326
+ useModelStreamingTranscript
3209
3327
  });
3210
3328
  if (entries.length === 0) return null;
3211
3329
  const fontFamily = subtitleStyle?.fontFamily ?? "system-ui, -apple-system, sans-serif";
@@ -3828,6 +3946,19 @@ function detectMediaVolumeType(source) {
3828
3946
  if (!url) return null;
3829
3947
  return isM3u8Url(url) ? "hls" : "video";
3830
3948
  }
3949
+ function getSourceRenderKey(sourceId, source) {
3950
+ const trtc = getTrtcPlayback(source.playback);
3951
+ if (!trtc) {
3952
+ return sourceId;
3953
+ }
3954
+ return [
3955
+ "trtc",
3956
+ trtc.app_id ?? "",
3957
+ trtc.room_id ?? "",
3958
+ trtc.user_id ?? "",
3959
+ trtc.user_sig ?? ""
3960
+ ].join(":");
3961
+ }
3831
3962
  function TrackSlotMediaContent(props) {
3832
3963
  const {
3833
3964
  slot,
@@ -3865,7 +3996,7 @@ function TrackSlotMediaContent(props) {
3865
3996
  IVITrtcPlayer,
3866
3997
  {
3867
3998
  trtc,
3868
- sourceId: source.source.source_id,
3999
+ sourceId: slotSourceId,
3869
4000
  runtime,
3870
4001
  ...trtcPlayerProps,
3871
4002
  muted: trtcMuted,
@@ -4020,26 +4151,35 @@ function SlotVideo(props) {
4020
4151
  // src/react/internal/use-multi-preload-sources.ts
4021
4152
  function useMultiPreloadSources(sources, activeSourceId) {
4022
4153
  return react.useMemo(() => {
4023
- const entries = [];
4024
- for (const [id, runtimeSource] of sources) {
4025
- const isActive = id === activeSourceId;
4026
- const shouldMount = Boolean(runtimeSource.preload) || isActive;
4027
- if (!shouldMount) {
4028
- continue;
4029
- }
4030
- const ready = toReadyRuntimeSource(runtimeSource);
4031
- if (!ready) {
4032
- continue;
4033
- }
4034
- entries.push({
4035
- sourceId: id,
4036
- source: ready,
4037
- isActive
4038
- });
4039
- }
4040
- return entries;
4154
+ return buildPreloadSourceEntries(sources, activeSourceId);
4041
4155
  }, [sources, activeSourceId]);
4042
4156
  }
4157
+ function buildPreloadSourceEntries(sources, activeSourceId) {
4158
+ const entriesByRenderKey = /* @__PURE__ */ new Map();
4159
+ for (const [id, runtimeSource] of sources) {
4160
+ const isActive = id === activeSourceId;
4161
+ const shouldMount = Boolean(runtimeSource.preload) || isActive;
4162
+ if (!shouldMount) {
4163
+ continue;
4164
+ }
4165
+ const ready = toReadyRuntimeSource(runtimeSource);
4166
+ if (!ready) {
4167
+ continue;
4168
+ }
4169
+ const renderKey = getSourceRenderKey(id, ready);
4170
+ const entry = {
4171
+ sourceId: id,
4172
+ renderKey,
4173
+ source: ready,
4174
+ isActive
4175
+ };
4176
+ const previous = entriesByRenderKey.get(renderKey);
4177
+ if (!previous || !previous.isActive && isActive) {
4178
+ entriesByRenderKey.set(renderKey, entry);
4179
+ }
4180
+ }
4181
+ return Array.from(entriesByRenderKey.values());
4182
+ }
4043
4183
  function IVITrackSlot(props) {
4044
4184
  const {
4045
4185
  slot,
@@ -4129,7 +4269,7 @@ function IVITrackSlot(props) {
4129
4269
  }
4130
4270
  ) })
4131
4271
  },
4132
- entry.sourceId
4272
+ entry.renderKey
4133
4273
  );
4134
4274
  }),
4135
4275
  showSubtitle && activeSource && supportsSubtitleOverlay(activeSource) && /* @__PURE__ */ jsxRuntime.jsx("div", { style: SUBTITLE_OVERLAY_STYLE, children: /* @__PURE__ */ jsxRuntime.jsx(