@vivix-ai/ivi-frontend-sdk 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -4
- package/dist/index.cjs +169 -29
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -1
- package/dist/index.d.ts +18 -1
- package/dist/index.js +169 -29
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -380,20 +380,22 @@ function useIviSubtitles(
|
|
|
380
380
|
): IviSubtitleItem[];
|
|
381
381
|
```
|
|
382
382
|
|
|
383
|
-
基于 runtime 的 conversation / response
|
|
383
|
+
基于 runtime 的 conversation / response 事件维护字幕队列,不负责渲染,也不按时间自动消失。默认从 conversation 条目聚合字幕;当 `roles` 包含 `"model"` 且开启 `useModelStreamingTranscript` 时,model 字幕改为使用 `response.output_audio_transcript.*` 事件,`response.output_audio_transcript.done` 只更新文本,只有 `response.done` 才表示这一轮 model 字幕结束。
|
|
384
384
|
|
|
385
385
|
| Option | 类型 | 默认值 | 说明 |
|
|
386
386
|
|--------|------|--------|------|
|
|
387
387
|
| `roles` | `IviSubtitleRole \| IviSubtitleRole[]` | `"user"` | 要收集的发言人角色 |
|
|
388
388
|
| `maxItems` | `number` | `2` | 最多保留的字幕条数,超过后清理最旧条目 |
|
|
389
|
+
| `useModelStreamingTranscript` | `boolean` | `false` | 当 `roles` 包含 `"model"` 时,model 字幕是否改用 `response.output_audio_transcript.*` 事件 |
|
|
389
390
|
|
|
390
|
-
`IviSubtitleItem` 包含 `id`、`role`、`lifecycle`、`status`、`text`、`transcript`、`displayText`、`timestamp`、`updatedAt`、`content`、`item`。
|
|
391
|
+
`IviSubtitleItem` 包含 `id`、`role`、`lifecycle`、`status`、`text`、`transcript`、`displayText`、`timestamp`、`updatedAt`、`content`、`item`、`source`、`responseId`、`itemId`。
|
|
391
392
|
|
|
392
393
|
```tsx
|
|
393
394
|
function CustomSubtitles({ runtime }: { runtime: IviRuntimeCoordinator | null }) {
|
|
394
395
|
const subtitles = useIviSubtitles(runtime, {
|
|
395
396
|
roles: ["user", "model"],
|
|
396
|
-
maxItems: 5
|
|
397
|
+
maxItems: 5,
|
|
398
|
+
useModelStreamingTranscript: true
|
|
397
399
|
});
|
|
398
400
|
|
|
399
401
|
return (
|
|
@@ -464,7 +466,7 @@ function useApplyVolumeToSlot(
|
|
|
464
466
|
| `showVolumeControl` | `boolean` | — | 是否显示音量控制浮层 |
|
|
465
467
|
| `volumeControlProps` | — | — | 音量控制自定义配置 |
|
|
466
468
|
| `showSubtitle` | `boolean` | — | 是否显示字幕浮层 |
|
|
467
|
-
| `subtitleProps` | `Omit<IVISubtitleOverlayProps, "runtime">` | — |
|
|
469
|
+
| `subtitleProps` | `Omit<IVISubtitleOverlayProps, "runtime">` | — | 字幕自定义配置,可传 `roles`、`maxItems`、`useModelStreamingTranscript` 等 |
|
|
468
470
|
| `trtcPlayerProps` | — | — | TRTC 播放器自定义配置 |
|
|
469
471
|
| `livekitPlayerProps` | — | — | LiveKit 播放器自定义配置 |
|
|
470
472
|
| `videoProps` / `imageProps` | — | — | 透传给原生 `<video>` / `<img>` |
|
|
@@ -574,6 +576,7 @@ npm install xgplayer xgplayer-flv
|
|
|
574
576
|
| `roles` | `IviSubtitleRole \| IviSubtitleRole[]` | `"user"` | 要展示的发言人角色 |
|
|
575
577
|
| `maxItems` | `number` | `2` | 最多保留的字幕条数,超过后清理最旧条目 |
|
|
576
578
|
| `maxVisible` | `number` | — | 已废弃,兼容旧字段;请使用 `maxItems` |
|
|
579
|
+
| `useModelStreamingTranscript` | `boolean` | `false` | 当 `roles` 包含 `"model"` 时,model 字幕是否改用 `response.output_audio_transcript.*` 事件 |
|
|
577
580
|
| `subtitleStyle` | `IVISubtitleOverlayStyle` | — | 样式配置 |
|
|
578
581
|
| `className` / `style` | — | — | 样式 |
|
|
579
582
|
|
package/dist/index.cjs
CHANGED
|
@@ -3088,6 +3088,7 @@ function useIviSubtitles(runtime, options = {}) {
|
|
|
3088
3088
|
() => new Set(roleKey.split("\0")),
|
|
3089
3089
|
[roleKey]
|
|
3090
3090
|
);
|
|
3091
|
+
const useModelStreamingTranscript = options.useModelStreamingTranscript === true && roleSet.has("model");
|
|
3091
3092
|
const [subtitles, setSubtitles] = react.useState([]);
|
|
3092
3093
|
const seenIdsRef = react.useRef(/* @__PURE__ */ new Set());
|
|
3093
3094
|
const initializedRef = react.useRef(false);
|
|
@@ -3104,7 +3105,7 @@ function useIviSubtitles(runtime, options = {}) {
|
|
|
3104
3105
|
if (!initializedRef.current) {
|
|
3105
3106
|
initializedRef.current = true;
|
|
3106
3107
|
for (const item of conversations) {
|
|
3107
|
-
if (item.lifecycle === "done" || !getDisplayText(item) || !
|
|
3108
|
+
if (item.lifecycle === "done" || !getDisplayText(item) || !shouldUseConversationRole(item.role, roleSet, useModelStreamingTranscript)) {
|
|
3108
3109
|
seenIds.add(item.id);
|
|
3109
3110
|
}
|
|
3110
3111
|
}
|
|
@@ -3113,8 +3114,12 @@ function useIviSubtitles(runtime, options = {}) {
|
|
|
3113
3114
|
const conversationMap = new Map(conversations.map((item) => [item.id, item]));
|
|
3114
3115
|
const nextById = /* @__PURE__ */ new Map();
|
|
3115
3116
|
for (const previousItem of previous) {
|
|
3117
|
+
if (previousItem.source === "response_audio_transcript") {
|
|
3118
|
+
nextById.set(previousItem.id, previousItem);
|
|
3119
|
+
continue;
|
|
3120
|
+
}
|
|
3116
3121
|
const conversation = conversationMap.get(previousItem.id);
|
|
3117
|
-
if (!conversation || !
|
|
3122
|
+
if (!conversation || !shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
|
|
3118
3123
|
continue;
|
|
3119
3124
|
}
|
|
3120
3125
|
nextById.set(
|
|
@@ -3127,7 +3132,7 @@ function useIviSubtitles(runtime, options = {}) {
|
|
|
3127
3132
|
);
|
|
3128
3133
|
}
|
|
3129
3134
|
for (const conversation of conversations) {
|
|
3130
|
-
if (!
|
|
3135
|
+
if (!shouldUseConversationRole(conversation.role, roleSet, useModelStreamingTranscript) || !getDisplayText(conversation)) {
|
|
3131
3136
|
continue;
|
|
3132
3137
|
}
|
|
3133
3138
|
if (seenIds.has(conversation.id)) {
|
|
@@ -3137,10 +3142,7 @@ function useIviSubtitles(runtime, options = {}) {
|
|
|
3137
3142
|
nextById.set(conversation.id, buildSubtitleItem(conversation, now, now));
|
|
3138
3143
|
}
|
|
3139
3144
|
const next = Array.from(nextById.values());
|
|
3140
|
-
|
|
3141
|
-
return [];
|
|
3142
|
-
}
|
|
3143
|
-
return next.length > maxItems ? next.slice(next.length - maxItems) : next;
|
|
3145
|
+
return trimSubtitleItems(next, maxItems);
|
|
3144
3146
|
});
|
|
3145
3147
|
};
|
|
3146
3148
|
syncConversations(runtime.getState().conversations);
|
|
@@ -3151,12 +3153,20 @@ function useIviSubtitles(runtime, options = {}) {
|
|
|
3151
3153
|
setSubtitles([]);
|
|
3152
3154
|
return;
|
|
3153
3155
|
}
|
|
3156
|
+
if (useModelStreamingTranscript && isModelStreamingTranscriptEvent(event)) {
|
|
3157
|
+
setSubtitles((previous) => trimSubtitleItems(upsertModelStreamingSubtitle(previous, event), maxItems));
|
|
3158
|
+
return;
|
|
3159
|
+
}
|
|
3160
|
+
if (useModelStreamingTranscript && isResponseDoneEvent(event)) {
|
|
3161
|
+
setSubtitles((previous) => trimSubtitleItems(markModelStreamingSubtitleDone(previous, event), maxItems));
|
|
3162
|
+
return;
|
|
3163
|
+
}
|
|
3154
3164
|
if (!isSubtitleRelatedEvent(event.type)) {
|
|
3155
3165
|
return;
|
|
3156
3166
|
}
|
|
3157
3167
|
syncConversations(state.conversations);
|
|
3158
3168
|
});
|
|
3159
|
-
}, [runtime, roleSet, maxItems]);
|
|
3169
|
+
}, [runtime, roleSet, maxItems, useModelStreamingTranscript]);
|
|
3160
3170
|
return subtitles;
|
|
3161
3171
|
}
|
|
3162
3172
|
function normalizeMaxItems(maxItems) {
|
|
@@ -3168,6 +3178,19 @@ function normalizeMaxItems(maxItems) {
|
|
|
3168
3178
|
}
|
|
3169
3179
|
return Math.max(0, Math.floor(maxItems));
|
|
3170
3180
|
}
|
|
3181
|
+
function shouldUseConversationRole(role, roleSet, useModelStreamingTranscript) {
|
|
3182
|
+
if (!roleSet.has(role)) {
|
|
3183
|
+
return false;
|
|
3184
|
+
}
|
|
3185
|
+
return !(useModelStreamingTranscript && role === "model");
|
|
3186
|
+
}
|
|
3187
|
+
function trimSubtitleItems(items, maxItems) {
|
|
3188
|
+
if (maxItems === 0) {
|
|
3189
|
+
return [];
|
|
3190
|
+
}
|
|
3191
|
+
const sorted = [...items].sort((a, b) => a.timestamp - b.timestamp);
|
|
3192
|
+
return sorted.length > maxItems ? sorted.slice(sorted.length - maxItems) : sorted;
|
|
3193
|
+
}
|
|
3171
3194
|
function getDisplayText(item) {
|
|
3172
3195
|
return item.text || item.transcript;
|
|
3173
3196
|
}
|
|
@@ -3182,16 +3205,109 @@ function buildSubtitleItem(item, timestamp, updatedAt) {
|
|
|
3182
3205
|
displayText: getDisplayText(item),
|
|
3183
3206
|
content: item.content,
|
|
3184
3207
|
item: item.item,
|
|
3208
|
+
source: "conversation",
|
|
3185
3209
|
timestamp,
|
|
3186
3210
|
updatedAt
|
|
3187
3211
|
};
|
|
3188
3212
|
}
|
|
3213
|
+
function upsertModelStreamingSubtitle(previous, event, now = Date.now()) {
|
|
3214
|
+
const responseId = getEventString(event, "responseId", "response_id");
|
|
3215
|
+
if (!responseId) {
|
|
3216
|
+
return previous;
|
|
3217
|
+
}
|
|
3218
|
+
const displayText = event.type === "response.output_audio_transcript.done" ? event.transcript : event.delta;
|
|
3219
|
+
if (typeof displayText !== "string" || displayText.length === 0) {
|
|
3220
|
+
return previous;
|
|
3221
|
+
}
|
|
3222
|
+
const id = buildModelStreamingSubtitleId(responseId);
|
|
3223
|
+
const itemId = getEventString(event, "itemId", "item_id");
|
|
3224
|
+
const existing = previous.find((item) => item.id === id);
|
|
3225
|
+
const nextItem = {
|
|
3226
|
+
id,
|
|
3227
|
+
role: "model",
|
|
3228
|
+
lifecycle: "added",
|
|
3229
|
+
status: "in_progress",
|
|
3230
|
+
text: "",
|
|
3231
|
+
transcript: displayText,
|
|
3232
|
+
displayText,
|
|
3233
|
+
content: [],
|
|
3234
|
+
item: {
|
|
3235
|
+
id: itemId ?? id,
|
|
3236
|
+
type: "message",
|
|
3237
|
+
role: "model",
|
|
3238
|
+
status: "in_progress",
|
|
3239
|
+
content: [{ type: "audio", transcript: displayText }]
|
|
3240
|
+
},
|
|
3241
|
+
source: "response_audio_transcript",
|
|
3242
|
+
responseId,
|
|
3243
|
+
itemId,
|
|
3244
|
+
timestamp: existing?.timestamp ?? now,
|
|
3245
|
+
updatedAt: hasStreamingSubtitleChanged(existing, displayText) ? now : existing?.updatedAt ?? now
|
|
3246
|
+
};
|
|
3247
|
+
return replaceSubtitleItem(previous, nextItem);
|
|
3248
|
+
}
|
|
3249
|
+
function markModelStreamingSubtitleDone(previous, event, now = Date.now()) {
|
|
3250
|
+
const responseId = event.response?.id;
|
|
3251
|
+
if (!responseId) {
|
|
3252
|
+
return previous;
|
|
3253
|
+
}
|
|
3254
|
+
const id = buildModelStreamingSubtitleId(responseId);
|
|
3255
|
+
const status = mapResponseStatusToConversationStatus(event.response?.status);
|
|
3256
|
+
return previous.map((item) => {
|
|
3257
|
+
if (item.id !== id || item.source !== "response_audio_transcript") {
|
|
3258
|
+
return item;
|
|
3259
|
+
}
|
|
3260
|
+
return {
|
|
3261
|
+
...item,
|
|
3262
|
+
lifecycle: "done",
|
|
3263
|
+
status,
|
|
3264
|
+
item: {
|
|
3265
|
+
...item.item,
|
|
3266
|
+
status
|
|
3267
|
+
},
|
|
3268
|
+
updatedAt: now
|
|
3269
|
+
};
|
|
3270
|
+
});
|
|
3271
|
+
}
|
|
3272
|
+
function replaceSubtitleItem(previous, nextItem) {
|
|
3273
|
+
const replaced = previous.map((item) => item.id === nextItem.id ? nextItem : item);
|
|
3274
|
+
if (replaced.some((item) => item.id === nextItem.id)) {
|
|
3275
|
+
return replaced;
|
|
3276
|
+
}
|
|
3277
|
+
return [...previous, nextItem];
|
|
3278
|
+
}
|
|
3279
|
+
function hasStreamingSubtitleChanged(previous, nextDisplayText) {
|
|
3280
|
+
return !previous || previous.displayText !== nextDisplayText || previous.lifecycle !== "added" || previous.status !== "in_progress";
|
|
3281
|
+
}
|
|
3282
|
+
function buildModelStreamingSubtitleId(responseId) {
|
|
3283
|
+
return `model-response:${responseId}`;
|
|
3284
|
+
}
|
|
3285
|
+
function getEventString(event, camelKey, snakeKey) {
|
|
3286
|
+
const record = event;
|
|
3287
|
+
const value = record[camelKey] ?? record[snakeKey];
|
|
3288
|
+
return typeof value === "string" && value.length > 0 ? value : void 0;
|
|
3289
|
+
}
|
|
3290
|
+
function mapResponseStatusToConversationStatus(status) {
|
|
3291
|
+
if (status === "completed") {
|
|
3292
|
+
return "completed";
|
|
3293
|
+
}
|
|
3294
|
+
if (status === "in_progress") {
|
|
3295
|
+
return "in_progress";
|
|
3296
|
+
}
|
|
3297
|
+
return "incomplete";
|
|
3298
|
+
}
|
|
3189
3299
|
function hasSubtitleChanged(previous, next) {
|
|
3190
3300
|
return previous.text !== next.text || previous.transcript !== next.transcript || previous.lifecycle !== next.lifecycle || previous.status !== next.status || previous.role !== next.role;
|
|
3191
3301
|
}
|
|
3192
3302
|
function isSubtitleRelatedEvent(type) {
|
|
3193
3303
|
return type.startsWith("conversation.") || type.startsWith("response.");
|
|
3194
3304
|
}
|
|
3305
|
+
function isModelStreamingTranscriptEvent(event) {
|
|
3306
|
+
return event.type === "response.output_audio_transcript.delta" || event.type === "response.output_audio_transcript.done";
|
|
3307
|
+
}
|
|
3308
|
+
function isResponseDoneEvent(event) {
|
|
3309
|
+
return event.type === "response.done";
|
|
3310
|
+
}
|
|
3195
3311
|
var BREATHE_KEYFRAMES = `@keyframes ivi-subtitle-breathe{0%,100%{opacity:1}50%{opacity:.55}}`;
|
|
3196
3312
|
function IVISubtitleOverlay(props) {
|
|
3197
3313
|
const {
|
|
@@ -3199,13 +3315,15 @@ function IVISubtitleOverlay(props) {
|
|
|
3199
3315
|
roles = "user",
|
|
3200
3316
|
maxItems,
|
|
3201
3317
|
maxVisible,
|
|
3318
|
+
useModelStreamingTranscript,
|
|
3202
3319
|
subtitleStyle,
|
|
3203
3320
|
className,
|
|
3204
3321
|
style
|
|
3205
3322
|
} = props;
|
|
3206
3323
|
const entries = useIviSubtitles(runtime, {
|
|
3207
3324
|
roles,
|
|
3208
|
-
maxItems: maxItems ?? maxVisible
|
|
3325
|
+
maxItems: maxItems ?? maxVisible,
|
|
3326
|
+
useModelStreamingTranscript
|
|
3209
3327
|
});
|
|
3210
3328
|
if (entries.length === 0) return null;
|
|
3211
3329
|
const fontFamily = subtitleStyle?.fontFamily ?? "system-ui, -apple-system, sans-serif";
|
|
@@ -3828,6 +3946,19 @@ function detectMediaVolumeType(source) {
|
|
|
3828
3946
|
if (!url) return null;
|
|
3829
3947
|
return isM3u8Url(url) ? "hls" : "video";
|
|
3830
3948
|
}
|
|
3949
|
+
function getSourceRenderKey(sourceId, source) {
|
|
3950
|
+
const trtc = getTrtcPlayback(source.playback);
|
|
3951
|
+
if (!trtc) {
|
|
3952
|
+
return sourceId;
|
|
3953
|
+
}
|
|
3954
|
+
return [
|
|
3955
|
+
"trtc",
|
|
3956
|
+
trtc.app_id ?? "",
|
|
3957
|
+
trtc.room_id ?? "",
|
|
3958
|
+
trtc.user_id ?? "",
|
|
3959
|
+
trtc.user_sig ?? ""
|
|
3960
|
+
].join(":");
|
|
3961
|
+
}
|
|
3831
3962
|
function TrackSlotMediaContent(props) {
|
|
3832
3963
|
const {
|
|
3833
3964
|
slot,
|
|
@@ -3865,7 +3996,7 @@ function TrackSlotMediaContent(props) {
|
|
|
3865
3996
|
IVITrtcPlayer,
|
|
3866
3997
|
{
|
|
3867
3998
|
trtc,
|
|
3868
|
-
sourceId:
|
|
3999
|
+
sourceId: slotSourceId,
|
|
3869
4000
|
runtime,
|
|
3870
4001
|
...trtcPlayerProps,
|
|
3871
4002
|
muted: trtcMuted,
|
|
@@ -4020,26 +4151,35 @@ function SlotVideo(props) {
|
|
|
4020
4151
|
// src/react/internal/use-multi-preload-sources.ts
|
|
4021
4152
|
function useMultiPreloadSources(sources, activeSourceId) {
|
|
4022
4153
|
return react.useMemo(() => {
|
|
4023
|
-
|
|
4024
|
-
for (const [id, runtimeSource] of sources) {
|
|
4025
|
-
const isActive = id === activeSourceId;
|
|
4026
|
-
const shouldMount = Boolean(runtimeSource.preload) || isActive;
|
|
4027
|
-
if (!shouldMount) {
|
|
4028
|
-
continue;
|
|
4029
|
-
}
|
|
4030
|
-
const ready = toReadyRuntimeSource(runtimeSource);
|
|
4031
|
-
if (!ready) {
|
|
4032
|
-
continue;
|
|
4033
|
-
}
|
|
4034
|
-
entries.push({
|
|
4035
|
-
sourceId: id,
|
|
4036
|
-
source: ready,
|
|
4037
|
-
isActive
|
|
4038
|
-
});
|
|
4039
|
-
}
|
|
4040
|
-
return entries;
|
|
4154
|
+
return buildPreloadSourceEntries(sources, activeSourceId);
|
|
4041
4155
|
}, [sources, activeSourceId]);
|
|
4042
4156
|
}
|
|
4157
|
+
function buildPreloadSourceEntries(sources, activeSourceId) {
|
|
4158
|
+
const entriesByRenderKey = /* @__PURE__ */ new Map();
|
|
4159
|
+
for (const [id, runtimeSource] of sources) {
|
|
4160
|
+
const isActive = id === activeSourceId;
|
|
4161
|
+
const shouldMount = Boolean(runtimeSource.preload) || isActive;
|
|
4162
|
+
if (!shouldMount) {
|
|
4163
|
+
continue;
|
|
4164
|
+
}
|
|
4165
|
+
const ready = toReadyRuntimeSource(runtimeSource);
|
|
4166
|
+
if (!ready) {
|
|
4167
|
+
continue;
|
|
4168
|
+
}
|
|
4169
|
+
const renderKey = getSourceRenderKey(id, ready);
|
|
4170
|
+
const entry = {
|
|
4171
|
+
sourceId: id,
|
|
4172
|
+
renderKey,
|
|
4173
|
+
source: ready,
|
|
4174
|
+
isActive
|
|
4175
|
+
};
|
|
4176
|
+
const previous = entriesByRenderKey.get(renderKey);
|
|
4177
|
+
if (!previous || !previous.isActive && isActive) {
|
|
4178
|
+
entriesByRenderKey.set(renderKey, entry);
|
|
4179
|
+
}
|
|
4180
|
+
}
|
|
4181
|
+
return Array.from(entriesByRenderKey.values());
|
|
4182
|
+
}
|
|
4043
4183
|
function IVITrackSlot(props) {
|
|
4044
4184
|
const {
|
|
4045
4185
|
slot,
|
|
@@ -4129,7 +4269,7 @@ function IVITrackSlot(props) {
|
|
|
4129
4269
|
}
|
|
4130
4270
|
) })
|
|
4131
4271
|
},
|
|
4132
|
-
entry.
|
|
4272
|
+
entry.renderKey
|
|
4133
4273
|
);
|
|
4134
4274
|
}),
|
|
4135
4275
|
showSubtitle && activeSource && supportsSubtitleOverlay(activeSource) && /* @__PURE__ */ jsxRuntime.jsx("div", { style: SUBTITLE_OVERLAY_STYLE, children: /* @__PURE__ */ jsxRuntime.jsx(
|