@siact/sime-x-vue 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- import { inject, defineComponent, shallowRef, provide, renderSlot, computed, openBlock, createBlock, Transition, withCtx, createElementBlock, normalizeClass, createElementVNode, toDisplayString, createVNode, createCommentVNode, ref, onBeforeUnmount, reactive, watch, normalizeStyle, withModifiers, nextTick } from 'vue';
1
+ import { inject, defineComponent, shallowRef, provide, renderSlot, computed, openBlock, createBlock, Transition, withCtx, createElementBlock, normalizeClass, createElementVNode, toDisplayString, createVNode, createCommentVNode, ref, onBeforeUnmount, reactive, watch, normalizeStyle, withModifiers, nextTick, unref, Fragment, renderList } from 'vue';
2
2
  import { HostBridge } from '@siact/sime-bridge';
3
- import { WakeWordDetectorStandalone } from 'web-voice-kit';
3
+ import { WakeWordDetectorStandalone, SpeechSynthesizerStandalone, SpeechTranscriberStandalone } from 'web-voice-kit';
4
4
 
5
5
  const AiChatbotXKey = Symbol("sime-x");
6
6
  function injectStrict(key, defaultValue, treatDefaultAsFactory) {
@@ -29,7 +29,7 @@ var clientCommandKey = /* @__PURE__ */ ((clientCommandKey2) => {
29
29
  return clientCommandKey2;
30
30
  })(clientCommandKey || {});
31
31
 
32
- const _sfc_main$3 = /* @__PURE__ */ defineComponent({
32
+ const _sfc_main$4 = /* @__PURE__ */ defineComponent({
33
33
  __name: "sime-provider",
34
34
  props: {
35
35
  project: {},
@@ -42,7 +42,7 @@ const _sfc_main$3 = /* @__PURE__ */ defineComponent({
42
42
  },
43
43
  setup(__props) {
44
44
  const props = __props;
45
- const hostBridge = shallowRef(new HostBridge());
45
+ const hostBridge = shallowRef(new HostBridge({ debug: false }));
46
46
  const startListeningRef = shallowRef(async () => {
47
47
  });
48
48
  const stopListeningRef = shallowRef(async () => {
@@ -57,7 +57,7 @@ const _sfc_main$3 = /* @__PURE__ */ defineComponent({
57
57
  chatbotUrl: () => props.chatbotUrl,
58
58
  appId: () => props.appId,
59
59
  appToken: () => props.appToken,
60
- voiceConfig: () => props.voiceConfig,
60
+ voiceConfig: () => props.voiceConfig || { appId: "", apiKey: "", websocketUrl: "" },
61
61
  startListening: () => startListeningRef.value(),
62
62
  stopListening: () => stopListeningRef.value(),
63
63
  toggleCollapse: () => toggleCollapseRef.value(),
@@ -106,18 +106,18 @@ const _sfc_main$3 = /* @__PURE__ */ defineComponent({
106
106
  }
107
107
  });
108
108
 
109
- const _hoisted_1$2 = { class: "content-container" };
110
- const _hoisted_2$2 = { class: "status-header" };
111
- const _hoisted_3$1 = { class: "status-text" };
112
- const _hoisted_4$1 = {
109
+ const _hoisted_1$3 = { class: "content-container" };
110
+ const _hoisted_2$3 = { class: "status-header" };
111
+ const _hoisted_3$2 = { class: "status-text" };
112
+ const _hoisted_4$2 = {
113
113
  key: 0,
114
114
  class: "transcription-content"
115
115
  };
116
- const _hoisted_5$1 = {
116
+ const _hoisted_5$2 = {
117
117
  key: 1,
118
118
  class: "placeholder-text"
119
119
  };
120
- const _sfc_main$2 = /* @__PURE__ */ defineComponent({
120
+ const _sfc_main$3 = /* @__PURE__ */ defineComponent({
121
121
  __name: "voice-status",
122
122
  props: {
123
123
  status: {},
@@ -179,9 +179,9 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
179
179
  ])
180
180
  ])
181
181
  ], -1)),
182
- createElementVNode("div", _hoisted_1$2, [
183
- createElementVNode("div", _hoisted_2$2, [
184
- createElementVNode("span", _hoisted_3$1, toDisplayString(statusLabel.value), 1)
182
+ createElementVNode("div", _hoisted_1$3, [
183
+ createElementVNode("div", _hoisted_2$3, [
184
+ createElementVNode("span", _hoisted_3$2, toDisplayString(statusLabel.value), 1)
185
185
  ]),
186
186
  createElementVNode("div", {
187
187
  class: normalizeClass(["text-window", { "has-text": !!__props.transcriptionText }])
@@ -191,7 +191,7 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
191
191
  mode: "out-in"
192
192
  }, {
193
193
  default: withCtx(() => [
194
- __props.transcriptionText ? (openBlock(), createElementBlock("p", _hoisted_4$1, toDisplayString(__props.transcriptionText), 1)) : __props.status === "wake" ? (openBlock(), createElementBlock("p", _hoisted_5$1, "Listening...")) : createCommentVNode("", true)
194
+ __props.transcriptionText ? (openBlock(), createElementBlock("p", _hoisted_4$2, toDisplayString(__props.transcriptionText), 1)) : __props.status === "wake" ? (openBlock(), createElementBlock("p", _hoisted_5$2, "Listening...")) : createCommentVNode("", true)
195
195
  ]),
196
196
  _: 1
197
197
  })
@@ -213,14 +213,14 @@ const _export_sfc = (sfc, props) => {
213
213
  return target;
214
214
  };
215
215
 
216
- const VoiceStatus = /* @__PURE__ */ _export_sfc(_sfc_main$2, [["__scopeId", "data-v-c9fa6caf"]]);
216
+ const VoiceStatus = /* @__PURE__ */ _export_sfc(_sfc_main$3, [["__scopeId", "data-v-c9fa6caf"]]);
217
217
 
218
- const _hoisted_1$1 = {
218
+ const _hoisted_1$2 = {
219
219
  key: 0,
220
220
  class: "execution-bubble"
221
221
  };
222
- const _hoisted_2$1 = { class: "exec-text" };
223
- const _sfc_main$1 = /* @__PURE__ */ defineComponent({
222
+ const _hoisted_2$2 = { class: "exec-text" };
223
+ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
224
224
  __name: "execution-status",
225
225
  props: {
226
226
  visible: { type: Boolean },
@@ -230,8 +230,8 @@ const _sfc_main$1 = /* @__PURE__ */ defineComponent({
230
230
  return (_ctx, _cache) => {
231
231
  return openBlock(), createBlock(Transition, { name: "exec-bubble" }, {
232
232
  default: withCtx(() => [
233
- __props.visible ? (openBlock(), createElementBlock("div", _hoisted_1$1, [
234
- createElementVNode("span", _hoisted_2$1, toDisplayString(__props.text || "执行中"), 1),
233
+ __props.visible ? (openBlock(), createElementBlock("div", _hoisted_1$2, [
234
+ createElementVNode("span", _hoisted_2$2, toDisplayString(__props.text || "执行中"), 1),
235
235
  _cache[0] || (_cache[0] = createElementVNode("div", { class: "loading-dots" }, [
236
236
  createElementVNode("span", { class: "dot" }),
237
237
  createElementVNode("span", { class: "dot" }),
@@ -245,7 +245,7 @@ const _sfc_main$1 = /* @__PURE__ */ defineComponent({
245
245
  }
246
246
  });
247
247
 
248
- const ExecutionStatus = /* @__PURE__ */ _export_sfc(_sfc_main$1, [["__scopeId", "data-v-8244ff0d"]]);
248
+ const ExecutionStatus = /* @__PURE__ */ _export_sfc(_sfc_main$2, [["__scopeId", "data-v-8244ff0d"]]);
249
249
 
250
250
  const ensureMicrophonePermission = async () => {
251
251
  if (typeof navigator === "undefined" || typeof window === "undefined") {
@@ -314,22 +314,22 @@ const ensureMicrophonePermission = async () => {
314
314
  }
315
315
  };
316
316
 
317
- const _hoisted_1 = ["data-theme"];
318
- const _hoisted_2 = { class: "fab-avatar-wrapper" };
319
- const _hoisted_3 = ["src"];
320
- const _hoisted_4 = { class: "header-left" };
321
- const _hoisted_5 = { class: "logo-icon" };
322
- const _hoisted_6 = ["src"];
323
- const _hoisted_7 = { class: "title" };
324
- const _hoisted_8 = { class: "actions" };
325
- const _hoisted_9 = ["title"];
326
- const _hoisted_10 = {
317
+ const _hoisted_1$1 = ["data-theme"];
318
+ const _hoisted_2$1 = { class: "fab-avatar-wrapper" };
319
+ const _hoisted_3$1 = ["src"];
320
+ const _hoisted_4$1 = { class: "header-left" };
321
+ const _hoisted_5$1 = { class: "logo-icon" };
322
+ const _hoisted_6$1 = ["src"];
323
+ const _hoisted_7$1 = { class: "title" };
324
+ const _hoisted_8$1 = { class: "actions" };
325
+ const _hoisted_9$1 = ["title"];
326
+ const _hoisted_10$1 = {
327
327
  key: 0,
328
328
  class: "voice-indicator"
329
329
  };
330
- const _hoisted_11 = ["title"];
331
- const _hoisted_12 = ["title"];
332
- const _hoisted_13 = {
330
+ const _hoisted_11$1 = ["title"];
331
+ const _hoisted_12$1 = ["title"];
332
+ const _hoisted_13$1 = {
333
333
  width: "16",
334
334
  height: "16",
335
335
  viewBox: "0 0 24 24",
@@ -338,7 +338,7 @@ const _hoisted_13 = {
338
338
  const _hoisted_14 = ["d"];
339
339
  const _hoisted_15 = ["src"];
340
340
  const FAB_SAFE_GAP = 24;
341
- const _sfc_main = /* @__PURE__ */ defineComponent({
341
+ const _sfc_main$1 = /* @__PURE__ */ defineComponent({
342
342
  __name: "sime-x",
343
343
  props: {
344
344
  xLogo: {},
@@ -704,9 +704,9 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
704
704
  emit("wakeUp", false);
705
705
  }
706
706
  };
707
- const handleIframeLoad = (event) => {
707
+ const handleIframeLoad = async (event) => {
708
708
  aiChatbotX.setIframeElement(event.target);
709
- aiChatbotX.setTheme("dark");
709
+ aiChatbotX.setTheme(currentTheme.value);
710
710
  };
711
711
  watch(
712
712
  () => [aiChatbotX.chatbotUrl()],
@@ -763,14 +763,14 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
763
763
  visible: isProcessing.value,
764
764
  text: transcriptionText.value
765
765
  }, null, 8, ["visible", "text"])),
766
- createElementVNode("div", _hoisted_2, [
766
+ createElementVNode("div", _hoisted_2$1, [
767
767
  createElementVNode("img", {
768
768
  src: __props.xLogo ? __props.xLogo : "/sime.png",
769
769
  alt: "assistant",
770
770
  style: normalizeStyle({
771
771
  width: __props.xSize?.width + "px"
772
772
  })
773
- }, null, 12, _hoisted_3),
773
+ }, null, 12, _hoisted_3$1),
774
774
  createVNode(Transition, { name: "indicator-fade" }, {
775
775
  default: withCtx(() => [
776
776
  voiceStatus.value === "listening" ? (openBlock(), createElementBlock("div", {
@@ -835,17 +835,17 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
835
835
  class: "x-dialog-header",
836
836
  onMousedown: withModifiers(startDrag, ["stop"])
837
837
  }, [
838
- createElementVNode("div", _hoisted_4, [
839
- createElementVNode("div", _hoisted_5, [
838
+ createElementVNode("div", _hoisted_4$1, [
839
+ createElementVNode("div", _hoisted_5$1, [
840
840
  createElementVNode("img", {
841
841
  src: __props.xLogo ? __props.xLogo : "/sime.png",
842
842
  alt: "assistant",
843
843
  class: "logo"
844
- }, null, 8, _hoisted_6)
844
+ }, null, 8, _hoisted_6$1)
845
845
  ]),
846
- createElementVNode("span", _hoisted_7, toDisplayString(__props.xTitle), 1)
846
+ createElementVNode("span", _hoisted_7$1, toDisplayString(__props.xTitle), 1)
847
847
  ]),
848
- createElementVNode("div", _hoisted_8, [
848
+ createElementVNode("div", _hoisted_8$1, [
849
849
  createElementVNode("button", {
850
850
  class: "action-btn theme-btn",
851
851
  title: "开启新对话",
@@ -909,8 +909,8 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
909
909
  "stroke-linejoin": "round"
910
910
  })
911
911
  ], -1)),
912
- voiceStatus.value !== "standby" ? (openBlock(), createElementBlock("span", _hoisted_10)) : createCommentVNode("", true)
913
- ], 10, _hoisted_9),
912
+ voiceStatus.value !== "standby" ? (openBlock(), createElementBlock("span", _hoisted_10$1)) : createCommentVNode("", true)
913
+ ], 10, _hoisted_9$1),
914
914
  createElementVNode("button", {
915
915
  class: "action-btn theme-btn",
916
916
  onClick: withModifiers(cycleTheme, ["stop"]),
@@ -934,13 +934,13 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
934
934
  fill: "currentColor"
935
935
  })
936
936
  ], -1)
937
- ])], 8, _hoisted_11),
937
+ ])], 8, _hoisted_11$1),
938
938
  createElementVNode("button", {
939
939
  class: "action-btn collapse-btn",
940
940
  onClick: withModifiers(toggleCollapse, ["stop"]),
941
941
  title: isCollapsed.value ? "展开" : "折叠"
942
942
  }, [
943
- (openBlock(), createElementBlock("svg", _hoisted_13, [
943
+ (openBlock(), createElementBlock("svg", _hoisted_13$1, [
944
944
  createElementVNode("path", {
945
945
  d: isCollapsed.value ? "M18 15L12 9L6 15" : "M6 9L12 15L18 9",
946
946
  stroke: "currentColor",
@@ -949,7 +949,7 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
949
949
  "stroke-linejoin": "round"
950
950
  }, null, 8, _hoisted_14)
951
951
  ]))
952
- ], 8, _hoisted_12),
952
+ ], 8, _hoisted_12$1),
953
953
  createElementVNode("button", {
954
954
  class: "action-btn minimize-btn",
955
955
  onClick: _cache[2] || (_cache[2] = withModifiers(($event) => toggleDialog(false), ["stop"])),
@@ -979,7 +979,7 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
979
979
  ref: "iframeRef",
980
980
  src: chatbotUrl.value,
981
981
  class: "x-iframe",
982
- allow: "microphone *",
982
+ allow: "microphone *; storage-access *; camera *",
983
983
  frameborder: "0",
984
984
  onLoad: handleIframeLoad
985
985
  }, null, 40, _hoisted_15)
@@ -988,12 +988,1222 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
988
988
  ]),
989
989
  _: 1
990
990
  })
991
- ], 8, _hoisted_1);
991
+ ], 8, _hoisted_1$1);
992
+ };
993
+ }
994
+ });
995
+
996
+ const simeX = /* @__PURE__ */ _export_sfc(_sfc_main$1, [["__scopeId", "data-v-91f104d1"]]);
997
+
998
+ function useTTS(getVoiceConfig) {
999
+ const isSpeaking = ref(false);
1000
+ let instance = null;
1001
+ let initPromise = null;
1002
+ let audioCtx = null;
1003
+ let sentenceBuffer = "";
1004
+ const sentenceDelimiters = /[。!?;\n.!?;]/;
1005
+ const stripMarkdown = (text) => text.replace(/```[\s\S]*?```/g, "").replace(/\|[^\n]*\|/g, "").replace(/#{1,6}\s*/g, "").replace(/\*\*(.*?)\*\*/g, "$1").replace(/\*(.*?)\*/g, "$1").replace(/`([^`]*)`/g, "$1").replace(/\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/[-*+]\s+/g, "").replace(/>\s+/g, "").replace(/\n{2,}/g, "。").replace(/\n/g, ",").trim();
1006
+ const warmUpAudio = () => {
1007
+ if (!audioCtx || audioCtx.state === "closed") {
1008
+ try {
1009
+ audioCtx = new AudioContext();
1010
+ } catch {
1011
+ return;
1012
+ }
1013
+ }
1014
+ if (audioCtx.state === "suspended") {
1015
+ audioCtx.resume();
1016
+ }
1017
+ };
1018
+ let onQueueEmptyCb = null;
1019
+ const ensureInstance = async () => {
1020
+ if (instance) return instance;
1021
+ if (initPromise) return initPromise;
1022
+ const vc = getVoiceConfig();
1023
+ if (!vc || !vc.apiSecret) {
1024
+ console.warn("[TTS] 缺少 voiceConfig 或 apiSecret,语音播报已禁用");
1025
+ return null;
1026
+ }
1027
+ initPromise = (async () => {
1028
+ try {
1029
+ const tts = new SpeechSynthesizerStandalone({
1030
+ appId: vc.appId,
1031
+ apiKey: vc.ttsApiKey || vc.apiKey,
1032
+ apiSecret: vc.apiSecret,
1033
+ websocketUrl: vc.ttsWebsocketUrl || "wss://tts-api.xfyun.cn/v2/tts",
1034
+ vcn: vc.ttsVcn || "xiaoyan",
1035
+ speed: 60,
1036
+ volume: 50,
1037
+ pitch: 50,
1038
+ aue: "raw",
1039
+ auf: "audio/L16;rate=16000",
1040
+ tte: "UTF8",
1041
+ autoPlay: true
1042
+ });
1043
+ tts.onStart(() => {
1044
+ isSpeaking.value = true;
1045
+ });
1046
+ tts.onEnd(() => {
1047
+ });
1048
+ tts.onQueueEmpty(() => {
1049
+ isSpeaking.value = false;
1050
+ onQueueEmptyCb?.();
1051
+ });
1052
+ tts.onError((err) => {
1053
+ console.error("[TTS] Error:", err);
1054
+ isSpeaking.value = false;
1055
+ });
1056
+ if (audioCtx && audioCtx.state === "running") {
1057
+ tts.audioContext = audioCtx;
1058
+ tts.gainNode = audioCtx.createGain();
1059
+ tts.gainNode.connect(audioCtx.destination);
1060
+ }
1061
+ instance = tts;
1062
+ initPromise = null;
1063
+ return tts;
1064
+ } catch (err) {
1065
+ console.error("[TTS] 初始化失败:", err);
1066
+ initPromise = null;
1067
+ return null;
1068
+ }
1069
+ })();
1070
+ return initPromise;
1071
+ };
1072
+ const speak = async (text) => {
1073
+ const clean = stripMarkdown(text);
1074
+ if (!clean.trim()) return;
1075
+ const tts = await ensureInstance();
1076
+ if (!tts) return;
1077
+ try {
1078
+ tts.speak(clean);
1079
+ } catch (err) {
1080
+ console.error("[TTS] speak 失败:", err);
1081
+ }
1082
+ };
1083
+ const feed = (delta) => {
1084
+ sentenceBuffer += delta;
1085
+ while (true) {
1086
+ const match = sentenceBuffer.match(sentenceDelimiters);
1087
+ if (!match || match.index === void 0) break;
1088
+ const sentence = sentenceBuffer.slice(0, match.index + 1).trim();
1089
+ sentenceBuffer = sentenceBuffer.slice(match.index + 1);
1090
+ if (sentence.length > 0) speak(sentence);
1091
+ }
1092
+ };
1093
+ const flush = () => {
1094
+ const remaining = sentenceBuffer.trim();
1095
+ sentenceBuffer = "";
1096
+ if (remaining.length > 0) speak(remaining);
1097
+ };
1098
+ const stop = () => {
1099
+ sentenceBuffer = "";
1100
+ isSpeaking.value = false;
1101
+ if (instance) {
1102
+ try {
1103
+ instance.stop();
1104
+ } catch {
1105
+ }
1106
+ }
1107
+ };
1108
+ const setOnQueueEmpty = (cb) => {
1109
+ onQueueEmptyCb = cb;
1110
+ };
1111
+ const destroy = () => {
1112
+ stop();
1113
+ if (instance) {
1114
+ try {
1115
+ instance.destroy();
1116
+ } catch {
1117
+ }
1118
+ instance = null;
1119
+ }
1120
+ if (audioCtx) {
1121
+ try {
1122
+ audioCtx.close();
1123
+ } catch {
1124
+ }
1125
+ audioCtx = null;
1126
+ }
1127
+ };
1128
+ return {
1129
+ isSpeaking,
1130
+ warmUpAudio,
1131
+ speak,
1132
+ feed,
1133
+ flush,
1134
+ stop,
1135
+ destroy,
1136
+ setOnQueueEmpty
1137
+ };
1138
+ }
1139
+
1140
+ function useBubble(options = {}) {
1141
+ const visible = ref(false);
1142
+ const fadingOut = ref(false);
1143
+ const stackRef = ref(null);
1144
+ let dismissTimer = null;
1145
+ const show = computed(() => visible.value && !fadingOut.value);
1146
+ const style = computed(() => ({
1147
+ width: options.bubbleSize?.width || void 0,
1148
+ maxHeight: options.bubbleSize?.maxHeight || void 0
1149
+ }));
1150
+ const open = () => {
1151
+ cancelDismiss();
1152
+ fadingOut.value = false;
1153
+ visible.value = true;
1154
+ };
1155
+ const cancelDismiss = () => {
1156
+ if (dismissTimer) {
1157
+ clearTimeout(dismissTimer);
1158
+ dismissTimer = null;
1159
+ }
1160
+ };
1161
+ const scheduleDismiss = () => {
1162
+ cancelDismiss();
1163
+ if (options.isSpeaking?.value) return;
1164
+ if (options.isInvoking?.value) return;
1165
+ const delay = options.dismissDelay ?? 4e3;
1166
+ dismissTimer = setTimeout(() => {
1167
+ fadingOut.value = true;
1168
+ setTimeout(() => {
1169
+ visible.value = false;
1170
+ fadingOut.value = false;
1171
+ }, 400);
1172
+ }, delay);
1173
+ };
1174
+ const hide = () => {
1175
+ cancelDismiss();
1176
+ fadingOut.value = false;
1177
+ visible.value = false;
1178
+ };
1179
+ const scrollToBottom = () => {
1180
+ nextTick(() => {
1181
+ if (stackRef.value) {
1182
+ stackRef.value.scrollTop = stackRef.value.scrollHeight;
1183
+ }
1184
+ });
1185
+ };
1186
+ const destroy = () => {
1187
+ cancelDismiss();
1188
+ };
1189
+ return {
1190
+ visible,
1191
+ fadingOut,
1192
+ show,
1193
+ style,
1194
+ stackRef,
1195
+ open,
1196
+ hide,
1197
+ cancelDismiss,
1198
+ scheduleDismiss,
1199
+ scrollToBottom,
1200
+ destroy
1201
+ };
1202
+ }
1203
+
1204
+ function useVoiceRecognition(options) {
1205
+ const voiceStatus = ref("standby");
1206
+ const isTranscribing = ref(false);
1207
+ const isInitializing = ref(false);
1208
+ const transcriptionText = ref("");
1209
+ const wakeAnimating = ref(false);
1210
+ let detector = null;
1211
+ let transcriber = null;
1212
+ const initTranscriber = () => {
1213
+ if (transcriber) return;
1214
+ const vc = options.getVoiceConfig();
1215
+ if (!vc || !vc.appId || !vc.apiKey || !vc.websocketUrl) {
1216
+ console.error("[VoiceRecognition] 缺少 voiceConfig,无法初始化转写器");
1217
+ return;
1218
+ }
1219
+ transcriber = new SpeechTranscriberStandalone({
1220
+ appId: vc.appId,
1221
+ apiKey: vc.apiKey,
1222
+ websocketUrl: vc.websocketUrl,
1223
+ autoStop: {
1224
+ enabled: true,
1225
+ silenceTimeoutMs: 2e3,
1226
+ noSpeechTimeoutMs: 5e3,
1227
+ maxDurationMs: 45e3
1228
+ }
1229
+ });
1230
+ transcriber.onResult((result) => {
1231
+ transcriptionText.value = result.transcript || "";
1232
+ });
1233
+ transcriber.onAutoStop(async () => {
1234
+ const finalText = transcriptionText.value;
1235
+ await stopTranscribing();
1236
+ transcriptionText.value = "";
1237
+ if (finalText.trim()) {
1238
+ options.onTranscriptionDone?.(finalText);
1239
+ }
1240
+ });
1241
+ transcriber.onError((error) => {
1242
+ console.error("[VoiceRecognition] 转写错误:", error);
1243
+ stopTranscribing();
1244
+ transcriptionText.value = "";
1245
+ });
1246
+ };
1247
+ const startTranscribing = async () => {
1248
+ if (isTranscribing.value) return;
1249
+ if (!transcriber) initTranscriber();
1250
+ if (!transcriber) return;
1251
+ try {
1252
+ await transcriber.start();
1253
+ isTranscribing.value = true;
1254
+ transcriptionText.value = "";
1255
+ } catch (error) {
1256
+ console.error("[VoiceRecognition] 启动转写失败:", error);
1257
+ }
1258
+ };
1259
+ const stopTranscribing = async () => {
1260
+ if (!transcriber || !transcriber.isActive()) {
1261
+ isTranscribing.value = false;
1262
+ return;
1263
+ }
1264
+ try {
1265
+ await transcriber.stop();
1266
+ } catch (error) {
1267
+ console.error("[VoiceRecognition] 停止转写失败:", error);
1268
+ } finally {
1269
+ isTranscribing.value = false;
1270
+ }
1271
+ };
1272
+ const initDetector = () => {
1273
+ if (detector || isInitializing.value) return;
1274
+ if (!options.modelPath) {
1275
+ console.error("[VoiceRecognition] 未传入 modelPath,无法启用唤醒词");
1276
+ return;
1277
+ }
1278
+ isInitializing.value = true;
1279
+ try {
1280
+ detector = new WakeWordDetectorStandalone({
1281
+ modelPath: options.modelPath,
1282
+ sampleRate: 16e3,
1283
+ usePartial: true,
1284
+ autoReset: {
1285
+ enabled: true,
1286
+ resetDelayMs: 4e3
1287
+ }
1288
+ });
1289
+ detector.setWakeWords(options.wakeWords || ["你好", "您好"]);
1290
+ detector.onWake(async () => {
1291
+ wakeAnimating.value = true;
1292
+ options.onWake?.();
1293
+ await startTranscribing();
1294
+ setTimeout(() => {
1295
+ wakeAnimating.value = false;
1296
+ }, 1200);
1297
+ });
1298
+ detector.onError((error) => {
1299
+ console.error("[VoiceRecognition] 唤醒监听错误:", error);
1300
+ voiceStatus.value = "standby";
1301
+ stopTranscribing();
1302
+ });
1303
+ } finally {
1304
+ isInitializing.value = false;
1305
+ }
1306
+ };
1307
+ const toggleVoiceMode = async (targetState) => {
1308
+ const permission = await ensureMicrophonePermission();
1309
+ if (!permission || isInitializing.value) return;
1310
+ if (!detector) {
1311
+ initDetector();
1312
+ if (!detector) return;
1313
+ }
1314
+ const isListening = voiceStatus.value === "listening";
1315
+ const shouldStart = targetState !== void 0 ? targetState : !isListening;
1316
+ if (isListening === shouldStart) return;
1317
+ try {
1318
+ if (shouldStart) {
1319
+ await detector.start();
1320
+ voiceStatus.value = "listening";
1321
+ } else {
1322
+ await detector.stop();
1323
+ voiceStatus.value = "standby";
1324
+ transcriptionText.value = "";
1325
+ await stopTranscribing();
1326
+ }
1327
+ } catch (error) {
1328
+ console.error("[VoiceRecognition] 监听切换失败:", error);
1329
+ voiceStatus.value = "standby";
1330
+ }
1331
+ };
1332
+ const abortTranscription = async () => {
1333
+ transcriptionText.value = "";
1334
+ await stopTranscribing();
1335
+ };
1336
+ const destroy = async () => {
1337
+ if (detector) {
1338
+ try {
1339
+ if (detector.isActive()) await detector.stop();
1340
+ } catch {
1341
+ }
1342
+ detector = null;
1343
+ }
1344
+ if (transcriber) {
1345
+ try {
1346
+ if (transcriber.isActive()) await transcriber.stop();
1347
+ } catch {
1348
+ }
1349
+ transcriber = null;
1350
+ }
1351
+ };
1352
+ return {
1353
+ voiceStatus,
1354
+ isTranscribing,
1355
+ isInitializing,
1356
+ transcriptionText,
1357
+ wakeAnimating,
1358
+ startTranscribing,
1359
+ stopTranscribing,
1360
+ abortTranscription,
1361
+ toggleVoiceMode,
1362
+ destroy
1363
+ };
1364
+ }
1365
+
1366
+ const DATA_STREAM_LINE_RE = /^[0-9a-f]:/;
1367
+ function detectFormat(firstChunk) {
1368
+ const trimmed = firstChunk.trimStart();
1369
+ if (trimmed.startsWith("data:")) {
1370
+ const firstLine = trimmed.split("\n")[0];
1371
+ const payload = firstLine.slice(5).trim();
1372
+ try {
1373
+ const parsed = JSON.parse(payload);
1374
+ if (parsed && typeof parsed.type === "string") {
1375
+ return "ui-message-stream";
1376
+ }
1377
+ } catch {
1378
+ }
1379
+ if (DATA_STREAM_LINE_RE.test(payload)) {
1380
+ return "data-stream";
1381
+ }
1382
+ return "ui-message-stream";
1383
+ }
1384
+ if (DATA_STREAM_LINE_RE.test(trimmed)) {
1385
+ return "data-stream";
1386
+ }
1387
+ return "plain-text";
1388
+ }
1389
+ function processUIMessageStreamEvent(payload, callbacks) {
1390
+ const trimmed = payload.trim();
1391
+ if (!trimmed || trimmed === "[DONE]") {
1392
+ callbacks.onFinish?.({});
1393
+ return;
1394
+ }
1395
+ let parsed;
1396
+ try {
1397
+ parsed = JSON.parse(trimmed);
1398
+ } catch {
1399
+ console.warn("[DataStreamParser] failed to parse UI message stream event:", trimmed.slice(0, 100));
1400
+ return;
1401
+ }
1402
+ const type = parsed?.type;
1403
+ if (!type) return;
1404
+ switch (type) {
1405
+ case "text-delta":
1406
+ if (typeof parsed.delta === "string") {
1407
+ callbacks.onTextDelta?.(parsed.delta);
1408
+ }
1409
+ break;
1410
+ case "tool-input-start":
1411
+ callbacks.onToolCallStart?.(parsed.toolCallId, parsed.toolName);
1412
+ break;
1413
+ case "tool-input-delta":
1414
+ callbacks.onToolCallDelta?.(parsed.toolCallId, parsed.inputTextDelta);
1415
+ break;
1416
+ case "tool-input-available":
1417
+ callbacks.onToolCallComplete?.(parsed.toolCallId, parsed.toolName, parsed.input);
1418
+ break;
1419
+ case "tool-output-available":
1420
+ callbacks.onToolResult?.(parsed.toolCallId, parsed.output);
1421
+ break;
1422
+ case "finish-step":
1423
+ callbacks.onStepFinish?.(parsed);
1424
+ break;
1425
+ case "finish":
1426
+ callbacks.onFinish?.(parsed);
1427
+ break;
1428
+ case "error":
1429
+ callbacks.onError?.(parsed.errorText || parsed.error || "Unknown error");
1430
+ break;
1431
+ case "start":
1432
+ case "text-start":
1433
+ case "text-end":
1434
+ case "start-step":
1435
+ case "reasoning-start":
1436
+ case "reasoning-delta":
1437
+ case "reasoning-end":
1438
+ case "source-url":
1439
+ case "source-document":
1440
+ case "file":
1441
+ case "abort":
1442
+ break;
1443
+ default:
1444
+ if (type.startsWith("data-")) ; else {
1445
+ console.log("[DataStreamParser] unhandled UI message stream type:", type);
1446
+ }
1447
+ break;
1448
+ }
1449
+ }
1450
+ function parseLegacyProtocolLine(line, callbacks) {
1451
+ if (!line || !DATA_STREAM_LINE_RE.test(line)) return;
1452
+ const code = line[0];
1453
+ const rawValue = line.slice(2);
1454
+ let value;
1455
+ try {
1456
+ value = JSON.parse(rawValue);
1457
+ } catch {
1458
+ value = rawValue;
1459
+ }
1460
+ switch (code) {
1461
+ case "0":
1462
+ callbacks.onTextDelta?.(value);
1463
+ break;
1464
+ case "9":
1465
+ callbacks.onToolCallStart?.(value.toolCallId, value.toolName);
1466
+ break;
1467
+ case "b":
1468
+ callbacks.onToolCallDelta?.(value.toolCallId, value.argsTextDelta);
1469
+ break;
1470
+ case "c":
1471
+ callbacks.onToolCallComplete?.(value.toolCallId, value.toolName, value.args);
1472
+ break;
1473
+ case "a":
1474
+ callbacks.onToolResult?.(value.toolCallId, value.result);
1475
+ break;
1476
+ case "e":
1477
+ callbacks.onStepFinish?.(value);
1478
+ break;
1479
+ case "d":
1480
+ callbacks.onFinish?.(value);
1481
+ break;
1482
+ case "3":
1483
+ callbacks.onError?.(value);
1484
+ break;
1485
+ }
1486
+ }
1487
+ async function readDataStream(response, callbacks) {
1488
+ if (!response.body) return;
1489
+ const reader = response.body.getReader();
1490
+ const decoder = new TextDecoder();
1491
+ let buffer = "";
1492
+ let format = null;
1493
+ while (true) {
1494
+ const { value, done } = await reader.read();
1495
+ if (done) break;
1496
+ const chunk = decoder.decode(value, { stream: true });
1497
+ buffer += chunk;
1498
+ if (format === null && buffer.trim().length > 0) {
1499
+ format = detectFormat(buffer);
1500
+ console.log("[DataStreamParser] detected format:", format, "| first 200 chars:", buffer.slice(0, 200));
1501
+ }
1502
+ if (format === "plain-text") {
1503
+ const text = buffer;
1504
+ buffer = "";
1505
+ if (text) callbacks.onTextDelta?.(text);
1506
+ continue;
1507
+ }
1508
+ if (format === "ui-message-stream") {
1509
+ while (true) {
1510
+ const eventEnd = buffer.indexOf("\n\n");
1511
+ if (eventEnd === -1) break;
1512
+ const eventBlock = buffer.slice(0, eventEnd);
1513
+ buffer = buffer.slice(eventEnd + 2);
1514
+ const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1515
+ for (const dataLine of dataLines) {
1516
+ processUIMessageStreamEvent(dataLine, callbacks);
1517
+ }
1518
+ }
1519
+ continue;
1520
+ }
1521
+ if (format === "data-stream") {
1522
+ const isSSEWrapped = buffer.trimStart().startsWith("data:");
1523
+ if (isSSEWrapped) {
1524
+ while (true) {
1525
+ const eventEnd = buffer.indexOf("\n\n");
1526
+ if (eventEnd === -1) break;
1527
+ const eventBlock = buffer.slice(0, eventEnd);
1528
+ buffer = buffer.slice(eventEnd + 2);
1529
+ const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1530
+ for (const dl of dataLines) {
1531
+ const t = dl.trim();
1532
+ if (!t || t === "[DONE]") {
1533
+ if (t === "[DONE]") callbacks.onFinish?.({});
1534
+ continue;
1535
+ }
1536
+ parseLegacyProtocolLine(t, callbacks);
1537
+ }
1538
+ }
1539
+ } else {
1540
+ while (true) {
1541
+ const newlineIdx = buffer.indexOf("\n");
1542
+ if (newlineIdx === -1) break;
1543
+ const line = buffer.slice(0, newlineIdx).trim();
1544
+ buffer = buffer.slice(newlineIdx + 1);
1545
+ if (line) parseLegacyProtocolLine(line, callbacks);
1546
+ }
1547
+ }
1548
+ continue;
1549
+ }
1550
+ }
1551
+ const tail = decoder.decode();
1552
+ if (tail) buffer += tail;
1553
+ if (buffer.trim()) {
1554
+ if (format === "plain-text") {
1555
+ callbacks.onTextDelta?.(buffer);
1556
+ } else if (format === "ui-message-stream") {
1557
+ const dataLines = buffer.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1558
+ for (const dl of dataLines) {
1559
+ processUIMessageStreamEvent(dl, callbacks);
1560
+ }
1561
+ } else if (format === "data-stream") {
1562
+ parseLegacyProtocolLine(buffer.trim(), callbacks);
1563
+ }
1564
+ }
1565
+ callbacks.onFinish?.({});
1566
+ }
1567
+ async function parseDataStreamToMessage(response, onUpdate) {
1568
+ let textContent = "";
1569
+ const parts = [];
1570
+ const toolCalls = /* @__PURE__ */ new Map();
1571
+ const ensureTextPart = () => {
1572
+ for (let i = parts.length - 1; i >= 0; i--) {
1573
+ if (parts[i].type === "text") {
1574
+ return parts[i];
1575
+ }
1576
+ }
1577
+ const textPart = { type: "text", text: "" };
1578
+ parts.push(textPart);
1579
+ return textPart;
1580
+ };
1581
+ const findToolPartIndex = (toolCallId) => {
1582
+ return parts.findIndex((p) => (p.type === "tool-call" || p.type === "tool-result") && p.toolCallId === toolCallId);
1583
+ };
1584
+ const emitUpdate = () => {
1585
+ onUpdate({ textContent, parts: [...parts], toolCalls: new Map(toolCalls) });
1586
+ };
1587
+ await readDataStream(response, {
1588
+ onTextDelta(text) {
1589
+ textContent += text;
1590
+ const textPart = ensureTextPart();
1591
+ textPart.text = textContent;
1592
+ emitUpdate();
1593
+ },
1594
+ onToolCallStart(toolCallId, toolName) {
1595
+ const tracker = {
1596
+ toolCallId,
1597
+ toolName,
1598
+ argsText: "",
1599
+ args: void 0,
1600
+ state: "partial-call"
1601
+ };
1602
+ toolCalls.set(toolCallId, tracker);
1603
+ const part = {
1604
+ type: "tool-call",
1605
+ toolCallId,
1606
+ toolName,
1607
+ args: void 0,
1608
+ state: "partial-call"
1609
+ };
1610
+ parts.push(part);
1611
+ emitUpdate();
1612
+ },
1613
+ onToolCallDelta(toolCallId, argsTextDelta) {
1614
+ const tracker = toolCalls.get(toolCallId);
1615
+ if (tracker) {
1616
+ tracker.argsText += argsTextDelta;
1617
+ try {
1618
+ tracker.args = JSON.parse(tracker.argsText);
1619
+ } catch {
1620
+ }
1621
+ const idx = findToolPartIndex(toolCallId);
1622
+ if (idx !== -1 && parts[idx].type === "tool-call") {
1623
+ parts[idx].args = tracker.args;
1624
+ }
1625
+ emitUpdate();
1626
+ }
1627
+ },
1628
+ onToolCallComplete(toolCallId, toolName, args) {
1629
+ const tracker = toolCalls.get(toolCallId);
1630
+ if (tracker) {
1631
+ tracker.state = "call";
1632
+ tracker.args = typeof args === "string" ? safeJsonParse(args) : args;
1633
+ } else {
1634
+ toolCalls.set(toolCallId, {
1635
+ toolCallId,
1636
+ toolName,
1637
+ argsText: typeof args === "string" ? args : JSON.stringify(args),
1638
+ args: typeof args === "string" ? safeJsonParse(args) : args,
1639
+ state: "call"
1640
+ });
1641
+ }
1642
+ const idx = findToolPartIndex(toolCallId);
1643
+ if (idx !== -1) {
1644
+ parts[idx].state = "call";
1645
+ parts[idx].toolName = toolName;
1646
+ parts[idx].args = toolCalls.get(toolCallId).args;
1647
+ } else {
1648
+ parts.push({
1649
+ type: "tool-call",
1650
+ toolCallId,
1651
+ toolName,
1652
+ args: toolCalls.get(toolCallId).args,
1653
+ state: "call"
1654
+ });
1655
+ }
1656
+ emitUpdate();
1657
+ },
1658
+ onToolResult(toolCallId, result) {
1659
+ const tracker = toolCalls.get(toolCallId);
1660
+ if (tracker) {
1661
+ tracker.result = result;
1662
+ tracker.state = "result";
1663
+ }
1664
+ const idx = findToolPartIndex(toolCallId);
1665
+ if (idx !== -1) {
1666
+ const existing = parts[idx];
1667
+ const resultPart = {
1668
+ type: "tool-result",
1669
+ toolCallId,
1670
+ toolName: existing.toolName,
1671
+ args: existing.args,
1672
+ result,
1673
+ state: "result"
1674
+ };
1675
+ parts[idx] = resultPart;
1676
+ } else {
1677
+ parts.push({
1678
+ type: "tool-result",
1679
+ toolCallId,
1680
+ toolName: tracker?.toolName || "unknown",
1681
+ args: tracker?.args,
1682
+ result,
1683
+ state: "result"
1684
+ });
1685
+ }
1686
+ emitUpdate();
1687
+ },
1688
+ onError(error) {
1689
+ console.error("[DataStreamParser] stream error:", error);
1690
+ },
1691
+ onStepFinish(_data) {
1692
+ emitUpdate();
1693
+ },
1694
+ onFinish(_data) {
1695
+ emitUpdate();
1696
+ }
1697
+ });
1698
+ return { textContent, parts, toolCalls };
1699
+ }
1700
+ function safeJsonParse(str) {
1701
+ try {
1702
+ return JSON.parse(str);
1703
+ } catch {
1704
+ return str;
1705
+ }
1706
+ }
1707
+
1708
+ const toolDisplayNames = {
1709
+ generateReport: "生成报告",
1710
+ searchKnowledge: "知识库检索",
1711
+ resolveInstanceTargets: "解析实例目标",
1712
+ getHistoryMetrics: "历史数据查询",
1713
+ getRealtimeMetrics: "实时数据查询",
1714
+ queryBitableData: "多维表格查询",
1715
+ searchUser: "搜索用户",
1716
+ createBitableRecord: "创建表格记录",
1717
+ timeTool: "时间工具",
1718
+ loadSkill: "加载技能",
1719
+ executeCommand: "执行命令",
1720
+ dataAnalyzer: "数据分析",
1721
+ dataPredictor: "数据预测"
1722
+ };
1723
+ function useAgentInvoke(options) {
1724
+ const { aiChatbotX, tts, bubble } = options;
1725
+ const sessionTimeoutMs = options.sessionTimeoutMs ?? 12e4;
1726
+ const maxHistoryTurns = options.maxHistoryTurns ?? 10;
1727
+ const isInvoking = ref(false);
1728
+ const currentTextContent = ref("");
1729
+ const currentToolParts = ref([]);
1730
+ const executingTools = ref(/* @__PURE__ */ new Set());
1731
+ const conversationHistory = ref([]);
1732
+ let lastInteractionTime = 0;
1733
+ const checkSessionTimeout = () => {
1734
+ if (lastInteractionTime > 0 && Date.now() - lastInteractionTime > sessionTimeoutMs) {
1735
+ conversationHistory.value = [];
1736
+ }
1737
+ };
1738
+ const appendToHistory = (role, content) => {
1739
+ conversationHistory.value.push({ role, content });
1740
+ const maxLen = maxHistoryTurns * 2;
1741
+ if (conversationHistory.value.length > maxLen) {
1742
+ conversationHistory.value = conversationHistory.value.slice(-maxLen);
1743
+ }
1744
+ };
1745
+ const clearHistory = () => {
1746
+ conversationHistory.value = [];
1747
+ };
1748
+ let abortController = null;
1749
+ const hasAnyContent = computed(() => {
1750
+ return !!(currentTextContent.value || currentToolParts.value.length > 0);
1751
+ });
1752
+ const toolDisplayName = (name) => toolDisplayNames[name] || name;
1753
+ const resetState = () => {
1754
+ currentTextContent.value = "";
1755
+ currentToolParts.value = [];
1756
+ executingTools.value = /* @__PURE__ */ new Set();
1757
+ };
1758
+ const executeHostCommands = async (toolCallId, result) => {
1759
+ if (!result || typeof result !== "object") return;
1760
+ const commands = result.commands;
1761
+ if (!Array.isArray(commands) || commands.length === 0) return;
1762
+ try {
1763
+ executingTools.value = /* @__PURE__ */ new Set([...executingTools.value, toolCallId]);
1764
+ for (const cmd of commands) {
1765
+ const args = Array.isArray(cmd.args) ? cmd.args : [];
1766
+ try {
1767
+ await aiChatbotX.executeCommand(cmd.name, args);
1768
+ } catch (cmdErr) {
1769
+ console.error(`[AgentInvoke] 执行命令 ${cmd.name} 失败:`, cmdErr);
1770
+ }
1771
+ }
1772
+ } finally {
1773
+ const next = new Set(executingTools.value);
1774
+ next.delete(toolCallId);
1775
+ executingTools.value = next;
1776
+ }
1777
+ };
1778
+ const parseAssistantText = (payload) => {
1779
+ if (!payload) return "";
1780
+ if (typeof payload === "string") return payload;
1781
+ if (typeof payload === "object") {
1782
+ const data = payload;
1783
+ const directText = data.output || data.answer || data.message || data.result;
1784
+ if (typeof directText === "string" && directText.trim()) return directText;
1785
+ if (data.data && typeof data.data === "object") {
1786
+ const nested = data.data;
1787
+ const nestedText = nested.output || nested.answer || nested.message || nested.result;
1788
+ if (typeof nestedText === "string" && nestedText.trim()) return nestedText;
1789
+ }
1790
+ return JSON.stringify(payload);
1791
+ }
1792
+ return String(payload);
1793
+ };
1794
+ const invoke = async (question) => {
1795
+ const content = question.trim();
1796
+ if (!content) return;
1797
+ abort();
1798
+ checkSessionTimeout();
1799
+ resetState();
1800
+ tts.stop();
1801
+ isInvoking.value = true;
1802
+ bubble.open();
1803
+ let prevTextLength = 0;
1804
+ const processedToolResults = /* @__PURE__ */ new Set();
1805
+ abortController = new AbortController();
1806
+ const commands = await aiChatbotX.hostCommads();
1807
+ const historyToSend = conversationHistory.value.length > 0 ? [...conversationHistory.value] : void 0;
1808
+ try {
1809
+ const response = await fetch(options.endpoint.value, {
1810
+ method: "POST",
1811
+ headers: { "Content-Type": "application/json" },
1812
+ body: JSON.stringify({
1813
+ input: content,
1814
+ projectId: options.projectId || "",
1815
+ commands: commands.length > 0 ? commands : void 0,
1816
+ messages: historyToSend
1817
+ }),
1818
+ signal: abortController.signal
1819
+ });
1820
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
1821
+ const contentType = response.headers.get("content-type") || "";
1822
+ const isJsonResponse = contentType.includes("application/json");
1823
+ if (isJsonResponse) {
1824
+ const data = await response.json();
1825
+ const reply = parseAssistantText(data) || "已收到,但没有返回可展示的文本内容。";
1826
+ currentTextContent.value = reply;
1827
+ tts.speak(reply);
1828
+ appendToHistory("user", content);
1829
+ appendToHistory("assistant", reply);
1830
+ if (data.toolResults && Array.isArray(data.toolResults)) {
1831
+ for (const tr of data.toolResults) {
1832
+ const toolPart = {
1833
+ type: "tool-result",
1834
+ toolCallId: `invoke-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
1835
+ toolName: tr.toolName,
1836
+ args: tr.args,
1837
+ result: tr.result,
1838
+ state: "result"
1839
+ };
1840
+ currentToolParts.value = [...currentToolParts.value, toolPart];
1841
+ if (tr.toolName === "executeCommand") {
1842
+ executeHostCommands(toolPart.toolCallId, tr.result);
1843
+ }
1844
+ }
1845
+ }
1846
+ } else {
1847
+ await parseDataStreamToMessage(response, (result) => {
1848
+ currentTextContent.value = result.textContent;
1849
+ if (result.textContent.length > prevTextLength) {
1850
+ const delta = result.textContent.slice(prevTextLength);
1851
+ prevTextLength = result.textContent.length;
1852
+ tts.feed(delta);
1853
+ }
1854
+ const toolParts = result.parts.filter(
1855
+ (p) => p.type === "tool-call" || p.type === "tool-result"
1856
+ );
1857
+ currentToolParts.value = toolParts;
1858
+ for (const part of toolParts) {
1859
+ if (part.toolName === "executeCommand" && !processedToolResults.has(part.toolCallId)) {
1860
+ if (part.type === "tool-call" && part.state === "call" && part.args) {
1861
+ processedToolResults.add(part.toolCallId);
1862
+ executeHostCommands(part.toolCallId, part.args);
1863
+ } else if (part.type === "tool-result" && part.result) {
1864
+ processedToolResults.add(part.toolCallId);
1865
+ executeHostCommands(part.toolCallId, part.result);
1866
+ }
1867
+ }
1868
+ }
1869
+ bubble.scrollToBottom();
1870
+ });
1871
+ tts.flush();
1872
+ const assistantReply = currentTextContent.value.trim();
1873
+ appendToHistory("user", content);
1874
+ if (assistantReply) {
1875
+ appendToHistory("assistant", assistantReply);
1876
+ }
1877
+ if (!assistantReply && currentToolParts.value.length === 0) {
1878
+ currentTextContent.value = "已收到,但没有返回可展示的文本内容。";
1879
+ }
1880
+ }
1881
+ } catch (error) {
1882
+ if (error.name === "AbortError") {
1883
+ return;
1884
+ }
1885
+ console.error("[AgentInvoke] invoke failed:", error);
1886
+ tts.stop();
1887
+ currentTextContent.value = "请求失败,请检查服务地址或稍后重试。";
1888
+ } finally {
1889
+ isInvoking.value = false;
1890
+ abortController = null;
1891
+ lastInteractionTime = Date.now();
1892
+ bubble.scheduleDismiss();
1893
+ }
1894
+ };
1895
+ const abort = () => {
1896
+ if (abortController) {
1897
+ abortController.abort();
1898
+ abortController = null;
1899
+ }
1900
+ tts.stop();
1901
+ isInvoking.value = false;
1902
+ };
1903
+ return {
1904
+ isInvoking,
1905
+ currentTextContent,
1906
+ currentToolParts,
1907
+ executingTools,
1908
+ hasAnyContent,
1909
+ conversationHistory,
1910
+ toolDisplayName,
1911
+ invoke,
1912
+ abort,
1913
+ resetState,
1914
+ clearHistory
1915
+ };
1916
+ }
1917
+
1918
+ const _hoisted_1 = { class: "agent-bubble" };
1919
+ const _hoisted_2 = {
1920
+ key: 0,
1921
+ class: "tool-steps"
1922
+ };
1923
+ const _hoisted_3 = { class: "tool-step__icon" };
1924
+ const _hoisted_4 = {
1925
+ key: 0,
1926
+ class: "tool-step__spinner",
1927
+ width: "14",
1928
+ height: "14",
1929
+ viewBox: "0 0 24 24",
1930
+ fill: "none"
1931
+ };
1932
+ const _hoisted_5 = {
1933
+ key: 1,
1934
+ width: "14",
1935
+ height: "14",
1936
+ viewBox: "0 0 24 24",
1937
+ fill: "none"
1938
+ };
1939
+ const _hoisted_6 = {
1940
+ key: 2,
1941
+ width: "14",
1942
+ height: "14",
1943
+ viewBox: "0 0 24 24",
1944
+ fill: "none"
1945
+ };
1946
+ const _hoisted_7 = { class: "tool-step__name" };
1947
+ const _hoisted_8 = {
1948
+ key: 0,
1949
+ class: "tool-step__tag tool-step__tag--exec"
1950
+ };
1951
+ const _hoisted_9 = {
1952
+ key: 1,
1953
+ class: "thinking-dots"
1954
+ };
1955
+ const _hoisted_10 = {
1956
+ key: 2,
1957
+ class: "agent-text"
1958
+ };
1959
+ const _hoisted_11 = {
1960
+ key: 0,
1961
+ class: "status-pill"
1962
+ };
1963
+ const _hoisted_12 = { class: "fab-avatar-wrapper" };
1964
+ const _hoisted_13 = ["src"];
1965
+ const currentTheme = "dark";
1966
+ const _sfc_main = /* @__PURE__ */ defineComponent({
1967
+ __name: "voice-assistant",
1968
+ props: {
1969
+ xLogo: {},
1970
+ xTitle: {},
1971
+ xSize: {},
1972
+ xTheme: {},
1973
+ wakeWords: {},
1974
+ modelPath: {},
1975
+ projectId: {},
1976
+ invokeUrl: {},
1977
+ voiceConfig: {},
1978
+ bubbleSize: {},
1979
+ bubbleDismissDelay: {}
1980
+ },
1981
+ setup(__props) {
1982
+ const props = __props;
1983
+ const aiChatbotX = injectStrict(AiChatbotXKey);
1984
+ const getVoiceConfig = () => {
1985
+ if (props.voiceConfig) return props.voiceConfig;
1986
+ try {
1987
+ return aiChatbotX.voiceConfig();
1988
+ } catch {
1989
+ return null;
1990
+ }
1991
+ };
1992
+ const endpoint = computed(() => {
1993
+ return props.invokeUrl || "http://localhost:3001/agent/zyy55sw40nrl801056m0o/stream-invoke";
1994
+ });
1995
+ const wakeResponses = ["您好"];
1996
+ const tts = useTTS(getVoiceConfig);
1997
+ const bubbleBridge = {
1998
+ open: () => {
1999
+ },
2000
+ scheduleDismiss: () => {
2001
+ },
2002
+ scrollToBottom: () => {
2003
+ }
2004
+ };
2005
+ const agent = useAgentInvoke({
2006
+ endpoint,
2007
+ projectId: props.projectId,
2008
+ aiChatbotX,
2009
+ tts: {
2010
+ speak: tts.speak,
2011
+ feed: tts.feed,
2012
+ flush: tts.flush,
2013
+ stop: tts.stop
2014
+ },
2015
+ bubble: {
2016
+ open: () => bubbleBridge.open(),
2017
+ scheduleDismiss: () => bubbleBridge.scheduleDismiss(),
2018
+ scrollToBottom: () => bubbleBridge.scrollToBottom()
2019
+ }
2020
+ });
2021
+ const bubble = useBubble({
2022
+ dismissDelay: props.bubbleDismissDelay,
2023
+ isSpeaking: tts.isSpeaking,
2024
+ isInvoking: agent.isInvoking,
2025
+ bubbleSize: props.bubbleSize
2026
+ });
2027
+ bubbleBridge.open = bubble.open;
2028
+ bubbleBridge.scheduleDismiss = bubble.scheduleDismiss;
2029
+ bubbleBridge.scrollToBottom = bubble.scrollToBottom;
2030
+ const { show: showBubble, style: bubbleStyle, stackRef: bubbleStackRef } = bubble;
2031
+ tts.setOnQueueEmpty(() => {
2032
+ if (!agent.isInvoking.value) {
2033
+ bubble.scheduleDismiss();
2034
+ }
2035
+ });
2036
+ const interruptCurrentResponse = () => {
2037
+ agent.abort();
2038
+ agent.resetState();
2039
+ tts.stop();
2040
+ bubble.hide();
2041
+ };
2042
+ const voice = useVoiceRecognition({
2043
+ modelPath: props.modelPath,
2044
+ wakeWords: props.wakeWords,
2045
+ getVoiceConfig,
2046
+ onWake: () => {
2047
+ interruptCurrentResponse();
2048
+ tts.warmUpAudio();
2049
+ const text = wakeResponses[Math.floor(Math.random() * wakeResponses.length)];
2050
+ tts.speak(text);
2051
+ },
2052
+ onTranscriptionDone: (text) => {
2053
+ agent.invoke(text);
2054
+ }
2055
+ });
2056
+ const toggleVoiceMode = async (targetState) => {
2057
+ tts.warmUpAudio();
2058
+ await voice.toggleVoiceMode(targetState);
2059
+ };
2060
+ const { voiceStatus, transcriptionText, wakeAnimating } = voice;
2061
+ const { isInvoking, currentTextContent, currentToolParts, executingTools, hasAnyContent, toolDisplayName } = agent;
2062
+ aiChatbotX?.registerVoiceMethods({
2063
+ start: () => toggleVoiceMode(true),
2064
+ stop: () => toggleVoiceMode(false),
2065
+ openDialog: async () => Promise.resolve(),
2066
+ closeDialog: async () => Promise.resolve(),
2067
+ toggleCollapse: async () => Promise.resolve()
2068
+ });
2069
+ onBeforeUnmount(async () => {
2070
+ bubble.destroy();
2071
+ agent.abort();
2072
+ tts.destroy();
2073
+ await voice.destroy();
2074
+ });
2075
+ return (_ctx, _cache) => {
2076
+ return openBlock(), createElementBlock("div", {
2077
+ class: "voice-assistant",
2078
+ "data-theme": currentTheme
2079
+ }, [
2080
+ createVNode(Transition, { name: "bubble-fade" }, {
2081
+ default: withCtx(() => [
2082
+ unref(showBubble) ? (openBlock(), createElementBlock("div", {
2083
+ key: 0,
2084
+ class: "bubble-stack",
2085
+ ref_key: "bubbleStackRef",
2086
+ ref: bubbleStackRef,
2087
+ style: normalizeStyle(unref(bubbleStyle))
2088
+ }, [
2089
+ createElementVNode("div", _hoisted_1, [
2090
+ unref(currentToolParts).length > 0 ? (openBlock(), createElementBlock("div", _hoisted_2, [
2091
+ (openBlock(true), createElementBlock(Fragment, null, renderList(unref(currentToolParts), (toolPart) => {
2092
+ return openBlock(), createElementBlock("div", {
2093
+ key: toolPart.toolCallId,
2094
+ class: normalizeClass(["tool-step", {
2095
+ "tool-step--loading": toolPart.state === "partial-call" || toolPart.state === "call",
2096
+ "tool-step--done": toolPart.state === "result",
2097
+ "tool-step--error": toolPart.state === "error",
2098
+ "tool-step--executing": unref(executingTools).has(toolPart.toolCallId)
2099
+ }])
2100
+ }, [
2101
+ createElementVNode("span", _hoisted_3, [
2102
+ toolPart.state === "partial-call" || toolPart.state === "call" ? (openBlock(), createElementBlock("svg", _hoisted_4, [..._cache[1] || (_cache[1] = [
2103
+ createElementVNode("circle", {
2104
+ cx: "12",
2105
+ cy: "12",
2106
+ r: "10",
2107
+ stroke: "currentColor",
2108
+ "stroke-width": "2.5",
2109
+ "stroke-linecap": "round",
2110
+ "stroke-dasharray": "31.4 31.4"
2111
+ }, null, -1)
2112
+ ])])) : toolPart.state === "result" ? (openBlock(), createElementBlock("svg", _hoisted_5, [..._cache[2] || (_cache[2] = [
2113
+ createElementVNode("path", {
2114
+ d: "M20 6L9 17l-5-5",
2115
+ stroke: "currentColor",
2116
+ "stroke-width": "2.5",
2117
+ "stroke-linecap": "round",
2118
+ "stroke-linejoin": "round"
2119
+ }, null, -1)
2120
+ ])])) : toolPart.state === "error" ? (openBlock(), createElementBlock("svg", _hoisted_6, [..._cache[3] || (_cache[3] = [
2121
+ createElementVNode("circle", {
2122
+ cx: "12",
2123
+ cy: "12",
2124
+ r: "10",
2125
+ stroke: "currentColor",
2126
+ "stroke-width": "2"
2127
+ }, null, -1),
2128
+ createElementVNode("path", {
2129
+ d: "M15 9l-6 6M9 9l6 6",
2130
+ stroke: "currentColor",
2131
+ "stroke-width": "2",
2132
+ "stroke-linecap": "round"
2133
+ }, null, -1)
2134
+ ])])) : createCommentVNode("", true)
2135
+ ]),
2136
+ createElementVNode("span", _hoisted_7, toDisplayString(unref(toolDisplayName)(toolPart.toolName)), 1),
2137
+ unref(executingTools).has(toolPart.toolCallId) ? (openBlock(), createElementBlock("span", _hoisted_8, "命令执行中")) : createCommentVNode("", true)
2138
+ ], 2);
2139
+ }), 128))
2140
+ ])) : createCommentVNode("", true),
2141
+ unref(isInvoking) && !unref(hasAnyContent) ? (openBlock(), createElementBlock("div", _hoisted_9, [..._cache[4] || (_cache[4] = [
2142
+ createElementVNode("span", null, null, -1),
2143
+ createElementVNode("span", null, null, -1),
2144
+ createElementVNode("span", null, null, -1)
2145
+ ])])) : createCommentVNode("", true),
2146
+ unref(currentTextContent) ? (openBlock(), createElementBlock("div", _hoisted_10, toDisplayString(unref(currentTextContent)), 1)) : createCommentVNode("", true)
2147
+ ])
2148
+ ], 4)) : createCommentVNode("", true)
2149
+ ]),
2150
+ _: 1
2151
+ }),
2152
+ createElementVNode("div", {
2153
+ class: "assistant-fab",
2154
+ onClick: _cache[0] || (_cache[0] = ($event) => toggleVoiceMode())
2155
+ }, [
2156
+ unref(transcriptionText) || unref(isInvoking) ? (openBlock(), createElementBlock("div", _hoisted_11, toDisplayString(unref(isInvoking) ? "正在思考中..." : unref(transcriptionText)), 1)) : createCommentVNode("", true),
2157
+ createElementVNode("div", _hoisted_12, [
2158
+ createElementVNode("img", {
2159
+ src: __props.xLogo ? __props.xLogo : "/sime.png",
2160
+ alt: "voice assistant",
2161
+ style: normalizeStyle({
2162
+ width: `${__props.xSize?.width || 88}px`
2163
+ })
2164
+ }, null, 12, _hoisted_13),
2165
+ createVNode(Transition, { name: "indicator-fade" }, {
2166
+ default: withCtx(() => [
2167
+ unref(voiceStatus) === "listening" ? (openBlock(), createElementBlock("div", {
2168
+ key: 0,
2169
+ class: normalizeClass(["listening-badge", { "wake-active": unref(wakeAnimating) }])
2170
+ }, [..._cache[5] || (_cache[5] = [
2171
+ createElementVNode("div", { class: "listening-waves" }, [
2172
+ createElementVNode("div", { class: "wave wave-1" }),
2173
+ createElementVNode("div", { class: "wave wave-2" }),
2174
+ createElementVNode("div", { class: "wave wave-3" })
2175
+ ], -1),
2176
+ createElementVNode("div", { class: "listening-icon" }, [
2177
+ createElementVNode("svg", {
2178
+ width: "22",
2179
+ height: "22",
2180
+ viewBox: "0 0 24 24",
2181
+ fill: "none"
2182
+ }, [
2183
+ createElementVNode("path", {
2184
+ d: "M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z",
2185
+ fill: "currentColor"
2186
+ }),
2187
+ createElementVNode("path", {
2188
+ d: "M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5",
2189
+ stroke: "currentColor",
2190
+ "stroke-width": "2",
2191
+ "stroke-linecap": "round"
2192
+ })
2193
+ ])
2194
+ ], -1)
2195
+ ])], 2)) : createCommentVNode("", true)
2196
+ ]),
2197
+ _: 1
2198
+ })
2199
+ ])
2200
+ ])
2201
+ ]);
992
2202
  };
993
2203
  }
994
2204
  });
995
2205
 
996
- const simeX = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-c01a4dba"]]);
2206
+ const voiceAssistant = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-9e420a26"]]);
997
2207
 
998
- export { _sfc_main$3 as AiChatbotProvider, simeX as AiChatbotX, AiChatbotXKey, clientCommandKey, injectStrict };
2208
+ export { _sfc_main$4 as AiChatbotProvider, voiceAssistant as AiChatbotVoiceAssistant, simeX as AiChatbotX, AiChatbotXKey, clientCommandKey, injectStrict };
999
2209
  //# sourceMappingURL=sime-x-vue.mjs.map