@siact/sime-x-vue 0.0.14 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { defineComponent, ref, reactive, computed, watch, onMounted, openBlock, createElementBlock, normalizeClass, createElementVNode, toDisplayString, withModifiers, withDirectives, vModelText, Fragment, renderList, createCommentVNode, unref, createVNode, Transition, withCtx, nextTick, inject, shallowRef, provide, renderSlot, onBeforeUnmount, normalizeStyle } from 'vue';
1
+ import { defineComponent, ref, reactive, computed, watch, onMounted, openBlock, createElementBlock, normalizeClass, createElementVNode, toDisplayString, withModifiers, withDirectives, vModelText, Fragment, renderList, createCommentVNode, unref, createVNode, Transition, withCtx, nextTick, inject, onBeforeUnmount, normalizeStyle, withKeys, shallowRef, provide, renderSlot } from 'vue';
2
2
  import { Chat } from '@ai-sdk/vue';
3
3
  import { DefaultChatTransport } from 'ai';
4
4
  import { SpeechSynthesizerStandalone, WakeWordDetectorStandalone, SpeechTranscriberStandalone } from 'web-voice-kit';
@@ -88,30 +88,30 @@ function buildHistoryMessages(messages, excludeId) {
88
88
  return history;
89
89
  }
90
90
 
91
- const _hoisted_1$1 = {
91
+ const _hoisted_1$2 = {
92
92
  key: 0,
93
93
  class: "ai-chat__welcome"
94
94
  };
95
- const _hoisted_2$1 = { class: "ai-chat__welcome-header" };
96
- const _hoisted_3$1 = { class: "ai-chat__welcome-title" };
97
- const _hoisted_4$1 = { class: "ai-chat__welcome-desc" };
98
- const _hoisted_5$1 = { class: "ai-chat__input-area" };
99
- const _hoisted_6$1 = { class: "ai-chat__input-wrapper" };
100
- const _hoisted_7$1 = ["disabled"];
101
- const _hoisted_8$1 = {
95
+ const _hoisted_2$2 = { class: "ai-chat__welcome-header" };
96
+ const _hoisted_3$2 = { class: "ai-chat__welcome-title" };
97
+ const _hoisted_4$2 = { class: "ai-chat__welcome-desc" };
98
+ const _hoisted_5$2 = { class: "ai-chat__input-area" };
99
+ const _hoisted_6$2 = { class: "ai-chat__input-wrapper" };
100
+ const _hoisted_7$2 = ["disabled"];
101
+ const _hoisted_8$2 = {
102
102
  key: 0,
103
103
  class: "ai-chat__suggestions"
104
104
  };
105
- const _hoisted_9$1 = ["onClick"];
106
- const _hoisted_10$1 = { class: "ai-chat__messages-inner" };
107
- const _hoisted_11$1 = { class: "ai-chat__message-content" };
108
- const _hoisted_12$1 = ["innerHTML"];
109
- const _hoisted_13$1 = {
105
+ const _hoisted_9$2 = ["onClick"];
106
+ const _hoisted_10$2 = { class: "ai-chat__messages-inner" };
107
+ const _hoisted_11$2 = { class: "ai-chat__message-content" };
108
+ const _hoisted_12$2 = ["innerHTML"];
109
+ const _hoisted_13$2 = {
110
110
  key: 1,
111
111
  class: "ai-chat__reasoning"
112
112
  };
113
- const _hoisted_14 = ["onClick"];
114
- const _hoisted_15 = {
113
+ const _hoisted_14$1 = ["onClick"];
114
+ const _hoisted_15$1 = {
115
115
  key: 0,
116
116
  class: "ai-chat__reasoning-streaming"
117
117
  };
@@ -185,7 +185,7 @@ const _hoisted_29 = {
185
185
  };
186
186
  const _hoisted_30 = { class: "ai-chat__input-wrapper" };
187
187
  const _hoisted_31 = ["disabled"];
188
- const _sfc_main$2 = /* @__PURE__ */ defineComponent({
188
+ const _sfc_main$3 = /* @__PURE__ */ defineComponent({
189
189
  __name: "ai-chat",
190
190
  props: {
191
191
  api: {},
@@ -383,17 +383,17 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
383
383
  return openBlock(), createElementBlock("div", {
384
384
  class: normalizeClass(["ai-chat", { "ai-chat--full-width": __props.fullWidth }])
385
385
  }, [
386
- isEmpty.value ? (openBlock(), createElementBlock("div", _hoisted_1$1, [
387
- createElementVNode("div", _hoisted_2$1, [
388
- createElementVNode("h1", _hoisted_3$1, toDisplayString(__props.welcomeTitle), 1),
389
- createElementVNode("p", _hoisted_4$1, toDisplayString(__props.welcomeDescription), 1)
386
+ isEmpty.value ? (openBlock(), createElementBlock("div", _hoisted_1$2, [
387
+ createElementVNode("div", _hoisted_2$2, [
388
+ createElementVNode("h1", _hoisted_3$2, toDisplayString(__props.welcomeTitle), 1),
389
+ createElementVNode("p", _hoisted_4$2, toDisplayString(__props.welcomeDescription), 1)
390
390
  ]),
391
- createElementVNode("div", _hoisted_5$1, [
391
+ createElementVNode("div", _hoisted_5$2, [
392
392
  createElementVNode("form", {
393
393
  class: "ai-chat__form",
394
394
  onSubmit: withModifiers(handleSubmit, ["prevent"])
395
395
  }, [
396
- createElementVNode("div", _hoisted_6$1, [
396
+ createElementVNode("div", _hoisted_6$2, [
397
397
  withDirectives(createElementVNode("textarea", {
398
398
  ref_key: "textareaRef",
399
399
  ref: textareaRef,
@@ -429,16 +429,16 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
429
429
  }),
430
430
  createElementVNode("polygon", { points: "22 2 15 22 11 13 2 9 22 2" })
431
431
  ], -1)
432
- ])], 8, _hoisted_7$1)
432
+ ])], 8, _hoisted_7$2)
433
433
  ])
434
434
  ], 32),
435
- __props.suggestions.length > 0 ? (openBlock(), createElementBlock("div", _hoisted_8$1, [
435
+ __props.suggestions.length > 0 ? (openBlock(), createElementBlock("div", _hoisted_8$2, [
436
436
  (openBlock(true), createElementBlock(Fragment, null, renderList(__props.suggestions, (suggestion) => {
437
437
  return openBlock(), createElementBlock("button", {
438
438
  key: suggestion,
439
439
  class: "ai-chat__suggestion",
440
440
  onClick: ($event) => handleSuggestionClick(suggestion)
441
- }, toDisplayString(suggestion), 9, _hoisted_9$1);
441
+ }, toDisplayString(suggestion), 9, _hoisted_9$2);
442
442
  }), 128))
443
443
  ])) : createCommentVNode("", true)
444
444
  ])
@@ -449,7 +449,7 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
449
449
  class: "ai-chat__messages",
450
450
  onScroll: handleScroll
451
451
  }, [
452
- createElementVNode("div", _hoisted_10$1, [
452
+ createElementVNode("div", _hoisted_10$2, [
453
453
  (openBlock(true), createElementBlock(Fragment, null, renderList(unref(chat).messages, (message) => {
454
454
  return openBlock(), createElementBlock("div", {
455
455
  key: message.id,
@@ -463,13 +463,13 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
463
463
  key: 0,
464
464
  class: normalizeClass(["ai-chat__message", `ai-chat__message--${message.role}`])
465
465
  }, [
466
- createElementVNode("div", _hoisted_11$1, [
466
+ createElementVNode("div", _hoisted_11$2, [
467
467
  createElementVNode("div", {
468
468
  class: "ai-chat__message-text",
469
469
  innerHTML: renderMarkdown(part.text)
470
- }, null, 8, _hoisted_12$1)
470
+ }, null, 8, _hoisted_12$2)
471
471
  ])
472
- ], 2)) : part.type === "reasoning" ? (openBlock(), createElementBlock("div", _hoisted_13$1, [
472
+ ], 2)) : part.type === "reasoning" ? (openBlock(), createElementBlock("div", _hoisted_13$2, [
473
473
  createElementVNode("button", {
474
474
  class: "ai-chat__reasoning-trigger",
475
475
  onClick: ($event) => toggleReasoning(message.id)
@@ -486,8 +486,8 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
486
486
  createElementVNode("polyline", { points: "9 18 15 12 9 6" }, null, -1)
487
487
  ])], 2)),
488
488
  _cache[4] || (_cache[4] = createElementVNode("span", null, "思考过程", -1)),
489
- isStreamingMessage(message.id) && partIndex === message.parts.length - 1 ? (openBlock(), createElementBlock("span", _hoisted_15)) : createCommentVNode("", true)
490
- ], 8, _hoisted_14),
489
+ isStreamingMessage(message.id) && partIndex === message.parts.length - 1 ? (openBlock(), createElementBlock("span", _hoisted_15$1)) : createCommentVNode("", true)
490
+ ], 8, _hoisted_14$1),
491
491
  reasoningOpen[message.id] ? (openBlock(), createElementBlock("div", _hoisted_16, toDisplayString(part.text), 1)) : createCommentVNode("", true)
492
492
  ])) : isToolPart(part) ? (openBlock(), createElementBlock("div", _hoisted_17, [
493
493
  createElementVNode("div", {
@@ -692,1213 +692,1457 @@ const _export_sfc = (sfc, props) => {
692
692
  return target;
693
693
  };
694
694
 
695
- const aiChat = /* @__PURE__ */ _export_sfc(_sfc_main$2, [["__scopeId", "data-v-958fd919"]]);
695
+ const aiChat = /* @__PURE__ */ _export_sfc(_sfc_main$3, [["__scopeId", "data-v-958fd919"]]);
696
696
 
697
- class CommandManager {
698
- commands = /* @__PURE__ */ new Map();
699
- debug;
700
- constructor(options = {}) {
701
- this.debug = options.debug ?? false;
702
- }
703
- registerCommand(command) {
704
- this.commands.set(command.name, command);
705
- this.log("注册命令", `${command.name}: ${command.description}`);
706
- }
707
- unregisterCommand(name) {
708
- const deleted = this.commands.delete(name);
709
- if (deleted) {
710
- this.log("命令已注销", name);
697
+ const DATA_STREAM_LINE_RE = /^[0-9a-f]:/;
698
+ function detectFormat(firstChunk) {
699
+ const trimmed = firstChunk.trimStart();
700
+ if (trimmed.startsWith("data:")) {
701
+ const firstLine = trimmed.split("\n")[0];
702
+ const payload = firstLine.slice(5).trim();
703
+ try {
704
+ const parsed = JSON.parse(payload);
705
+ if (parsed && typeof parsed.type === "string") {
706
+ return "ui-message-stream";
707
+ }
708
+ } catch {
711
709
  }
712
- }
713
- async executeCommand(command, args = []) {
714
- const commandDef = this.commands.get(command);
715
- if (!commandDef) {
716
- throw new Error(`命令 "${command}" 未找到`);
710
+ if (DATA_STREAM_LINE_RE.test(payload)) {
711
+ return "data-stream";
717
712
  }
718
- this.log("执行命令", command, args);
719
- return await commandDef.handler(...args);
713
+ return "ui-message-stream";
720
714
  }
721
- getCommands() {
722
- return Array.from(this.commands.values()).map((cmd) => ({
723
- name: cmd.name,
724
- description: cmd.description,
725
- parameters: cmd.parameters
726
- }));
715
+ if (DATA_STREAM_LINE_RE.test(trimmed)) {
716
+ return "data-stream";
727
717
  }
728
- hasCommand(name) {
729
- return this.commands.has(name);
718
+ return "plain-text";
719
+ }
720
+ function processUIMessageStreamEvent(payload, callbacks) {
721
+ const trimmed = payload.trim();
722
+ if (!trimmed || trimmed === "[DONE]") {
723
+ callbacks.onFinish?.({});
724
+ return;
730
725
  }
731
- clear() {
732
- this.commands.clear();
733
- this.log("", "所有命令已清空");
726
+ let parsed;
727
+ try {
728
+ parsed = JSON.parse(trimmed);
729
+ } catch {
730
+ console.warn("[DataStreamParser] failed to parse UI message stream event:", trimmed.slice(0, 100));
731
+ return;
734
732
  }
735
- log(prefix, msg, ...args) {
736
- (/* @__PURE__ */ new Date()).toLocaleTimeString([], {
737
- hour: "2-digit",
738
- minute: "2-digit",
739
- second: "2-digit"
740
- });
741
- console.log(
742
- `%c ${prefix}`,
743
- "background:#7c3aed;color:white;padding:2px 6px;border-radius:3px 0 0 3px;font-weight:bold;",
744
- `${msg}`
745
- );
746
- if (args.length > 0) {
747
- console.log(...args);
748
- }
733
+ const type = parsed?.type;
734
+ if (!type) return;
735
+ switch (type) {
736
+ case "text-delta":
737
+ if (typeof parsed.delta === "string") {
738
+ callbacks.onTextDelta?.(parsed.delta);
739
+ }
740
+ break;
741
+ case "tool-input-start":
742
+ callbacks.onToolCallStart?.(parsed.toolCallId, parsed.toolName);
743
+ break;
744
+ case "tool-input-delta":
745
+ callbacks.onToolCallDelta?.(parsed.toolCallId, parsed.inputTextDelta);
746
+ break;
747
+ case "tool-input-available":
748
+ callbacks.onToolCallComplete?.(parsed.toolCallId, parsed.toolName, parsed.input);
749
+ break;
750
+ case "tool-output-available":
751
+ callbacks.onToolResult?.(parsed.toolCallId, parsed.output);
752
+ break;
753
+ case "finish-step":
754
+ callbacks.onStepFinish?.(parsed);
755
+ break;
756
+ case "finish":
757
+ callbacks.onFinish?.(parsed);
758
+ break;
759
+ case "error":
760
+ case "tool-output-error":
761
+ callbacks.onError?.(parsed.errorText || parsed.error || "Unknown error", parsed);
762
+ break;
763
+ case "start":
764
+ case "text-start":
765
+ case "text-end":
766
+ case "start-step":
767
+ case "reasoning-start":
768
+ case "reasoning-delta":
769
+ case "reasoning-end":
770
+ case "source-url":
771
+ case "source-document":
772
+ case "file":
773
+ case "abort":
774
+ break;
775
+ default:
776
+ if (type.startsWith("data-")) ; else {
777
+ console.log("[DataStreamParser] unhandled UI message stream type:", type);
778
+ }
779
+ break;
749
780
  }
750
781
  }
751
-
752
- const AiChatbotXKey = Symbol("sime-x");
753
- function injectStrict(key, defaultValue, treatDefaultAsFactory) {
754
- let result;
755
- if (defaultValue === void 0) {
756
- result = inject(key);
757
- } else if (treatDefaultAsFactory === true) {
758
- result = inject(key, defaultValue, true);
759
- } else {
760
- result = inject(key, defaultValue, false);
782
+ function parseLegacyProtocolLine(line, callbacks) {
783
+ if (!line || !DATA_STREAM_LINE_RE.test(line)) return;
784
+ const code = line[0];
785
+ const rawValue = line.slice(2);
786
+ let value;
787
+ try {
788
+ value = JSON.parse(rawValue);
789
+ } catch {
790
+ value = rawValue;
761
791
  }
762
- if (!result) {
763
- throw new Error(`Could not resolve ${key.description}`);
792
+ switch (code) {
793
+ case "0":
794
+ callbacks.onTextDelta?.(value);
795
+ break;
796
+ case "9":
797
+ callbacks.onToolCallStart?.(value.toolCallId, value.toolName);
798
+ break;
799
+ case "b":
800
+ callbacks.onToolCallDelta?.(value.toolCallId, value.argsTextDelta);
801
+ break;
802
+ case "c":
803
+ callbacks.onToolCallComplete?.(value.toolCallId, value.toolName, value.args);
804
+ break;
805
+ case "a":
806
+ callbacks.onToolResult?.(value.toolCallId, value.result);
807
+ break;
808
+ case "e":
809
+ callbacks.onStepFinish?.(value);
810
+ break;
811
+ case "d":
812
+ callbacks.onFinish?.(value);
813
+ break;
814
+ case "3":
815
+ callbacks.onError?.(value);
816
+ break;
764
817
  }
765
- return result;
766
818
  }
767
-
768
- const _sfc_main$1 = /* @__PURE__ */ defineComponent({
769
- __name: "sime-provider",
770
- props: {
771
- project: {},
772
- description: {},
773
- debug: { type: Boolean },
774
- chatbotUrl: {},
775
- appId: {},
776
- appToken: {},
777
- agentId: {}
778
- },
779
- setup(__props) {
780
- const props = __props;
781
- const commandManager = shallowRef(new CommandManager({ debug: props.debug ?? false }));
782
- const startListeningRef = shallowRef(async () => {
783
- });
784
- const stopListeningRef = shallowRef(async () => {
785
- });
786
- const stopBroadcastRef = shallowRef(async () => {
787
- });
788
- provide(AiChatbotXKey, {
789
- chatbotUrl: () => props.chatbotUrl,
790
- appId: () => props.appId,
791
- appToken: () => props.appToken,
792
- agentId: () => props.agentId,
793
- startListening: () => startListeningRef.value(),
794
- stopListening: () => stopListeningRef.value(),
795
- stopBroadcast: () => stopBroadcastRef.value(),
796
- registerVoiceMethods: (methods) => {
797
- if (methods.stopBroadcast) stopBroadcastRef.value = methods.stopBroadcast;
798
- if (methods.start) startListeningRef.value = methods.start;
799
- if (methods.stop) stopListeningRef.value = methods.stop;
800
- },
801
- getCommads: async () => commandManager.value.getCommands(),
802
- registerCommand: (cmd) => {
803
- commandManager.value.registerCommand(cmd);
804
- },
805
- unregisterCommand: (name) => {
806
- commandManager.value.unregisterCommand(name);
807
- },
808
- async executeCommand(commandName, args = []) {
809
- return await commandManager.value.executeCommand(commandName, args);
810
- }
811
- });
812
- return (_ctx, _cache) => {
813
- return renderSlot(_ctx.$slots, "default");
814
- };
815
- }
816
- });
817
-
818
- function useTTS(getVoiceConfig) {
819
- const isSpeaking = ref(false);
820
- const hasPendingAudio = ref(false);
821
- let instance = null;
822
- let initPromise = null;
823
- let audioCtx = null;
824
- let sentenceBuffer = "";
825
- const sentenceDelimiters = /[。!?;\n.!?;]/;
826
- const stripMarkdown = (text) => text.replace(/```[\s\S]*?```/g, "").replace(/\|[^\n]*\|/g, "").replace(/#{1,6}\s*/g, "").replace(/\*\*(.*?)\*\*/g, "$1").replace(/\*(.*?)\*/g, "$1").replace(/`([^`]*)`/g, "$1").replace(/\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/[-*+]\s+/g, "").replace(/>\s+/g, "").replace(/\n{2,}/g, "。").replace(/\n/g, ",").trim();
827
- const warmUpAudio = () => {
828
- if (!audioCtx || audioCtx.state === "closed") {
829
- try {
830
- audioCtx = new AudioContext();
831
- } catch {
832
- return;
833
- }
819
+ async function readDataStream(response, callbacks) {
820
+ if (!response.body) return;
821
+ const reader = response.body.getReader();
822
+ const decoder = new TextDecoder();
823
+ let buffer = "";
824
+ let format = null;
825
+ while (true) {
826
+ const { value, done } = await reader.read();
827
+ if (done) break;
828
+ const chunk = decoder.decode(value, { stream: true });
829
+ buffer += chunk;
830
+ if (format === null && buffer.trim().length > 0) {
831
+ format = detectFormat(buffer);
832
+ console.log("[DataStreamParser] detected format:", format, "| first 200 chars:", buffer.slice(0, 200));
834
833
  }
835
- if (audioCtx.state === "suspended") {
836
- audioCtx.resume();
834
+ if (format === "plain-text") {
835
+ const text = buffer;
836
+ buffer = "";
837
+ if (text) callbacks.onTextDelta?.(text);
838
+ continue;
837
839
  }
838
- };
839
- let onQueueEmptyCb = null;
840
- const ensureInstance = async () => {
841
- if (instance) return instance;
842
- if (initPromise) return initPromise;
843
- const vc = getVoiceConfig();
844
- if (!vc || !vc.apiSecret) {
845
- console.warn("[TTS] 缺少 voiceConfig apiSecret,语音播报已禁用");
846
- return null;
847
- }
848
- initPromise = (async () => {
849
- try {
850
- const tts = new SpeechSynthesizerStandalone({
851
- appId: vc.appId,
852
- apiKey: vc.ttsApiKey || vc.apiKey,
853
- apiSecret: vc.apiSecret,
854
- websocketUrl: vc.ttsWebsocketUrl || "wss://tts-api.xfyun.cn/v2/tts",
855
- vcn: vc.ttsVcn || "xiaoyan",
856
- speed: vc.speed || 55,
857
- volume: vc.volume || 90,
858
- pitch: vc.pitch || 50,
859
- aue: "raw",
860
- auf: "audio/L16;rate=16000",
861
- tte: "UTF8",
862
- autoPlay: true
863
- });
864
- tts.onStart(() => {
865
- isSpeaking.value = true;
866
- });
867
- tts.onEnd(() => {
868
- });
869
- tts.onQueueEmpty(() => {
870
- isSpeaking.value = false;
871
- hasPendingAudio.value = false;
872
- onQueueEmptyCb?.();
873
- });
874
- tts.onError((err) => {
875
- console.error("[TTS] Error:", err);
876
- isSpeaking.value = false;
877
- });
878
- if (audioCtx && audioCtx.state === "running") {
879
- tts.audioContext = audioCtx;
880
- tts.gainNode = audioCtx.createGain();
881
- tts.gainNode.connect(audioCtx.destination);
840
+ if (format === "ui-message-stream") {
841
+ while (true) {
842
+ const eventEnd = buffer.indexOf("\n\n");
843
+ if (eventEnd === -1) break;
844
+ const eventBlock = buffer.slice(0, eventEnd);
845
+ buffer = buffer.slice(eventEnd + 2);
846
+ const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
847
+ for (const dataLine of dataLines) {
848
+ processUIMessageStreamEvent(dataLine, callbacks);
882
849
  }
883
- instance = tts;
884
- initPromise = null;
885
- return tts;
886
- } catch (err) {
887
- console.error("[TTS] 初始化失败:", err);
888
- initPromise = null;
889
- return null;
890
850
  }
891
- })();
892
- return initPromise;
893
- };
894
- const speak = async (text) => {
895
- const clean = stripMarkdown(text);
896
- if (!clean.trim()) return;
897
- hasPendingAudio.value = true;
898
- const tts = await ensureInstance();
899
- if (!tts) return;
900
- try {
901
- tts.speak(clean);
902
- } catch (err) {
903
- console.error("[TTS] speak 失败:", err);
904
- }
905
- };
906
- const feed = (delta) => {
907
- sentenceBuffer += delta;
908
- while (true) {
909
- const match = sentenceBuffer.match(sentenceDelimiters);
910
- if (!match || match.index === void 0) break;
911
- const sentence = sentenceBuffer.slice(0, match.index + 1).trim();
912
- sentenceBuffer = sentenceBuffer.slice(match.index + 1);
913
- if (sentence.length > 0) speak(sentence);
851
+ continue;
914
852
  }
915
- };
916
- const flush = () => {
917
- const remaining = sentenceBuffer.trim();
918
- sentenceBuffer = "";
919
- if (remaining.length > 0) speak(remaining);
920
- };
921
- const stop = () => {
922
- sentenceBuffer = "";
923
- isSpeaking.value = false;
924
- hasPendingAudio.value = false;
925
- if (instance) {
926
- try {
927
- instance.stop();
928
- } catch {
853
+ if (format === "data-stream") {
854
+ const isSSEWrapped = buffer.trimStart().startsWith("data:");
855
+ if (isSSEWrapped) {
856
+ while (true) {
857
+ const eventEnd = buffer.indexOf("\n\n");
858
+ if (eventEnd === -1) break;
859
+ const eventBlock = buffer.slice(0, eventEnd);
860
+ buffer = buffer.slice(eventEnd + 2);
861
+ const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
862
+ for (const dl of dataLines) {
863
+ const t = dl.trim();
864
+ if (!t || t === "[DONE]") {
865
+ if (t === "[DONE]") callbacks.onFinish?.({});
866
+ continue;
867
+ }
868
+ parseLegacyProtocolLine(t, callbacks);
869
+ }
870
+ }
871
+ } else {
872
+ while (true) {
873
+ const newlineIdx = buffer.indexOf("\n");
874
+ if (newlineIdx === -1) break;
875
+ const line = buffer.slice(0, newlineIdx).trim();
876
+ buffer = buffer.slice(newlineIdx + 1);
877
+ if (line) parseLegacyProtocolLine(line, callbacks);
878
+ }
929
879
  }
880
+ continue;
930
881
  }
931
- };
932
- const setOnQueueEmpty = (cb) => {
933
- onQueueEmptyCb = cb;
934
- };
935
- const destroy = () => {
936
- stop();
937
- if (instance) {
938
- try {
939
- instance.destroy();
940
- } catch {
882
+ }
883
+ const tail = decoder.decode();
884
+ if (tail) buffer += tail;
885
+ if (buffer.trim()) {
886
+ if (format === "plain-text") {
887
+ callbacks.onTextDelta?.(buffer);
888
+ } else if (format === "ui-message-stream") {
889
+ const dataLines = buffer.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
890
+ for (const dl of dataLines) {
891
+ processUIMessageStreamEvent(dl, callbacks);
941
892
  }
942
- instance = null;
893
+ } else if (format === "data-stream") {
894
+ parseLegacyProtocolLine(buffer.trim(), callbacks);
943
895
  }
944
- if (audioCtx) {
945
- try {
946
- audioCtx.close();
947
- } catch {
896
+ }
897
+ callbacks.onFinish?.({});
898
+ }
899
+ async function parseDataStreamToMessage(response, onUpdate) {
900
+ let textContent = "";
901
+ const parts = [];
902
+ const toolCalls = /* @__PURE__ */ new Map();
903
+ const ensureTextPart = () => {
904
+ for (let i = parts.length - 1; i >= 0; i--) {
905
+ if (parts[i].type === "text") {
906
+ return parts[i];
948
907
  }
949
- audioCtx = null;
950
908
  }
909
+ const textPart = { type: "text", text: "" };
910
+ parts.push(textPart);
911
+ return textPart;
951
912
  };
952
- return {
953
- isSpeaking,
954
- hasPendingAudio,
955
- warmUpAudio,
956
- speak,
957
- feed,
958
- flush,
959
- stop,
960
- destroy,
961
- setOnQueueEmpty
962
- };
963
- }
964
-
965
- function useBubble(options = {}) {
966
- const visible = ref(false);
967
- const fadingOut = ref(false);
968
- const stackRef = ref(null);
969
- let dismissTimer = null;
970
- const hasOpened = ref(false);
971
- const isTTSActive = () => !!(options.isSpeaking?.value || options.hasPendingAudio?.value);
972
- const isBusy = () => !!(options.isInvoking?.value || isTTSActive());
973
- const show = computed(() => {
974
- if (!hasOpened.value) return false;
975
- if (isTTSActive()) return true;
976
- return visible.value && !fadingOut.value;
977
- });
978
- const style = computed(() => ({
979
- width: options.bubbleSize?.width || void 0,
980
- maxHeight: options.bubbleSize?.maxHeight || void 0
981
- }));
982
- const open = () => {
983
- cancelDismiss();
984
- fadingOut.value = false;
985
- visible.value = true;
986
- hasOpened.value = true;
913
+ const findToolPartIndex = (toolCallId) => {
914
+ return parts.findIndex((p) => (p.type === "tool-call" || p.type === "tool-result") && p.toolCallId === toolCallId);
987
915
  };
988
- const cancelDismiss = () => {
989
- if (dismissTimer) {
990
- clearTimeout(dismissTimer);
991
- dismissTimer = null;
992
- }
916
+ const emitUpdate = () => {
917
+ onUpdate({ textContent, parts: [...parts], toolCalls: new Map(toolCalls) });
993
918
  };
994
- const scheduleDismiss = () => {
995
- cancelDismiss();
996
- if (isBusy()) return;
997
- const delay = options.dismissDelay ?? 4e3;
998
- dismissTimer = setTimeout(() => {
999
- if (isBusy()) return;
1000
- fadingOut.value = true;
1001
- setTimeout(() => {
1002
- if (isBusy()) {
1003
- fadingOut.value = false;
1004
- return;
919
+ await readDataStream(response, {
920
+ onTextDelta(text) {
921
+ textContent += text;
922
+ const textPart = ensureTextPart();
923
+ textPart.text = textContent;
924
+ emitUpdate();
925
+ },
926
+ onToolCallStart(toolCallId, toolName) {
927
+ const tracker = {
928
+ toolCallId,
929
+ toolName,
930
+ argsText: "",
931
+ args: void 0,
932
+ state: "partial-call"
933
+ };
934
+ toolCalls.set(toolCallId, tracker);
935
+ const part = {
936
+ type: "tool-call",
937
+ toolCallId,
938
+ toolName,
939
+ args: void 0,
940
+ state: "partial-call"
941
+ };
942
+ parts.push(part);
943
+ emitUpdate();
944
+ },
945
+ onToolCallDelta(toolCallId, argsTextDelta) {
946
+ const tracker = toolCalls.get(toolCallId);
947
+ if (tracker) {
948
+ tracker.argsText += argsTextDelta;
949
+ try {
950
+ tracker.args = JSON.parse(tracker.argsText);
951
+ } catch {
1005
952
  }
1006
- visible.value = false;
1007
- fadingOut.value = false;
1008
- hasOpened.value = false;
1009
- }, 400);
1010
- }, delay);
1011
- };
1012
- const watchTTSRef = (ttsRef) => {
1013
- watch(ttsRef, (active) => {
1014
- if (active && hasOpened.value) {
1015
- cancelDismiss();
1016
- if (fadingOut.value) fadingOut.value = false;
1017
- } else if (!active && hasOpened.value && !isBusy()) {
1018
- scheduleDismiss();
953
+ const idx = findToolPartIndex(toolCallId);
954
+ if (idx !== -1 && parts[idx].type === "tool-call") {
955
+ parts[idx].args = tracker.args;
956
+ }
957
+ emitUpdate();
1019
958
  }
1020
- });
1021
- };
1022
- if (options.isSpeaking) watchTTSRef(options.isSpeaking);
1023
- if (options.hasPendingAudio) watchTTSRef(options.hasPendingAudio);
1024
- const hide = () => {
1025
- cancelDismiss();
1026
- fadingOut.value = false;
1027
- visible.value = false;
1028
- hasOpened.value = false;
1029
- };
1030
- const scrollToBottom = () => {
1031
- nextTick(() => {
1032
- if (stackRef.value) {
1033
- stackRef.value.scrollTop = stackRef.value.scrollHeight;
959
+ },
960
+ onToolCallComplete(toolCallId, toolName, args) {
961
+ const tracker = toolCalls.get(toolCallId);
962
+ if (tracker) {
963
+ tracker.state = "call";
964
+ tracker.args = typeof args === "string" ? safeJsonParse(args) : args;
965
+ } else {
966
+ toolCalls.set(toolCallId, {
967
+ toolCallId,
968
+ toolName,
969
+ argsText: typeof args === "string" ? args : JSON.stringify(args),
970
+ args: typeof args === "string" ? safeJsonParse(args) : args,
971
+ state: "call"
972
+ });
1034
973
  }
1035
- });
1036
- };
1037
- const destroy = () => {
1038
- cancelDismiss();
1039
- };
1040
- return {
1041
- visible,
1042
- fadingOut,
1043
- show,
1044
- style,
1045
- stackRef,
1046
- open,
1047
- hide,
1048
- cancelDismiss,
1049
- scheduleDismiss,
1050
- scrollToBottom,
1051
- destroy
1052
- };
1053
- }
1054
-
1055
- const ensureMicrophonePermission = async () => {
1056
- if (typeof navigator === "undefined" || typeof window === "undefined") {
1057
- console.log("当前环境不支持麦克风访问");
1058
- return false;
1059
- }
1060
- if (!navigator.mediaDevices?.getUserMedia || !navigator.mediaDevices?.enumerateDevices) {
1061
- console.log("当前环境不支持麦克风访问");
1062
- return false;
1063
- }
1064
- try {
1065
- const devices = await navigator.mediaDevices.enumerateDevices();
1066
- const audioInputDevices = devices.filter((device) => device.kind === "audioinput");
1067
- if (audioInputDevices.length === 0) {
1068
- console.log("未检测到麦克风设备,请连接麦克风后重试。");
1069
- return false;
1070
- }
1071
- if ("permissions" in navigator && navigator.permissions?.query) {
1072
- try {
1073
- const status = await navigator.permissions.query({ name: "microphone" });
1074
- if (status.state === "denied") {
1075
- console.log("麦克风权限被禁用,请在浏览器设置中开启。");
1076
- return false;
1077
- }
1078
- } catch (e) {
1079
- console.warn("Permission query not supported:", e);
974
+ const idx = findToolPartIndex(toolCallId);
975
+ if (idx !== -1) {
976
+ parts[idx].state = "call";
977
+ parts[idx].toolName = toolName;
978
+ parts[idx].args = toolCalls.get(toolCallId).args;
979
+ } else {
980
+ parts.push({
981
+ type: "tool-call",
982
+ toolCallId,
983
+ toolName,
984
+ args: toolCalls.get(toolCallId).args,
985
+ state: "call"
986
+ });
1080
987
  }
1081
- }
1082
- let stream = null;
1083
- try {
1084
- stream = await navigator.mediaDevices.getUserMedia({
1085
- audio: {
1086
- echoCancellation: true,
1087
- noiseSuppression: true,
1088
- autoGainControl: true
1089
- }
1090
- });
1091
- const audioTracks = stream.getAudioTracks();
1092
- if (audioTracks.length === 0) {
1093
- console.log("无法获取麦克风音频轨道。");
1094
- return false;
988
+ emitUpdate();
989
+ },
990
+ onToolResult(toolCallId, result) {
991
+ const tracker = toolCalls.get(toolCallId);
992
+ if (tracker) {
993
+ tracker.result = result;
994
+ tracker.state = "result";
1095
995
  }
1096
- const activeTrack = audioTracks[0];
1097
- if (!activeTrack.enabled || activeTrack.readyState !== "live") {
1098
- console.log("麦克风设备不可用,请检查设备连接。");
1099
- return false;
996
+ const idx = findToolPartIndex(toolCallId);
997
+ if (idx !== -1) {
998
+ const existing = parts[idx];
999
+ const resultPart = {
1000
+ type: "tool-result",
1001
+ toolCallId,
1002
+ toolName: existing.toolName,
1003
+ args: existing.args,
1004
+ result,
1005
+ state: "result"
1006
+ };
1007
+ parts[idx] = resultPart;
1008
+ } else {
1009
+ parts.push({
1010
+ type: "tool-result",
1011
+ toolCallId,
1012
+ toolName: tracker?.toolName || "unknown",
1013
+ args: tracker?.args,
1014
+ result,
1015
+ state: "result"
1016
+ });
1100
1017
  }
1101
- return true;
1102
- } finally {
1103
- if (stream) {
1104
- stream.getTracks().forEach((track) => track.stop());
1018
+ emitUpdate();
1019
+ },
1020
+ onError(error, data) {
1021
+ const toolCallId = data?.toolCallId;
1022
+ if (toolCallId) {
1023
+ toolCalls.delete(toolCallId);
1024
+ const idx = findToolPartIndex(toolCallId);
1025
+ if (idx !== -1) {
1026
+ parts.splice(idx, 1);
1027
+ emitUpdate();
1028
+ }
1105
1029
  }
1030
+ console.error("[DataStreamParser] stream error:", error);
1031
+ },
1032
+ onStepFinish(_data) {
1033
+ emitUpdate();
1034
+ },
1035
+ onFinish(_data) {
1036
+ emitUpdate();
1106
1037
  }
1107
- } catch (error) {
1108
- console.error("Microphone permission check failed", error);
1109
- if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
1110
- console.log("未检测到麦克风设备,请连接麦克风后重试。");
1111
- } else if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
1112
- console.log("麦克风权限被拒绝,请在浏览器设置中允许访问。");
1113
- } else if (error.name === "NotReadableError" || error.name === "TrackStartError") {
1114
- console.log("麦克风被其他应用占用或无法访问。");
1115
- } else {
1116
- console.log("无法访问麦克风,请检查设备连接和浏览器权限。");
1117
- }
1118
- return false;
1038
+ });
1039
+ return { textContent, parts, toolCalls };
1040
+ }
1041
+ function safeJsonParse(str) {
1042
+ try {
1043
+ return JSON.parse(str);
1044
+ } catch {
1045
+ return str;
1119
1046
  }
1120
- };
1047
+ }
1121
1048
 
1122
- function useVoiceRecognition(options) {
1123
- const voiceStatus = ref("standby");
1124
- const isTranscribing = ref(false);
1125
- const isInitializing = ref(false);
1126
- const transcriptionText = ref("");
1127
- const wakeAnimating = ref(false);
1128
- let detector = null;
1129
- let transcriber = null;
1130
- const initTranscriber = () => {
1131
- if (transcriber) return;
1132
- const vc = options.getVoiceConfig();
1133
- if (!vc || !vc.appId || !vc.apiKey || !vc.websocketUrl) {
1134
- console.error("[VoiceRecognition] 缺少 voiceConfig,无法初始化转写器");
1135
- return;
1049
+ const toolDisplayNames = {
1050
+ generateReport: "生成报告",
1051
+ searchKnowledge: "知识库检索",
1052
+ resolveInstanceTargets: "解析实例目标",
1053
+ getHistoryMetrics: "历史数据查询",
1054
+ getRealtimeMetrics: "实时数据查询",
1055
+ queryBitableData: "多维表格查询",
1056
+ searchUser: "搜索用户",
1057
+ createBitableRecord: "创建表格记录",
1058
+ timeTool: "时间工具",
1059
+ loadSkill: "加载技能",
1060
+ executeCommand: "执行命令",
1061
+ dataAnalyzer: "数据分析",
1062
+ dataPredictor: "数据预测"
1063
+ };
1064
+ function useAgentInvoke(options) {
1065
+ const { aiChatbotX, tts, bubble } = options;
1066
+ const sessionTimeoutMs = options.sessionTimeoutMs ?? 12e4;
1067
+ const maxHistoryTurns = options.maxHistoryTurns ?? 10;
1068
+ const isInvoking = ref(false);
1069
+ const currentTextContent = ref("");
1070
+ const currentToolParts = ref([]);
1071
+ const executingTools = ref(/* @__PURE__ */ new Set());
1072
+ const conversationHistory = ref([]);
1073
+ let lastInteractionTime = 0;
1074
+ const checkSessionTimeout = () => {
1075
+ if (lastInteractionTime > 0 && Date.now() - lastInteractionTime > sessionTimeoutMs) {
1076
+ conversationHistory.value = [];
1136
1077
  }
1137
- transcriber = new SpeechTranscriberStandalone({
1138
- appId: vc.appId,
1139
- apiKey: vc.apiKey,
1140
- websocketUrl: vc.websocketUrl,
1141
- autoStop: {
1142
- enabled: true,
1143
- silenceTimeoutMs: 2e3,
1144
- noSpeechTimeoutMs: 5e3,
1145
- maxDurationMs: 45e3
1146
- }
1147
- });
1148
- transcriber.onResult((result) => {
1149
- transcriptionText.value = result.transcript || "";
1150
- });
1151
- transcriber.onAutoStop(async () => {
1152
- const finalText = transcriptionText.value;
1153
- await stopTranscribing();
1154
- transcriptionText.value = "";
1155
- if (finalText.trim()) {
1156
- options.onTranscriptionDone?.(finalText);
1157
- }
1158
- });
1159
- transcriber.onError((error) => {
1160
- console.error("[VoiceRecognition] 转写错误:", error);
1161
- stopTranscribing();
1162
- transcriptionText.value = "";
1163
- });
1164
1078
  };
1165
- const startTranscribing = async () => {
1166
- if (isTranscribing.value) return;
1167
- if (!transcriber) initTranscriber();
1168
- if (!transcriber) return;
1169
- try {
1170
- await transcriber.start();
1171
- isTranscribing.value = true;
1172
- transcriptionText.value = "";
1173
- } catch (error) {
1174
- console.error("[VoiceRecognition] 启动转写失败:", error);
1079
+ const appendToHistory = (role, content) => {
1080
+ conversationHistory.value.push({ role, content });
1081
+ const maxLen = maxHistoryTurns * 2;
1082
+ if (conversationHistory.value.length > maxLen) {
1083
+ conversationHistory.value = conversationHistory.value.slice(-maxLen);
1175
1084
  }
1176
1085
  };
1177
- const stopTranscribing = async () => {
1178
- if (!transcriber || !transcriber.isActive()) {
1179
- isTranscribing.value = false;
1180
- return;
1086
+ const clearHistory = () => {
1087
+ conversationHistory.value = [];
1088
+ };
1089
+ let abortController = null;
1090
+ const hasAnyContent = computed(() => {
1091
+ return !!(currentTextContent.value || currentToolParts.value.length > 0);
1092
+ });
1093
+ const toolDisplayName = (name) => toolDisplayNames[name] || name;
1094
+ const resetState = () => {
1095
+ currentTextContent.value = "";
1096
+ currentToolParts.value = [];
1097
+ executingTools.value = /* @__PURE__ */ new Set();
1098
+ };
1099
+ const extractExecutableCommands = (payload) => {
1100
+ if (!payload || typeof payload !== "object") return [];
1101
+ const commands = payload.commands;
1102
+ if (!Array.isArray(commands) || commands.length === 0) return [];
1103
+ return commands.filter((cmd) => cmd && typeof cmd === "object" && typeof cmd.name === "string" && cmd.name.trim()).map((cmd) => ({
1104
+ name: cmd.name,
1105
+ args: Array.isArray(cmd.args) ? cmd.args : []
1106
+ }));
1107
+ };
1108
+ const buildCommandDefinitionMap = (commands) => {
1109
+ return new Map(commands.map((command) => [command.name, command]));
1110
+ };
1111
+ const toExecutableCommand = (toolName, payload, commandDefinitions) => {
1112
+ const commandDefinition = commandDefinitions.get(toolName);
1113
+ if (!commandDefinition) {
1114
+ return null;
1181
1115
  }
1182
- try {
1183
- await transcriber.stop();
1184
- } catch (error) {
1185
- console.error("[VoiceRecognition] 停止转写失败:", error);
1186
- } finally {
1187
- isTranscribing.value = false;
1116
+ const parameters = commandDefinition.parameters || [];
1117
+ if (Array.isArray(payload)) {
1118
+ return {
1119
+ name: toolName,
1120
+ args: payload
1121
+ };
1122
+ }
1123
+ if (!payload || typeof payload !== "object") {
1124
+ return {
1125
+ name: toolName,
1126
+ args: []
1127
+ };
1188
1128
  }
1129
+ const payloadRecord = payload;
1130
+ return {
1131
+ name: toolName,
1132
+ args: parameters.map((parameter) => payloadRecord[parameter.name])
1133
+ };
1189
1134
  };
1190
- const initDetector = () => {
1191
- if (detector || isInitializing.value) return;
1192
- if (!options.modelPath) {
1193
- console.error("[VoiceRecognition] 未传入 modelPath,无法启用唤醒词");
1194
- return;
1135
+ const resolveExecutableCommands = (toolName, payload, commandDefinitions) => {
1136
+ const extractedCommands = extractExecutableCommands(payload);
1137
+ if (extractedCommands.length > 0) {
1138
+ return extractedCommands;
1195
1139
  }
1196
- isInitializing.value = true;
1140
+ const directCommand = toExecutableCommand(toolName, payload, commandDefinitions);
1141
+ return directCommand ? [directCommand] : [];
1142
+ };
1143
+ const executeHostCommands = async (toolCallId, toolName, payload, commandDefinitions) => {
1144
+ const commands = resolveExecutableCommands(toolName, payload, commandDefinitions);
1145
+ if (commands.length === 0) return false;
1197
1146
  try {
1198
- detector = new WakeWordDetectorStandalone({
1199
- modelPath: options.modelPath,
1200
- sampleRate: 16e3,
1201
- usePartial: true,
1202
- autoReset: {
1203
- enabled: true,
1204
- resetDelayMs: 4e3
1147
+ executingTools.value = /* @__PURE__ */ new Set([...executingTools.value, toolCallId]);
1148
+ for (const cmd of commands) {
1149
+ try {
1150
+ await aiChatbotX.executeCommand(cmd.name, cmd.args);
1151
+ } catch (cmdErr) {
1152
+ console.error(`[AgentInvoke] 执行命令 ${cmd.name} 失败:`, cmdErr);
1205
1153
  }
1206
- });
1207
- detector.setWakeWords(options.wakeWords || ["你好", "您好"]);
1208
- detector.onWake(async () => {
1209
- wakeAnimating.value = true;
1210
- options.onWake?.();
1211
- await startTranscribing();
1212
- setTimeout(() => {
1213
- wakeAnimating.value = false;
1214
- }, 1200);
1215
- });
1216
- detector.onError((error) => {
1217
- console.error("[VoiceRecognition] 唤醒监听错误:", error);
1218
- voiceStatus.value = "standby";
1219
- stopTranscribing();
1220
- });
1154
+ }
1155
+ return true;
1221
1156
  } finally {
1222
- isInitializing.value = false;
1157
+ const next = new Set(executingTools.value);
1158
+ next.delete(toolCallId);
1159
+ executingTools.value = next;
1223
1160
  }
1224
1161
  };
1225
- const toggleVoiceMode = async (targetState) => {
1226
- const permission = await ensureMicrophonePermission();
1227
- if (!permission || isInitializing.value) return;
1228
- if (!detector) {
1229
- initDetector();
1230
- if (!detector) return;
1162
+ const parseAssistantText = (payload) => {
1163
+ if (!payload) return "";
1164
+ if (typeof payload === "string") return payload;
1165
+ if (typeof payload === "object") {
1166
+ const data = payload;
1167
+ const directText = data.output || data.answer || data.message || data.result;
1168
+ if (typeof directText === "string" && directText.trim()) return directText;
1169
+ if (data.data && typeof data.data === "object") {
1170
+ const nested = data.data;
1171
+ const nestedText = nested.output || nested.answer || nested.message || nested.result;
1172
+ if (typeof nestedText === "string" && nestedText.trim()) return nestedText;
1173
+ }
1174
+ return JSON.stringify(payload);
1231
1175
  }
1232
- const isListening = voiceStatus.value === "listening";
1233
- const shouldStart = targetState !== void 0 ? targetState : !isListening;
1234
- if (isListening === shouldStart) return;
1176
+ return String(payload);
1177
+ };
1178
+ const invoke = async (question) => {
1179
+ const content = question.trim();
1180
+ if (!content) return;
1181
+ abort();
1182
+ checkSessionTimeout();
1183
+ resetState();
1184
+ tts.stop();
1185
+ isInvoking.value = true;
1186
+ bubble.open();
1187
+ let prevTextLength = 0;
1188
+ const processedToolResults = /* @__PURE__ */ new Set();
1189
+ const processingToolResults = /* @__PURE__ */ new Set();
1190
+ abortController = new AbortController();
1191
+ const commands = await aiChatbotX.getCommads();
1192
+ const commandDefinitions = buildCommandDefinitionMap(commands);
1193
+ conversationHistory.value.length > 0 ? [...conversationHistory.value] : void 0;
1235
1194
  try {
1236
- if (shouldStart) {
1237
- await detector.start();
1238
- voiceStatus.value = "listening";
1195
+ const response = await fetch(options.endpoint, {
1196
+ method: "POST",
1197
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${options.appToken || ""}` },
1198
+ body: JSON.stringify({
1199
+ input: content,
1200
+ projectId: options.projectId || "",
1201
+ commands: commands.length > 0 ? commands : void 0
1202
+ // messages: historyToSend,
1203
+ }),
1204
+ signal: abortController.signal
1205
+ });
1206
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
1207
+ const contentType = response.headers.get("content-type") || "";
1208
+ const isJsonResponse = contentType.includes("application/json");
1209
+ if (isJsonResponse) {
1210
+ const data = await response.json();
1211
+ const reply = parseAssistantText(data) || "已收到,但没有返回可展示的文本内容。";
1212
+ currentTextContent.value = reply;
1213
+ tts.speak(reply);
1214
+ appendToHistory("user", content);
1215
+ appendToHistory("assistant", reply);
1216
+ if (data.toolResults && Array.isArray(data.toolResults)) {
1217
+ for (const tr of data.toolResults) {
1218
+ const toolPart = {
1219
+ type: "tool-result",
1220
+ toolCallId: `invoke-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
1221
+ toolName: tr.toolName,
1222
+ args: tr.args,
1223
+ result: tr.result,
1224
+ state: "result"
1225
+ };
1226
+ currentToolParts.value = [...currentToolParts.value, toolPart];
1227
+ if (commandDefinitions.has(tr.toolName)) {
1228
+ void executeHostCommands(toolPart.toolCallId, tr.toolName, tr.result, commandDefinitions);
1229
+ }
1230
+ }
1231
+ }
1239
1232
  } else {
1240
- await detector.stop();
1241
- voiceStatus.value = "standby";
1242
- transcriptionText.value = "";
1243
- await stopTranscribing();
1233
+ await parseDataStreamToMessage(response, (result) => {
1234
+ currentTextContent.value = result.textContent;
1235
+ if (result.textContent.length > prevTextLength) {
1236
+ const delta = result.textContent.slice(prevTextLength);
1237
+ prevTextLength = result.textContent.length;
1238
+ tts.feed(delta);
1239
+ }
1240
+ const toolParts = result.parts.filter(
1241
+ (p) => p.type === "tool-call" || p.type === "tool-result"
1242
+ );
1243
+ currentToolParts.value = toolParts;
1244
+ for (const part of toolParts) {
1245
+ if (commandDefinitions.has(part.toolName) && !processedToolResults.has(part.toolCallId) && !processingToolResults.has(part.toolCallId)) {
1246
+ if (part.type === "tool-call" && part.state === "call" && part.args) {
1247
+ processingToolResults.add(part.toolCallId);
1248
+ void executeHostCommands(part.toolCallId, part.toolName, part.args, commandDefinitions).then(
1249
+ (executed) => {
1250
+ if (executed) {
1251
+ processedToolResults.add(part.toolCallId);
1252
+ }
1253
+ processingToolResults.delete(part.toolCallId);
1254
+ }
1255
+ );
1256
+ } else if (part.type === "tool-result" && part.result) {
1257
+ processingToolResults.add(part.toolCallId);
1258
+ void executeHostCommands(part.toolCallId, part.toolName, part.result, commandDefinitions).then(
1259
+ (executed) => {
1260
+ if (executed) {
1261
+ processedToolResults.add(part.toolCallId);
1262
+ }
1263
+ processingToolResults.delete(part.toolCallId);
1264
+ }
1265
+ );
1266
+ }
1267
+ }
1268
+ }
1269
+ bubble.scrollToBottom();
1270
+ });
1271
+ tts.flush();
1272
+ const assistantReply = currentTextContent.value.trim();
1273
+ appendToHistory("user", content);
1274
+ if (assistantReply) {
1275
+ appendToHistory("assistant", assistantReply);
1276
+ }
1277
+ if (!assistantReply && currentToolParts.value.length === 0) {
1278
+ currentTextContent.value = "已收到,但没有返回可展示的文本内容。";
1279
+ }
1244
1280
  }
1245
1281
  } catch (error) {
1246
- console.error("[VoiceRecognition] 监听切换失败:", error);
1247
- voiceStatus.value = "standby";
1248
- }
1282
+ if (error.name === "AbortError") {
1283
+ return;
1284
+ }
1285
+ console.error("[AgentInvoke] invoke failed:", error);
1286
+ tts.stop();
1287
+ currentTextContent.value = "请求失败,请检查服务地址或稍后重试。";
1288
+ } finally {
1289
+ isInvoking.value = false;
1290
+ abortController = null;
1291
+ lastInteractionTime = Date.now();
1292
+ bubble.scheduleDismiss();
1293
+ }
1249
1294
  };
1250
- const abortTranscription = async () => {
1251
- transcriptionText.value = "";
1252
- await stopTranscribing();
1295
+ const abort = () => {
1296
+ if (abortController) {
1297
+ abortController.abort();
1298
+ abortController = null;
1299
+ }
1300
+ tts.stop();
1301
+ isInvoking.value = false;
1253
1302
  };
1254
- const destroy = async () => {
1255
- if (detector) {
1256
- try {
1257
- if (detector.isActive()) await detector.stop();
1258
- } catch {
1259
- }
1260
- detector = null;
1303
+ return {
1304
+ isInvoking,
1305
+ currentTextContent,
1306
+ currentToolParts,
1307
+ executingTools,
1308
+ hasAnyContent,
1309
+ conversationHistory,
1310
+ toolDisplayName,
1311
+ invoke,
1312
+ abort,
1313
+ resetState,
1314
+ clearHistory
1315
+ };
1316
+ }
1317
+
1318
+ function useBubble(options = {}) {
1319
+ const visible = ref(false);
1320
+ const fadingOut = ref(false);
1321
+ const stackRef = ref(null);
1322
+ let dismissTimer = null;
1323
+ const hasOpened = ref(false);
1324
+ const isTTSActive = () => !!(options.isSpeaking?.value || options.hasPendingAudio?.value);
1325
+ const isBusy = () => !!(options.isInvoking?.value || isTTSActive());
1326
+ const show = computed(() => {
1327
+ if (!hasOpened.value) return false;
1328
+ if (isTTSActive()) return true;
1329
+ return visible.value && !fadingOut.value;
1330
+ });
1331
+ const style = computed(() => ({
1332
+ width: options.bubbleSize?.width || void 0,
1333
+ maxHeight: options.bubbleSize?.maxHeight || void 0
1334
+ }));
1335
+ const open = () => {
1336
+ cancelDismiss();
1337
+ fadingOut.value = false;
1338
+ visible.value = true;
1339
+ hasOpened.value = true;
1340
+ };
1341
+ const cancelDismiss = () => {
1342
+ if (dismissTimer) {
1343
+ clearTimeout(dismissTimer);
1344
+ dismissTimer = null;
1261
1345
  }
1262
- if (transcriber) {
1263
- try {
1264
- if (transcriber.isActive()) await transcriber.stop();
1265
- } catch {
1346
+ };
1347
+ const scheduleDismiss = () => {
1348
+ cancelDismiss();
1349
+ if (isBusy()) return;
1350
+ const delay = options.dismissDelay ?? 4e3;
1351
+ dismissTimer = setTimeout(() => {
1352
+ if (isBusy()) return;
1353
+ fadingOut.value = true;
1354
+ setTimeout(() => {
1355
+ if (isBusy()) {
1356
+ fadingOut.value = false;
1357
+ return;
1358
+ }
1359
+ visible.value = false;
1360
+ fadingOut.value = false;
1361
+ hasOpened.value = false;
1362
+ }, 400);
1363
+ }, delay);
1364
+ };
1365
+ const watchTTSRef = (ttsRef) => {
1366
+ watch(ttsRef, (active) => {
1367
+ if (active && hasOpened.value) {
1368
+ cancelDismiss();
1369
+ if (fadingOut.value) fadingOut.value = false;
1370
+ } else if (!active && hasOpened.value && !isBusy()) {
1371
+ scheduleDismiss();
1266
1372
  }
1267
- transcriber = null;
1268
- }
1373
+ });
1374
+ };
1375
+ if (options.isSpeaking) watchTTSRef(options.isSpeaking);
1376
+ if (options.hasPendingAudio) watchTTSRef(options.hasPendingAudio);
1377
+ const hide = () => {
1378
+ cancelDismiss();
1379
+ fadingOut.value = false;
1380
+ visible.value = false;
1381
+ hasOpened.value = false;
1382
+ };
1383
+ const scrollToBottom = () => {
1384
+ nextTick(() => {
1385
+ if (stackRef.value) {
1386
+ stackRef.value.scrollTop = stackRef.value.scrollHeight;
1387
+ }
1388
+ });
1389
+ };
1390
+ const destroy = () => {
1391
+ cancelDismiss();
1269
1392
  };
1270
1393
  return {
1271
- voiceStatus,
1272
- isTranscribing,
1273
- isInitializing,
1274
- transcriptionText,
1275
- wakeAnimating,
1276
- startTranscribing,
1277
- stopTranscribing,
1278
- abortTranscription,
1279
- toggleVoiceMode,
1394
+ visible,
1395
+ fadingOut,
1396
+ show,
1397
+ style,
1398
+ stackRef,
1399
+ open,
1400
+ hide,
1401
+ cancelDismiss,
1402
+ scheduleDismiss,
1403
+ scrollToBottom,
1280
1404
  destroy
1281
1405
  };
1282
1406
  }
1283
1407
 
1284
- const DATA_STREAM_LINE_RE = /^[0-9a-f]:/;
1285
- function detectFormat(firstChunk) {
1286
- const trimmed = firstChunk.trimStart();
1287
- if (trimmed.startsWith("data:")) {
1288
- const firstLine = trimmed.split("\n")[0];
1289
- const payload = firstLine.slice(5).trim();
1290
- try {
1291
- const parsed = JSON.parse(payload);
1292
- if (parsed && typeof parsed.type === "string") {
1293
- return "ui-message-stream";
1294
- }
1295
- } catch {
1296
- }
1297
- if (DATA_STREAM_LINE_RE.test(payload)) {
1298
- return "data-stream";
1299
- }
1300
- return "ui-message-stream";
1408
+ const AiChatbotXKey = Symbol("sime-x");
1409
+ function injectStrict(key, defaultValue, treatDefaultAsFactory) {
1410
+ let result;
1411
+ if (defaultValue === void 0) {
1412
+ result = inject(key);
1413
+ } else if (treatDefaultAsFactory === true) {
1414
+ result = inject(key, defaultValue, true);
1415
+ } else {
1416
+ result = inject(key, defaultValue, false);
1301
1417
  }
1302
- if (DATA_STREAM_LINE_RE.test(trimmed)) {
1303
- return "data-stream";
1418
+ if (!result) {
1419
+ throw new Error(`Could not resolve ${key.description}`);
1304
1420
  }
1305
- return "plain-text";
1421
+ return result;
1306
1422
  }
1307
- function processUIMessageStreamEvent(payload, callbacks) {
1308
- const trimmed = payload.trim();
1309
- if (!trimmed || trimmed === "[DONE]") {
1310
- callbacks.onFinish?.({});
1311
- return;
1312
- }
1313
- let parsed;
1314
- try {
1315
- parsed = JSON.parse(trimmed);
1316
- } catch {
1317
- console.warn("[DataStreamParser] failed to parse UI message stream event:", trimmed.slice(0, 100));
1318
- return;
1319
- }
1320
- const type = parsed?.type;
1321
- if (!type) return;
1322
- switch (type) {
1323
- case "text-delta":
1324
- if (typeof parsed.delta === "string") {
1325
- callbacks.onTextDelta?.(parsed.delta);
1423
+
1424
+ const _hoisted_1$1 = { class: "agent-bubble" };
1425
+ const _hoisted_2$1 = {
1426
+ key: 0,
1427
+ class: "tool-steps"
1428
+ };
1429
+ const _hoisted_3$1 = { class: "tool-step__icon" };
1430
+ const _hoisted_4$1 = {
1431
+ key: 0,
1432
+ class: "tool-step__spinner",
1433
+ width: "14",
1434
+ height: "14",
1435
+ viewBox: "0 0 24 24",
1436
+ fill: "none"
1437
+ };
1438
+ const _hoisted_5$1 = {
1439
+ key: 1,
1440
+ width: "14",
1441
+ height: "14",
1442
+ viewBox: "0 0 24 24",
1443
+ fill: "none"
1444
+ };
1445
+ const _hoisted_6$1 = {
1446
+ key: 2,
1447
+ width: "14",
1448
+ height: "14",
1449
+ viewBox: "0 0 24 24",
1450
+ fill: "none"
1451
+ };
1452
+ const _hoisted_7$1 = { class: "tool-step__name" };
1453
+ const _hoisted_8$1 = {
1454
+ key: 0,
1455
+ class: "tool-step__tag tool-step__tag--exec"
1456
+ };
1457
+ const _hoisted_9$1 = {
1458
+ key: 1,
1459
+ class: "thinking-dots"
1460
+ };
1461
+ const _hoisted_10$1 = {
1462
+ key: 2,
1463
+ class: "agent-text"
1464
+ };
1465
+ const _hoisted_11$1 = { class: "input-bar" };
1466
+ const _hoisted_12$1 = ["disabled"];
1467
+ const _hoisted_13$1 = ["disabled"];
1468
+ const _hoisted_14 = {
1469
+ key: 0,
1470
+ class: "btn-spinner",
1471
+ width: "18",
1472
+ height: "18",
1473
+ viewBox: "0 0 24 24",
1474
+ fill: "none"
1475
+ };
1476
+ const _hoisted_15 = {
1477
+ key: 1,
1478
+ width: "18",
1479
+ height: "18",
1480
+ viewBox: "0 0 24 24",
1481
+ fill: "none"
1482
+ };
1483
+ const currentTheme$1 = "dark";
1484
+ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
1485
+ __name: "command-test",
1486
+ props: {
1487
+ agentId: {},
1488
+ projectId: {},
1489
+ bubbleSize: {},
1490
+ bubbleDismissDelay: {}
1491
+ },
1492
+ setup(__props) {
1493
+ const props = __props;
1494
+ const aiChatbotX = injectStrict(AiChatbotXKey);
1495
+ const inputText = ref("");
1496
+ const noopTts = {
1497
+ speak: (_text) => {
1498
+ },
1499
+ feed: (_delta) => {
1500
+ },
1501
+ flush: () => {
1502
+ },
1503
+ stop: () => {
1326
1504
  }
1327
- break;
1328
- case "tool-input-start":
1329
- callbacks.onToolCallStart?.(parsed.toolCallId, parsed.toolName);
1330
- break;
1331
- case "tool-input-delta":
1332
- callbacks.onToolCallDelta?.(parsed.toolCallId, parsed.inputTextDelta);
1333
- break;
1334
- case "tool-input-available":
1335
- callbacks.onToolCallComplete?.(parsed.toolCallId, parsed.toolName, parsed.input);
1336
- break;
1337
- case "tool-output-available":
1338
- callbacks.onToolResult?.(parsed.toolCallId, parsed.output);
1339
- break;
1340
- case "finish-step":
1341
- callbacks.onStepFinish?.(parsed);
1342
- break;
1343
- case "finish":
1344
- callbacks.onFinish?.(parsed);
1345
- break;
1346
- case "error":
1347
- case "tool-output-error":
1348
- callbacks.onError?.(parsed.errorText || parsed.error || "Unknown error", parsed);
1349
- break;
1350
- case "start":
1351
- case "text-start":
1352
- case "text-end":
1353
- case "start-step":
1354
- case "reasoning-start":
1355
- case "reasoning-delta":
1356
- case "reasoning-end":
1357
- case "source-url":
1358
- case "source-document":
1359
- case "file":
1360
- case "abort":
1361
- break;
1362
- default:
1363
- if (type.startsWith("data-")) ; else {
1364
- console.log("[DataStreamParser] unhandled UI message stream type:", type);
1505
+ };
1506
+ const bubbleBridge = {
1507
+ open: () => {
1508
+ },
1509
+ scheduleDismiss: () => {
1510
+ },
1511
+ scrollToBottom: () => {
1365
1512
  }
1366
- break;
1513
+ };
1514
+ const endpoint = `/sime/proxy/organizations/${aiChatbotX.organizationId()}/agents/${props.agentId}/stream-invoke`;
1515
+ const agent = useAgentInvoke({
1516
+ endpoint,
1517
+ appToken: aiChatbotX.appToken(),
1518
+ projectId: props.projectId,
1519
+ aiChatbotX,
1520
+ tts: noopTts,
1521
+ bubble: {
1522
+ open: () => bubbleBridge.open(),
1523
+ scheduleDismiss: () => bubbleBridge.scheduleDismiss(),
1524
+ scrollToBottom: () => bubbleBridge.scrollToBottom()
1525
+ }
1526
+ });
1527
+ const bubble = useBubble({
1528
+ dismissDelay: props.bubbleDismissDelay ?? 8e3,
1529
+ isInvoking: agent.isInvoking,
1530
+ bubbleSize: props.bubbleSize
1531
+ });
1532
+ bubbleBridge.open = bubble.open;
1533
+ bubbleBridge.scheduleDismiss = bubble.scheduleDismiss;
1534
+ bubbleBridge.scrollToBottom = bubble.scrollToBottom;
1535
+ const { show: showBubble, style: bubbleStyle, stackRef: bubbleStackRef } = bubble;
1536
+ const handleSubmit = () => {
1537
+ const text = inputText.value.trim();
1538
+ if (!text || agent.isInvoking.value) return;
1539
+ inputText.value = "";
1540
+ agent.invoke(text);
1541
+ };
1542
+ const { isInvoking, currentTextContent, currentToolParts, executingTools, hasAnyContent, toolDisplayName } = agent;
1543
+ onBeforeUnmount(() => {
1544
+ bubble.destroy();
1545
+ agent.abort();
1546
+ });
1547
+ return (_ctx, _cache) => {
1548
+ return openBlock(), createElementBlock("div", {
1549
+ class: "command-test",
1550
+ "data-theme": currentTheme$1
1551
+ }, [
1552
+ createVNode(Transition, { name: "bubble-fade" }, {
1553
+ default: withCtx(() => [
1554
+ unref(showBubble) ? (openBlock(), createElementBlock("div", {
1555
+ key: 0,
1556
+ class: "bubble-stack",
1557
+ ref_key: "bubbleStackRef",
1558
+ ref: bubbleStackRef,
1559
+ style: normalizeStyle(unref(bubbleStyle))
1560
+ }, [
1561
+ createElementVNode("div", _hoisted_1$1, [
1562
+ unref(currentToolParts).length > 0 ? (openBlock(), createElementBlock("div", _hoisted_2$1, [
1563
+ (openBlock(true), createElementBlock(Fragment, null, renderList(unref(currentToolParts), (toolPart) => {
1564
+ return openBlock(), createElementBlock("div", {
1565
+ key: toolPart.toolCallId,
1566
+ class: normalizeClass(["tool-step", {
1567
+ "tool-step--loading": toolPart.state === "partial-call" || toolPart.state === "call",
1568
+ "tool-step--done": toolPart.state === "result",
1569
+ "tool-step--error": toolPart.state === "error",
1570
+ "tool-step--executing": unref(executingTools).has(toolPart.toolCallId)
1571
+ }])
1572
+ }, [
1573
+ createElementVNode("span", _hoisted_3$1, [
1574
+ toolPart.state === "partial-call" || toolPart.state === "call" ? (openBlock(), createElementBlock("svg", _hoisted_4$1, [..._cache[1] || (_cache[1] = [
1575
+ createElementVNode("circle", {
1576
+ cx: "12",
1577
+ cy: "12",
1578
+ r: "10",
1579
+ stroke: "currentColor",
1580
+ "stroke-width": "2.5",
1581
+ "stroke-linecap": "round",
1582
+ "stroke-dasharray": "31.4 31.4"
1583
+ }, null, -1)
1584
+ ])])) : toolPart.state === "result" ? (openBlock(), createElementBlock("svg", _hoisted_5$1, [..._cache[2] || (_cache[2] = [
1585
+ createElementVNode("path", {
1586
+ d: "M20 6L9 17l-5-5",
1587
+ stroke: "currentColor",
1588
+ "stroke-width": "2.5",
1589
+ "stroke-linecap": "round",
1590
+ "stroke-linejoin": "round"
1591
+ }, null, -1)
1592
+ ])])) : toolPart.state === "error" ? (openBlock(), createElementBlock("svg", _hoisted_6$1, [..._cache[3] || (_cache[3] = [
1593
+ createElementVNode("circle", {
1594
+ cx: "12",
1595
+ cy: "12",
1596
+ r: "10",
1597
+ stroke: "currentColor",
1598
+ "stroke-width": "2"
1599
+ }, null, -1),
1600
+ createElementVNode("path", {
1601
+ d: "M15 9l-6 6M9 9l6 6",
1602
+ stroke: "currentColor",
1603
+ "stroke-width": "2",
1604
+ "stroke-linecap": "round"
1605
+ }, null, -1)
1606
+ ])])) : createCommentVNode("", true)
1607
+ ]),
1608
+ createElementVNode("span", _hoisted_7$1, toDisplayString(unref(toolDisplayName)(toolPart.toolName)), 1),
1609
+ unref(executingTools).has(toolPart.toolCallId) ? (openBlock(), createElementBlock("span", _hoisted_8$1, "命令执行中")) : createCommentVNode("", true)
1610
+ ], 2);
1611
+ }), 128))
1612
+ ])) : createCommentVNode("", true),
1613
+ unref(isInvoking) && !unref(hasAnyContent) ? (openBlock(), createElementBlock("div", _hoisted_9$1, [..._cache[4] || (_cache[4] = [
1614
+ createElementVNode("span", null, null, -1),
1615
+ createElementVNode("span", null, null, -1),
1616
+ createElementVNode("span", null, null, -1)
1617
+ ])])) : createCommentVNode("", true),
1618
+ unref(currentTextContent) ? (openBlock(), createElementBlock("div", _hoisted_10$1, toDisplayString(unref(currentTextContent)), 1)) : createCommentVNode("", true)
1619
+ ])
1620
+ ], 4)) : createCommentVNode("", true)
1621
+ ]),
1622
+ _: 1
1623
+ }),
1624
+ createElementVNode("div", _hoisted_11$1, [
1625
+ withDirectives(createElementVNode("input", {
1626
+ "onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => inputText.value = $event),
1627
+ type: "text",
1628
+ class: "input-field",
1629
+ placeholder: "输入指令...",
1630
+ disabled: unref(isInvoking),
1631
+ onKeydown: withKeys(handleSubmit, ["enter"])
1632
+ }, null, 40, _hoisted_12$1), [
1633
+ [vModelText, inputText.value]
1634
+ ]),
1635
+ createElementVNode("button", {
1636
+ class: "submit-btn",
1637
+ disabled: unref(isInvoking) || !inputText.value.trim(),
1638
+ onClick: handleSubmit
1639
+ }, [
1640
+ unref(isInvoking) ? (openBlock(), createElementBlock("svg", _hoisted_14, [..._cache[5] || (_cache[5] = [
1641
+ createElementVNode("circle", {
1642
+ cx: "12",
1643
+ cy: "12",
1644
+ r: "10",
1645
+ stroke: "currentColor",
1646
+ "stroke-width": "2.5",
1647
+ "stroke-linecap": "round",
1648
+ "stroke-dasharray": "31.4 31.4"
1649
+ }, null, -1)
1650
+ ])])) : (openBlock(), createElementBlock("svg", _hoisted_15, [..._cache[6] || (_cache[6] = [
1651
+ createElementVNode("path", {
1652
+ d: "M22 2L11 13",
1653
+ stroke: "currentColor",
1654
+ "stroke-width": "2",
1655
+ "stroke-linecap": "round",
1656
+ "stroke-linejoin": "round"
1657
+ }, null, -1),
1658
+ createElementVNode("path", {
1659
+ d: "M22 2L15 22l-4-9-9-4 20-7z",
1660
+ stroke: "currentColor",
1661
+ "stroke-width": "2",
1662
+ "stroke-linecap": "round",
1663
+ "stroke-linejoin": "round"
1664
+ }, null, -1)
1665
+ ])]))
1666
+ ], 8, _hoisted_13$1)
1667
+ ])
1668
+ ]);
1669
+ };
1367
1670
  }
1368
- }
1369
- function parseLegacyProtocolLine(line, callbacks) {
1370
- if (!line || !DATA_STREAM_LINE_RE.test(line)) return;
1371
- const code = line[0];
1372
- const rawValue = line.slice(2);
1373
- let value;
1374
- try {
1375
- value = JSON.parse(rawValue);
1376
- } catch {
1377
- value = rawValue;
1671
+ });
1672
+
1673
+ const commandTest = /* @__PURE__ */ _export_sfc(_sfc_main$2, [["__scopeId", "data-v-5c7468c4"]]);
1674
+
1675
+ class CommandManager {
1676
+ commands = /* @__PURE__ */ new Map();
1677
+ debug;
1678
+ constructor(options = {}) {
1679
+ this.debug = options.debug ?? false;
1378
1680
  }
1379
- switch (code) {
1380
- case "0":
1381
- callbacks.onTextDelta?.(value);
1382
- break;
1383
- case "9":
1384
- callbacks.onToolCallStart?.(value.toolCallId, value.toolName);
1385
- break;
1386
- case "b":
1387
- callbacks.onToolCallDelta?.(value.toolCallId, value.argsTextDelta);
1388
- break;
1389
- case "c":
1390
- callbacks.onToolCallComplete?.(value.toolCallId, value.toolName, value.args);
1391
- break;
1392
- case "a":
1393
- callbacks.onToolResult?.(value.toolCallId, value.result);
1394
- break;
1395
- case "e":
1396
- callbacks.onStepFinish?.(value);
1397
- break;
1398
- case "d":
1399
- callbacks.onFinish?.(value);
1400
- break;
1401
- case "3":
1402
- callbacks.onError?.(value);
1403
- break;
1681
+ registerCommand(command) {
1682
+ this.commands.set(command.name, command);
1683
+ this.log("注册命令", `${command.name}: ${command.description}`);
1404
1684
  }
1405
- }
1406
- async function readDataStream(response, callbacks) {
1407
- if (!response.body) return;
1408
- const reader = response.body.getReader();
1409
- const decoder = new TextDecoder();
1410
- let buffer = "";
1411
- let format = null;
1412
- while (true) {
1413
- const { value, done } = await reader.read();
1414
- if (done) break;
1415
- const chunk = decoder.decode(value, { stream: true });
1416
- buffer += chunk;
1417
- if (format === null && buffer.trim().length > 0) {
1418
- format = detectFormat(buffer);
1419
- console.log("[DataStreamParser] detected format:", format, "| first 200 chars:", buffer.slice(0, 200));
1420
- }
1421
- if (format === "plain-text") {
1422
- const text = buffer;
1423
- buffer = "";
1424
- if (text) callbacks.onTextDelta?.(text);
1425
- continue;
1426
- }
1427
- if (format === "ui-message-stream") {
1428
- while (true) {
1429
- const eventEnd = buffer.indexOf("\n\n");
1430
- if (eventEnd === -1) break;
1431
- const eventBlock = buffer.slice(0, eventEnd);
1432
- buffer = buffer.slice(eventEnd + 2);
1433
- const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1434
- for (const dataLine of dataLines) {
1435
- processUIMessageStreamEvent(dataLine, callbacks);
1436
- }
1437
- }
1438
- continue;
1685
+ unregisterCommand(name) {
1686
+ const deleted = this.commands.delete(name);
1687
+ if (deleted) {
1688
+ this.log("命令已注销", name);
1439
1689
  }
1440
- if (format === "data-stream") {
1441
- const isSSEWrapped = buffer.trimStart().startsWith("data:");
1442
- if (isSSEWrapped) {
1443
- while (true) {
1444
- const eventEnd = buffer.indexOf("\n\n");
1445
- if (eventEnd === -1) break;
1446
- const eventBlock = buffer.slice(0, eventEnd);
1447
- buffer = buffer.slice(eventEnd + 2);
1448
- const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1449
- for (const dl of dataLines) {
1450
- const t = dl.trim();
1451
- if (!t || t === "[DONE]") {
1452
- if (t === "[DONE]") callbacks.onFinish?.({});
1453
- continue;
1454
- }
1455
- parseLegacyProtocolLine(t, callbacks);
1456
- }
1457
- }
1458
- } else {
1459
- while (true) {
1460
- const newlineIdx = buffer.indexOf("\n");
1461
- if (newlineIdx === -1) break;
1462
- const line = buffer.slice(0, newlineIdx).trim();
1463
- buffer = buffer.slice(newlineIdx + 1);
1464
- if (line) parseLegacyProtocolLine(line, callbacks);
1465
- }
1466
- }
1467
- continue;
1690
+ }
1691
+ async executeCommand(command, args = []) {
1692
+ const commandDef = this.commands.get(command);
1693
+ if (!commandDef) {
1694
+ throw new Error(`命令 "${command}" 未找到`);
1468
1695
  }
1696
+ this.log("执行命令", command, args);
1697
+ return await commandDef.handler(...args);
1469
1698
  }
1470
- const tail = decoder.decode();
1471
- if (tail) buffer += tail;
1472
- if (buffer.trim()) {
1473
- if (format === "plain-text") {
1474
- callbacks.onTextDelta?.(buffer);
1475
- } else if (format === "ui-message-stream") {
1476
- const dataLines = buffer.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1477
- for (const dl of dataLines) {
1478
- processUIMessageStreamEvent(dl, callbacks);
1479
- }
1480
- } else if (format === "data-stream") {
1481
- parseLegacyProtocolLine(buffer.trim(), callbacks);
1699
+ getCommands() {
1700
+ return Array.from(this.commands.values()).map((cmd) => ({
1701
+ name: cmd.name,
1702
+ description: cmd.description,
1703
+ parameters: cmd.parameters
1704
+ }));
1705
+ }
1706
+ hasCommand(name) {
1707
+ return this.commands.has(name);
1708
+ }
1709
+ clear() {
1710
+ this.commands.clear();
1711
+ this.log("", "所有命令已清空");
1712
+ }
1713
+ log(prefix, msg, ...args) {
1714
+ (/* @__PURE__ */ new Date()).toLocaleTimeString([], {
1715
+ hour: "2-digit",
1716
+ minute: "2-digit",
1717
+ second: "2-digit"
1718
+ });
1719
+ console.log(
1720
+ `%c ${prefix}`,
1721
+ "background:#7c3aed;color:white;padding:2px 6px;border-radius:3px 0 0 3px;font-weight:bold;",
1722
+ `${msg}`
1723
+ );
1724
+ if (args.length > 0) {
1725
+ console.log(...args);
1482
1726
  }
1483
1727
  }
1484
- callbacks.onFinish?.({});
1485
1728
  }
1486
- async function parseDataStreamToMessage(response, onUpdate) {
1487
- let textContent = "";
1488
- const parts = [];
1489
- const toolCalls = /* @__PURE__ */ new Map();
1490
- const ensureTextPart = () => {
1491
- for (let i = parts.length - 1; i >= 0; i--) {
1492
- if (parts[i].type === "text") {
1493
- return parts[i];
1729
+
1730
+ const _sfc_main$1 = /* @__PURE__ */ defineComponent({
1731
+ __name: "sime-provider",
1732
+ props: {
1733
+ appToken: {},
1734
+ organizationId: {}
1735
+ },
1736
+ setup(__props) {
1737
+ const props = __props;
1738
+ const commandManager = shallowRef(new CommandManager({ debug: false }));
1739
+ const startListeningRef = shallowRef(async () => {
1740
+ });
1741
+ const stopListeningRef = shallowRef(async () => {
1742
+ });
1743
+ const stopBroadcastRef = shallowRef(async () => {
1744
+ });
1745
+ provide(AiChatbotXKey, {
1746
+ appToken: () => props.appToken,
1747
+ organizationId: () => props.organizationId,
1748
+ startListening: () => startListeningRef.value(),
1749
+ stopListening: () => stopListeningRef.value(),
1750
+ stopBroadcast: () => stopBroadcastRef.value(),
1751
+ registerVoiceMethods: (methods) => {
1752
+ if (methods.stopBroadcast) stopBroadcastRef.value = methods.stopBroadcast;
1753
+ if (methods.start) startListeningRef.value = methods.start;
1754
+ if (methods.stop) stopListeningRef.value = methods.stop;
1755
+ },
1756
+ getCommads: async () => commandManager.value.getCommands(),
1757
+ registerCommand: (cmd) => {
1758
+ commandManager.value.registerCommand(cmd);
1759
+ },
1760
+ unregisterCommand: (name) => {
1761
+ commandManager.value.unregisterCommand(name);
1762
+ },
1763
+ async executeCommand(commandName, args = []) {
1764
+ return await commandManager.value.executeCommand(commandName, args);
1765
+ }
1766
+ });
1767
+ return (_ctx, _cache) => {
1768
+ return renderSlot(_ctx.$slots, "default");
1769
+ };
1770
+ }
1771
+ });
1772
+
1773
+ function useTTS(getVoiceConfig) {
1774
+ const isSpeaking = ref(false);
1775
+ const hasPendingAudio = ref(false);
1776
+ let instance = null;
1777
+ let initPromise = null;
1778
+ let audioCtx = null;
1779
+ let sentenceBuffer = "";
1780
+ const sentenceDelimiters = /[。!?;\n.!?;]/;
1781
+ const stripMarkdown = (text) => text.replace(/```[\s\S]*?```/g, "").replace(/\|[^\n]*\|/g, "").replace(/#{1,6}\s*/g, "").replace(/\*\*(.*?)\*\*/g, "$1").replace(/\*(.*?)\*/g, "$1").replace(/`([^`]*)`/g, "$1").replace(/\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/[-*+]\s+/g, "").replace(/>\s+/g, "").replace(/\n{2,}/g, "。").replace(/\n/g, ",").trim();
1782
+ const warmUpAudio = () => {
1783
+ if (!audioCtx || audioCtx.state === "closed") {
1784
+ try {
1785
+ audioCtx = new AudioContext();
1786
+ } catch {
1787
+ return;
1494
1788
  }
1495
1789
  }
1496
- const textPart = { type: "text", text: "" };
1497
- parts.push(textPart);
1498
- return textPart;
1499
- };
1500
- const findToolPartIndex = (toolCallId) => {
1501
- return parts.findIndex((p) => (p.type === "tool-call" || p.type === "tool-result") && p.toolCallId === toolCallId);
1502
- };
1503
- const emitUpdate = () => {
1504
- onUpdate({ textContent, parts: [...parts], toolCalls: new Map(toolCalls) });
1790
+ if (audioCtx.state === "suspended") {
1791
+ audioCtx.resume();
1792
+ }
1505
1793
  };
1506
- await readDataStream(response, {
1507
- onTextDelta(text) {
1508
- textContent += text;
1509
- const textPart = ensureTextPart();
1510
- textPart.text = textContent;
1511
- emitUpdate();
1512
- },
1513
- onToolCallStart(toolCallId, toolName) {
1514
- const tracker = {
1515
- toolCallId,
1516
- toolName,
1517
- argsText: "",
1518
- args: void 0,
1519
- state: "partial-call"
1520
- };
1521
- toolCalls.set(toolCallId, tracker);
1522
- const part = {
1523
- type: "tool-call",
1524
- toolCallId,
1525
- toolName,
1526
- args: void 0,
1527
- state: "partial-call"
1528
- };
1529
- parts.push(part);
1530
- emitUpdate();
1531
- },
1532
- onToolCallDelta(toolCallId, argsTextDelta) {
1533
- const tracker = toolCalls.get(toolCallId);
1534
- if (tracker) {
1535
- tracker.argsText += argsTextDelta;
1536
- try {
1537
- tracker.args = JSON.parse(tracker.argsText);
1538
- } catch {
1539
- }
1540
- const idx = findToolPartIndex(toolCallId);
1541
- if (idx !== -1 && parts[idx].type === "tool-call") {
1542
- parts[idx].args = tracker.args;
1543
- }
1544
- emitUpdate();
1545
- }
1546
- },
1547
- onToolCallComplete(toolCallId, toolName, args) {
1548
- const tracker = toolCalls.get(toolCallId);
1549
- if (tracker) {
1550
- tracker.state = "call";
1551
- tracker.args = typeof args === "string" ? safeJsonParse(args) : args;
1552
- } else {
1553
- toolCalls.set(toolCallId, {
1554
- toolCallId,
1555
- toolName,
1556
- argsText: typeof args === "string" ? args : JSON.stringify(args),
1557
- args: typeof args === "string" ? safeJsonParse(args) : args,
1558
- state: "call"
1559
- });
1560
- }
1561
- const idx = findToolPartIndex(toolCallId);
1562
- if (idx !== -1) {
1563
- parts[idx].state = "call";
1564
- parts[idx].toolName = toolName;
1565
- parts[idx].args = toolCalls.get(toolCallId).args;
1566
- } else {
1567
- parts.push({
1568
- type: "tool-call",
1569
- toolCallId,
1570
- toolName,
1571
- args: toolCalls.get(toolCallId).args,
1572
- state: "call"
1573
- });
1574
- }
1575
- emitUpdate();
1576
- },
1577
- onToolResult(toolCallId, result) {
1578
- const tracker = toolCalls.get(toolCallId);
1579
- if (tracker) {
1580
- tracker.result = result;
1581
- tracker.state = "result";
1582
- }
1583
- const idx = findToolPartIndex(toolCallId);
1584
- if (idx !== -1) {
1585
- const existing = parts[idx];
1586
- const resultPart = {
1587
- type: "tool-result",
1588
- toolCallId,
1589
- toolName: existing.toolName,
1590
- args: existing.args,
1591
- result,
1592
- state: "result"
1593
- };
1594
- parts[idx] = resultPart;
1595
- } else {
1596
- parts.push({
1597
- type: "tool-result",
1598
- toolCallId,
1599
- toolName: tracker?.toolName || "unknown",
1600
- args: tracker?.args,
1601
- result,
1602
- state: "result"
1603
- });
1604
- }
1605
- emitUpdate();
1606
- },
1607
- onError(error, data) {
1608
- const toolCallId = data?.toolCallId;
1609
- if (toolCallId) {
1610
- toolCalls.delete(toolCallId);
1611
- const idx = findToolPartIndex(toolCallId);
1612
- if (idx !== -1) {
1613
- parts.splice(idx, 1);
1614
- emitUpdate();
1615
- }
1616
- }
1617
- console.error("[DataStreamParser] stream error:", error);
1618
- },
1619
- onStepFinish(_data) {
1620
- emitUpdate();
1621
- },
1622
- onFinish(_data) {
1623
- emitUpdate();
1794
+ let onQueueEmptyCb = null;
1795
+ const ensureInstance = async () => {
1796
+ if (instance) return instance;
1797
+ if (initPromise) return initPromise;
1798
+ const vc = getVoiceConfig();
1799
+ if (!vc || !vc.apiSecret) {
1800
+ console.warn("[TTS] 缺少 voiceConfig 或 apiSecret,语音播报已禁用");
1801
+ return null;
1624
1802
  }
1625
- });
1626
- return { textContent, parts, toolCalls };
1627
- }
1628
- function safeJsonParse(str) {
1629
- try {
1630
- return JSON.parse(str);
1631
- } catch {
1632
- return str;
1633
- }
1634
- }
1635
-
1636
- const toolDisplayNames = {
1637
- generateReport: "生成报告",
1638
- searchKnowledge: "知识库检索",
1639
- resolveInstanceTargets: "解析实例目标",
1640
- getHistoryMetrics: "历史数据查询",
1641
- getRealtimeMetrics: "实时数据查询",
1642
- queryBitableData: "多维表格查询",
1643
- searchUser: "搜索用户",
1644
- createBitableRecord: "创建表格记录",
1645
- timeTool: "时间工具",
1646
- loadSkill: "加载技能",
1647
- executeCommand: "执行命令",
1648
- dataAnalyzer: "数据分析",
1649
- dataPredictor: "数据预测"
1650
- };
1651
- function useAgentInvoke(options) {
1652
- const { aiChatbotX, tts, bubble } = options;
1653
- const sessionTimeoutMs = options.sessionTimeoutMs ?? 12e4;
1654
- const maxHistoryTurns = options.maxHistoryTurns ?? 10;
1655
- const isInvoking = ref(false);
1656
- const currentTextContent = ref("");
1657
- const currentToolParts = ref([]);
1658
- const executingTools = ref(/* @__PURE__ */ new Set());
1659
- const conversationHistory = ref([]);
1660
- let lastInteractionTime = 0;
1661
- const checkSessionTimeout = () => {
1662
- if (lastInteractionTime > 0 && Date.now() - lastInteractionTime > sessionTimeoutMs) {
1663
- conversationHistory.value = [];
1803
+ initPromise = (async () => {
1804
+ try {
1805
+ const tts = new SpeechSynthesizerStandalone({
1806
+ appId: vc.appId,
1807
+ apiKey: vc.ttsApiKey || vc.apiKey,
1808
+ apiSecret: vc.apiSecret,
1809
+ websocketUrl: vc.ttsWebsocketUrl || "wss://tts-api.xfyun.cn/v2/tts",
1810
+ vcn: vc.ttsVcn || "xiaoyan",
1811
+ speed: vc.speed || 55,
1812
+ volume: vc.volume || 90,
1813
+ pitch: vc.pitch || 50,
1814
+ aue: "raw",
1815
+ auf: "audio/L16;rate=16000",
1816
+ tte: "UTF8",
1817
+ autoPlay: true
1818
+ });
1819
+ tts.onStart(() => {
1820
+ isSpeaking.value = true;
1821
+ });
1822
+ tts.onEnd(() => {
1823
+ });
1824
+ tts.onQueueEmpty(() => {
1825
+ isSpeaking.value = false;
1826
+ hasPendingAudio.value = false;
1827
+ onQueueEmptyCb?.();
1828
+ });
1829
+ tts.onError((err) => {
1830
+ console.error("[TTS] Error:", err);
1831
+ isSpeaking.value = false;
1832
+ });
1833
+ if (audioCtx && audioCtx.state === "running") {
1834
+ tts.audioContext = audioCtx;
1835
+ tts.gainNode = audioCtx.createGain();
1836
+ tts.gainNode.connect(audioCtx.destination);
1837
+ }
1838
+ instance = tts;
1839
+ initPromise = null;
1840
+ return tts;
1841
+ } catch (err) {
1842
+ console.error("[TTS] 初始化失败:", err);
1843
+ initPromise = null;
1844
+ return null;
1845
+ }
1846
+ })();
1847
+ return initPromise;
1848
+ };
1849
+ const speak = async (text) => {
1850
+ const clean = stripMarkdown(text);
1851
+ if (!clean.trim()) return;
1852
+ hasPendingAudio.value = true;
1853
+ const tts = await ensureInstance();
1854
+ if (!tts) return;
1855
+ try {
1856
+ tts.speak(clean);
1857
+ } catch (err) {
1858
+ console.error("[TTS] speak 失败:", err);
1664
1859
  }
1665
1860
  };
1666
- const appendToHistory = (role, content) => {
1667
- conversationHistory.value.push({ role, content });
1668
- const maxLen = maxHistoryTurns * 2;
1669
- if (conversationHistory.value.length > maxLen) {
1670
- conversationHistory.value = conversationHistory.value.slice(-maxLen);
1861
+ const feed = (delta) => {
1862
+ sentenceBuffer += delta;
1863
+ while (true) {
1864
+ const match = sentenceBuffer.match(sentenceDelimiters);
1865
+ if (!match || match.index === void 0) break;
1866
+ const sentence = sentenceBuffer.slice(0, match.index + 1).trim();
1867
+ sentenceBuffer = sentenceBuffer.slice(match.index + 1);
1868
+ if (sentence.length > 0) speak(sentence);
1671
1869
  }
1672
1870
  };
1673
- const clearHistory = () => {
1674
- conversationHistory.value = [];
1871
+ const flush = () => {
1872
+ const remaining = sentenceBuffer.trim();
1873
+ sentenceBuffer = "";
1874
+ if (remaining.length > 0) speak(remaining);
1675
1875
  };
1676
- let abortController = null;
1677
- const hasAnyContent = computed(() => {
1678
- return !!(currentTextContent.value || currentToolParts.value.length > 0);
1679
- });
1680
- const toolDisplayName = (name) => toolDisplayNames[name] || name;
1681
- const resetState = () => {
1682
- currentTextContent.value = "";
1683
- currentToolParts.value = [];
1684
- executingTools.value = /* @__PURE__ */ new Set();
1876
+ const stop = () => {
1877
+ sentenceBuffer = "";
1878
+ isSpeaking.value = false;
1879
+ hasPendingAudio.value = false;
1880
+ if (instance) {
1881
+ try {
1882
+ instance.stop();
1883
+ } catch {
1884
+ }
1885
+ }
1685
1886
  };
1686
- const extractExecutableCommands = (payload) => {
1687
- if (!payload || typeof payload !== "object") return [];
1688
- const commands = payload.commands;
1689
- if (!Array.isArray(commands) || commands.length === 0) return [];
1690
- return commands.filter((cmd) => cmd && typeof cmd === "object" && typeof cmd.name === "string" && cmd.name.trim()).map((cmd) => ({
1691
- name: cmd.name,
1692
- args: Array.isArray(cmd.args) ? cmd.args : []
1693
- }));
1887
+ const setOnQueueEmpty = (cb) => {
1888
+ onQueueEmptyCb = cb;
1694
1889
  };
1695
- const buildCommandDefinitionMap = (commands) => {
1696
- return new Map(commands.map((command) => [command.name, command]));
1890
+ const destroy = () => {
1891
+ stop();
1892
+ if (instance) {
1893
+ try {
1894
+ instance.destroy();
1895
+ } catch {
1896
+ }
1897
+ instance = null;
1898
+ }
1899
+ if (audioCtx) {
1900
+ try {
1901
+ audioCtx.close();
1902
+ } catch {
1903
+ }
1904
+ audioCtx = null;
1905
+ }
1697
1906
  };
1698
- const toExecutableCommand = (toolName, payload, commandDefinitions) => {
1699
- const commandDefinition = commandDefinitions.get(toolName);
1700
- if (!commandDefinition) {
1701
- return null;
1907
+ return {
1908
+ isSpeaking,
1909
+ hasPendingAudio,
1910
+ warmUpAudio,
1911
+ speak,
1912
+ feed,
1913
+ flush,
1914
+ stop,
1915
+ destroy,
1916
+ setOnQueueEmpty
1917
+ };
1918
+ }
1919
+
1920
+ const ensureMicrophonePermission = async () => {
1921
+ if (typeof navigator === "undefined" || typeof window === "undefined") {
1922
+ console.log("当前环境不支持麦克风访问");
1923
+ return false;
1924
+ }
1925
+ if (!navigator.mediaDevices?.getUserMedia || !navigator.mediaDevices?.enumerateDevices) {
1926
+ console.log("当前环境不支持麦克风访问");
1927
+ return false;
1928
+ }
1929
+ try {
1930
+ const devices = await navigator.mediaDevices.enumerateDevices();
1931
+ const audioInputDevices = devices.filter((device) => device.kind === "audioinput");
1932
+ if (audioInputDevices.length === 0) {
1933
+ console.log("未检测到麦克风设备,请连接麦克风后重试。");
1934
+ return false;
1702
1935
  }
1703
- const parameters = commandDefinition.parameters || [];
1704
- if (Array.isArray(payload)) {
1705
- return {
1706
- name: toolName,
1707
- args: payload
1708
- };
1936
+ if ("permissions" in navigator && navigator.permissions?.query) {
1937
+ try {
1938
+ const status = await navigator.permissions.query({ name: "microphone" });
1939
+ if (status.state === "denied") {
1940
+ console.log("麦克风权限被禁用,请在浏览器设置中开启。");
1941
+ return false;
1942
+ }
1943
+ } catch (e) {
1944
+ console.warn("Permission query not supported:", e);
1945
+ }
1709
1946
  }
1710
- if (!payload || typeof payload !== "object") {
1711
- return {
1712
- name: toolName,
1713
- args: []
1714
- };
1947
+ let stream = null;
1948
+ try {
1949
+ stream = await navigator.mediaDevices.getUserMedia({
1950
+ audio: {
1951
+ echoCancellation: true,
1952
+ noiseSuppression: true,
1953
+ autoGainControl: true
1954
+ }
1955
+ });
1956
+ const audioTracks = stream.getAudioTracks();
1957
+ if (audioTracks.length === 0) {
1958
+ console.log("无法获取麦克风音频轨道。");
1959
+ return false;
1960
+ }
1961
+ const activeTrack = audioTracks[0];
1962
+ if (!activeTrack.enabled || activeTrack.readyState !== "live") {
1963
+ console.log("麦克风设备不可用,请检查设备连接。");
1964
+ return false;
1965
+ }
1966
+ return true;
1967
+ } finally {
1968
+ if (stream) {
1969
+ stream.getTracks().forEach((track) => track.stop());
1970
+ }
1971
+ }
1972
+ } catch (error) {
1973
+ console.error("Microphone permission check failed", error);
1974
+ if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
1975
+ console.log("未检测到麦克风设备,请连接麦克风后重试。");
1976
+ } else if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
1977
+ console.log("麦克风权限被拒绝,请在浏览器设置中允许访问。");
1978
+ } else if (error.name === "NotReadableError" || error.name === "TrackStartError") {
1979
+ console.log("麦克风被其他应用占用或无法访问。");
1980
+ } else {
1981
+ console.log("无法访问麦克风,请检查设备连接和浏览器权限。");
1982
+ }
1983
+ return false;
1984
+ }
1985
+ };
1986
+
1987
+ function useVoiceRecognition(options) {
1988
+ const voiceStatus = ref("standby");
1989
+ const isTranscribing = ref(false);
1990
+ const isInitializing = ref(false);
1991
+ const transcriptionText = ref("");
1992
+ const wakeAnimating = ref(false);
1993
+ let detector = null;
1994
+ let transcriber = null;
1995
+ const initTranscriber = () => {
1996
+ if (transcriber) return;
1997
+ const vc = options.getVoiceConfig();
1998
+ if (!vc || !vc.appId || !vc.apiKey || !vc.websocketUrl) {
1999
+ console.error("[VoiceRecognition] 缺少 voiceConfig,无法初始化转写器");
2000
+ return;
2001
+ }
2002
+ transcriber = new SpeechTranscriberStandalone({
2003
+ appId: vc.appId,
2004
+ apiKey: vc.apiKey,
2005
+ websocketUrl: vc.websocketUrl,
2006
+ autoStop: {
2007
+ enabled: true,
2008
+ silenceTimeoutMs: 2e3,
2009
+ noSpeechTimeoutMs: 5e3,
2010
+ maxDurationMs: 45e3
2011
+ }
2012
+ });
2013
+ transcriber.onResult((result) => {
2014
+ transcriptionText.value = result.transcript || "";
2015
+ });
2016
+ transcriber.onAutoStop(async () => {
2017
+ const finalText = transcriptionText.value;
2018
+ await stopTranscribing();
2019
+ transcriptionText.value = "";
2020
+ if (finalText.trim()) {
2021
+ options.onTranscriptionDone?.(finalText);
2022
+ }
2023
+ });
2024
+ transcriber.onError((error) => {
2025
+ console.error("[VoiceRecognition] 转写错误:", error);
2026
+ stopTranscribing();
2027
+ transcriptionText.value = "";
2028
+ });
2029
+ };
2030
+ const startTranscribing = async () => {
2031
+ if (isTranscribing.value) return;
2032
+ if (!transcriber) initTranscriber();
2033
+ if (!transcriber) return;
2034
+ try {
2035
+ await transcriber.start();
2036
+ isTranscribing.value = true;
2037
+ transcriptionText.value = "";
2038
+ } catch (error) {
2039
+ console.error("[VoiceRecognition] 启动转写失败:", error);
1715
2040
  }
1716
- const payloadRecord = payload;
1717
- return {
1718
- name: toolName,
1719
- args: parameters.map((parameter) => payloadRecord[parameter.name])
1720
- };
1721
2041
  };
1722
- const resolveExecutableCommands = (toolName, payload, commandDefinitions) => {
1723
- const extractedCommands = extractExecutableCommands(payload);
1724
- if (extractedCommands.length > 0) {
1725
- return extractedCommands;
2042
+ const stopTranscribing = async () => {
2043
+ if (!transcriber || !transcriber.isActive()) {
2044
+ isTranscribing.value = false;
2045
+ return;
2046
+ }
2047
+ try {
2048
+ await transcriber.stop();
2049
+ } catch (error) {
2050
+ console.error("[VoiceRecognition] 停止转写失败:", error);
2051
+ } finally {
2052
+ isTranscribing.value = false;
1726
2053
  }
1727
- const directCommand = toExecutableCommand(toolName, payload, commandDefinitions);
1728
- return directCommand ? [directCommand] : [];
1729
2054
  };
1730
- const executeHostCommands = async (toolCallId, toolName, payload, commandDefinitions) => {
1731
- const commands = resolveExecutableCommands(toolName, payload, commandDefinitions);
1732
- if (commands.length === 0) return false;
2055
+ const initDetector = () => {
2056
+ if (detector || isInitializing.value) return;
2057
+ if (!options.modelPath) {
2058
+ console.error("[VoiceRecognition] 未传入 modelPath,无法启用唤醒词");
2059
+ return;
2060
+ }
2061
+ isInitializing.value = true;
1733
2062
  try {
1734
- executingTools.value = /* @__PURE__ */ new Set([...executingTools.value, toolCallId]);
1735
- for (const cmd of commands) {
1736
- try {
1737
- await aiChatbotX.executeCommand(cmd.name, cmd.args);
1738
- } catch (cmdErr) {
1739
- console.error(`[AgentInvoke] 执行命令 ${cmd.name} 失败:`, cmdErr);
2063
+ detector = new WakeWordDetectorStandalone({
2064
+ modelPath: options.modelPath,
2065
+ sampleRate: 16e3,
2066
+ usePartial: true,
2067
+ autoReset: {
2068
+ enabled: true,
2069
+ resetDelayMs: 4e3
1740
2070
  }
1741
- }
1742
- return true;
2071
+ });
2072
+ detector.setWakeWords(options.wakeWords || ["你好", "您好"]);
2073
+ detector.onWake(async () => {
2074
+ wakeAnimating.value = true;
2075
+ options.onWake?.();
2076
+ await startTranscribing();
2077
+ setTimeout(() => {
2078
+ wakeAnimating.value = false;
2079
+ }, 1200);
2080
+ });
2081
+ detector.onError((error) => {
2082
+ console.error("[VoiceRecognition] 唤醒监听错误:", error);
2083
+ voiceStatus.value = "standby";
2084
+ stopTranscribing();
2085
+ });
1743
2086
  } finally {
1744
- const next = new Set(executingTools.value);
1745
- next.delete(toolCallId);
1746
- executingTools.value = next;
2087
+ isInitializing.value = false;
1747
2088
  }
1748
2089
  };
1749
- const parseAssistantText = (payload) => {
1750
- if (!payload) return "";
1751
- if (typeof payload === "string") return payload;
1752
- if (typeof payload === "object") {
1753
- const data = payload;
1754
- const directText = data.output || data.answer || data.message || data.result;
1755
- if (typeof directText === "string" && directText.trim()) return directText;
1756
- if (data.data && typeof data.data === "object") {
1757
- const nested = data.data;
1758
- const nestedText = nested.output || nested.answer || nested.message || nested.result;
1759
- if (typeof nestedText === "string" && nestedText.trim()) return nestedText;
1760
- }
1761
- return JSON.stringify(payload);
2090
+ const toggleVoiceMode = async (targetState) => {
2091
+ const permission = await ensureMicrophonePermission();
2092
+ if (!permission || isInitializing.value) return;
2093
+ if (!detector) {
2094
+ initDetector();
2095
+ if (!detector) return;
1762
2096
  }
1763
- return String(payload);
1764
- };
1765
- const invoke = async (question) => {
1766
- const content = question.trim();
1767
- if (!content) return;
1768
- abort();
1769
- checkSessionTimeout();
1770
- resetState();
1771
- tts.stop();
1772
- isInvoking.value = true;
1773
- bubble.open();
1774
- let prevTextLength = 0;
1775
- const processedToolResults = /* @__PURE__ */ new Set();
1776
- const processingToolResults = /* @__PURE__ */ new Set();
1777
- abortController = new AbortController();
1778
- const commands = await aiChatbotX.getCommads();
1779
- const commandDefinitions = buildCommandDefinitionMap(commands);
1780
- conversationHistory.value.length > 0 ? [...conversationHistory.value] : void 0;
2097
+ const isListening = voiceStatus.value === "listening";
2098
+ const shouldStart = targetState !== void 0 ? targetState : !isListening;
2099
+ if (isListening === shouldStart) return;
1781
2100
  try {
1782
- const response = await fetch(options.endpoint, {
1783
- method: "POST",
1784
- headers: { "Content-Type": "application/json", Authorization: `Bearer ${options.appToken || ""}` },
1785
- body: JSON.stringify({
1786
- input: content,
1787
- projectId: options.projectId || "",
1788
- commands: commands.length > 0 ? commands : void 0
1789
- // messages: historyToSend,
1790
- }),
1791
- signal: abortController.signal
1792
- });
1793
- if (!response.ok) throw new Error(`HTTP ${response.status}`);
1794
- const contentType = response.headers.get("content-type") || "";
1795
- const isJsonResponse = contentType.includes("application/json");
1796
- if (isJsonResponse) {
1797
- const data = await response.json();
1798
- const reply = parseAssistantText(data) || "已收到,但没有返回可展示的文本内容。";
1799
- currentTextContent.value = reply;
1800
- tts.speak(reply);
1801
- appendToHistory("user", content);
1802
- appendToHistory("assistant", reply);
1803
- if (data.toolResults && Array.isArray(data.toolResults)) {
1804
- for (const tr of data.toolResults) {
1805
- const toolPart = {
1806
- type: "tool-result",
1807
- toolCallId: `invoke-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
1808
- toolName: tr.toolName,
1809
- args: tr.args,
1810
- result: tr.result,
1811
- state: "result"
1812
- };
1813
- currentToolParts.value = [...currentToolParts.value, toolPart];
1814
- if (commandDefinitions.has(tr.toolName)) {
1815
- void executeHostCommands(toolPart.toolCallId, tr.toolName, tr.result, commandDefinitions);
1816
- }
1817
- }
1818
- }
2101
+ if (shouldStart) {
2102
+ await detector.start();
2103
+ voiceStatus.value = "listening";
1819
2104
  } else {
1820
- await parseDataStreamToMessage(response, (result) => {
1821
- currentTextContent.value = result.textContent;
1822
- if (result.textContent.length > prevTextLength) {
1823
- const delta = result.textContent.slice(prevTextLength);
1824
- prevTextLength = result.textContent.length;
1825
- tts.feed(delta);
1826
- }
1827
- const toolParts = result.parts.filter(
1828
- (p) => p.type === "tool-call" || p.type === "tool-result"
1829
- );
1830
- currentToolParts.value = toolParts;
1831
- for (const part of toolParts) {
1832
- if (commandDefinitions.has(part.toolName) && !processedToolResults.has(part.toolCallId) && !processingToolResults.has(part.toolCallId)) {
1833
- if (part.type === "tool-call" && part.state === "call" && part.args) {
1834
- processingToolResults.add(part.toolCallId);
1835
- void executeHostCommands(part.toolCallId, part.toolName, part.args, commandDefinitions).then(
1836
- (executed) => {
1837
- if (executed) {
1838
- processedToolResults.add(part.toolCallId);
1839
- }
1840
- processingToolResults.delete(part.toolCallId);
1841
- }
1842
- );
1843
- } else if (part.type === "tool-result" && part.result) {
1844
- processingToolResults.add(part.toolCallId);
1845
- void executeHostCommands(part.toolCallId, part.toolName, part.result, commandDefinitions).then(
1846
- (executed) => {
1847
- if (executed) {
1848
- processedToolResults.add(part.toolCallId);
1849
- }
1850
- processingToolResults.delete(part.toolCallId);
1851
- }
1852
- );
1853
- }
1854
- }
1855
- }
1856
- bubble.scrollToBottom();
1857
- });
1858
- tts.flush();
1859
- const assistantReply = currentTextContent.value.trim();
1860
- appendToHistory("user", content);
1861
- if (assistantReply) {
1862
- appendToHistory("assistant", assistantReply);
1863
- }
1864
- if (!assistantReply && currentToolParts.value.length === 0) {
1865
- currentTextContent.value = "已收到,但没有返回可展示的文本内容。";
1866
- }
2105
+ await detector.stop();
2106
+ voiceStatus.value = "standby";
2107
+ transcriptionText.value = "";
2108
+ await stopTranscribing();
1867
2109
  }
1868
2110
  } catch (error) {
1869
- if (error.name === "AbortError") {
1870
- return;
1871
- }
1872
- console.error("[AgentInvoke] invoke failed:", error);
1873
- tts.stop();
1874
- currentTextContent.value = "请求失败,请检查服务地址或稍后重试。";
1875
- } finally {
1876
- isInvoking.value = false;
1877
- abortController = null;
1878
- lastInteractionTime = Date.now();
1879
- bubble.scheduleDismiss();
2111
+ console.error("[VoiceRecognition] 监听切换失败:", error);
2112
+ voiceStatus.value = "standby";
1880
2113
  }
1881
2114
  };
1882
- const abort = () => {
1883
- if (abortController) {
1884
- abortController.abort();
1885
- abortController = null;
2115
+ const abortTranscription = async () => {
2116
+ transcriptionText.value = "";
2117
+ await stopTranscribing();
2118
+ };
2119
+ const destroy = async () => {
2120
+ if (detector) {
2121
+ try {
2122
+ if (detector.isActive()) await detector.stop();
2123
+ } catch {
2124
+ }
2125
+ detector = null;
2126
+ }
2127
+ if (transcriber) {
2128
+ try {
2129
+ if (transcriber.isActive()) await transcriber.stop();
2130
+ } catch {
2131
+ }
2132
+ transcriber = null;
1886
2133
  }
1887
- tts.stop();
1888
- isInvoking.value = false;
1889
2134
  };
1890
2135
  return {
1891
- isInvoking,
1892
- currentTextContent,
1893
- currentToolParts,
1894
- executingTools,
1895
- hasAnyContent,
1896
- conversationHistory,
1897
- toolDisplayName,
1898
- invoke,
1899
- abort,
1900
- resetState,
1901
- clearHistory
2136
+ voiceStatus,
2137
+ isTranscribing,
2138
+ isInitializing,
2139
+ transcriptionText,
2140
+ wakeAnimating,
2141
+ startTranscribing,
2142
+ stopTranscribing,
2143
+ abortTranscription,
2144
+ toggleVoiceMode,
2145
+ destroy
1902
2146
  };
1903
2147
  }
1904
2148
 
@@ -1960,6 +2204,7 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
1960
2204
  wakeWords: {},
1961
2205
  wakeResponses: {},
1962
2206
  modelPath: {},
2207
+ agentId: {},
1963
2208
  projectId: {},
1964
2209
  voiceConfig: {},
1965
2210
  bubbleSize: {},
@@ -1973,6 +2218,7 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
1973
2218
  return null;
1974
2219
  };
1975
2220
  const wakeResponses = computed(() => props.wakeResponses || ["在呢"]);
2221
+ const agentId = computed(() => props.agentId);
1976
2222
  const tts = useTTS(getVoiceConfig);
1977
2223
  const bubbleBridge = {
1978
2224
  open: () => {
@@ -1982,7 +2228,7 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
1982
2228
  scrollToBottom: () => {
1983
2229
  }
1984
2230
  };
1985
- const endpoint = `/sime/proxy/agent/${aiChatbotX.agentId()}/stream-invoke`;
2231
+ const endpoint = `/sime/proxy/organizations/${aiChatbotX.organizationId()}/agents/${agentId.value}/stream-invoke`;
1986
2232
  const agent = useAgentInvoke({
1987
2233
  endpoint,
1988
2234
  appToken: aiChatbotX.appToken(),
@@ -2179,7 +2425,7 @@ const _sfc_main = /* @__PURE__ */ defineComponent({
2179
2425
  }
2180
2426
  });
2181
2427
 
2182
- const voiceAssistant = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-76f6b7ef"]]);
2428
+ const voiceAssistant = /* @__PURE__ */ _export_sfc(_sfc_main, [["__scopeId", "data-v-59d72f34"]]);
2183
2429
 
2184
2430
  var clientCommandKey = /* @__PURE__ */ ((clientCommandKey2) => {
2185
2431
  clientCommandKey2["SET_THEME"] = "SiMeAgent_setTheme";
@@ -2192,5 +2438,5 @@ var clientCommandKey = /* @__PURE__ */ ((clientCommandKey2) => {
2192
2438
  return clientCommandKey2;
2193
2439
  })(clientCommandKey || {});
2194
2440
 
2195
- export { AgentChatTransport, aiChat as AiChat, _sfc_main$1 as AiChatbotProvider, voiceAssistant as AiChatbotVoiceAssistant, AiChatbotXKey, clientCommandKey, createAgentChatTransport, injectStrict };
2441
+ export { AgentChatTransport, aiChat as AiChat, commandTest as AiChatbotCommandTest, _sfc_main$1 as AiChatbotProvider, voiceAssistant as AiChatbotVoiceAssistant, AiChatbotXKey, clientCommandKey, createAgentChatTransport, injectStrict };
2196
2442
  //# sourceMappingURL=sime-x-vue.mjs.map