@siact/sime-x-vue 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { defineComponent, ref, reactive, computed, watch, onMounted, openBlock, createElementBlock, normalizeClass, createElementVNode, toDisplayString, withModifiers, withDirectives, vModelText, Fragment, renderList, createCommentVNode, unref, createVNode, Transition, withCtx, nextTick, inject, shallowRef, provide, renderSlot, onBeforeUnmount, normalizeStyle } from 'vue';
1
+ import { defineComponent, ref, reactive, computed, watch, onMounted, openBlock, createElementBlock, normalizeClass, createElementVNode, toDisplayString, withModifiers, withDirectives, vModelText, Fragment, renderList, createCommentVNode, unref, createVNode, Transition, withCtx, nextTick, inject, onBeforeUnmount, normalizeStyle, withKeys, shallowRef, provide, renderSlot } from 'vue';
2
2
  import { Chat } from '@ai-sdk/vue';
3
3
  import { DefaultChatTransport } from 'ai';
4
4
  import { SpeechSynthesizerStandalone, WakeWordDetectorStandalone, SpeechTranscriberStandalone } from 'web-voice-kit';
@@ -88,30 +88,30 @@ function buildHistoryMessages(messages, excludeId) {
88
88
  return history;
89
89
  }
90
90
 
91
- const _hoisted_1$1 = {
91
+ const _hoisted_1$2 = {
92
92
  key: 0,
93
93
  class: "ai-chat__welcome"
94
94
  };
95
- const _hoisted_2$1 = { class: "ai-chat__welcome-header" };
96
- const _hoisted_3$1 = { class: "ai-chat__welcome-title" };
97
- const _hoisted_4$1 = { class: "ai-chat__welcome-desc" };
98
- const _hoisted_5$1 = { class: "ai-chat__input-area" };
99
- const _hoisted_6$1 = { class: "ai-chat__input-wrapper" };
100
- const _hoisted_7$1 = ["disabled"];
101
- const _hoisted_8$1 = {
95
+ const _hoisted_2$2 = { class: "ai-chat__welcome-header" };
96
+ const _hoisted_3$2 = { class: "ai-chat__welcome-title" };
97
+ const _hoisted_4$2 = { class: "ai-chat__welcome-desc" };
98
+ const _hoisted_5$2 = { class: "ai-chat__input-area" };
99
+ const _hoisted_6$2 = { class: "ai-chat__input-wrapper" };
100
+ const _hoisted_7$2 = ["disabled"];
101
+ const _hoisted_8$2 = {
102
102
  key: 0,
103
103
  class: "ai-chat__suggestions"
104
104
  };
105
- const _hoisted_9$1 = ["onClick"];
106
- const _hoisted_10$1 = { class: "ai-chat__messages-inner" };
107
- const _hoisted_11$1 = { class: "ai-chat__message-content" };
108
- const _hoisted_12$1 = ["innerHTML"];
109
- const _hoisted_13$1 = {
105
+ const _hoisted_9$2 = ["onClick"];
106
+ const _hoisted_10$2 = { class: "ai-chat__messages-inner" };
107
+ const _hoisted_11$2 = { class: "ai-chat__message-content" };
108
+ const _hoisted_12$2 = ["innerHTML"];
109
+ const _hoisted_13$2 = {
110
110
  key: 1,
111
111
  class: "ai-chat__reasoning"
112
112
  };
113
- const _hoisted_14 = ["onClick"];
114
- const _hoisted_15 = {
113
+ const _hoisted_14$1 = ["onClick"];
114
+ const _hoisted_15$1 = {
115
115
  key: 0,
116
116
  class: "ai-chat__reasoning-streaming"
117
117
  };
@@ -185,7 +185,7 @@ const _hoisted_29 = {
185
185
  };
186
186
  const _hoisted_30 = { class: "ai-chat__input-wrapper" };
187
187
  const _hoisted_31 = ["disabled"];
188
- const _sfc_main$2 = /* @__PURE__ */ defineComponent({
188
+ const _sfc_main$3 = /* @__PURE__ */ defineComponent({
189
189
  __name: "ai-chat",
190
190
  props: {
191
191
  api: {},
@@ -383,17 +383,17 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
383
383
  return openBlock(), createElementBlock("div", {
384
384
  class: normalizeClass(["ai-chat", { "ai-chat--full-width": __props.fullWidth }])
385
385
  }, [
386
- isEmpty.value ? (openBlock(), createElementBlock("div", _hoisted_1$1, [
387
- createElementVNode("div", _hoisted_2$1, [
388
- createElementVNode("h1", _hoisted_3$1, toDisplayString(__props.welcomeTitle), 1),
389
- createElementVNode("p", _hoisted_4$1, toDisplayString(__props.welcomeDescription), 1)
386
+ isEmpty.value ? (openBlock(), createElementBlock("div", _hoisted_1$2, [
387
+ createElementVNode("div", _hoisted_2$2, [
388
+ createElementVNode("h1", _hoisted_3$2, toDisplayString(__props.welcomeTitle), 1),
389
+ createElementVNode("p", _hoisted_4$2, toDisplayString(__props.welcomeDescription), 1)
390
390
  ]),
391
- createElementVNode("div", _hoisted_5$1, [
391
+ createElementVNode("div", _hoisted_5$2, [
392
392
  createElementVNode("form", {
393
393
  class: "ai-chat__form",
394
394
  onSubmit: withModifiers(handleSubmit, ["prevent"])
395
395
  }, [
396
- createElementVNode("div", _hoisted_6$1, [
396
+ createElementVNode("div", _hoisted_6$2, [
397
397
  withDirectives(createElementVNode("textarea", {
398
398
  ref_key: "textareaRef",
399
399
  ref: textareaRef,
@@ -429,16 +429,16 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
429
429
  }),
430
430
  createElementVNode("polygon", { points: "22 2 15 22 11 13 2 9 22 2" })
431
431
  ], -1)
432
- ])], 8, _hoisted_7$1)
432
+ ])], 8, _hoisted_7$2)
433
433
  ])
434
434
  ], 32),
435
- __props.suggestions.length > 0 ? (openBlock(), createElementBlock("div", _hoisted_8$1, [
435
+ __props.suggestions.length > 0 ? (openBlock(), createElementBlock("div", _hoisted_8$2, [
436
436
  (openBlock(true), createElementBlock(Fragment, null, renderList(__props.suggestions, (suggestion) => {
437
437
  return openBlock(), createElementBlock("button", {
438
438
  key: suggestion,
439
439
  class: "ai-chat__suggestion",
440
440
  onClick: ($event) => handleSuggestionClick(suggestion)
441
- }, toDisplayString(suggestion), 9, _hoisted_9$1);
441
+ }, toDisplayString(suggestion), 9, _hoisted_9$2);
442
442
  }), 128))
443
443
  ])) : createCommentVNode("", true)
444
444
  ])
@@ -449,7 +449,7 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
449
449
  class: "ai-chat__messages",
450
450
  onScroll: handleScroll
451
451
  }, [
452
- createElementVNode("div", _hoisted_10$1, [
452
+ createElementVNode("div", _hoisted_10$2, [
453
453
  (openBlock(true), createElementBlock(Fragment, null, renderList(unref(chat).messages, (message) => {
454
454
  return openBlock(), createElementBlock("div", {
455
455
  key: message.id,
@@ -463,13 +463,13 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
463
463
  key: 0,
464
464
  class: normalizeClass(["ai-chat__message", `ai-chat__message--${message.role}`])
465
465
  }, [
466
- createElementVNode("div", _hoisted_11$1, [
466
+ createElementVNode("div", _hoisted_11$2, [
467
467
  createElementVNode("div", {
468
468
  class: "ai-chat__message-text",
469
469
  innerHTML: renderMarkdown(part.text)
470
- }, null, 8, _hoisted_12$1)
470
+ }, null, 8, _hoisted_12$2)
471
471
  ])
472
- ], 2)) : part.type === "reasoning" ? (openBlock(), createElementBlock("div", _hoisted_13$1, [
472
+ ], 2)) : part.type === "reasoning" ? (openBlock(), createElementBlock("div", _hoisted_13$2, [
473
473
  createElementVNode("button", {
474
474
  class: "ai-chat__reasoning-trigger",
475
475
  onClick: ($event) => toggleReasoning(message.id)
@@ -486,8 +486,8 @@ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
486
486
  createElementVNode("polyline", { points: "9 18 15 12 9 6" }, null, -1)
487
487
  ])], 2)),
488
488
  _cache[4] || (_cache[4] = createElementVNode("span", null, "思考过程", -1)),
489
- isStreamingMessage(message.id) && partIndex === message.parts.length - 1 ? (openBlock(), createElementBlock("span", _hoisted_15)) : createCommentVNode("", true)
490
- ], 8, _hoisted_14),
489
+ isStreamingMessage(message.id) && partIndex === message.parts.length - 1 ? (openBlock(), createElementBlock("span", _hoisted_15$1)) : createCommentVNode("", true)
490
+ ], 8, _hoisted_14$1),
491
491
  reasoningOpen[message.id] ? (openBlock(), createElementBlock("div", _hoisted_16, toDisplayString(part.text), 1)) : createCommentVNode("", true)
492
492
  ])) : isToolPart(part) ? (openBlock(), createElementBlock("div", _hoisted_17, [
493
493
  createElementVNode("div", {
@@ -692,1206 +692,1457 @@ const _export_sfc = (sfc, props) => {
692
692
  return target;
693
693
  };
694
694
 
695
- const aiChat = /* @__PURE__ */ _export_sfc(_sfc_main$2, [["__scopeId", "data-v-958fd919"]]);
695
+ const aiChat = /* @__PURE__ */ _export_sfc(_sfc_main$3, [["__scopeId", "data-v-958fd919"]]);
696
696
 
697
- class CommandManager {
698
- commands = /* @__PURE__ */ new Map();
699
- debug;
700
- constructor(options = {}) {
701
- this.debug = options.debug ?? false;
702
- }
703
- registerCommand(command) {
704
- this.commands.set(command.name, command);
705
- this.log("注册命令", `${command.name}: ${command.description}`);
706
- }
707
- unregisterCommand(name) {
708
- const deleted = this.commands.delete(name);
709
- if (deleted) {
710
- this.log("命令已注销", name);
697
+ const DATA_STREAM_LINE_RE = /^[0-9a-f]:/;
698
+ function detectFormat(firstChunk) {
699
+ const trimmed = firstChunk.trimStart();
700
+ if (trimmed.startsWith("data:")) {
701
+ const firstLine = trimmed.split("\n")[0];
702
+ const payload = firstLine.slice(5).trim();
703
+ try {
704
+ const parsed = JSON.parse(payload);
705
+ if (parsed && typeof parsed.type === "string") {
706
+ return "ui-message-stream";
707
+ }
708
+ } catch {
711
709
  }
712
- }
713
- async executeCommand(command, args = []) {
714
- const commandDef = this.commands.get(command);
715
- if (!commandDef) {
716
- throw new Error(`命令 "${command}" 未找到`);
710
+ if (DATA_STREAM_LINE_RE.test(payload)) {
711
+ return "data-stream";
717
712
  }
718
- this.log("执行命令", command, args);
719
- return await commandDef.handler(...args);
713
+ return "ui-message-stream";
720
714
  }
721
- getCommands() {
722
- return Array.from(this.commands.values()).map((cmd) => ({
723
- name: cmd.name,
724
- description: cmd.description,
725
- parameters: cmd.parameters
726
- }));
715
+ if (DATA_STREAM_LINE_RE.test(trimmed)) {
716
+ return "data-stream";
727
717
  }
728
- hasCommand(name) {
729
- return this.commands.has(name);
718
+ return "plain-text";
719
+ }
720
+ function processUIMessageStreamEvent(payload, callbacks) {
721
+ const trimmed = payload.trim();
722
+ if (!trimmed || trimmed === "[DONE]") {
723
+ callbacks.onFinish?.({});
724
+ return;
730
725
  }
731
- clear() {
732
- this.commands.clear();
733
- this.log("", "所有命令已清空");
726
+ let parsed;
727
+ try {
728
+ parsed = JSON.parse(trimmed);
729
+ } catch {
730
+ console.warn("[DataStreamParser] failed to parse UI message stream event:", trimmed.slice(0, 100));
731
+ return;
734
732
  }
735
- log(prefix, msg, ...args) {
736
- (/* @__PURE__ */ new Date()).toLocaleTimeString([], {
737
- hour: "2-digit",
738
- minute: "2-digit",
739
- second: "2-digit"
740
- });
741
- console.log(
742
- `%c ${prefix}`,
743
- "background:#7c3aed;color:white;padding:2px 6px;border-radius:3px 0 0 3px;font-weight:bold;",
744
- `${msg}`
745
- );
746
- if (args.length > 0) {
747
- console.log(...args);
748
- }
733
+ const type = parsed?.type;
734
+ if (!type) return;
735
+ switch (type) {
736
+ case "text-delta":
737
+ if (typeof parsed.delta === "string") {
738
+ callbacks.onTextDelta?.(parsed.delta);
739
+ }
740
+ break;
741
+ case "tool-input-start":
742
+ callbacks.onToolCallStart?.(parsed.toolCallId, parsed.toolName);
743
+ break;
744
+ case "tool-input-delta":
745
+ callbacks.onToolCallDelta?.(parsed.toolCallId, parsed.inputTextDelta);
746
+ break;
747
+ case "tool-input-available":
748
+ callbacks.onToolCallComplete?.(parsed.toolCallId, parsed.toolName, parsed.input);
749
+ break;
750
+ case "tool-output-available":
751
+ callbacks.onToolResult?.(parsed.toolCallId, parsed.output);
752
+ break;
753
+ case "finish-step":
754
+ callbacks.onStepFinish?.(parsed);
755
+ break;
756
+ case "finish":
757
+ callbacks.onFinish?.(parsed);
758
+ break;
759
+ case "error":
760
+ case "tool-output-error":
761
+ callbacks.onError?.(parsed.errorText || parsed.error || "Unknown error", parsed);
762
+ break;
763
+ case "start":
764
+ case "text-start":
765
+ case "text-end":
766
+ case "start-step":
767
+ case "reasoning-start":
768
+ case "reasoning-delta":
769
+ case "reasoning-end":
770
+ case "source-url":
771
+ case "source-document":
772
+ case "file":
773
+ case "abort":
774
+ break;
775
+ default:
776
+ if (type.startsWith("data-")) ; else {
777
+ console.log("[DataStreamParser] unhandled UI message stream type:", type);
778
+ }
779
+ break;
749
780
  }
750
781
  }
751
-
752
- const AiChatbotXKey = Symbol("sime-x");
753
- function injectStrict(key, defaultValue, treatDefaultAsFactory) {
754
- let result;
755
- if (defaultValue === void 0) {
756
- result = inject(key);
757
- } else if (treatDefaultAsFactory === true) {
758
- result = inject(key, defaultValue, true);
759
- } else {
760
- result = inject(key, defaultValue, false);
782
+ function parseLegacyProtocolLine(line, callbacks) {
783
+ if (!line || !DATA_STREAM_LINE_RE.test(line)) return;
784
+ const code = line[0];
785
+ const rawValue = line.slice(2);
786
+ let value;
787
+ try {
788
+ value = JSON.parse(rawValue);
789
+ } catch {
790
+ value = rawValue;
761
791
  }
762
- if (!result) {
763
- throw new Error(`Could not resolve ${key.description}`);
792
+ switch (code) {
793
+ case "0":
794
+ callbacks.onTextDelta?.(value);
795
+ break;
796
+ case "9":
797
+ callbacks.onToolCallStart?.(value.toolCallId, value.toolName);
798
+ break;
799
+ case "b":
800
+ callbacks.onToolCallDelta?.(value.toolCallId, value.argsTextDelta);
801
+ break;
802
+ case "c":
803
+ callbacks.onToolCallComplete?.(value.toolCallId, value.toolName, value.args);
804
+ break;
805
+ case "a":
806
+ callbacks.onToolResult?.(value.toolCallId, value.result);
807
+ break;
808
+ case "e":
809
+ callbacks.onStepFinish?.(value);
810
+ break;
811
+ case "d":
812
+ callbacks.onFinish?.(value);
813
+ break;
814
+ case "3":
815
+ callbacks.onError?.(value);
816
+ break;
764
817
  }
765
- return result;
766
818
  }
767
-
768
- const _sfc_main$1 = /* @__PURE__ */ defineComponent({
769
- __name: "sime-provider",
770
- props: {
771
- appToken: {},
772
- organizationId: {}
773
- },
774
- setup(__props) {
775
- const props = __props;
776
- const commandManager = shallowRef(new CommandManager({ debug: false }));
777
- const startListeningRef = shallowRef(async () => {
778
- });
779
- const stopListeningRef = shallowRef(async () => {
780
- });
781
- const stopBroadcastRef = shallowRef(async () => {
782
- });
783
- provide(AiChatbotXKey, {
784
- appToken: () => props.appToken,
785
- organizationId: () => props.organizationId,
786
- startListening: () => startListeningRef.value(),
787
- stopListening: () => stopListeningRef.value(),
788
- stopBroadcast: () => stopBroadcastRef.value(),
789
- registerVoiceMethods: (methods) => {
790
- if (methods.stopBroadcast) stopBroadcastRef.value = methods.stopBroadcast;
791
- if (methods.start) startListeningRef.value = methods.start;
792
- if (methods.stop) stopListeningRef.value = methods.stop;
793
- },
794
- getCommads: async () => commandManager.value.getCommands(),
795
- registerCommand: (cmd) => {
796
- commandManager.value.registerCommand(cmd);
797
- },
798
- unregisterCommand: (name) => {
799
- commandManager.value.unregisterCommand(name);
800
- },
801
- async executeCommand(commandName, args = []) {
802
- return await commandManager.value.executeCommand(commandName, args);
803
- }
804
- });
805
- return (_ctx, _cache) => {
806
- return renderSlot(_ctx.$slots, "default");
807
- };
808
- }
809
- });
810
-
811
- function useTTS(getVoiceConfig) {
812
- const isSpeaking = ref(false);
813
- const hasPendingAudio = ref(false);
814
- let instance = null;
815
- let initPromise = null;
816
- let audioCtx = null;
817
- let sentenceBuffer = "";
818
- const sentenceDelimiters = /[。!?;\n.!?;]/;
819
- const stripMarkdown = (text) => text.replace(/```[\s\S]*?```/g, "").replace(/\|[^\n]*\|/g, "").replace(/#{1,6}\s*/g, "").replace(/\*\*(.*?)\*\*/g, "$1").replace(/\*(.*?)\*/g, "$1").replace(/`([^`]*)`/g, "$1").replace(/\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/[-*+]\s+/g, "").replace(/>\s+/g, "").replace(/\n{2,}/g, "。").replace(/\n/g, ",").trim();
820
- const warmUpAudio = () => {
821
- if (!audioCtx || audioCtx.state === "closed") {
822
- try {
823
- audioCtx = new AudioContext();
824
- } catch {
825
- return;
826
- }
819
+ async function readDataStream(response, callbacks) {
820
+ if (!response.body) return;
821
+ const reader = response.body.getReader();
822
+ const decoder = new TextDecoder();
823
+ let buffer = "";
824
+ let format = null;
825
+ while (true) {
826
+ const { value, done } = await reader.read();
827
+ if (done) break;
828
+ const chunk = decoder.decode(value, { stream: true });
829
+ buffer += chunk;
830
+ if (format === null && buffer.trim().length > 0) {
831
+ format = detectFormat(buffer);
832
+ console.log("[DataStreamParser] detected format:", format, "| first 200 chars:", buffer.slice(0, 200));
827
833
  }
828
- if (audioCtx.state === "suspended") {
829
- audioCtx.resume();
834
+ if (format === "plain-text") {
835
+ const text = buffer;
836
+ buffer = "";
837
+ if (text) callbacks.onTextDelta?.(text);
838
+ continue;
830
839
  }
831
- };
832
- let onQueueEmptyCb = null;
833
- const ensureInstance = async () => {
834
- if (instance) return instance;
835
- if (initPromise) return initPromise;
836
- const vc = getVoiceConfig();
837
- if (!vc || !vc.apiSecret) {
838
- console.warn("[TTS] 缺少 voiceConfig apiSecret,语音播报已禁用");
839
- return null;
840
- }
841
- initPromise = (async () => {
842
- try {
843
- const tts = new SpeechSynthesizerStandalone({
844
- appId: vc.appId,
845
- apiKey: vc.ttsApiKey || vc.apiKey,
846
- apiSecret: vc.apiSecret,
847
- websocketUrl: vc.ttsWebsocketUrl || "wss://tts-api.xfyun.cn/v2/tts",
848
- vcn: vc.ttsVcn || "xiaoyan",
849
- speed: vc.speed || 55,
850
- volume: vc.volume || 90,
851
- pitch: vc.pitch || 50,
852
- aue: "raw",
853
- auf: "audio/L16;rate=16000",
854
- tte: "UTF8",
855
- autoPlay: true
856
- });
857
- tts.onStart(() => {
858
- isSpeaking.value = true;
859
- });
860
- tts.onEnd(() => {
861
- });
862
- tts.onQueueEmpty(() => {
863
- isSpeaking.value = false;
864
- hasPendingAudio.value = false;
865
- onQueueEmptyCb?.();
866
- });
867
- tts.onError((err) => {
868
- console.error("[TTS] Error:", err);
869
- isSpeaking.value = false;
870
- });
871
- if (audioCtx && audioCtx.state === "running") {
872
- tts.audioContext = audioCtx;
873
- tts.gainNode = audioCtx.createGain();
874
- tts.gainNode.connect(audioCtx.destination);
840
+ if (format === "ui-message-stream") {
841
+ while (true) {
842
+ const eventEnd = buffer.indexOf("\n\n");
843
+ if (eventEnd === -1) break;
844
+ const eventBlock = buffer.slice(0, eventEnd);
845
+ buffer = buffer.slice(eventEnd + 2);
846
+ const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
847
+ for (const dataLine of dataLines) {
848
+ processUIMessageStreamEvent(dataLine, callbacks);
875
849
  }
876
- instance = tts;
877
- initPromise = null;
878
- return tts;
879
- } catch (err) {
880
- console.error("[TTS] 初始化失败:", err);
881
- initPromise = null;
882
- return null;
883
850
  }
884
- })();
885
- return initPromise;
886
- };
887
- const speak = async (text) => {
888
- const clean = stripMarkdown(text);
889
- if (!clean.trim()) return;
890
- hasPendingAudio.value = true;
891
- const tts = await ensureInstance();
892
- if (!tts) return;
893
- try {
894
- tts.speak(clean);
895
- } catch (err) {
896
- console.error("[TTS] speak 失败:", err);
897
- }
898
- };
899
- const feed = (delta) => {
900
- sentenceBuffer += delta;
901
- while (true) {
902
- const match = sentenceBuffer.match(sentenceDelimiters);
903
- if (!match || match.index === void 0) break;
904
- const sentence = sentenceBuffer.slice(0, match.index + 1).trim();
905
- sentenceBuffer = sentenceBuffer.slice(match.index + 1);
906
- if (sentence.length > 0) speak(sentence);
851
+ continue;
907
852
  }
908
- };
909
- const flush = () => {
910
- const remaining = sentenceBuffer.trim();
911
- sentenceBuffer = "";
912
- if (remaining.length > 0) speak(remaining);
913
- };
914
- const stop = () => {
915
- sentenceBuffer = "";
916
- isSpeaking.value = false;
917
- hasPendingAudio.value = false;
918
- if (instance) {
919
- try {
920
- instance.stop();
921
- } catch {
853
+ if (format === "data-stream") {
854
+ const isSSEWrapped = buffer.trimStart().startsWith("data:");
855
+ if (isSSEWrapped) {
856
+ while (true) {
857
+ const eventEnd = buffer.indexOf("\n\n");
858
+ if (eventEnd === -1) break;
859
+ const eventBlock = buffer.slice(0, eventEnd);
860
+ buffer = buffer.slice(eventEnd + 2);
861
+ const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
862
+ for (const dl of dataLines) {
863
+ const t = dl.trim();
864
+ if (!t || t === "[DONE]") {
865
+ if (t === "[DONE]") callbacks.onFinish?.({});
866
+ continue;
867
+ }
868
+ parseLegacyProtocolLine(t, callbacks);
869
+ }
870
+ }
871
+ } else {
872
+ while (true) {
873
+ const newlineIdx = buffer.indexOf("\n");
874
+ if (newlineIdx === -1) break;
875
+ const line = buffer.slice(0, newlineIdx).trim();
876
+ buffer = buffer.slice(newlineIdx + 1);
877
+ if (line) parseLegacyProtocolLine(line, callbacks);
878
+ }
922
879
  }
880
+ continue;
923
881
  }
924
- };
925
- const setOnQueueEmpty = (cb) => {
926
- onQueueEmptyCb = cb;
927
- };
928
- const destroy = () => {
929
- stop();
930
- if (instance) {
931
- try {
932
- instance.destroy();
933
- } catch {
882
+ }
883
+ const tail = decoder.decode();
884
+ if (tail) buffer += tail;
885
+ if (buffer.trim()) {
886
+ if (format === "plain-text") {
887
+ callbacks.onTextDelta?.(buffer);
888
+ } else if (format === "ui-message-stream") {
889
+ const dataLines = buffer.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
890
+ for (const dl of dataLines) {
891
+ processUIMessageStreamEvent(dl, callbacks);
934
892
  }
935
- instance = null;
893
+ } else if (format === "data-stream") {
894
+ parseLegacyProtocolLine(buffer.trim(), callbacks);
936
895
  }
937
- if (audioCtx) {
938
- try {
939
- audioCtx.close();
940
- } catch {
896
+ }
897
+ callbacks.onFinish?.({});
898
+ }
899
+ async function parseDataStreamToMessage(response, onUpdate) {
900
+ let textContent = "";
901
+ const parts = [];
902
+ const toolCalls = /* @__PURE__ */ new Map();
903
+ const ensureTextPart = () => {
904
+ for (let i = parts.length - 1; i >= 0; i--) {
905
+ if (parts[i].type === "text") {
906
+ return parts[i];
941
907
  }
942
- audioCtx = null;
943
908
  }
909
+ const textPart = { type: "text", text: "" };
910
+ parts.push(textPart);
911
+ return textPart;
944
912
  };
945
- return {
946
- isSpeaking,
947
- hasPendingAudio,
948
- warmUpAudio,
949
- speak,
950
- feed,
951
- flush,
952
- stop,
953
- destroy,
954
- setOnQueueEmpty
955
- };
956
- }
957
-
958
- function useBubble(options = {}) {
959
- const visible = ref(false);
960
- const fadingOut = ref(false);
961
- const stackRef = ref(null);
962
- let dismissTimer = null;
963
- const hasOpened = ref(false);
964
- const isTTSActive = () => !!(options.isSpeaking?.value || options.hasPendingAudio?.value);
965
- const isBusy = () => !!(options.isInvoking?.value || isTTSActive());
966
- const show = computed(() => {
967
- if (!hasOpened.value) return false;
968
- if (isTTSActive()) return true;
969
- return visible.value && !fadingOut.value;
970
- });
971
- const style = computed(() => ({
972
- width: options.bubbleSize?.width || void 0,
973
- maxHeight: options.bubbleSize?.maxHeight || void 0
974
- }));
975
- const open = () => {
976
- cancelDismiss();
977
- fadingOut.value = false;
978
- visible.value = true;
979
- hasOpened.value = true;
913
+ const findToolPartIndex = (toolCallId) => {
914
+ return parts.findIndex((p) => (p.type === "tool-call" || p.type === "tool-result") && p.toolCallId === toolCallId);
980
915
  };
981
- const cancelDismiss = () => {
982
- if (dismissTimer) {
983
- clearTimeout(dismissTimer);
984
- dismissTimer = null;
985
- }
916
+ const emitUpdate = () => {
917
+ onUpdate({ textContent, parts: [...parts], toolCalls: new Map(toolCalls) });
986
918
  };
987
- const scheduleDismiss = () => {
988
- cancelDismiss();
989
- if (isBusy()) return;
990
- const delay = options.dismissDelay ?? 4e3;
991
- dismissTimer = setTimeout(() => {
992
- if (isBusy()) return;
993
- fadingOut.value = true;
994
- setTimeout(() => {
995
- if (isBusy()) {
996
- fadingOut.value = false;
997
- return;
919
+ await readDataStream(response, {
920
+ onTextDelta(text) {
921
+ textContent += text;
922
+ const textPart = ensureTextPart();
923
+ textPart.text = textContent;
924
+ emitUpdate();
925
+ },
926
+ onToolCallStart(toolCallId, toolName) {
927
+ const tracker = {
928
+ toolCallId,
929
+ toolName,
930
+ argsText: "",
931
+ args: void 0,
932
+ state: "partial-call"
933
+ };
934
+ toolCalls.set(toolCallId, tracker);
935
+ const part = {
936
+ type: "tool-call",
937
+ toolCallId,
938
+ toolName,
939
+ args: void 0,
940
+ state: "partial-call"
941
+ };
942
+ parts.push(part);
943
+ emitUpdate();
944
+ },
945
+ onToolCallDelta(toolCallId, argsTextDelta) {
946
+ const tracker = toolCalls.get(toolCallId);
947
+ if (tracker) {
948
+ tracker.argsText += argsTextDelta;
949
+ try {
950
+ tracker.args = JSON.parse(tracker.argsText);
951
+ } catch {
998
952
  }
999
- visible.value = false;
1000
- fadingOut.value = false;
1001
- hasOpened.value = false;
1002
- }, 400);
1003
- }, delay);
1004
- };
1005
- const watchTTSRef = (ttsRef) => {
1006
- watch(ttsRef, (active) => {
1007
- if (active && hasOpened.value) {
1008
- cancelDismiss();
1009
- if (fadingOut.value) fadingOut.value = false;
1010
- } else if (!active && hasOpened.value && !isBusy()) {
1011
- scheduleDismiss();
953
+ const idx = findToolPartIndex(toolCallId);
954
+ if (idx !== -1 && parts[idx].type === "tool-call") {
955
+ parts[idx].args = tracker.args;
956
+ }
957
+ emitUpdate();
1012
958
  }
1013
- });
1014
- };
1015
- if (options.isSpeaking) watchTTSRef(options.isSpeaking);
1016
- if (options.hasPendingAudio) watchTTSRef(options.hasPendingAudio);
1017
- const hide = () => {
1018
- cancelDismiss();
1019
- fadingOut.value = false;
1020
- visible.value = false;
1021
- hasOpened.value = false;
1022
- };
1023
- const scrollToBottom = () => {
1024
- nextTick(() => {
1025
- if (stackRef.value) {
1026
- stackRef.value.scrollTop = stackRef.value.scrollHeight;
959
+ },
960
+ onToolCallComplete(toolCallId, toolName, args) {
961
+ const tracker = toolCalls.get(toolCallId);
962
+ if (tracker) {
963
+ tracker.state = "call";
964
+ tracker.args = typeof args === "string" ? safeJsonParse(args) : args;
965
+ } else {
966
+ toolCalls.set(toolCallId, {
967
+ toolCallId,
968
+ toolName,
969
+ argsText: typeof args === "string" ? args : JSON.stringify(args),
970
+ args: typeof args === "string" ? safeJsonParse(args) : args,
971
+ state: "call"
972
+ });
1027
973
  }
1028
- });
1029
- };
1030
- const destroy = () => {
1031
- cancelDismiss();
1032
- };
1033
- return {
1034
- visible,
1035
- fadingOut,
1036
- show,
1037
- style,
1038
- stackRef,
1039
- open,
1040
- hide,
1041
- cancelDismiss,
1042
- scheduleDismiss,
1043
- scrollToBottom,
1044
- destroy
1045
- };
1046
- }
1047
-
1048
- const ensureMicrophonePermission = async () => {
1049
- if (typeof navigator === "undefined" || typeof window === "undefined") {
1050
- console.log("当前环境不支持麦克风访问");
1051
- return false;
1052
- }
1053
- if (!navigator.mediaDevices?.getUserMedia || !navigator.mediaDevices?.enumerateDevices) {
1054
- console.log("当前环境不支持麦克风访问");
1055
- return false;
1056
- }
1057
- try {
1058
- const devices = await navigator.mediaDevices.enumerateDevices();
1059
- const audioInputDevices = devices.filter((device) => device.kind === "audioinput");
1060
- if (audioInputDevices.length === 0) {
1061
- console.log("未检测到麦克风设备,请连接麦克风后重试。");
1062
- return false;
1063
- }
1064
- if ("permissions" in navigator && navigator.permissions?.query) {
1065
- try {
1066
- const status = await navigator.permissions.query({ name: "microphone" });
1067
- if (status.state === "denied") {
1068
- console.log("麦克风权限被禁用,请在浏览器设置中开启。");
1069
- return false;
1070
- }
1071
- } catch (e) {
1072
- console.warn("Permission query not supported:", e);
974
+ const idx = findToolPartIndex(toolCallId);
975
+ if (idx !== -1) {
976
+ parts[idx].state = "call";
977
+ parts[idx].toolName = toolName;
978
+ parts[idx].args = toolCalls.get(toolCallId).args;
979
+ } else {
980
+ parts.push({
981
+ type: "tool-call",
982
+ toolCallId,
983
+ toolName,
984
+ args: toolCalls.get(toolCallId).args,
985
+ state: "call"
986
+ });
1073
987
  }
1074
- }
1075
- let stream = null;
1076
- try {
1077
- stream = await navigator.mediaDevices.getUserMedia({
1078
- audio: {
1079
- echoCancellation: true,
1080
- noiseSuppression: true,
1081
- autoGainControl: true
1082
- }
1083
- });
1084
- const audioTracks = stream.getAudioTracks();
1085
- if (audioTracks.length === 0) {
1086
- console.log("无法获取麦克风音频轨道。");
1087
- return false;
988
+ emitUpdate();
989
+ },
990
+ onToolResult(toolCallId, result) {
991
+ const tracker = toolCalls.get(toolCallId);
992
+ if (tracker) {
993
+ tracker.result = result;
994
+ tracker.state = "result";
1088
995
  }
1089
- const activeTrack = audioTracks[0];
1090
- if (!activeTrack.enabled || activeTrack.readyState !== "live") {
1091
- console.log("麦克风设备不可用,请检查设备连接。");
1092
- return false;
996
+ const idx = findToolPartIndex(toolCallId);
997
+ if (idx !== -1) {
998
+ const existing = parts[idx];
999
+ const resultPart = {
1000
+ type: "tool-result",
1001
+ toolCallId,
1002
+ toolName: existing.toolName,
1003
+ args: existing.args,
1004
+ result,
1005
+ state: "result"
1006
+ };
1007
+ parts[idx] = resultPart;
1008
+ } else {
1009
+ parts.push({
1010
+ type: "tool-result",
1011
+ toolCallId,
1012
+ toolName: tracker?.toolName || "unknown",
1013
+ args: tracker?.args,
1014
+ result,
1015
+ state: "result"
1016
+ });
1093
1017
  }
1094
- return true;
1095
- } finally {
1096
- if (stream) {
1097
- stream.getTracks().forEach((track) => track.stop());
1018
+ emitUpdate();
1019
+ },
1020
+ onError(error, data) {
1021
+ const toolCallId = data?.toolCallId;
1022
+ if (toolCallId) {
1023
+ toolCalls.delete(toolCallId);
1024
+ const idx = findToolPartIndex(toolCallId);
1025
+ if (idx !== -1) {
1026
+ parts.splice(idx, 1);
1027
+ emitUpdate();
1028
+ }
1098
1029
  }
1030
+ console.error("[DataStreamParser] stream error:", error);
1031
+ },
1032
+ onStepFinish(_data) {
1033
+ emitUpdate();
1034
+ },
1035
+ onFinish(_data) {
1036
+ emitUpdate();
1099
1037
  }
1100
- } catch (error) {
1101
- console.error("Microphone permission check failed", error);
1102
- if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
1103
- console.log("未检测到麦克风设备,请连接麦克风后重试。");
1104
- } else if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
1105
- console.log("麦克风权限被拒绝,请在浏览器设置中允许访问。");
1106
- } else if (error.name === "NotReadableError" || error.name === "TrackStartError") {
1107
- console.log("麦克风被其他应用占用或无法访问。");
1108
- } else {
1109
- console.log("无法访问麦克风,请检查设备连接和浏览器权限。");
1110
- }
1111
- return false;
1038
+ });
1039
+ return { textContent, parts, toolCalls };
1040
+ }
1041
+ function safeJsonParse(str) {
1042
+ try {
1043
+ return JSON.parse(str);
1044
+ } catch {
1045
+ return str;
1112
1046
  }
1113
- };
1047
+ }
1114
1048
 
1115
- function useVoiceRecognition(options) {
1116
- const voiceStatus = ref("standby");
1117
- const isTranscribing = ref(false);
1118
- const isInitializing = ref(false);
1119
- const transcriptionText = ref("");
1120
- const wakeAnimating = ref(false);
1121
- let detector = null;
1122
- let transcriber = null;
1123
- const initTranscriber = () => {
1124
- if (transcriber) return;
1125
- const vc = options.getVoiceConfig();
1126
- if (!vc || !vc.appId || !vc.apiKey || !vc.websocketUrl) {
1127
- console.error("[VoiceRecognition] 缺少 voiceConfig,无法初始化转写器");
1128
- return;
1049
+ const toolDisplayNames = {
1050
+ generateReport: "生成报告",
1051
+ searchKnowledge: "知识库检索",
1052
+ resolveInstanceTargets: "解析实例目标",
1053
+ getHistoryMetrics: "历史数据查询",
1054
+ getRealtimeMetrics: "实时数据查询",
1055
+ queryBitableData: "多维表格查询",
1056
+ searchUser: "搜索用户",
1057
+ createBitableRecord: "创建表格记录",
1058
+ timeTool: "时间工具",
1059
+ loadSkill: "加载技能",
1060
+ executeCommand: "执行命令",
1061
+ dataAnalyzer: "数据分析",
1062
+ dataPredictor: "数据预测"
1063
+ };
1064
+ function useAgentInvoke(options) {
1065
+ const { aiChatbotX, tts, bubble } = options;
1066
+ const sessionTimeoutMs = options.sessionTimeoutMs ?? 12e4;
1067
+ const maxHistoryTurns = options.maxHistoryTurns ?? 10;
1068
+ const isInvoking = ref(false);
1069
+ const currentTextContent = ref("");
1070
+ const currentToolParts = ref([]);
1071
+ const executingTools = ref(/* @__PURE__ */ new Set());
1072
+ const conversationHistory = ref([]);
1073
+ let lastInteractionTime = 0;
1074
+ const checkSessionTimeout = () => {
1075
+ if (lastInteractionTime > 0 && Date.now() - lastInteractionTime > sessionTimeoutMs) {
1076
+ conversationHistory.value = [];
1129
1077
  }
1130
- transcriber = new SpeechTranscriberStandalone({
1131
- appId: vc.appId,
1132
- apiKey: vc.apiKey,
1133
- websocketUrl: vc.websocketUrl,
1134
- autoStop: {
1135
- enabled: true,
1136
- silenceTimeoutMs: 2e3,
1137
- noSpeechTimeoutMs: 5e3,
1138
- maxDurationMs: 45e3
1139
- }
1140
- });
1141
- transcriber.onResult((result) => {
1142
- transcriptionText.value = result.transcript || "";
1143
- });
1144
- transcriber.onAutoStop(async () => {
1145
- const finalText = transcriptionText.value;
1146
- await stopTranscribing();
1147
- transcriptionText.value = "";
1148
- if (finalText.trim()) {
1149
- options.onTranscriptionDone?.(finalText);
1150
- }
1151
- });
1152
- transcriber.onError((error) => {
1153
- console.error("[VoiceRecognition] 转写错误:", error);
1154
- stopTranscribing();
1155
- transcriptionText.value = "";
1156
- });
1157
1078
  };
1158
- const startTranscribing = async () => {
1159
- if (isTranscribing.value) return;
1160
- if (!transcriber) initTranscriber();
1161
- if (!transcriber) return;
1162
- try {
1163
- await transcriber.start();
1164
- isTranscribing.value = true;
1165
- transcriptionText.value = "";
1166
- } catch (error) {
1167
- console.error("[VoiceRecognition] 启动转写失败:", error);
1079
+ const appendToHistory = (role, content) => {
1080
+ conversationHistory.value.push({ role, content });
1081
+ const maxLen = maxHistoryTurns * 2;
1082
+ if (conversationHistory.value.length > maxLen) {
1083
+ conversationHistory.value = conversationHistory.value.slice(-maxLen);
1168
1084
  }
1169
1085
  };
1170
- const stopTranscribing = async () => {
1171
- if (!transcriber || !transcriber.isActive()) {
1172
- isTranscribing.value = false;
1173
- return;
1086
+ const clearHistory = () => {
1087
+ conversationHistory.value = [];
1088
+ };
1089
+ let abortController = null;
1090
+ const hasAnyContent = computed(() => {
1091
+ return !!(currentTextContent.value || currentToolParts.value.length > 0);
1092
+ });
1093
+ const toolDisplayName = (name) => toolDisplayNames[name] || name;
1094
+ const resetState = () => {
1095
+ currentTextContent.value = "";
1096
+ currentToolParts.value = [];
1097
+ executingTools.value = /* @__PURE__ */ new Set();
1098
+ };
1099
+ const extractExecutableCommands = (payload) => {
1100
+ if (!payload || typeof payload !== "object") return [];
1101
+ const commands = payload.commands;
1102
+ if (!Array.isArray(commands) || commands.length === 0) return [];
1103
+ return commands.filter((cmd) => cmd && typeof cmd === "object" && typeof cmd.name === "string" && cmd.name.trim()).map((cmd) => ({
1104
+ name: cmd.name,
1105
+ args: Array.isArray(cmd.args) ? cmd.args : []
1106
+ }));
1107
+ };
1108
+ const buildCommandDefinitionMap = (commands) => {
1109
+ return new Map(commands.map((command) => [command.name, command]));
1110
+ };
1111
+ const toExecutableCommand = (toolName, payload, commandDefinitions) => {
1112
+ const commandDefinition = commandDefinitions.get(toolName);
1113
+ if (!commandDefinition) {
1114
+ return null;
1174
1115
  }
1175
- try {
1176
- await transcriber.stop();
1177
- } catch (error) {
1178
- console.error("[VoiceRecognition] 停止转写失败:", error);
1179
- } finally {
1180
- isTranscribing.value = false;
1116
+ const parameters = commandDefinition.parameters || [];
1117
+ if (Array.isArray(payload)) {
1118
+ return {
1119
+ name: toolName,
1120
+ args: payload
1121
+ };
1181
1122
  }
1123
+ if (!payload || typeof payload !== "object") {
1124
+ return {
1125
+ name: toolName,
1126
+ args: []
1127
+ };
1128
+ }
1129
+ const payloadRecord = payload;
1130
+ return {
1131
+ name: toolName,
1132
+ args: parameters.map((parameter) => payloadRecord[parameter.name])
1133
+ };
1182
1134
  };
1183
- const initDetector = () => {
1184
- if (detector || isInitializing.value) return;
1185
- if (!options.modelPath) {
1186
- console.error("[VoiceRecognition] 未传入 modelPath,无法启用唤醒词");
1187
- return;
1135
+ const resolveExecutableCommands = (toolName, payload, commandDefinitions) => {
1136
+ const extractedCommands = extractExecutableCommands(payload);
1137
+ if (extractedCommands.length > 0) {
1138
+ return extractedCommands;
1188
1139
  }
1189
- isInitializing.value = true;
1140
+ const directCommand = toExecutableCommand(toolName, payload, commandDefinitions);
1141
+ return directCommand ? [directCommand] : [];
1142
+ };
1143
+ const executeHostCommands = async (toolCallId, toolName, payload, commandDefinitions) => {
1144
+ const commands = resolveExecutableCommands(toolName, payload, commandDefinitions);
1145
+ if (commands.length === 0) return false;
1190
1146
  try {
1191
- detector = new WakeWordDetectorStandalone({
1192
- modelPath: options.modelPath,
1193
- sampleRate: 16e3,
1194
- usePartial: true,
1195
- autoReset: {
1196
- enabled: true,
1197
- resetDelayMs: 4e3
1147
+ executingTools.value = /* @__PURE__ */ new Set([...executingTools.value, toolCallId]);
1148
+ for (const cmd of commands) {
1149
+ try {
1150
+ await aiChatbotX.executeCommand(cmd.name, cmd.args);
1151
+ } catch (cmdErr) {
1152
+ console.error(`[AgentInvoke] 执行命令 ${cmd.name} 失败:`, cmdErr);
1198
1153
  }
1199
- });
1200
- detector.setWakeWords(options.wakeWords || ["你好", "您好"]);
1201
- detector.onWake(async () => {
1202
- wakeAnimating.value = true;
1203
- options.onWake?.();
1204
- await startTranscribing();
1205
- setTimeout(() => {
1206
- wakeAnimating.value = false;
1207
- }, 1200);
1208
- });
1209
- detector.onError((error) => {
1210
- console.error("[VoiceRecognition] 唤醒监听错误:", error);
1211
- voiceStatus.value = "standby";
1212
- stopTranscribing();
1213
- });
1154
+ }
1155
+ return true;
1214
1156
  } finally {
1215
- isInitializing.value = false;
1157
+ const next = new Set(executingTools.value);
1158
+ next.delete(toolCallId);
1159
+ executingTools.value = next;
1216
1160
  }
1217
1161
  };
1218
- const toggleVoiceMode = async (targetState) => {
1219
- const permission = await ensureMicrophonePermission();
1220
- if (!permission || isInitializing.value) return;
1221
- if (!detector) {
1222
- initDetector();
1223
- if (!detector) return;
1162
+ const parseAssistantText = (payload) => {
1163
+ if (!payload) return "";
1164
+ if (typeof payload === "string") return payload;
1165
+ if (typeof payload === "object") {
1166
+ const data = payload;
1167
+ const directText = data.output || data.answer || data.message || data.result;
1168
+ if (typeof directText === "string" && directText.trim()) return directText;
1169
+ if (data.data && typeof data.data === "object") {
1170
+ const nested = data.data;
1171
+ const nestedText = nested.output || nested.answer || nested.message || nested.result;
1172
+ if (typeof nestedText === "string" && nestedText.trim()) return nestedText;
1173
+ }
1174
+ return JSON.stringify(payload);
1224
1175
  }
1225
- const isListening = voiceStatus.value === "listening";
1226
- const shouldStart = targetState !== void 0 ? targetState : !isListening;
1227
- if (isListening === shouldStart) return;
1176
+ return String(payload);
1177
+ };
1178
+ const invoke = async (question) => {
1179
+ const content = question.trim();
1180
+ if (!content) return;
1181
+ abort();
1182
+ checkSessionTimeout();
1183
+ resetState();
1184
+ tts.stop();
1185
+ isInvoking.value = true;
1186
+ bubble.open();
1187
+ let prevTextLength = 0;
1188
+ const processedToolResults = /* @__PURE__ */ new Set();
1189
+ const processingToolResults = /* @__PURE__ */ new Set();
1190
+ abortController = new AbortController();
1191
+ const commands = await aiChatbotX.getCommads();
1192
+ const commandDefinitions = buildCommandDefinitionMap(commands);
1193
+ conversationHistory.value.length > 0 ? [...conversationHistory.value] : void 0;
1228
1194
  try {
1229
- if (shouldStart) {
1230
- await detector.start();
1231
- voiceStatus.value = "listening";
1195
+ const response = await fetch(options.endpoint, {
1196
+ method: "POST",
1197
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${options.appToken || ""}` },
1198
+ body: JSON.stringify({
1199
+ input: content,
1200
+ projectId: options.projectId || "",
1201
+ commands: commands.length > 0 ? commands : void 0
1202
+ // messages: historyToSend,
1203
+ }),
1204
+ signal: abortController.signal
1205
+ });
1206
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
1207
+ const contentType = response.headers.get("content-type") || "";
1208
+ const isJsonResponse = contentType.includes("application/json");
1209
+ if (isJsonResponse) {
1210
+ const data = await response.json();
1211
+ const reply = parseAssistantText(data) || "已收到,但没有返回可展示的文本内容。";
1212
+ currentTextContent.value = reply;
1213
+ tts.speak(reply);
1214
+ appendToHistory("user", content);
1215
+ appendToHistory("assistant", reply);
1216
+ if (data.toolResults && Array.isArray(data.toolResults)) {
1217
+ for (const tr of data.toolResults) {
1218
+ const toolPart = {
1219
+ type: "tool-result",
1220
+ toolCallId: `invoke-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
1221
+ toolName: tr.toolName,
1222
+ args: tr.args,
1223
+ result: tr.result,
1224
+ state: "result"
1225
+ };
1226
+ currentToolParts.value = [...currentToolParts.value, toolPart];
1227
+ if (commandDefinitions.has(tr.toolName)) {
1228
+ void executeHostCommands(toolPart.toolCallId, tr.toolName, tr.result, commandDefinitions);
1229
+ }
1230
+ }
1231
+ }
1232
1232
  } else {
1233
- await detector.stop();
1234
- voiceStatus.value = "standby";
1235
- transcriptionText.value = "";
1236
- await stopTranscribing();
1233
+ await parseDataStreamToMessage(response, (result) => {
1234
+ currentTextContent.value = result.textContent;
1235
+ if (result.textContent.length > prevTextLength) {
1236
+ const delta = result.textContent.slice(prevTextLength);
1237
+ prevTextLength = result.textContent.length;
1238
+ tts.feed(delta);
1239
+ }
1240
+ const toolParts = result.parts.filter(
1241
+ (p) => p.type === "tool-call" || p.type === "tool-result"
1242
+ );
1243
+ currentToolParts.value = toolParts;
1244
+ for (const part of toolParts) {
1245
+ if (commandDefinitions.has(part.toolName) && !processedToolResults.has(part.toolCallId) && !processingToolResults.has(part.toolCallId)) {
1246
+ if (part.type === "tool-call" && part.state === "call" && part.args) {
1247
+ processingToolResults.add(part.toolCallId);
1248
+ void executeHostCommands(part.toolCallId, part.toolName, part.args, commandDefinitions).then(
1249
+ (executed) => {
1250
+ if (executed) {
1251
+ processedToolResults.add(part.toolCallId);
1252
+ }
1253
+ processingToolResults.delete(part.toolCallId);
1254
+ }
1255
+ );
1256
+ } else if (part.type === "tool-result" && part.result) {
1257
+ processingToolResults.add(part.toolCallId);
1258
+ void executeHostCommands(part.toolCallId, part.toolName, part.result, commandDefinitions).then(
1259
+ (executed) => {
1260
+ if (executed) {
1261
+ processedToolResults.add(part.toolCallId);
1262
+ }
1263
+ processingToolResults.delete(part.toolCallId);
1264
+ }
1265
+ );
1266
+ }
1267
+ }
1268
+ }
1269
+ bubble.scrollToBottom();
1270
+ });
1271
+ tts.flush();
1272
+ const assistantReply = currentTextContent.value.trim();
1273
+ appendToHistory("user", content);
1274
+ if (assistantReply) {
1275
+ appendToHistory("assistant", assistantReply);
1276
+ }
1277
+ if (!assistantReply && currentToolParts.value.length === 0) {
1278
+ currentTextContent.value = "已收到,但没有返回可展示的文本内容。";
1279
+ }
1237
1280
  }
1238
1281
  } catch (error) {
1239
- console.error("[VoiceRecognition] 监听切换失败:", error);
1240
- voiceStatus.value = "standby";
1282
+ if (error.name === "AbortError") {
1283
+ return;
1284
+ }
1285
+ console.error("[AgentInvoke] invoke failed:", error);
1286
+ tts.stop();
1287
+ currentTextContent.value = "请求失败,请检查服务地址或稍后重试。";
1288
+ } finally {
1289
+ isInvoking.value = false;
1290
+ abortController = null;
1291
+ lastInteractionTime = Date.now();
1292
+ bubble.scheduleDismiss();
1241
1293
  }
1242
1294
  };
1243
- const abortTranscription = async () => {
1244
- transcriptionText.value = "";
1245
- await stopTranscribing();
1295
+ const abort = () => {
1296
+ if (abortController) {
1297
+ abortController.abort();
1298
+ abortController = null;
1299
+ }
1300
+ tts.stop();
1301
+ isInvoking.value = false;
1246
1302
  };
1247
- const destroy = async () => {
1248
- if (detector) {
1249
- try {
1250
- if (detector.isActive()) await detector.stop();
1251
- } catch {
1252
- }
1253
- detector = null;
1303
+ return {
1304
+ isInvoking,
1305
+ currentTextContent,
1306
+ currentToolParts,
1307
+ executingTools,
1308
+ hasAnyContent,
1309
+ conversationHistory,
1310
+ toolDisplayName,
1311
+ invoke,
1312
+ abort,
1313
+ resetState,
1314
+ clearHistory
1315
+ };
1316
+ }
1317
+
1318
+ function useBubble(options = {}) {
1319
+ const visible = ref(false);
1320
+ const fadingOut = ref(false);
1321
+ const stackRef = ref(null);
1322
+ let dismissTimer = null;
1323
+ const hasOpened = ref(false);
1324
+ const isTTSActive = () => !!(options.isSpeaking?.value || options.hasPendingAudio?.value);
1325
+ const isBusy = () => !!(options.isInvoking?.value || isTTSActive());
1326
+ const show = computed(() => {
1327
+ if (!hasOpened.value) return false;
1328
+ if (isTTSActive()) return true;
1329
+ return visible.value && !fadingOut.value;
1330
+ });
1331
+ const style = computed(() => ({
1332
+ width: options.bubbleSize?.width || void 0,
1333
+ maxHeight: options.bubbleSize?.maxHeight || void 0
1334
+ }));
1335
+ const open = () => {
1336
+ cancelDismiss();
1337
+ fadingOut.value = false;
1338
+ visible.value = true;
1339
+ hasOpened.value = true;
1340
+ };
1341
+ const cancelDismiss = () => {
1342
+ if (dismissTimer) {
1343
+ clearTimeout(dismissTimer);
1344
+ dismissTimer = null;
1254
1345
  }
1255
- if (transcriber) {
1256
- try {
1257
- if (transcriber.isActive()) await transcriber.stop();
1258
- } catch {
1346
+ };
1347
+ const scheduleDismiss = () => {
1348
+ cancelDismiss();
1349
+ if (isBusy()) return;
1350
+ const delay = options.dismissDelay ?? 4e3;
1351
+ dismissTimer = setTimeout(() => {
1352
+ if (isBusy()) return;
1353
+ fadingOut.value = true;
1354
+ setTimeout(() => {
1355
+ if (isBusy()) {
1356
+ fadingOut.value = false;
1357
+ return;
1358
+ }
1359
+ visible.value = false;
1360
+ fadingOut.value = false;
1361
+ hasOpened.value = false;
1362
+ }, 400);
1363
+ }, delay);
1364
+ };
1365
+ const watchTTSRef = (ttsRef) => {
1366
+ watch(ttsRef, (active) => {
1367
+ if (active && hasOpened.value) {
1368
+ cancelDismiss();
1369
+ if (fadingOut.value) fadingOut.value = false;
1370
+ } else if (!active && hasOpened.value && !isBusy()) {
1371
+ scheduleDismiss();
1259
1372
  }
1260
- transcriber = null;
1261
- }
1373
+ });
1374
+ };
1375
+ if (options.isSpeaking) watchTTSRef(options.isSpeaking);
1376
+ if (options.hasPendingAudio) watchTTSRef(options.hasPendingAudio);
1377
+ const hide = () => {
1378
+ cancelDismiss();
1379
+ fadingOut.value = false;
1380
+ visible.value = false;
1381
+ hasOpened.value = false;
1382
+ };
1383
+ const scrollToBottom = () => {
1384
+ nextTick(() => {
1385
+ if (stackRef.value) {
1386
+ stackRef.value.scrollTop = stackRef.value.scrollHeight;
1387
+ }
1388
+ });
1389
+ };
1390
+ const destroy = () => {
1391
+ cancelDismiss();
1262
1392
  };
1263
1393
  return {
1264
- voiceStatus,
1265
- isTranscribing,
1266
- isInitializing,
1267
- transcriptionText,
1268
- wakeAnimating,
1269
- startTranscribing,
1270
- stopTranscribing,
1271
- abortTranscription,
1272
- toggleVoiceMode,
1394
+ visible,
1395
+ fadingOut,
1396
+ show,
1397
+ style,
1398
+ stackRef,
1399
+ open,
1400
+ hide,
1401
+ cancelDismiss,
1402
+ scheduleDismiss,
1403
+ scrollToBottom,
1273
1404
  destroy
1274
1405
  };
1275
1406
  }
1276
1407
 
1277
- const DATA_STREAM_LINE_RE = /^[0-9a-f]:/;
1278
- function detectFormat(firstChunk) {
1279
- const trimmed = firstChunk.trimStart();
1280
- if (trimmed.startsWith("data:")) {
1281
- const firstLine = trimmed.split("\n")[0];
1282
- const payload = firstLine.slice(5).trim();
1283
- try {
1284
- const parsed = JSON.parse(payload);
1285
- if (parsed && typeof parsed.type === "string") {
1286
- return "ui-message-stream";
1287
- }
1288
- } catch {
1289
- }
1290
- if (DATA_STREAM_LINE_RE.test(payload)) {
1291
- return "data-stream";
1292
- }
1293
- return "ui-message-stream";
1408
+ const AiChatbotXKey = Symbol("sime-x");
1409
+ function injectStrict(key, defaultValue, treatDefaultAsFactory) {
1410
+ let result;
1411
+ if (defaultValue === void 0) {
1412
+ result = inject(key);
1413
+ } else if (treatDefaultAsFactory === true) {
1414
+ result = inject(key, defaultValue, true);
1415
+ } else {
1416
+ result = inject(key, defaultValue, false);
1294
1417
  }
1295
- if (DATA_STREAM_LINE_RE.test(trimmed)) {
1296
- return "data-stream";
1418
+ if (!result) {
1419
+ throw new Error(`Could not resolve ${key.description}`);
1297
1420
  }
1298
- return "plain-text";
1421
+ return result;
1299
1422
  }
1300
- function processUIMessageStreamEvent(payload, callbacks) {
1301
- const trimmed = payload.trim();
1302
- if (!trimmed || trimmed === "[DONE]") {
1303
- callbacks.onFinish?.({});
1304
- return;
1305
- }
1306
- let parsed;
1307
- try {
1308
- parsed = JSON.parse(trimmed);
1309
- } catch {
1310
- console.warn("[DataStreamParser] failed to parse UI message stream event:", trimmed.slice(0, 100));
1311
- return;
1312
- }
1313
- const type = parsed?.type;
1314
- if (!type) return;
1315
- switch (type) {
1316
- case "text-delta":
1317
- if (typeof parsed.delta === "string") {
1318
- callbacks.onTextDelta?.(parsed.delta);
1423
+
1424
+ const _hoisted_1$1 = { class: "agent-bubble" };
1425
+ const _hoisted_2$1 = {
1426
+ key: 0,
1427
+ class: "tool-steps"
1428
+ };
1429
+ const _hoisted_3$1 = { class: "tool-step__icon" };
1430
+ const _hoisted_4$1 = {
1431
+ key: 0,
1432
+ class: "tool-step__spinner",
1433
+ width: "14",
1434
+ height: "14",
1435
+ viewBox: "0 0 24 24",
1436
+ fill: "none"
1437
+ };
1438
+ const _hoisted_5$1 = {
1439
+ key: 1,
1440
+ width: "14",
1441
+ height: "14",
1442
+ viewBox: "0 0 24 24",
1443
+ fill: "none"
1444
+ };
1445
+ const _hoisted_6$1 = {
1446
+ key: 2,
1447
+ width: "14",
1448
+ height: "14",
1449
+ viewBox: "0 0 24 24",
1450
+ fill: "none"
1451
+ };
1452
+ const _hoisted_7$1 = { class: "tool-step__name" };
1453
+ const _hoisted_8$1 = {
1454
+ key: 0,
1455
+ class: "tool-step__tag tool-step__tag--exec"
1456
+ };
1457
+ const _hoisted_9$1 = {
1458
+ key: 1,
1459
+ class: "thinking-dots"
1460
+ };
1461
+ const _hoisted_10$1 = {
1462
+ key: 2,
1463
+ class: "agent-text"
1464
+ };
1465
+ const _hoisted_11$1 = { class: "input-bar" };
1466
+ const _hoisted_12$1 = ["disabled"];
1467
+ const _hoisted_13$1 = ["disabled"];
1468
+ const _hoisted_14 = {
1469
+ key: 0,
1470
+ class: "btn-spinner",
1471
+ width: "18",
1472
+ height: "18",
1473
+ viewBox: "0 0 24 24",
1474
+ fill: "none"
1475
+ };
1476
+ const _hoisted_15 = {
1477
+ key: 1,
1478
+ width: "18",
1479
+ height: "18",
1480
+ viewBox: "0 0 24 24",
1481
+ fill: "none"
1482
+ };
1483
+ const currentTheme$1 = "dark";
1484
+ const _sfc_main$2 = /* @__PURE__ */ defineComponent({
1485
+ __name: "command-test",
1486
+ props: {
1487
+ agentId: {},
1488
+ projectId: {},
1489
+ bubbleSize: {},
1490
+ bubbleDismissDelay: {}
1491
+ },
1492
+ setup(__props) {
1493
+ const props = __props;
1494
+ const aiChatbotX = injectStrict(AiChatbotXKey);
1495
+ const inputText = ref("");
1496
+ const noopTts = {
1497
+ speak: (_text) => {
1498
+ },
1499
+ feed: (_delta) => {
1500
+ },
1501
+ flush: () => {
1502
+ },
1503
+ stop: () => {
1319
1504
  }
1320
- break;
1321
- case "tool-input-start":
1322
- callbacks.onToolCallStart?.(parsed.toolCallId, parsed.toolName);
1323
- break;
1324
- case "tool-input-delta":
1325
- callbacks.onToolCallDelta?.(parsed.toolCallId, parsed.inputTextDelta);
1326
- break;
1327
- case "tool-input-available":
1328
- callbacks.onToolCallComplete?.(parsed.toolCallId, parsed.toolName, parsed.input);
1329
- break;
1330
- case "tool-output-available":
1331
- callbacks.onToolResult?.(parsed.toolCallId, parsed.output);
1332
- break;
1333
- case "finish-step":
1334
- callbacks.onStepFinish?.(parsed);
1335
- break;
1336
- case "finish":
1337
- callbacks.onFinish?.(parsed);
1338
- break;
1339
- case "error":
1340
- case "tool-output-error":
1341
- callbacks.onError?.(parsed.errorText || parsed.error || "Unknown error", parsed);
1342
- break;
1343
- case "start":
1344
- case "text-start":
1345
- case "text-end":
1346
- case "start-step":
1347
- case "reasoning-start":
1348
- case "reasoning-delta":
1349
- case "reasoning-end":
1350
- case "source-url":
1351
- case "source-document":
1352
- case "file":
1353
- case "abort":
1354
- break;
1355
- default:
1356
- if (type.startsWith("data-")) ; else {
1357
- console.log("[DataStreamParser] unhandled UI message stream type:", type);
1505
+ };
1506
+ const bubbleBridge = {
1507
+ open: () => {
1508
+ },
1509
+ scheduleDismiss: () => {
1510
+ },
1511
+ scrollToBottom: () => {
1358
1512
  }
1359
- break;
1513
+ };
1514
+ const endpoint = `/sime/proxy/organizations/${aiChatbotX.organizationId()}/agents/${props.agentId}/stream-invoke`;
1515
+ const agent = useAgentInvoke({
1516
+ endpoint,
1517
+ appToken: aiChatbotX.appToken(),
1518
+ projectId: props.projectId,
1519
+ aiChatbotX,
1520
+ tts: noopTts,
1521
+ bubble: {
1522
+ open: () => bubbleBridge.open(),
1523
+ scheduleDismiss: () => bubbleBridge.scheduleDismiss(),
1524
+ scrollToBottom: () => bubbleBridge.scrollToBottom()
1525
+ }
1526
+ });
1527
+ const bubble = useBubble({
1528
+ dismissDelay: props.bubbleDismissDelay ?? 8e3,
1529
+ isInvoking: agent.isInvoking,
1530
+ bubbleSize: props.bubbleSize
1531
+ });
1532
+ bubbleBridge.open = bubble.open;
1533
+ bubbleBridge.scheduleDismiss = bubble.scheduleDismiss;
1534
+ bubbleBridge.scrollToBottom = bubble.scrollToBottom;
1535
+ const { show: showBubble, style: bubbleStyle, stackRef: bubbleStackRef } = bubble;
1536
+ const handleSubmit = () => {
1537
+ const text = inputText.value.trim();
1538
+ if (!text || agent.isInvoking.value) return;
1539
+ inputText.value = "";
1540
+ agent.invoke(text);
1541
+ };
1542
+ const { isInvoking, currentTextContent, currentToolParts, executingTools, hasAnyContent, toolDisplayName } = agent;
1543
+ onBeforeUnmount(() => {
1544
+ bubble.destroy();
1545
+ agent.abort();
1546
+ });
1547
+ return (_ctx, _cache) => {
1548
+ return openBlock(), createElementBlock("div", {
1549
+ class: "command-test",
1550
+ "data-theme": currentTheme$1
1551
+ }, [
1552
+ createVNode(Transition, { name: "bubble-fade" }, {
1553
+ default: withCtx(() => [
1554
+ unref(showBubble) ? (openBlock(), createElementBlock("div", {
1555
+ key: 0,
1556
+ class: "bubble-stack",
1557
+ ref_key: "bubbleStackRef",
1558
+ ref: bubbleStackRef,
1559
+ style: normalizeStyle(unref(bubbleStyle))
1560
+ }, [
1561
+ createElementVNode("div", _hoisted_1$1, [
1562
+ unref(currentToolParts).length > 0 ? (openBlock(), createElementBlock("div", _hoisted_2$1, [
1563
+ (openBlock(true), createElementBlock(Fragment, null, renderList(unref(currentToolParts), (toolPart) => {
1564
+ return openBlock(), createElementBlock("div", {
1565
+ key: toolPart.toolCallId,
1566
+ class: normalizeClass(["tool-step", {
1567
+ "tool-step--loading": toolPart.state === "partial-call" || toolPart.state === "call",
1568
+ "tool-step--done": toolPart.state === "result",
1569
+ "tool-step--error": toolPart.state === "error",
1570
+ "tool-step--executing": unref(executingTools).has(toolPart.toolCallId)
1571
+ }])
1572
+ }, [
1573
+ createElementVNode("span", _hoisted_3$1, [
1574
+ toolPart.state === "partial-call" || toolPart.state === "call" ? (openBlock(), createElementBlock("svg", _hoisted_4$1, [..._cache[1] || (_cache[1] = [
1575
+ createElementVNode("circle", {
1576
+ cx: "12",
1577
+ cy: "12",
1578
+ r: "10",
1579
+ stroke: "currentColor",
1580
+ "stroke-width": "2.5",
1581
+ "stroke-linecap": "round",
1582
+ "stroke-dasharray": "31.4 31.4"
1583
+ }, null, -1)
1584
+ ])])) : toolPart.state === "result" ? (openBlock(), createElementBlock("svg", _hoisted_5$1, [..._cache[2] || (_cache[2] = [
1585
+ createElementVNode("path", {
1586
+ d: "M20 6L9 17l-5-5",
1587
+ stroke: "currentColor",
1588
+ "stroke-width": "2.5",
1589
+ "stroke-linecap": "round",
1590
+ "stroke-linejoin": "round"
1591
+ }, null, -1)
1592
+ ])])) : toolPart.state === "error" ? (openBlock(), createElementBlock("svg", _hoisted_6$1, [..._cache[3] || (_cache[3] = [
1593
+ createElementVNode("circle", {
1594
+ cx: "12",
1595
+ cy: "12",
1596
+ r: "10",
1597
+ stroke: "currentColor",
1598
+ "stroke-width": "2"
1599
+ }, null, -1),
1600
+ createElementVNode("path", {
1601
+ d: "M15 9l-6 6M9 9l6 6",
1602
+ stroke: "currentColor",
1603
+ "stroke-width": "2",
1604
+ "stroke-linecap": "round"
1605
+ }, null, -1)
1606
+ ])])) : createCommentVNode("", true)
1607
+ ]),
1608
+ createElementVNode("span", _hoisted_7$1, toDisplayString(unref(toolDisplayName)(toolPart.toolName)), 1),
1609
+ unref(executingTools).has(toolPart.toolCallId) ? (openBlock(), createElementBlock("span", _hoisted_8$1, "命令执行中")) : createCommentVNode("", true)
1610
+ ], 2);
1611
+ }), 128))
1612
+ ])) : createCommentVNode("", true),
1613
+ unref(isInvoking) && !unref(hasAnyContent) ? (openBlock(), createElementBlock("div", _hoisted_9$1, [..._cache[4] || (_cache[4] = [
1614
+ createElementVNode("span", null, null, -1),
1615
+ createElementVNode("span", null, null, -1),
1616
+ createElementVNode("span", null, null, -1)
1617
+ ])])) : createCommentVNode("", true),
1618
+ unref(currentTextContent) ? (openBlock(), createElementBlock("div", _hoisted_10$1, toDisplayString(unref(currentTextContent)), 1)) : createCommentVNode("", true)
1619
+ ])
1620
+ ], 4)) : createCommentVNode("", true)
1621
+ ]),
1622
+ _: 1
1623
+ }),
1624
+ createElementVNode("div", _hoisted_11$1, [
1625
+ withDirectives(createElementVNode("input", {
1626
+ "onUpdate:modelValue": _cache[0] || (_cache[0] = ($event) => inputText.value = $event),
1627
+ type: "text",
1628
+ class: "input-field",
1629
+ placeholder: "输入指令...",
1630
+ disabled: unref(isInvoking),
1631
+ onKeydown: withKeys(handleSubmit, ["enter"])
1632
+ }, null, 40, _hoisted_12$1), [
1633
+ [vModelText, inputText.value]
1634
+ ]),
1635
+ createElementVNode("button", {
1636
+ class: "submit-btn",
1637
+ disabled: unref(isInvoking) || !inputText.value.trim(),
1638
+ onClick: handleSubmit
1639
+ }, [
1640
+ unref(isInvoking) ? (openBlock(), createElementBlock("svg", _hoisted_14, [..._cache[5] || (_cache[5] = [
1641
+ createElementVNode("circle", {
1642
+ cx: "12",
1643
+ cy: "12",
1644
+ r: "10",
1645
+ stroke: "currentColor",
1646
+ "stroke-width": "2.5",
1647
+ "stroke-linecap": "round",
1648
+ "stroke-dasharray": "31.4 31.4"
1649
+ }, null, -1)
1650
+ ])])) : (openBlock(), createElementBlock("svg", _hoisted_15, [..._cache[6] || (_cache[6] = [
1651
+ createElementVNode("path", {
1652
+ d: "M22 2L11 13",
1653
+ stroke: "currentColor",
1654
+ "stroke-width": "2",
1655
+ "stroke-linecap": "round",
1656
+ "stroke-linejoin": "round"
1657
+ }, null, -1),
1658
+ createElementVNode("path", {
1659
+ d: "M22 2L15 22l-4-9-9-4 20-7z",
1660
+ stroke: "currentColor",
1661
+ "stroke-width": "2",
1662
+ "stroke-linecap": "round",
1663
+ "stroke-linejoin": "round"
1664
+ }, null, -1)
1665
+ ])]))
1666
+ ], 8, _hoisted_13$1)
1667
+ ])
1668
+ ]);
1669
+ };
1360
1670
  }
1361
- }
1362
- function parseLegacyProtocolLine(line, callbacks) {
1363
- if (!line || !DATA_STREAM_LINE_RE.test(line)) return;
1364
- const code = line[0];
1365
- const rawValue = line.slice(2);
1366
- let value;
1367
- try {
1368
- value = JSON.parse(rawValue);
1369
- } catch {
1370
- value = rawValue;
1671
+ });
1672
+
1673
+ const commandTest = /* @__PURE__ */ _export_sfc(_sfc_main$2, [["__scopeId", "data-v-5c7468c4"]]);
1674
+
1675
+ class CommandManager {
1676
+ commands = /* @__PURE__ */ new Map();
1677
+ debug;
1678
+ constructor(options = {}) {
1679
+ this.debug = options.debug ?? false;
1371
1680
  }
1372
- switch (code) {
1373
- case "0":
1374
- callbacks.onTextDelta?.(value);
1375
- break;
1376
- case "9":
1377
- callbacks.onToolCallStart?.(value.toolCallId, value.toolName);
1378
- break;
1379
- case "b":
1380
- callbacks.onToolCallDelta?.(value.toolCallId, value.argsTextDelta);
1381
- break;
1382
- case "c":
1383
- callbacks.onToolCallComplete?.(value.toolCallId, value.toolName, value.args);
1384
- break;
1385
- case "a":
1386
- callbacks.onToolResult?.(value.toolCallId, value.result);
1387
- break;
1388
- case "e":
1389
- callbacks.onStepFinish?.(value);
1390
- break;
1391
- case "d":
1392
- callbacks.onFinish?.(value);
1393
- break;
1394
- case "3":
1395
- callbacks.onError?.(value);
1396
- break;
1681
+ registerCommand(command) {
1682
+ this.commands.set(command.name, command);
1683
+ this.log("注册命令", `${command.name}: ${command.description}`);
1397
1684
  }
1398
- }
1399
- async function readDataStream(response, callbacks) {
1400
- if (!response.body) return;
1401
- const reader = response.body.getReader();
1402
- const decoder = new TextDecoder();
1403
- let buffer = "";
1404
- let format = null;
1405
- while (true) {
1406
- const { value, done } = await reader.read();
1407
- if (done) break;
1408
- const chunk = decoder.decode(value, { stream: true });
1409
- buffer += chunk;
1410
- if (format === null && buffer.trim().length > 0) {
1411
- format = detectFormat(buffer);
1412
- console.log("[DataStreamParser] detected format:", format, "| first 200 chars:", buffer.slice(0, 200));
1413
- }
1414
- if (format === "plain-text") {
1415
- const text = buffer;
1416
- buffer = "";
1417
- if (text) callbacks.onTextDelta?.(text);
1418
- continue;
1419
- }
1420
- if (format === "ui-message-stream") {
1421
- while (true) {
1422
- const eventEnd = buffer.indexOf("\n\n");
1423
- if (eventEnd === -1) break;
1424
- const eventBlock = buffer.slice(0, eventEnd);
1425
- buffer = buffer.slice(eventEnd + 2);
1426
- const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1427
- for (const dataLine of dataLines) {
1428
- processUIMessageStreamEvent(dataLine, callbacks);
1429
- }
1430
- }
1431
- continue;
1685
+ unregisterCommand(name) {
1686
+ const deleted = this.commands.delete(name);
1687
+ if (deleted) {
1688
+ this.log("命令已注销", name);
1432
1689
  }
1433
- if (format === "data-stream") {
1434
- const isSSEWrapped = buffer.trimStart().startsWith("data:");
1435
- if (isSSEWrapped) {
1436
- while (true) {
1437
- const eventEnd = buffer.indexOf("\n\n");
1438
- if (eventEnd === -1) break;
1439
- const eventBlock = buffer.slice(0, eventEnd);
1440
- buffer = buffer.slice(eventEnd + 2);
1441
- const dataLines = eventBlock.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1442
- for (const dl of dataLines) {
1443
- const t = dl.trim();
1444
- if (!t || t === "[DONE]") {
1445
- if (t === "[DONE]") callbacks.onFinish?.({});
1446
- continue;
1447
- }
1448
- parseLegacyProtocolLine(t, callbacks);
1449
- }
1450
- }
1451
- } else {
1452
- while (true) {
1453
- const newlineIdx = buffer.indexOf("\n");
1454
- if (newlineIdx === -1) break;
1455
- const line = buffer.slice(0, newlineIdx).trim();
1456
- buffer = buffer.slice(newlineIdx + 1);
1457
- if (line) parseLegacyProtocolLine(line, callbacks);
1458
- }
1459
- }
1460
- continue;
1690
+ }
1691
+ async executeCommand(command, args = []) {
1692
+ const commandDef = this.commands.get(command);
1693
+ if (!commandDef) {
1694
+ throw new Error(`命令 "${command}" 未找到`);
1461
1695
  }
1696
+ this.log("执行命令", command, args);
1697
+ return await commandDef.handler(...args);
1462
1698
  }
1463
- const tail = decoder.decode();
1464
- if (tail) buffer += tail;
1465
- if (buffer.trim()) {
1466
- if (format === "plain-text") {
1467
- callbacks.onTextDelta?.(buffer);
1468
- } else if (format === "ui-message-stream") {
1469
- const dataLines = buffer.split(/\r?\n/).filter((l) => l.startsWith("data:")).map((l) => l.slice(5).trimStart());
1470
- for (const dl of dataLines) {
1471
- processUIMessageStreamEvent(dl, callbacks);
1472
- }
1473
- } else if (format === "data-stream") {
1474
- parseLegacyProtocolLine(buffer.trim(), callbacks);
1699
+ getCommands() {
1700
+ return Array.from(this.commands.values()).map((cmd) => ({
1701
+ name: cmd.name,
1702
+ description: cmd.description,
1703
+ parameters: cmd.parameters
1704
+ }));
1705
+ }
1706
+ hasCommand(name) {
1707
+ return this.commands.has(name);
1708
+ }
1709
+ clear() {
1710
+ this.commands.clear();
1711
+ this.log("", "所有命令已清空");
1712
+ }
1713
+ log(prefix, msg, ...args) {
1714
+ (/* @__PURE__ */ new Date()).toLocaleTimeString([], {
1715
+ hour: "2-digit",
1716
+ minute: "2-digit",
1717
+ second: "2-digit"
1718
+ });
1719
+ console.log(
1720
+ `%c ${prefix}`,
1721
+ "background:#7c3aed;color:white;padding:2px 6px;border-radius:3px 0 0 3px;font-weight:bold;",
1722
+ `${msg}`
1723
+ );
1724
+ if (args.length > 0) {
1725
+ console.log(...args);
1475
1726
  }
1476
1727
  }
1477
- callbacks.onFinish?.({});
1478
1728
  }
1479
- async function parseDataStreamToMessage(response, onUpdate) {
1480
- let textContent = "";
1481
- const parts = [];
1482
- const toolCalls = /* @__PURE__ */ new Map();
1483
- const ensureTextPart = () => {
1484
- for (let i = parts.length - 1; i >= 0; i--) {
1485
- if (parts[i].type === "text") {
1486
- return parts[i];
1729
+
1730
+ const _sfc_main$1 = /* @__PURE__ */ defineComponent({
1731
+ __name: "sime-provider",
1732
+ props: {
1733
+ appToken: {},
1734
+ organizationId: {}
1735
+ },
1736
+ setup(__props) {
1737
+ const props = __props;
1738
+ const commandManager = shallowRef(new CommandManager({ debug: false }));
1739
+ const startListeningRef = shallowRef(async () => {
1740
+ });
1741
+ const stopListeningRef = shallowRef(async () => {
1742
+ });
1743
+ const stopBroadcastRef = shallowRef(async () => {
1744
+ });
1745
+ provide(AiChatbotXKey, {
1746
+ appToken: () => props.appToken,
1747
+ organizationId: () => props.organizationId,
1748
+ startListening: () => startListeningRef.value(),
1749
+ stopListening: () => stopListeningRef.value(),
1750
+ stopBroadcast: () => stopBroadcastRef.value(),
1751
+ registerVoiceMethods: (methods) => {
1752
+ if (methods.stopBroadcast) stopBroadcastRef.value = methods.stopBroadcast;
1753
+ if (methods.start) startListeningRef.value = methods.start;
1754
+ if (methods.stop) stopListeningRef.value = methods.stop;
1755
+ },
1756
+ getCommads: async () => commandManager.value.getCommands(),
1757
+ registerCommand: (cmd) => {
1758
+ commandManager.value.registerCommand(cmd);
1759
+ },
1760
+ unregisterCommand: (name) => {
1761
+ commandManager.value.unregisterCommand(name);
1762
+ },
1763
+ async executeCommand(commandName, args = []) {
1764
+ return await commandManager.value.executeCommand(commandName, args);
1765
+ }
1766
+ });
1767
+ return (_ctx, _cache) => {
1768
+ return renderSlot(_ctx.$slots, "default");
1769
+ };
1770
+ }
1771
+ });
1772
+
1773
+ function useTTS(getVoiceConfig) {
1774
+ const isSpeaking = ref(false);
1775
+ const hasPendingAudio = ref(false);
1776
+ let instance = null;
1777
+ let initPromise = null;
1778
+ let audioCtx = null;
1779
+ let sentenceBuffer = "";
1780
+ const sentenceDelimiters = /[。!?;\n.!?;]/;
1781
+ const stripMarkdown = (text) => text.replace(/```[\s\S]*?```/g, "").replace(/\|[^\n]*\|/g, "").replace(/#{1,6}\s*/g, "").replace(/\*\*(.*?)\*\*/g, "$1").replace(/\*(.*?)\*/g, "$1").replace(/`([^`]*)`/g, "$1").replace(/\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/[-*+]\s+/g, "").replace(/>\s+/g, "").replace(/\n{2,}/g, "。").replace(/\n/g, ",").trim();
1782
+ const warmUpAudio = () => {
1783
+ if (!audioCtx || audioCtx.state === "closed") {
1784
+ try {
1785
+ audioCtx = new AudioContext();
1786
+ } catch {
1787
+ return;
1487
1788
  }
1488
1789
  }
1489
- const textPart = { type: "text", text: "" };
1490
- parts.push(textPart);
1491
- return textPart;
1492
- };
1493
- const findToolPartIndex = (toolCallId) => {
1494
- return parts.findIndex((p) => (p.type === "tool-call" || p.type === "tool-result") && p.toolCallId === toolCallId);
1495
- };
1496
- const emitUpdate = () => {
1497
- onUpdate({ textContent, parts: [...parts], toolCalls: new Map(toolCalls) });
1790
+ if (audioCtx.state === "suspended") {
1791
+ audioCtx.resume();
1792
+ }
1498
1793
  };
1499
- await readDataStream(response, {
1500
- onTextDelta(text) {
1501
- textContent += text;
1502
- const textPart = ensureTextPart();
1503
- textPart.text = textContent;
1504
- emitUpdate();
1505
- },
1506
- onToolCallStart(toolCallId, toolName) {
1507
- const tracker = {
1508
- toolCallId,
1509
- toolName,
1510
- argsText: "",
1511
- args: void 0,
1512
- state: "partial-call"
1513
- };
1514
- toolCalls.set(toolCallId, tracker);
1515
- const part = {
1516
- type: "tool-call",
1517
- toolCallId,
1518
- toolName,
1519
- args: void 0,
1520
- state: "partial-call"
1521
- };
1522
- parts.push(part);
1523
- emitUpdate();
1524
- },
1525
- onToolCallDelta(toolCallId, argsTextDelta) {
1526
- const tracker = toolCalls.get(toolCallId);
1527
- if (tracker) {
1528
- tracker.argsText += argsTextDelta;
1529
- try {
1530
- tracker.args = JSON.parse(tracker.argsText);
1531
- } catch {
1532
- }
1533
- const idx = findToolPartIndex(toolCallId);
1534
- if (idx !== -1 && parts[idx].type === "tool-call") {
1535
- parts[idx].args = tracker.args;
1536
- }
1537
- emitUpdate();
1538
- }
1539
- },
1540
- onToolCallComplete(toolCallId, toolName, args) {
1541
- const tracker = toolCalls.get(toolCallId);
1542
- if (tracker) {
1543
- tracker.state = "call";
1544
- tracker.args = typeof args === "string" ? safeJsonParse(args) : args;
1545
- } else {
1546
- toolCalls.set(toolCallId, {
1547
- toolCallId,
1548
- toolName,
1549
- argsText: typeof args === "string" ? args : JSON.stringify(args),
1550
- args: typeof args === "string" ? safeJsonParse(args) : args,
1551
- state: "call"
1552
- });
1553
- }
1554
- const idx = findToolPartIndex(toolCallId);
1555
- if (idx !== -1) {
1556
- parts[idx].state = "call";
1557
- parts[idx].toolName = toolName;
1558
- parts[idx].args = toolCalls.get(toolCallId).args;
1559
- } else {
1560
- parts.push({
1561
- type: "tool-call",
1562
- toolCallId,
1563
- toolName,
1564
- args: toolCalls.get(toolCallId).args,
1565
- state: "call"
1566
- });
1567
- }
1568
- emitUpdate();
1569
- },
1570
- onToolResult(toolCallId, result) {
1571
- const tracker = toolCalls.get(toolCallId);
1572
- if (tracker) {
1573
- tracker.result = result;
1574
- tracker.state = "result";
1575
- }
1576
- const idx = findToolPartIndex(toolCallId);
1577
- if (idx !== -1) {
1578
- const existing = parts[idx];
1579
- const resultPart = {
1580
- type: "tool-result",
1581
- toolCallId,
1582
- toolName: existing.toolName,
1583
- args: existing.args,
1584
- result,
1585
- state: "result"
1586
- };
1587
- parts[idx] = resultPart;
1588
- } else {
1589
- parts.push({
1590
- type: "tool-result",
1591
- toolCallId,
1592
- toolName: tracker?.toolName || "unknown",
1593
- args: tracker?.args,
1594
- result,
1595
- state: "result"
1596
- });
1597
- }
1598
- emitUpdate();
1599
- },
1600
- onError(error, data) {
1601
- const toolCallId = data?.toolCallId;
1602
- if (toolCallId) {
1603
- toolCalls.delete(toolCallId);
1604
- const idx = findToolPartIndex(toolCallId);
1605
- if (idx !== -1) {
1606
- parts.splice(idx, 1);
1607
- emitUpdate();
1608
- }
1609
- }
1610
- console.error("[DataStreamParser] stream error:", error);
1611
- },
1612
- onStepFinish(_data) {
1613
- emitUpdate();
1614
- },
1615
- onFinish(_data) {
1616
- emitUpdate();
1794
+ let onQueueEmptyCb = null;
1795
+ const ensureInstance = async () => {
1796
+ if (instance) return instance;
1797
+ if (initPromise) return initPromise;
1798
+ const vc = getVoiceConfig();
1799
+ if (!vc || !vc.apiSecret) {
1800
+ console.warn("[TTS] 缺少 voiceConfig 或 apiSecret,语音播报已禁用");
1801
+ return null;
1617
1802
  }
1618
- });
1619
- return { textContent, parts, toolCalls };
1620
- }
1621
- function safeJsonParse(str) {
1622
- try {
1623
- return JSON.parse(str);
1624
- } catch {
1625
- return str;
1626
- }
1627
- }
1628
-
1629
- const toolDisplayNames = {
1630
- generateReport: "生成报告",
1631
- searchKnowledge: "知识库检索",
1632
- resolveInstanceTargets: "解析实例目标",
1633
- getHistoryMetrics: "历史数据查询",
1634
- getRealtimeMetrics: "实时数据查询",
1635
- queryBitableData: "多维表格查询",
1636
- searchUser: "搜索用户",
1637
- createBitableRecord: "创建表格记录",
1638
- timeTool: "时间工具",
1639
- loadSkill: "加载技能",
1640
- executeCommand: "执行命令",
1641
- dataAnalyzer: "数据分析",
1642
- dataPredictor: "数据预测"
1643
- };
1644
- function useAgentInvoke(options) {
1645
- const { aiChatbotX, tts, bubble } = options;
1646
- const sessionTimeoutMs = options.sessionTimeoutMs ?? 12e4;
1647
- const maxHistoryTurns = options.maxHistoryTurns ?? 10;
1648
- const isInvoking = ref(false);
1649
- const currentTextContent = ref("");
1650
- const currentToolParts = ref([]);
1651
- const executingTools = ref(/* @__PURE__ */ new Set());
1652
- const conversationHistory = ref([]);
1653
- let lastInteractionTime = 0;
1654
- const checkSessionTimeout = () => {
1655
- if (lastInteractionTime > 0 && Date.now() - lastInteractionTime > sessionTimeoutMs) {
1656
- conversationHistory.value = [];
1803
+ initPromise = (async () => {
1804
+ try {
1805
+ const tts = new SpeechSynthesizerStandalone({
1806
+ appId: vc.appId,
1807
+ apiKey: vc.ttsApiKey || vc.apiKey,
1808
+ apiSecret: vc.apiSecret,
1809
+ websocketUrl: vc.ttsWebsocketUrl || "wss://tts-api.xfyun.cn/v2/tts",
1810
+ vcn: vc.ttsVcn || "xiaoyan",
1811
+ speed: vc.speed || 55,
1812
+ volume: vc.volume || 90,
1813
+ pitch: vc.pitch || 50,
1814
+ aue: "raw",
1815
+ auf: "audio/L16;rate=16000",
1816
+ tte: "UTF8",
1817
+ autoPlay: true
1818
+ });
1819
+ tts.onStart(() => {
1820
+ isSpeaking.value = true;
1821
+ });
1822
+ tts.onEnd(() => {
1823
+ });
1824
+ tts.onQueueEmpty(() => {
1825
+ isSpeaking.value = false;
1826
+ hasPendingAudio.value = false;
1827
+ onQueueEmptyCb?.();
1828
+ });
1829
+ tts.onError((err) => {
1830
+ console.error("[TTS] Error:", err);
1831
+ isSpeaking.value = false;
1832
+ });
1833
+ if (audioCtx && audioCtx.state === "running") {
1834
+ tts.audioContext = audioCtx;
1835
+ tts.gainNode = audioCtx.createGain();
1836
+ tts.gainNode.connect(audioCtx.destination);
1837
+ }
1838
+ instance = tts;
1839
+ initPromise = null;
1840
+ return tts;
1841
+ } catch (err) {
1842
+ console.error("[TTS] 初始化失败:", err);
1843
+ initPromise = null;
1844
+ return null;
1845
+ }
1846
+ })();
1847
+ return initPromise;
1848
+ };
1849
+ const speak = async (text) => {
1850
+ const clean = stripMarkdown(text);
1851
+ if (!clean.trim()) return;
1852
+ hasPendingAudio.value = true;
1853
+ const tts = await ensureInstance();
1854
+ if (!tts) return;
1855
+ try {
1856
+ tts.speak(clean);
1857
+ } catch (err) {
1858
+ console.error("[TTS] speak 失败:", err);
1657
1859
  }
1658
1860
  };
1659
- const appendToHistory = (role, content) => {
1660
- conversationHistory.value.push({ role, content });
1661
- const maxLen = maxHistoryTurns * 2;
1662
- if (conversationHistory.value.length > maxLen) {
1663
- conversationHistory.value = conversationHistory.value.slice(-maxLen);
1861
+ const feed = (delta) => {
1862
+ sentenceBuffer += delta;
1863
+ while (true) {
1864
+ const match = sentenceBuffer.match(sentenceDelimiters);
1865
+ if (!match || match.index === void 0) break;
1866
+ const sentence = sentenceBuffer.slice(0, match.index + 1).trim();
1867
+ sentenceBuffer = sentenceBuffer.slice(match.index + 1);
1868
+ if (sentence.length > 0) speak(sentence);
1664
1869
  }
1665
1870
  };
1666
- const clearHistory = () => {
1667
- conversationHistory.value = [];
1871
+ const flush = () => {
1872
+ const remaining = sentenceBuffer.trim();
1873
+ sentenceBuffer = "";
1874
+ if (remaining.length > 0) speak(remaining);
1668
1875
  };
1669
- let abortController = null;
1670
- const hasAnyContent = computed(() => {
1671
- return !!(currentTextContent.value || currentToolParts.value.length > 0);
1672
- });
1673
- const toolDisplayName = (name) => toolDisplayNames[name] || name;
1674
- const resetState = () => {
1675
- currentTextContent.value = "";
1676
- currentToolParts.value = [];
1677
- executingTools.value = /* @__PURE__ */ new Set();
1876
+ const stop = () => {
1877
+ sentenceBuffer = "";
1878
+ isSpeaking.value = false;
1879
+ hasPendingAudio.value = false;
1880
+ if (instance) {
1881
+ try {
1882
+ instance.stop();
1883
+ } catch {
1884
+ }
1885
+ }
1678
1886
  };
1679
- const extractExecutableCommands = (payload) => {
1680
- if (!payload || typeof payload !== "object") return [];
1681
- const commands = payload.commands;
1682
- if (!Array.isArray(commands) || commands.length === 0) return [];
1683
- return commands.filter((cmd) => cmd && typeof cmd === "object" && typeof cmd.name === "string" && cmd.name.trim()).map((cmd) => ({
1684
- name: cmd.name,
1685
- args: Array.isArray(cmd.args) ? cmd.args : []
1686
- }));
1887
+ const setOnQueueEmpty = (cb) => {
1888
+ onQueueEmptyCb = cb;
1687
1889
  };
1688
- const buildCommandDefinitionMap = (commands) => {
1689
- return new Map(commands.map((command) => [command.name, command]));
1890
+ const destroy = () => {
1891
+ stop();
1892
+ if (instance) {
1893
+ try {
1894
+ instance.destroy();
1895
+ } catch {
1896
+ }
1897
+ instance = null;
1898
+ }
1899
+ if (audioCtx) {
1900
+ try {
1901
+ audioCtx.close();
1902
+ } catch {
1903
+ }
1904
+ audioCtx = null;
1905
+ }
1690
1906
  };
1691
- const toExecutableCommand = (toolName, payload, commandDefinitions) => {
1692
- const commandDefinition = commandDefinitions.get(toolName);
1693
- if (!commandDefinition) {
1694
- return null;
1907
+ return {
1908
+ isSpeaking,
1909
+ hasPendingAudio,
1910
+ warmUpAudio,
1911
+ speak,
1912
+ feed,
1913
+ flush,
1914
+ stop,
1915
+ destroy,
1916
+ setOnQueueEmpty
1917
+ };
1918
+ }
1919
+
1920
+ const ensureMicrophonePermission = async () => {
1921
+ if (typeof navigator === "undefined" || typeof window === "undefined") {
1922
+ console.log("当前环境不支持麦克风访问");
1923
+ return false;
1924
+ }
1925
+ if (!navigator.mediaDevices?.getUserMedia || !navigator.mediaDevices?.enumerateDevices) {
1926
+ console.log("当前环境不支持麦克风访问");
1927
+ return false;
1928
+ }
1929
+ try {
1930
+ const devices = await navigator.mediaDevices.enumerateDevices();
1931
+ const audioInputDevices = devices.filter((device) => device.kind === "audioinput");
1932
+ if (audioInputDevices.length === 0) {
1933
+ console.log("未检测到麦克风设备,请连接麦克风后重试。");
1934
+ return false;
1695
1935
  }
1696
- const parameters = commandDefinition.parameters || [];
1697
- if (Array.isArray(payload)) {
1698
- return {
1699
- name: toolName,
1700
- args: payload
1701
- };
1936
+ if ("permissions" in navigator && navigator.permissions?.query) {
1937
+ try {
1938
+ const status = await navigator.permissions.query({ name: "microphone" });
1939
+ if (status.state === "denied") {
1940
+ console.log("麦克风权限被禁用,请在浏览器设置中开启。");
1941
+ return false;
1942
+ }
1943
+ } catch (e) {
1944
+ console.warn("Permission query not supported:", e);
1945
+ }
1702
1946
  }
1703
- if (!payload || typeof payload !== "object") {
1704
- return {
1705
- name: toolName,
1706
- args: []
1707
- };
1947
+ let stream = null;
1948
+ try {
1949
+ stream = await navigator.mediaDevices.getUserMedia({
1950
+ audio: {
1951
+ echoCancellation: true,
1952
+ noiseSuppression: true,
1953
+ autoGainControl: true
1954
+ }
1955
+ });
1956
+ const audioTracks = stream.getAudioTracks();
1957
+ if (audioTracks.length === 0) {
1958
+ console.log("无法获取麦克风音频轨道。");
1959
+ return false;
1960
+ }
1961
+ const activeTrack = audioTracks[0];
1962
+ if (!activeTrack.enabled || activeTrack.readyState !== "live") {
1963
+ console.log("麦克风设备不可用,请检查设备连接。");
1964
+ return false;
1965
+ }
1966
+ return true;
1967
+ } finally {
1968
+ if (stream) {
1969
+ stream.getTracks().forEach((track) => track.stop());
1970
+ }
1971
+ }
1972
+ } catch (error) {
1973
+ console.error("Microphone permission check failed", error);
1974
+ if (error.name === "NotFoundError" || error.name === "DevicesNotFoundError") {
1975
+ console.log("未检测到麦克风设备,请连接麦克风后重试。");
1976
+ } else if (error.name === "NotAllowedError" || error.name === "PermissionDeniedError") {
1977
+ console.log("麦克风权限被拒绝,请在浏览器设置中允许访问。");
1978
+ } else if (error.name === "NotReadableError" || error.name === "TrackStartError") {
1979
+ console.log("麦克风被其他应用占用或无法访问。");
1980
+ } else {
1981
+ console.log("无法访问麦克风,请检查设备连接和浏览器权限。");
1982
+ }
1983
+ return false;
1984
+ }
1985
+ };
1986
+
1987
+ function useVoiceRecognition(options) {
1988
+ const voiceStatus = ref("standby");
1989
+ const isTranscribing = ref(false);
1990
+ const isInitializing = ref(false);
1991
+ const transcriptionText = ref("");
1992
+ const wakeAnimating = ref(false);
1993
+ let detector = null;
1994
+ let transcriber = null;
1995
+ const initTranscriber = () => {
1996
+ if (transcriber) return;
1997
+ const vc = options.getVoiceConfig();
1998
+ if (!vc || !vc.appId || !vc.apiKey || !vc.websocketUrl) {
1999
+ console.error("[VoiceRecognition] 缺少 voiceConfig,无法初始化转写器");
2000
+ return;
2001
+ }
2002
+ transcriber = new SpeechTranscriberStandalone({
2003
+ appId: vc.appId,
2004
+ apiKey: vc.apiKey,
2005
+ websocketUrl: vc.websocketUrl,
2006
+ autoStop: {
2007
+ enabled: true,
2008
+ silenceTimeoutMs: 2e3,
2009
+ noSpeechTimeoutMs: 5e3,
2010
+ maxDurationMs: 45e3
2011
+ }
2012
+ });
2013
+ transcriber.onResult((result) => {
2014
+ transcriptionText.value = result.transcript || "";
2015
+ });
2016
+ transcriber.onAutoStop(async () => {
2017
+ const finalText = transcriptionText.value;
2018
+ await stopTranscribing();
2019
+ transcriptionText.value = "";
2020
+ if (finalText.trim()) {
2021
+ options.onTranscriptionDone?.(finalText);
2022
+ }
2023
+ });
2024
+ transcriber.onError((error) => {
2025
+ console.error("[VoiceRecognition] 转写错误:", error);
2026
+ stopTranscribing();
2027
+ transcriptionText.value = "";
2028
+ });
2029
+ };
2030
+ const startTranscribing = async () => {
2031
+ if (isTranscribing.value) return;
2032
+ if (!transcriber) initTranscriber();
2033
+ if (!transcriber) return;
2034
+ try {
2035
+ await transcriber.start();
2036
+ isTranscribing.value = true;
2037
+ transcriptionText.value = "";
2038
+ } catch (error) {
2039
+ console.error("[VoiceRecognition] 启动转写失败:", error);
1708
2040
  }
1709
- const payloadRecord = payload;
1710
- return {
1711
- name: toolName,
1712
- args: parameters.map((parameter) => payloadRecord[parameter.name])
1713
- };
1714
2041
  };
1715
- const resolveExecutableCommands = (toolName, payload, commandDefinitions) => {
1716
- const extractedCommands = extractExecutableCommands(payload);
1717
- if (extractedCommands.length > 0) {
1718
- return extractedCommands;
2042
+ const stopTranscribing = async () => {
2043
+ if (!transcriber || !transcriber.isActive()) {
2044
+ isTranscribing.value = false;
2045
+ return;
2046
+ }
2047
+ try {
2048
+ await transcriber.stop();
2049
+ } catch (error) {
2050
+ console.error("[VoiceRecognition] 停止转写失败:", error);
2051
+ } finally {
2052
+ isTranscribing.value = false;
1719
2053
  }
1720
- const directCommand = toExecutableCommand(toolName, payload, commandDefinitions);
1721
- return directCommand ? [directCommand] : [];
1722
2054
  };
1723
- const executeHostCommands = async (toolCallId, toolName, payload, commandDefinitions) => {
1724
- const commands = resolveExecutableCommands(toolName, payload, commandDefinitions);
1725
- if (commands.length === 0) return false;
2055
+ const initDetector = () => {
2056
+ if (detector || isInitializing.value) return;
2057
+ if (!options.modelPath) {
2058
+ console.error("[VoiceRecognition] 未传入 modelPath,无法启用唤醒词");
2059
+ return;
2060
+ }
2061
+ isInitializing.value = true;
1726
2062
  try {
1727
- executingTools.value = /* @__PURE__ */ new Set([...executingTools.value, toolCallId]);
1728
- for (const cmd of commands) {
1729
- try {
1730
- await aiChatbotX.executeCommand(cmd.name, cmd.args);
1731
- } catch (cmdErr) {
1732
- console.error(`[AgentInvoke] 执行命令 ${cmd.name} 失败:`, cmdErr);
2063
+ detector = new WakeWordDetectorStandalone({
2064
+ modelPath: options.modelPath,
2065
+ sampleRate: 16e3,
2066
+ usePartial: true,
2067
+ autoReset: {
2068
+ enabled: true,
2069
+ resetDelayMs: 4e3
1733
2070
  }
1734
- }
1735
- return true;
2071
+ });
2072
+ detector.setWakeWords(options.wakeWords || ["你好", "您好"]);
2073
+ detector.onWake(async () => {
2074
+ wakeAnimating.value = true;
2075
+ options.onWake?.();
2076
+ await startTranscribing();
2077
+ setTimeout(() => {
2078
+ wakeAnimating.value = false;
2079
+ }, 1200);
2080
+ });
2081
+ detector.onError((error) => {
2082
+ console.error("[VoiceRecognition] 唤醒监听错误:", error);
2083
+ voiceStatus.value = "standby";
2084
+ stopTranscribing();
2085
+ });
1736
2086
  } finally {
1737
- const next = new Set(executingTools.value);
1738
- next.delete(toolCallId);
1739
- executingTools.value = next;
2087
+ isInitializing.value = false;
1740
2088
  }
1741
2089
  };
1742
- const parseAssistantText = (payload) => {
1743
- if (!payload) return "";
1744
- if (typeof payload === "string") return payload;
1745
- if (typeof payload === "object") {
1746
- const data = payload;
1747
- const directText = data.output || data.answer || data.message || data.result;
1748
- if (typeof directText === "string" && directText.trim()) return directText;
1749
- if (data.data && typeof data.data === "object") {
1750
- const nested = data.data;
1751
- const nestedText = nested.output || nested.answer || nested.message || nested.result;
1752
- if (typeof nestedText === "string" && nestedText.trim()) return nestedText;
1753
- }
1754
- return JSON.stringify(payload);
2090
+ const toggleVoiceMode = async (targetState) => {
2091
+ const permission = await ensureMicrophonePermission();
2092
+ if (!permission || isInitializing.value) return;
2093
+ if (!detector) {
2094
+ initDetector();
2095
+ if (!detector) return;
1755
2096
  }
1756
- return String(payload);
1757
- };
1758
- const invoke = async (question) => {
1759
- const content = question.trim();
1760
- if (!content) return;
1761
- abort();
1762
- checkSessionTimeout();
1763
- resetState();
1764
- tts.stop();
1765
- isInvoking.value = true;
1766
- bubble.open();
1767
- let prevTextLength = 0;
1768
- const processedToolResults = /* @__PURE__ */ new Set();
1769
- const processingToolResults = /* @__PURE__ */ new Set();
1770
- abortController = new AbortController();
1771
- const commands = await aiChatbotX.getCommads();
1772
- const commandDefinitions = buildCommandDefinitionMap(commands);
1773
- conversationHistory.value.length > 0 ? [...conversationHistory.value] : void 0;
2097
+ const isListening = voiceStatus.value === "listening";
2098
+ const shouldStart = targetState !== void 0 ? targetState : !isListening;
2099
+ if (isListening === shouldStart) return;
1774
2100
  try {
1775
- const response = await fetch(options.endpoint, {
1776
- method: "POST",
1777
- headers: { "Content-Type": "application/json", Authorization: `Bearer ${options.appToken || ""}` },
1778
- body: JSON.stringify({
1779
- input: content,
1780
- projectId: options.projectId || "",
1781
- commands: commands.length > 0 ? commands : void 0
1782
- // messages: historyToSend,
1783
- }),
1784
- signal: abortController.signal
1785
- });
1786
- if (!response.ok) throw new Error(`HTTP ${response.status}`);
1787
- const contentType = response.headers.get("content-type") || "";
1788
- const isJsonResponse = contentType.includes("application/json");
1789
- if (isJsonResponse) {
1790
- const data = await response.json();
1791
- const reply = parseAssistantText(data) || "已收到,但没有返回可展示的文本内容。";
1792
- currentTextContent.value = reply;
1793
- tts.speak(reply);
1794
- appendToHistory("user", content);
1795
- appendToHistory("assistant", reply);
1796
- if (data.toolResults && Array.isArray(data.toolResults)) {
1797
- for (const tr of data.toolResults) {
1798
- const toolPart = {
1799
- type: "tool-result",
1800
- toolCallId: `invoke-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
1801
- toolName: tr.toolName,
1802
- args: tr.args,
1803
- result: tr.result,
1804
- state: "result"
1805
- };
1806
- currentToolParts.value = [...currentToolParts.value, toolPart];
1807
- if (commandDefinitions.has(tr.toolName)) {
1808
- void executeHostCommands(toolPart.toolCallId, tr.toolName, tr.result, commandDefinitions);
1809
- }
1810
- }
1811
- }
2101
+ if (shouldStart) {
2102
+ await detector.start();
2103
+ voiceStatus.value = "listening";
1812
2104
  } else {
1813
- await parseDataStreamToMessage(response, (result) => {
1814
- currentTextContent.value = result.textContent;
1815
- if (result.textContent.length > prevTextLength) {
1816
- const delta = result.textContent.slice(prevTextLength);
1817
- prevTextLength = result.textContent.length;
1818
- tts.feed(delta);
1819
- }
1820
- const toolParts = result.parts.filter(
1821
- (p) => p.type === "tool-call" || p.type === "tool-result"
1822
- );
1823
- currentToolParts.value = toolParts;
1824
- for (const part of toolParts) {
1825
- if (commandDefinitions.has(part.toolName) && !processedToolResults.has(part.toolCallId) && !processingToolResults.has(part.toolCallId)) {
1826
- if (part.type === "tool-call" && part.state === "call" && part.args) {
1827
- processingToolResults.add(part.toolCallId);
1828
- void executeHostCommands(part.toolCallId, part.toolName, part.args, commandDefinitions).then(
1829
- (executed) => {
1830
- if (executed) {
1831
- processedToolResults.add(part.toolCallId);
1832
- }
1833
- processingToolResults.delete(part.toolCallId);
1834
- }
1835
- );
1836
- } else if (part.type === "tool-result" && part.result) {
1837
- processingToolResults.add(part.toolCallId);
1838
- void executeHostCommands(part.toolCallId, part.toolName, part.result, commandDefinitions).then(
1839
- (executed) => {
1840
- if (executed) {
1841
- processedToolResults.add(part.toolCallId);
1842
- }
1843
- processingToolResults.delete(part.toolCallId);
1844
- }
1845
- );
1846
- }
1847
- }
1848
- }
1849
- bubble.scrollToBottom();
1850
- });
1851
- tts.flush();
1852
- const assistantReply = currentTextContent.value.trim();
1853
- appendToHistory("user", content);
1854
- if (assistantReply) {
1855
- appendToHistory("assistant", assistantReply);
1856
- }
1857
- if (!assistantReply && currentToolParts.value.length === 0) {
1858
- currentTextContent.value = "已收到,但没有返回可展示的文本内容。";
1859
- }
2105
+ await detector.stop();
2106
+ voiceStatus.value = "standby";
2107
+ transcriptionText.value = "";
2108
+ await stopTranscribing();
1860
2109
  }
1861
2110
  } catch (error) {
1862
- if (error.name === "AbortError") {
1863
- return;
1864
- }
1865
- console.error("[AgentInvoke] invoke failed:", error);
1866
- tts.stop();
1867
- currentTextContent.value = "请求失败,请检查服务地址或稍后重试。";
1868
- } finally {
1869
- isInvoking.value = false;
1870
- abortController = null;
1871
- lastInteractionTime = Date.now();
1872
- bubble.scheduleDismiss();
2111
+ console.error("[VoiceRecognition] 监听切换失败:", error);
2112
+ voiceStatus.value = "standby";
1873
2113
  }
1874
2114
  };
1875
- const abort = () => {
1876
- if (abortController) {
1877
- abortController.abort();
1878
- abortController = null;
2115
+ const abortTranscription = async () => {
2116
+ transcriptionText.value = "";
2117
+ await stopTranscribing();
2118
+ };
2119
+ const destroy = async () => {
2120
+ if (detector) {
2121
+ try {
2122
+ if (detector.isActive()) await detector.stop();
2123
+ } catch {
2124
+ }
2125
+ detector = null;
2126
+ }
2127
+ if (transcriber) {
2128
+ try {
2129
+ if (transcriber.isActive()) await transcriber.stop();
2130
+ } catch {
2131
+ }
2132
+ transcriber = null;
1879
2133
  }
1880
- tts.stop();
1881
- isInvoking.value = false;
1882
2134
  };
1883
2135
  return {
1884
- isInvoking,
1885
- currentTextContent,
1886
- currentToolParts,
1887
- executingTools,
1888
- hasAnyContent,
1889
- conversationHistory,
1890
- toolDisplayName,
1891
- invoke,
1892
- abort,
1893
- resetState,
1894
- clearHistory
2136
+ voiceStatus,
2137
+ isTranscribing,
2138
+ isInitializing,
2139
+ transcriptionText,
2140
+ wakeAnimating,
2141
+ startTranscribing,
2142
+ stopTranscribing,
2143
+ abortTranscription,
2144
+ toggleVoiceMode,
2145
+ destroy
1895
2146
  };
1896
2147
  }
1897
2148
 
@@ -2187,5 +2438,5 @@ var clientCommandKey = /* @__PURE__ */ ((clientCommandKey2) => {
2187
2438
  return clientCommandKey2;
2188
2439
  })(clientCommandKey || {});
2189
2440
 
2190
- export { AgentChatTransport, aiChat as AiChat, _sfc_main$1 as AiChatbotProvider, voiceAssistant as AiChatbotVoiceAssistant, AiChatbotXKey, clientCommandKey, createAgentChatTransport, injectStrict };
2441
+ export { AgentChatTransport, aiChat as AiChat, commandTest as AiChatbotCommandTest, _sfc_main$1 as AiChatbotProvider, voiceAssistant as AiChatbotVoiceAssistant, AiChatbotXKey, clientCommandKey, createAgentChatTransport, injectStrict };
2191
2442
  //# sourceMappingURL=sime-x-vue.mjs.map