qt-human 4.1.2 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1880,6 +1880,13 @@ interface VoiceConfig {
1880
1880
  /** 是否要求唤醒词说两遍(如"小德小德"),默认 false(向后兼容) */
1881
1881
  doubleWakeWord?: boolean
1882
1882
 
1883
+ /**
1884
+ * 默认 false。true=唤醒词门控强模式:IDLE 下每轮必须带唤醒词前缀,旁人语音忽略。
1885
+ * 门控强弱(单次 vs 唤醒词说两遍)由 `doubleWakeWord` 治理,且其默认值各模块不同:
1886
+ * VoiceConversationE2E 默认 doubleWakeWord=true,VoiceConversation 默认 false。
1887
+ */
1888
+ wakePerTurn?: boolean
1889
+
1883
1890
  /** 唤醒确认语,如"在呢"。唤醒后通过直接 TTS 播报(不走 LLM,不入历史) */
1884
1891
  wakeConfirmText?: string
1885
1892
 
@@ -2959,164 +2966,6 @@ declare class CameraSDK {
2959
2966
  destroy(): void;
2960
2967
  }
2961
2968
 
2962
- declare class Human extends Emittery implements IHuman {
2963
- name: string;
2964
- core: ThreeJSApp | null;
2965
- private config;
2966
- loading?: Loading;
2967
- private audio;
2968
- private container;
2969
- recorder: any | null;
2970
- private recorderMessageHandler;
2971
- private voiceToTextManager;
2972
- private status;
2973
- private chatOrchestrator;
2974
- private onAnyEvent;
2975
- private characterData;
2976
- private isNotRender;
2977
- private mode;
2978
- private gatewayService;
2979
- framePlayer: FramePlayer | null;
2980
- camera: CameraSDK | null;
2981
- get sessionId(): string;
2982
- enableProactiveTTSPlayback(enable?: boolean): void;
2983
- private _proactiveAudioCtx;
2984
- private voiceInteractionConfig;
2985
- private voiceInteractionState;
2986
- private voiceInteractionExitTimer;
2987
- private exitPromptFired;
2988
- private secondExitTimer;
2989
- private exitTimerTurnId;
2990
- private voiceInteractionConversationId;
2991
- private voiceInteractionProcessing;
2992
- private voiceInteractionLastText;
2993
- private voiceInteractionLastTextTime;
2994
- private voiceInteractionTTSMode;
2995
- private voiceInteractionWebRTCDisabled;
2996
- private ttsState;
2997
- private eventBusHandlers;
2998
- private eventBusModule;
2999
- private _destroyed;
3000
- private updateTokenWarnedOnce;
3001
- private degradedNotice;
3002
- private static readonly IDLE_LOOP_MODE;
3003
- private static readonly BARGE_IN_MIN_TEXT_LENGTH;
3004
- private static readonly BARGE_IN_COOLDOWN_MS;
3005
- private static readonly INTERRUPT_INTENT_WORDS;
3006
- private static readonly GREETING_ONLY_WORDS;
3007
- private static readonly CHAR_HOMOPHONES;
3008
- private static generateNameVariants;
3009
- private static normalizeASRText;
3010
- private isDoubleWakeWord;
3011
- private speakDirect;
3012
- private lastBargeInTime;
3013
- private bargeInInProgress;
3014
- private voiceInteractionAskId;
3015
- private isAudioDataDiscarding;
3016
- private audioDiscardingTimer;
3017
- private static readonly AUDIO_DISCARDING_TIMEOUT_MS;
3018
- private isFirstUserInput;
3019
- private static readonly CONTINUATION_WINDOW_MS;
3020
- private static readonly MIN_VALID_TEXT_LENGTH;
3021
- private static readonly NOISE_WORDS;
3022
- private asrTextBuffer;
3023
- private asrSilenceTimer;
3024
- private pendingASRFinal;
3025
- private static readonly ASR_SILENCE_TIMEOUT_MS;
3026
- private static readonly DUPLICATE_TEXT_WINDOW_MS;
3027
- private pendingMergeText;
3028
- private textMergeTimer;
3029
- private textMergeStartTime;
3030
- private static readonly TEXT_MERGE_DELAY_MS;
3031
- private static readonly TEXT_MERGE_MAX_DELAY_MS;
3032
- private ttsPlaySafetyTimer;
3033
- private static readonly DEFAULT_TTS_PLAY_SAFETY_TIMEOUT_MS;
3034
- private framePlayerPlayEndHandler;
3035
- private audioDebugger;
3036
- private webrtcLipSync;
3037
- constructor(config: HumanConfig);
3038
- private mergeConfig;
3039
- private setupContainer;
3040
- private initFramePlayer;
3041
- private clearTTSPlaySafetyTimer;
3042
- private replaceFramePlayerPlayEndHandler;
3043
- private initGatewayService;
3044
- private setupGatewayEventForwarding;
3045
- private refreshGatewayToken;
3046
- setCharacterData(characterId: string): Promise<boolean>;
3047
- setCharacter(characterId: string, configuration?: Partial<HumanConfig>): Promise<boolean>;
3048
- speak(text: string, callback?: Function): Promise<StreamAudioPlayInfo>;
3049
- askCommand(params: AskParams, callback: (msgs: IResultData<string[]>) => void): Promise<void>;
3050
- askStream(params: AskParams, callback: (msgs: Result<ChatMessage$1[]>) => void, options?: {
3051
- skipStop?: boolean;
3052
- }): Promise<Result<AskStreamResponse>>;
3053
- ask(params: AskParams, callback: (result: Result<ChatMessage$1>) => void): Promise<Result<AskResponse>>;
3054
- speakByAudioShapes(uris: string[], shapes: Array<number[][]>): Promise<boolean>;
3055
- speakByAudioStreamShapes(audios: ArrayBuffer[], shapes: number[][], rate?: number): Promise<boolean>;
3056
- speakByAudio(uri: string, shapes: number[][]): Promise<boolean>;
3057
- startVoice2Text(callback?: (data: Voice2TextResult) => void): Promise<IResultData<RecorderPermission>>;
3058
- stopVoice2Text(callback?: (data: Voice2TextResult) => void): Promise<Result<null>>;
3059
- sendToolAction(tool: string, action: string, data: object): void;
3060
- stop(): Promise<boolean>;
3061
- private stopPlayback;
3062
- getStatus(): Status;
3063
- private initStorage;
3064
- muteAudio(isMute?: boolean): void;
3065
- muteAction(isMute?: boolean): void;
3066
- private dealVoice2Text;
3067
- enableAudio(): Promise<void>;
3068
- enableAudioDebug(enabled?: boolean): void;
3069
- getDebugAudioInfo(): {
3070
- sampleRate: number;
3071
- totalSize: number;
3072
- chunks: number;
3073
- };
3074
- downloadDebugAudio(filename?: string): void;
3075
- playDebugAudio(sampleRateOverride?: number): Promise<void>;
3076
- playDebugAudio2(sampleRateOverride?: number, streamMode?: boolean): Promise<void>;
3077
- private playIdleAnimation;
3078
- private finalizeTTSPlayback;
3079
- destroy(): Promise<void>;
3080
- refreshAuthToken(token: string): Promise<void>;
3081
- updateToken(token: string): void;
3082
- setAngleView({ camera }: AngleViewConfig): void;
3083
- playAction(code: string, opts?: PlayActionOptions): Promise<void>;
3084
- getModelInfo(): ModelInfo | null;
3085
- openVoiceInteraction(config: VoiceConfig): Promise<boolean>;
3086
- closeVoiceInteraction(): Promise<boolean>;
3087
- wakeVoiceInteraction(): void;
3088
- sleepVoiceInteraction(): void;
3089
- private createVoiceInteractionWebRTC;
3090
- private handleVoiceInteractionASR;
3091
- private flushPendingASRIfReady;
3092
- private processCompleteASRText;
3093
- private flushMergedText;
3094
- private sendTextToProcess;
3095
- private checkKeywords;
3096
- private containsInterruptIntent;
3097
- private triggerBargeIn;
3098
- private isNoiseText;
3099
- private handleVoiceInteractionWakeup;
3100
- private handleVoiceInteractionSleep;
3101
- private handleVoiceInteractionInterrupt;
3102
- private handleVoiceInteractionAsk;
3103
- private resetVoiceInteractionExitTimer;
3104
- offlineRender(files: CharacterModel[]): Promise<boolean>;
3105
- startRender(): Promise<boolean>;
3106
- stopRender(): Promise<boolean>;
3107
- closeRecorder(): Promise<boolean>;
3108
- openRecorder(): Promise<Result<RecorderPermission>>;
3109
- openCamera(config: CameraConfig): Promise<boolean>;
3110
- startFaceRecognition(config?: FaceRecognitionConfig): Promise<boolean>;
3111
- stopFaceRecognition(): Promise<boolean>;
3112
- startBodyDetect(): Promise<boolean>;
3113
- stopBodyDetect(): Promise<boolean>;
3114
- startSmileDetection(config?: SmileDetectionConfig): Promise<boolean>;
3115
- stopSmileDetection(): boolean;
3116
- closeCamera(): void;
3117
- }
3118
- //# sourceMappingURL=human.d.ts.map
3119
-
3120
2969
  interface GatewayConfig {
3121
2970
  url: string;
3122
2971
  token: string;
@@ -3418,6 +3267,9 @@ interface VoiceConversationEvents {
3418
3267
  wakeConfirm: {
3419
3268
  text: string;
3420
3269
  };
3270
+ audioLevel: {
3271
+ level: number;
3272
+ };
3421
3273
  }
3422
3274
  interface VoiceConversationConfig {
3423
3275
  gateway: GatewayService;
@@ -3440,6 +3292,7 @@ interface VoiceConversationConfig {
3440
3292
  enableInterrupt?: boolean;
3441
3293
  cooldownDuration?: number;
3442
3294
  doubleWakeWord?: boolean;
3295
+ wakePerTurn?: boolean;
3443
3296
  wakeConfirmText?: string;
3444
3297
  syntheticTexts?: string[];
3445
3298
  gracefulExitEnabled?: boolean;
@@ -3468,7 +3321,7 @@ interface VoiceConversationConfig {
3468
3321
  onResponse?: (text: string) => void;
3469
3322
  onError?: (error: VoiceError$1) => void;
3470
3323
  }
3471
- interface InternalConfig extends Required<Omit<VoiceConversationConfig, 'onStateChange' | 'onTranscript' | 'onResponse' | 'onError' | 'animations' | 'interruptAck' | 'interruptFeedback' | 'enableInterrupt' | 'cooldownDuration' | 'doubleWakeWord' | 'wakeConfirmText' | 'syntheticTexts' | 'gracefulExitEnabled' | 'gracefulExitPrompt' | 'gracefulExitPhase2Prompt' | 'gracefulExitPhase2Silence' | 'gracefulExitPhase3Silence' | 'bargeInStrictMode' | 'continuousWindowMs' | 'ttsTransport' | 'voiceEngine' | 'bargeInCooldownMs' | 'llmTimeoutMs' | 'llmMaxRetry' | 'webrtcMaxReconnectAttempts' | 'webrtcReconnectInterval' | 'vadGateWindowMs' | 'vadGraceMs' | 'interruptFollowUpWindowMs' | 'interruptWordFilterMs'>> {
3324
+ interface InternalConfig extends Required<Omit<VoiceConversationConfig, 'onStateChange' | 'onTranscript' | 'onResponse' | 'onError' | 'animations' | 'interruptAck' | 'interruptFeedback' | 'enableInterrupt' | 'cooldownDuration' | 'doubleWakeWord' | 'wakePerTurn' | 'wakeConfirmText' | 'syntheticTexts' | 'gracefulExitEnabled' | 'gracefulExitPrompt' | 'gracefulExitPhase2Prompt' | 'gracefulExitPhase2Silence' | 'gracefulExitPhase3Silence' | 'bargeInStrictMode' | 'continuousWindowMs' | 'ttsTransport' | 'voiceEngine' | 'bargeInCooldownMs' | 'llmTimeoutMs' | 'llmMaxRetry' | 'webrtcMaxReconnectAttempts' | 'webrtcReconnectInterval' | 'vadGateWindowMs' | 'vadGraceMs' | 'interruptFollowUpWindowMs' | 'interruptWordFilterMs'>> {
3472
3325
  onStateChange?: (state: VoiceState$1, prevState: VoiceState$1) => void;
3473
3326
  onTranscript?: (text: string, isFinal: boolean) => void;
3474
3327
  onResponse?: (text: string) => void;
@@ -3482,6 +3335,7 @@ interface InternalConfig extends Required<Omit<VoiceConversationConfig, 'onState
3482
3335
  interruptFeedback?: string;
3483
3336
  enableInterrupt: boolean;
3484
3337
  doubleWakeWord: boolean;
3338
+ wakePerTurn: boolean;
3485
3339
  wakeConfirmText?: string;
3486
3340
  syntheticTexts: string[];
3487
3341
  gracefulExitEnabled: boolean;
@@ -3609,6 +3463,7 @@ declare class VoiceConversation extends Emittery {
3609
3463
  sleep(): void;
3610
3464
  suspendASR(): void;
3611
3465
  resumeASR(): void;
3466
+ interrupt(): Promise<void>;
3612
3467
  private isGracefulExitPromptContext;
3613
3468
  private shouldCaptureGracefulExitFollowUp;
3614
3469
  private bufferGracefulExitFollowUp;
@@ -3695,6 +3550,167 @@ declare class VoiceConversation extends Emittery {
3695
3550
  private emitError;
3696
3551
  }
3697
3552
 
3553
+ declare class Human extends Emittery implements IHuman {
3554
+ name: string;
3555
+ core: ThreeJSApp | null;
3556
+ private config;
3557
+ loading?: Loading;
3558
+ private audio;
3559
+ private container;
3560
+ recorder: any | null;
3561
+ private recorderMessageHandler;
3562
+ private voiceToTextManager;
3563
+ private status;
3564
+ private chatOrchestrator;
3565
+ private onAnyEvent;
3566
+ private characterData;
3567
+ private isNotRender;
3568
+ private mode;
3569
+ private gatewayService;
3570
+ framePlayer: FramePlayer | null;
3571
+ camera: CameraSDK | null;
3572
+ get sessionId(): string;
3573
+ enableProactiveTTSPlayback(enable?: boolean): void;
3574
+ private _proactiveAudioCtx;
3575
+ private voiceInteractionConfig;
3576
+ private voiceInteractionState;
3577
+ private voiceInteractionExitTimer;
3578
+ private exitPromptFired;
3579
+ private secondExitTimer;
3580
+ private exitTimerTurnId;
3581
+ private voiceInteractionConversationId;
3582
+ private voiceInteractionProcessing;
3583
+ private voiceInteractionLastText;
3584
+ private voiceInteractionLastTextTime;
3585
+ private voiceInteractionTTSMode;
3586
+ private voiceInteractionWebRTCDisabled;
3587
+ private ttsState;
3588
+ private eventBusHandlers;
3589
+ private eventBusModule;
3590
+ private _destroyed;
3591
+ private updateTokenWarnedOnce;
3592
+ private degradedNotice;
3593
+ private static readonly IDLE_LOOP_MODE;
3594
+ private static readonly BARGE_IN_MIN_TEXT_LENGTH;
3595
+ private static readonly BARGE_IN_COOLDOWN_MS;
3596
+ private static readonly INTERRUPT_INTENT_WORDS;
3597
+ private static readonly GREETING_ONLY_WORDS;
3598
+ private static readonly CHAR_HOMOPHONES;
3599
+ private static generateNameVariants;
3600
+ private static normalizeASRText;
3601
+ private isDoubleWakeWord;
3602
+ private speakDirect;
3603
+ private lastBargeInTime;
3604
+ private bargeInInProgress;
3605
+ private voiceInteractionAskId;
3606
+ private isAudioDataDiscarding;
3607
+ private audioDiscardingTimer;
3608
+ private static readonly AUDIO_DISCARDING_TIMEOUT_MS;
3609
+ private isFirstUserInput;
3610
+ private static readonly CONTINUATION_WINDOW_MS;
3611
+ private static readonly MIN_VALID_TEXT_LENGTH;
3612
+ private static readonly NOISE_WORDS;
3613
+ private asrTextBuffer;
3614
+ private asrSilenceTimer;
3615
+ private pendingASRFinal;
3616
+ private static readonly ASR_SILENCE_TIMEOUT_MS;
3617
+ private static readonly DUPLICATE_TEXT_WINDOW_MS;
3618
+ private pendingMergeText;
3619
+ private textMergeTimer;
3620
+ private textMergeStartTime;
3621
+ private static readonly TEXT_MERGE_DELAY_MS;
3622
+ private static readonly TEXT_MERGE_MAX_DELAY_MS;
3623
+ private ttsPlaySafetyTimer;
3624
+ private static readonly DEFAULT_TTS_PLAY_SAFETY_TIMEOUT_MS;
3625
+ private framePlayerPlayEndHandler;
3626
+ private audioDebugger;
3627
+ private webrtcLipSync;
3628
+ constructor(config: HumanConfig);
3629
+ private mergeConfig;
3630
+ private setupContainer;
3631
+ private initFramePlayer;
3632
+ private clearTTSPlaySafetyTimer;
3633
+ private replaceFramePlayerPlayEndHandler;
3634
+ private initGatewayService;
3635
+ private setupGatewayEventForwarding;
3636
+ private refreshGatewayToken;
3637
+ setCharacterData(characterId: string): Promise<boolean>;
3638
+ setCharacter(characterId: string, configuration?: Partial<HumanConfig>): Promise<boolean>;
3639
+ speak(text: string, callback?: Function): Promise<StreamAudioPlayInfo>;
3640
+ askCommand(params: AskParams, callback: (msgs: IResultData<string[]>) => void): Promise<void>;
3641
+ askStream(params: AskParams, callback: (msgs: Result<ChatMessage$1[]>) => void, options?: {
3642
+ skipStop?: boolean;
3643
+ }): Promise<Result<AskStreamResponse>>;
3644
+ ask(params: AskParams, callback: (result: Result<ChatMessage$1>) => void): Promise<Result<AskResponse>>;
3645
+ speakByAudioShapes(uris: string[], shapes: Array<number[][]>): Promise<boolean>;
3646
+ speakByAudioStreamShapes(audios: ArrayBuffer[], shapes: number[][], rate?: number): Promise<boolean>;
3647
+ speakByAudio(uri: string, shapes: number[][]): Promise<boolean>;
3648
+ startVoice2Text(callback?: (data: Voice2TextResult) => void): Promise<IResultData<RecorderPermission>>;
3649
+ stopVoice2Text(callback?: (data: Voice2TextResult) => void): Promise<Result<null>>;
3650
+ sendToolAction(tool: string, action: string, data: object): void;
3651
+ stop(): Promise<boolean>;
3652
+ private stopPlayback;
3653
+ getStatus(): Status;
3654
+ private initStorage;
3655
+ muteAudio(isMute?: boolean): void;
3656
+ muteAction(isMute?: boolean): void;
3657
+ private dealVoice2Text;
3658
+ enableAudio(): Promise<void>;
3659
+ enableAudioDebug(enabled?: boolean): void;
3660
+ getDebugAudioInfo(): {
3661
+ sampleRate: number;
3662
+ totalSize: number;
3663
+ chunks: number;
3664
+ };
3665
+ downloadDebugAudio(filename?: string): void;
3666
+ playDebugAudio(sampleRateOverride?: number): Promise<void>;
3667
+ playDebugAudio2(sampleRateOverride?: number, streamMode?: boolean): Promise<void>;
3668
+ private playIdleAnimation;
3669
+ private finalizeTTSPlayback;
3670
+ destroy(): Promise<void>;
3671
+ refreshAuthToken(token: string): Promise<void>;
3672
+ updateToken(token: string): void;
3673
+ setAngleView({ camera }: AngleViewConfig): void;
3674
+ playAction(code: string, opts?: PlayActionOptions): Promise<void>;
3675
+ getModelInfo(): ModelInfo | null;
3676
+ createVoiceConversation(config?: Omit<VoiceConversationConfig, 'gateway' | 'renderer' | 'framePlayer' | 'characterId'> & {
3677
+ characterId?: string;
3678
+ }): Promise<VoiceConversation>;
3679
+ openVoiceInteraction(config: VoiceConfig): Promise<boolean>;
3680
+ closeVoiceInteraction(): Promise<boolean>;
3681
+ wakeVoiceInteraction(): void;
3682
+ sleepVoiceInteraction(): void;
3683
+ private createVoiceInteractionWebRTC;
3684
+ private handleVoiceInteractionASR;
3685
+ private flushPendingASRIfReady;
3686
+ private processCompleteASRText;
3687
+ private flushMergedText;
3688
+ private sendTextToProcess;
3689
+ private checkKeywords;
3690
+ private containsInterruptIntent;
3691
+ private triggerBargeIn;
3692
+ private isNoiseText;
3693
+ private handleVoiceInteractionWakeup;
3694
+ private handleVoiceInteractionSleep;
3695
+ private handleVoiceInteractionInterrupt;
3696
+ private handleVoiceInteractionAsk;
3697
+ private resetVoiceInteractionExitTimer;
3698
+ offlineRender(files: CharacterModel[]): Promise<boolean>;
3699
+ startRender(): Promise<boolean>;
3700
+ stopRender(): Promise<boolean>;
3701
+ closeRecorder(): Promise<boolean>;
3702
+ openRecorder(): Promise<Result<RecorderPermission>>;
3703
+ openCamera(config: CameraConfig): Promise<boolean>;
3704
+ startFaceRecognition(config?: FaceRecognitionConfig): Promise<boolean>;
3705
+ stopFaceRecognition(): Promise<boolean>;
3706
+ startBodyDetect(): Promise<boolean>;
3707
+ stopBodyDetect(): Promise<boolean>;
3708
+ startSmileDetection(config?: SmileDetectionConfig): Promise<boolean>;
3709
+ stopSmileDetection(): boolean;
3710
+ closeCamera(): void;
3711
+ }
3712
+ //# sourceMappingURL=human.d.ts.map
3713
+
3698
3714
  type VoiceState = 'SLEEPING' | 'IDLE' | 'LISTENING' | 'THINKING' | 'SPEAKING';
3699
3715
  interface VoiceError {
3700
3716
  code: 'WEBRTC_FAILED' | 'ASR_ERROR' | 'LLM_TIMEOUT' | 'LLM_ERROR' | 'TTS_FAILED' | 'GATEWAY_DISCONNECTED';
@@ -3713,6 +3729,7 @@ interface VoiceConversationE2EConfig {
3713
3729
  autoWake?: boolean;
3714
3730
  echoDetection?: boolean;
3715
3731
  doubleWakeWord?: boolean;
3732
+ wakePerTurn?: boolean;
3716
3733
  bargeInStrictMode?: boolean;
3717
3734
  continuousWindowMs?: number;
3718
3735
  gracefulExitEnabled?: boolean;
@@ -293,6 +293,13 @@ export interface VoiceConfig {
293
293
  /** 是否要求唤醒词说两遍(如"小德小德"),默认 false(向后兼容) */
294
294
  doubleWakeWord?: boolean
295
295
 
296
+ /**
297
+ * 默认 false。true=唤醒词门控强模式:IDLE 下每轮必须带唤醒词前缀,旁人语音忽略。
298
+ * 门控强弱(单次 vs 唤醒词说两遍)由 `doubleWakeWord` 治理,且其默认值各模块不同:
299
+ * VoiceConversationE2E 默认 doubleWakeWord=true,VoiceConversation 默认 false。
300
+ */
301
+ wakePerTurn?: boolean
302
+
296
303
  /** 唤醒确认语,如"在呢"。唤醒后通过直接 TTS 播报(不走 LLM,不入历史) */
297
304
  wakeConfirmText?: string
298
305
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "qt-human",
3
- "version": "4.1.2",
3
+ "version": "4.3.0",
4
4
  "description": "A JavaScript SDK for 3D digital human rendering and interaction",
5
5
  "publishConfig": {
6
6
  "registry": "https://registry.npmjs.org/"