@bdky/aaas-pilot-kit 1.0.8 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs.js +255 -23
- package/dist/index.cjs.js.LICENSE.txt +9 -0
- package/dist/index.esm.js +254 -22
- package/dist/index.esm.js.LICENSE.txt +9 -0
- package/dist/index.umd.js +254 -22
- package/dist/index.umd.js.LICENSE.txt +9 -0
- package/dist/ky-aaas-pilot-kit.umd.js +254 -22
- package/dist/ky-aaas-pilot-kit.umd.js.LICENSE.txt +9 -0
- package/dist/libs/aaas-pilot-kit/src/index.d.ts +2 -0
- package/dist/libs/aaas-pilot-kit/src/lib/DI/types.d.ts +1 -0
- package/dist/libs/aaas-pilot-kit/src/lib/constants/azure.d.ts +423 -0
- package/dist/libs/aaas-pilot-kit/src/lib/service/conversation/conversationService.d.ts +24 -0
- package/dist/libs/aaas-pilot-kit/src/lib/service/conversation/types.d.ts +37 -0
- package/dist/libs/aaas-pilot-kit/src/lib/service/rtc-asr/asr/azureAsrService.d.ts +108 -0
- package/dist/libs/aaas-pilot-kit/src/lib/service/rtc-asr/asr/baseAsrService.d.ts +38 -3
- package/dist/libs/aaas-pilot-kit/src/lib/service/rtc-asr/signal/noop.d.ts +54 -0
- package/dist/libs/aaas-pilot-kit/src/lib/utils/AudioDeviceDetector.d.ts +3 -1
- package/dist/libs/aaas-pilot-kit/src/lib/utils/EnergyBasedGate.d.ts +240 -0
- package/dist/libs/aaas-pilot-kit/src/lib/utils/MediaStreamManager.d.ts +209 -0
- package/dist/libs/aaas-pilot-kit/src/lib/utils/audio-processing/AudioContextLifecycle.d.ts +110 -0
- package/dist/libs/aaas-pilot-kit/src/lib/utils/audio-processing/AudioWorkletCapability.d.ts +95 -0
- package/dist/libs/aaas-pilot-kit/src/lib/utils/audio-processing/FloatAPICompat.d.ts +103 -0
- package/dist/libs/aaas-pilot-kit/src/types/config.d.ts +358 -4
- package/package.json +23 -1
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file AudioWorklet 能力检测
|
|
3
|
+
* @description 检测浏览器是否支持 AudioWorklet API
|
|
4
|
+
*
|
|
5
|
+
* 浏览器支持情况:
|
|
6
|
+
* - Chrome 66+ ✅
|
|
7
|
+
* - Edge 79+ ✅
|
|
8
|
+
* - Safari 14.1+ ✅
|
|
9
|
+
* - Firefox 76+ ✅
|
|
10
|
+
* - iOS Safari 14.5+ ✅
|
|
11
|
+
* - Android Chrome 66+ ✅
|
|
12
|
+
*
|
|
13
|
+
* 降级场景:
|
|
14
|
+
* - Safari < 14.1 → ScriptProcessorNode
|
|
15
|
+
* - iOS Safari < 14.5 → ScriptProcessorNode
|
|
16
|
+
* - Android WebView (部分) → ScriptProcessorNode
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* AudioWorklet 能力检测结果
|
|
20
|
+
*/
|
|
21
|
+
export interface IAudioWorkletCapability {
|
|
22
|
+
/**
|
|
23
|
+
* 是否支持 AudioWorklet
|
|
24
|
+
*/
|
|
25
|
+
supported: boolean;
|
|
26
|
+
/**
|
|
27
|
+
* 不支持的原因 (仅当 supported=false 时)
|
|
28
|
+
*/
|
|
29
|
+
reason?: 'no-audio-worklet' | 'no-add-module' | 'no-constructor' | 'context-error' | 'test-load-failed';
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* AudioWorklet 能力检测工具
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```typescript
|
|
36
|
+
* const context = new AudioContext();
|
|
37
|
+
* const capability = await AudioWorkletCapability.check(context);
|
|
38
|
+
*
|
|
39
|
+
* if (capability.supported) {
|
|
40
|
+
* // 使用 AudioWorklet
|
|
41
|
+
* await context.audioWorklet.addModule('processor.js');
|
|
42
|
+
* const node = new AudioWorkletNode(context, 'my-processor');
|
|
43
|
+
* }
|
|
44
|
+
* else {
|
|
45
|
+
* // 降级到 ScriptProcessorNode
|
|
46
|
+
* console.warn('AudioWorklet not supported:', capability.reason);
|
|
47
|
+
* const node = context.createScriptProcessor(2048, 1, 1);
|
|
48
|
+
* }
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
export declare class AudioWorkletCapability {
|
|
52
|
+
/**
|
|
53
|
+
* 缓存的检测结果 (避免重复检测)
|
|
54
|
+
*/
|
|
55
|
+
private static cachedResult;
|
|
56
|
+
/**
|
|
57
|
+
* 检测 AudioWorklet 支持情况
|
|
58
|
+
*
|
|
59
|
+
* @param context - AudioContext 实例
|
|
60
|
+
* @returns 检测结果
|
|
61
|
+
*
|
|
62
|
+
* @remarks
|
|
63
|
+
* - 首次调用会执行实际检测并缓存结果
|
|
64
|
+
* - 后续调用直接返回缓存结果
|
|
65
|
+
* - 调用 reset() 可清除缓存
|
|
66
|
+
*/
|
|
67
|
+
static check: (context: AudioContext) => Promise<IAudioWorkletCapability>;
|
|
68
|
+
/**
|
|
69
|
+
* 重置缓存的检测结果
|
|
70
|
+
*
|
|
71
|
+
* @remarks
|
|
72
|
+
* 调用此方法后,下次 check() 会重新执行检测
|
|
73
|
+
* 一般用于测试场景
|
|
74
|
+
*/
|
|
75
|
+
static reset: () => void;
|
|
76
|
+
/**
|
|
77
|
+
* 同步检测 (仅基础属性检查,不执行加载测试)
|
|
78
|
+
*
|
|
79
|
+
* @param context - AudioContext 实例
|
|
80
|
+
* @returns 是否可能支持 AudioWorklet
|
|
81
|
+
*
|
|
82
|
+
* @remarks
|
|
83
|
+
* - 此方法仅检查 API 存在性,不测试实际加载
|
|
84
|
+
* - 返回 true 不保证 100% 可用,建议使用异步的 check()
|
|
85
|
+
* - 用于快速预判,避免不必要的异步等待
|
|
86
|
+
*/
|
|
87
|
+
static checkSync: (context: AudioContext) => boolean;
|
|
88
|
+
/**
|
|
89
|
+
* 执行实际的能力检测
|
|
90
|
+
*
|
|
91
|
+
* @param context - AudioContext 实例
|
|
92
|
+
* @returns 检测结果
|
|
93
|
+
*/
|
|
94
|
+
private static readonly performCheck;
|
|
95
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Float API 兼容层
|
|
3
|
+
* @description 为老版本浏览器(特别是 iOS Safari 10-13)提供 Float API 的降级方案
|
|
4
|
+
*
|
|
5
|
+
* 问题背景:
|
|
6
|
+
* - AnalyserNode.getFloatTimeDomainData() 和 getFloatFrequencyData() 在老版本 iOS/Android 上不存在
|
|
7
|
+
* - 直接调用会导致 TypeError 崩溃
|
|
8
|
+
* - 需要降级到 getByteTimeDomainData() 和 getByteFrequencyData() 并进行格式转换
|
|
9
|
+
*/
|
|
10
|
+
/**
|
|
11
|
+
* 兼容性包装: getFloatTimeDomainData
|
|
12
|
+
* 优先使用 Float API,降级到 Byte API 并转换
|
|
13
|
+
*
|
|
14
|
+
* @param analyser - AnalyserNode 实例
|
|
15
|
+
* @param output - 输出缓冲区 (Float32Array)
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const analyser = context.createAnalyser();
|
|
20
|
+
* const timeData = new Float32Array(analyser.fftSize);
|
|
21
|
+
* getFloatTimeDomainDataCompat(analyser, timeData);
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
export declare function getFloatTimeDomainDataCompat(analyser: AnalyserNode, output: Float32Array<ArrayBuffer>): void;
|
|
25
|
+
/**
|
|
26
|
+
* 兼容性包装: getFloatFrequencyData
|
|
27
|
+
* 优先使用 Float API,降级到 Byte API 并转换
|
|
28
|
+
*
|
|
29
|
+
* @param analyser - AnalyserNode 实例
|
|
30
|
+
* @param output - 输出缓冲区 (Float32Array)
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```typescript
|
|
34
|
+
* const analyser = context.createAnalyser();
|
|
35
|
+
* const freqData = new Float32Array(analyser.frequencyBinCount);
|
|
36
|
+
* getFloatFrequencyDataCompat(analyser, freqData);
|
|
37
|
+
* ```
|
|
38
|
+
*/
|
|
39
|
+
export declare function getFloatFrequencyDataCompat(analyser: AnalyserNode, output: Float32Array): void;
|
|
40
|
+
/**
|
|
41
|
+
* 直接从 Float32Array 采样计算 RMS (均方根)
|
|
42
|
+
* 比使用 AnalyserNode 更可靠,避免分支不更新的问题
|
|
43
|
+
*
|
|
44
|
+
* @param samples - 音频采样数组
|
|
45
|
+
* @returns RMS 值 (0.0 到 1.0+)
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const inputData = event.inputBuffer.getChannelData(0);
|
|
50
|
+
* const rms = calculateRMSDirect(inputData);
|
|
51
|
+
* const energyDb = rmsToDb(rms);
|
|
52
|
+
* ```
|
|
53
|
+
*/
|
|
54
|
+
export declare function calculateRMSDirect(samples: Float32Array): number;
|
|
55
|
+
/**
|
|
56
|
+
* 将 RMS 值转换为 dBFS (分贝满刻度)
|
|
57
|
+
*
|
|
58
|
+
* @param rms - RMS 值 (0.0 到 1.0+)
|
|
59
|
+
* @returns dBFS 值 (-100 到 0+)
|
|
60
|
+
*
|
|
61
|
+
* @remarks
|
|
62
|
+
* - 0 dBFS = 最大值 (RMS = 1.0)
|
|
63
|
+
* - -∞ dBFS = 静音 (RMS = 0.0)
|
|
64
|
+
* - 实际下限设为 -100 dBFS
|
|
65
|
+
*
|
|
66
|
+
* @example
|
|
67
|
+
* ```typescript
|
|
68
|
+
* rmsToDb(1.0) // 0 dBFS (最大)
|
|
69
|
+
* rmsToDb(0.5) // -6.02 dBFS
|
|
70
|
+
* rmsToDb(0.1) // -20 dBFS
|
|
71
|
+
* rmsToDb(0.00001) // -100 dBFS (下限)
|
|
72
|
+
* ```
|
|
73
|
+
*/
|
|
74
|
+
export declare function rmsToDb(rms: number): number;
|
|
75
|
+
/**
|
|
76
|
+
* 将 dBFS 转换为线性增益系数
|
|
77
|
+
*
|
|
78
|
+
* @param db - dBFS 值
|
|
79
|
+
* @returns 线性增益 (0.0 到 1.0+)
|
|
80
|
+
*
|
|
81
|
+
* @example
|
|
82
|
+
* ```typescript
|
|
83
|
+
* dbToGain(0) // 1.0 (无衰减)
|
|
84
|
+
* dbToGain(-6) // 0.501 (约 50%)
|
|
85
|
+
* dbToGain(-20) // 0.1 (10%)
|
|
86
|
+
* dbToGain(-40) // 0.01 (1%)
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
export declare function dbToGain(db: number): number;
|
|
90
|
+
/**
|
|
91
|
+
* 将线性增益转换为 dB
|
|
92
|
+
*
|
|
93
|
+
* @param gain - 线性增益 (0.0 到 1.0+)
|
|
94
|
+
* @returns dB 值
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```typescript
|
|
98
|
+
* gainToDb(1.0) // 0 dB
|
|
99
|
+
* gainToDb(0.5) // -6.02 dB
|
|
100
|
+
* gainToDb(2.0) // 6.02 dB (放大)
|
|
101
|
+
* ```
|
|
102
|
+
*/
|
|
103
|
+
export declare function gainToDb(gain: number): number;
|
|
@@ -2,6 +2,7 @@ import type { Newable } from 'inversify';
|
|
|
2
2
|
import type { AgentConfig } from './agent';
|
|
3
3
|
import type { Env } from './common';
|
|
4
4
|
import { BaseAgentService, NBaseAgentService } from '../lib/service/agent/baseAgentService';
|
|
5
|
+
import type { NBaseAsrService } from '../lib/service/rtc-asr/asr/baseAsrService';
|
|
5
6
|
import { type ReplacementRule } from '../lib/utils/applyTextReplacements';
|
|
6
7
|
import { NPictureClientDigitalHumanService } from '../lib/service/digital-human/PictureClientDigitalHumanService';
|
|
7
8
|
import Emittery from 'emittery';
|
|
@@ -11,9 +12,247 @@ import { NAbstractDigitalHumanService } from '../lib/service/digital-human/BaseD
|
|
|
11
12
|
import { IAsrMessageEventPayload, IBusyEventPayload, IInterruptEventPayload, IReplyStartEventPayload } from '../lib/controller';
|
|
12
13
|
import { NDigitalHumanService } from '../lib/service/digital-human/CloudDigitalHumanService';
|
|
13
14
|
import { Language, type LanguageCode } from '../lib/utils/language-mapper';
|
|
15
|
+
import type { AzureSpeechRegionValue, AzureSpeechLanguageValue } from '../lib/constants/azure';
|
|
14
16
|
type TFormatter = (input: string) => string;
|
|
15
17
|
export { Language };
|
|
16
18
|
export type { LanguageCode };
|
|
19
|
+
/**
|
|
20
|
+
* ASR 提供商类型
|
|
21
|
+
*/
|
|
22
|
+
export type AsrProvider = 'baidu' | 'azure';
|
|
23
|
+
/**
|
|
24
|
+
* ASR 通用配置(所有提供商共享)
|
|
25
|
+
*/
|
|
26
|
+
export interface ICommonAsrConfig {
|
|
27
|
+
/**
|
|
28
|
+
* 🎙️【选填】音频约束配置
|
|
29
|
+
* 用于控制 getUserMedia 的音频处理特性
|
|
30
|
+
*
|
|
31
|
+
* 直接使用 Web 标准 `MediaTrackConstraints` 类型,支持所有标准音频约束参数。
|
|
32
|
+
*
|
|
33
|
+
* ⚙️ 三大核心特性:
|
|
34
|
+
* - `echoCancellation`(回声消除):消除扬声器播放的声音被麦克风再次录入
|
|
35
|
+
* - `noiseSuppression`(降噪):过滤背景噪音
|
|
36
|
+
* - `autoGainControl`(自动增益):自动调节音量,保持稳定输出
|
|
37
|
+
*
|
|
38
|
+
* 📚 更多可用参数参考:https://developer.mozilla.org/en-US/docs/Web/API/MediaTrackConstraints
|
|
39
|
+
*
|
|
40
|
+
* @default
|
|
41
|
+
* ```typescript
|
|
42
|
+
* {
|
|
43
|
+
* echoCancellation: true,
|
|
44
|
+
* noiseSuppression: true,
|
|
45
|
+
* autoGainControl: true
|
|
46
|
+
* }
|
|
47
|
+
* ```
|
|
48
|
+
*
|
|
49
|
+
* @example 自定义配置
|
|
50
|
+
* ```typescript
|
|
51
|
+
* {
|
|
52
|
+
* echoCancellation: true,
|
|
53
|
+
* noiseSuppression: true,
|
|
54
|
+
* autoGainControl: false, // 禁用自动增益
|
|
55
|
+
* sampleRate: { ideal: 48000 }, // 48kHz 高质量采样
|
|
56
|
+
* channelCount: { exact: 1 }, // 强制单声道
|
|
57
|
+
* deviceId: 'specific-device-id' // 指定设备
|
|
58
|
+
* }
|
|
59
|
+
* ```
|
|
60
|
+
*/
|
|
61
|
+
audioConstraints?: MediaTrackConstraints;
|
|
62
|
+
/**
|
|
63
|
+
* 🔊【选填】启用外部播放抑制(移动端推荐)
|
|
64
|
+
* 用于解决移动端外放音频(如 TTS 播报)被 ASR 误识别的问题
|
|
65
|
+
*
|
|
66
|
+
* 工作原理:
|
|
67
|
+
* - 实时监测麦克风音频能量
|
|
68
|
+
* - 外部音频播放期间,提高 ASR 激活阈值
|
|
69
|
+
* - 只有当麦克风能量显著高于阈值时,才允许 ASR 识别
|
|
70
|
+
*
|
|
71
|
+
* 💡 使用场景:
|
|
72
|
+
* - 移动端用户外放播报(不使用耳机)
|
|
73
|
+
* - 浏览器回声消除效果不佳的场景
|
|
74
|
+
* - 需要在播报过程中仍然支持语音打断
|
|
75
|
+
*
|
|
76
|
+
* ⚠️ 注意:
|
|
77
|
+
* - 会增加约 10-20ms 音频处理延迟
|
|
78
|
+
* - 环境噪音过大可能影响效果
|
|
79
|
+
* - 用户需要比正常情况稍大声说话
|
|
80
|
+
*
|
|
81
|
+
* @default false
|
|
82
|
+
*/
|
|
83
|
+
enablePlaybackSuppression?: boolean;
|
|
84
|
+
/**
|
|
85
|
+
* 🎚️【选填】外部播放抑制配置(仅当 enablePlaybackSuppression=true 时生效)
|
|
86
|
+
*/
|
|
87
|
+
playbackSuppressionConfig?: {
|
|
88
|
+
/**
|
|
89
|
+
* 抑制启用时的能量倍数阈值
|
|
90
|
+
* 麦克风能量必须是基准的 N 倍才能通过
|
|
91
|
+
* 值越大,抑制越激进(可能漏识别用户语音)
|
|
92
|
+
* @default 3.0
|
|
93
|
+
*/
|
|
94
|
+
energyMultiplier?: number;
|
|
95
|
+
/**
|
|
96
|
+
* 空闲期间的基准能量阈值(dBFS)
|
|
97
|
+
* 低于此值的信号被视为静音或微弱回声
|
|
98
|
+
* @default -45
|
|
99
|
+
*/
|
|
100
|
+
idleThreshold?: number;
|
|
101
|
+
/**
|
|
102
|
+
* 能量平滑系数(0-1)
|
|
103
|
+
* 值越大,能量变化越平滑(降低噪声干扰)
|
|
104
|
+
* @default 0.7
|
|
105
|
+
*/
|
|
106
|
+
smoothingFactor?: number;
|
|
107
|
+
/**
|
|
108
|
+
* 抑制启用延迟(毫秒)
|
|
109
|
+
* 启用抑制后,延迟多久才启用高阈值
|
|
110
|
+
* @default 100
|
|
111
|
+
*/
|
|
112
|
+
activationDelay?: number;
|
|
113
|
+
/**
|
|
114
|
+
* 抑制关闭恢复延迟(毫秒)
|
|
115
|
+
* 停止抑制后,延迟多久恢复正常阈值
|
|
116
|
+
* @default 500
|
|
117
|
+
*/
|
|
118
|
+
recoveryDelay?: number;
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* 百度 ASR 配置
|
|
123
|
+
*/
|
|
124
|
+
export interface IBaiduAsrConfig extends ICommonAsrConfig {
|
|
125
|
+
/**
|
|
126
|
+
* ASR 语音端点检测(VAD)的静音超时时长(单位:毫秒)
|
|
127
|
+
* @default 600
|
|
128
|
+
*/
|
|
129
|
+
asrVad?: number;
|
|
130
|
+
/**
|
|
131
|
+
* 语言配置
|
|
132
|
+
* @default Language.CHINESE
|
|
133
|
+
*/
|
|
134
|
+
lang?: LanguageCode;
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Azure Speech 配置
|
|
138
|
+
*/
|
|
139
|
+
export interface IAzureSpeechConfig extends ICommonAsrConfig {
|
|
140
|
+
/**
|
|
141
|
+
* Azure Speech Service 订阅密钥
|
|
142
|
+
* @example "EZ8kfYwedvrliVh7xyH5biJtQbPb7lAF4XkudINlPdS2bXSFXY2YJQQJ99BLAC3pKaRXJ3w3AAAYACOGDGFo"
|
|
143
|
+
*/
|
|
144
|
+
subscriptionKey: string;
|
|
145
|
+
/**
|
|
146
|
+
* Azure 服务区域
|
|
147
|
+
*
|
|
148
|
+
* 🎯 推荐使用:
|
|
149
|
+
* - 亚太地区:AzureSpeechRegion.SOUTHEAST_ASIA(新加坡)、AzureSpeechRegion.EAST_ASIA(香港)
|
|
150
|
+
* - 日本:AzureSpeechRegion.JAPAN_EAST(东京)
|
|
151
|
+
* - 韩国:AzureSpeechRegion.KOREA_CENTRAL(首尔)
|
|
152
|
+
* - 欧美:AzureSpeechRegion.WEST_EUROPE(荷兰)、AzureSpeechRegion.EAST_US(美国东部)
|
|
153
|
+
*
|
|
154
|
+
* @example AzureSpeechRegion.SOUTHEAST_ASIA
|
|
155
|
+
* @example 'southeastasia' // 也支持字符串(向后兼容)
|
|
156
|
+
*/
|
|
157
|
+
region: AzureSpeechRegionValue | string;
|
|
158
|
+
/**
|
|
159
|
+
* 识别语言列表(支持多语言自动检测,最多 4 种)
|
|
160
|
+
*
|
|
161
|
+
* 🎯 常用语言:
|
|
162
|
+
* - 中文:AzureSpeechLanguage.CHINESE_SIMPLIFIED_CN
|
|
163
|
+
* - 英文:AzureSpeechLanguage.ENGLISH_US
|
|
164
|
+
* - 日文:AzureSpeechLanguage.JAPANESE_JP
|
|
165
|
+
* - 韩文:AzureSpeechLanguage.KOREAN_KR
|
|
166
|
+
*
|
|
167
|
+
* @default [AzureSpeechLanguage.ENGLISH_US]
|
|
168
|
+
* @example [AzureSpeechLanguage.CHINESE_SIMPLIFIED_CN, AzureSpeechLanguage.ENGLISH_US]
|
|
169
|
+
* @example ['zh-CN', 'en-US'] // 也支持字符串(向后兼容)
|
|
170
|
+
*/
|
|
171
|
+
languages?: Array<AzureSpeechLanguageValue | string>;
|
|
172
|
+
/**
|
|
173
|
+
* 自定义短语列表(Phrase List)
|
|
174
|
+
* 用于提升特定词汇的识别准确率
|
|
175
|
+
* @example ['客悦 ONE', 'Galaxy S25', 'iPhone 17Pro']
|
|
176
|
+
*/
|
|
177
|
+
phraseList?: string[];
|
|
178
|
+
/**
|
|
179
|
+
* 短语权重(1-10,默认 2)
|
|
180
|
+
*/
|
|
181
|
+
phraseWeight?: number;
|
|
182
|
+
/**
|
|
183
|
+
* 初始静音超时(毫秒)
|
|
184
|
+
* @default 30000
|
|
185
|
+
*/
|
|
186
|
+
initialSilenceTimeoutMs?: number;
|
|
187
|
+
/**
|
|
188
|
+
* 结束静音超时(毫秒)
|
|
189
|
+
* @default 30000
|
|
190
|
+
*/
|
|
191
|
+
endSilenceTimeoutMs?: number;
|
|
192
|
+
/**
|
|
193
|
+
* 分段静音超时(毫秒)
|
|
194
|
+
* @default 1000
|
|
195
|
+
*/
|
|
196
|
+
segmentationSilenceTimeoutMs?: number;
|
|
197
|
+
/**
|
|
198
|
+
* 是否启用音频日志
|
|
199
|
+
* @default false
|
|
200
|
+
*/
|
|
201
|
+
enableAudioLogging?: boolean;
|
|
202
|
+
/**
|
|
203
|
+
* 自定义端点 ID(如使用 Custom Speech)
|
|
204
|
+
*/
|
|
205
|
+
customEndpointId?: string;
|
|
206
|
+
/**
|
|
207
|
+
* 高级配置参数(用于传递任意 Azure SDK 配置)
|
|
208
|
+
* @example { recognitionMode: 'Interactive' }
|
|
209
|
+
*/
|
|
210
|
+
advancedConfig?: Record<string, any>;
|
|
211
|
+
/**
|
|
212
|
+
* 连接超时时间(毫秒)
|
|
213
|
+
* 用于控制连接服务的超时时间
|
|
214
|
+
* 如果不传入此参数,则不会执行超时逻辑
|
|
215
|
+
* @example 10000 - 10秒超时
|
|
216
|
+
*/
|
|
217
|
+
connectionTimeoutMs?: number;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* ASR 配置(判别联合类型)
|
|
221
|
+
*
|
|
222
|
+
* 当 provider 为 'baidu' 时,config 自动推断为 IBaiduAsrConfig
|
|
223
|
+
* 当 provider 为 'azure' 时,config 自动推断为 IAzureSpeechConfig
|
|
224
|
+
*
|
|
225
|
+
* @example 百度 ASR
|
|
226
|
+
* ```typescript
|
|
227
|
+
* {
|
|
228
|
+
* asr: {
|
|
229
|
+
* provider: 'baidu',
|
|
230
|
+
* config: { asrVad: 600, lang: Language.CHINESE }
|
|
231
|
+
* }
|
|
232
|
+
* }
|
|
233
|
+
* ```
|
|
234
|
+
*
|
|
235
|
+
* @example Azure ASR
|
|
236
|
+
* ```typescript
|
|
237
|
+
* {
|
|
238
|
+
* asr: {
|
|
239
|
+
* provider: 'azure',
|
|
240
|
+
* config: {
|
|
241
|
+
* subscriptionKey: 'YOUR_KEY',
|
|
242
|
+
* region: 'southeastasia',
|
|
243
|
+
* languages: ['zh-CN', 'en-US']
|
|
244
|
+
* }
|
|
245
|
+
* }
|
|
246
|
+
* }
|
|
247
|
+
* ```
|
|
248
|
+
*/
|
|
249
|
+
export type AsrConfig = {
|
|
250
|
+
provider: 'baidu';
|
|
251
|
+
config: IBaiduAsrConfig;
|
|
252
|
+
} | {
|
|
253
|
+
provider: 'azure';
|
|
254
|
+
config: IAzureSpeechConfig;
|
|
255
|
+
};
|
|
17
256
|
export interface IOptions<AS extends BaseAgentService = BaseAgentService> {
|
|
18
257
|
/**
|
|
19
258
|
* 🆔【必填】数字员工形象 ID —— 从平台获取的唯一形象资源标识
|
|
@@ -197,6 +436,46 @@ export interface IOptions<AS extends BaseAgentService = BaseAgentService> {
|
|
|
197
436
|
*/
|
|
198
437
|
ttsModel?: 'turbo_v2' | undefined;
|
|
199
438
|
/**
|
|
439
|
+
* 🎙️【选填】ASR 配置(推荐使用)
|
|
440
|
+
* —— 使用判别联合类型,根据 provider 自动推断 config 类型
|
|
441
|
+
*
|
|
442
|
+
* @example 百度 ASR
|
|
443
|
+
* ```typescript
|
|
444
|
+
* {
|
|
445
|
+
* asr: {
|
|
446
|
+
* provider: 'baidu',
|
|
447
|
+
* config: { asrVad: 600, lang: Language.CHINESE }
|
|
448
|
+
* }
|
|
449
|
+
* }
|
|
450
|
+
* ```
|
|
451
|
+
*
|
|
452
|
+
* @example Azure ASR
|
|
453
|
+
* ```typescript
|
|
454
|
+
* {
|
|
455
|
+
* asr: {
|
|
456
|
+
* provider: 'azure',
|
|
457
|
+
* config: {
|
|
458
|
+
* subscriptionKey: 'YOUR_KEY',
|
|
459
|
+
* region: 'southeastasia',
|
|
460
|
+
* languages: ['zh-CN', 'en-US']
|
|
461
|
+
* }
|
|
462
|
+
* }
|
|
463
|
+
* }
|
|
464
|
+
* ```
|
|
465
|
+
*/
|
|
466
|
+
asr?: AsrConfig;
|
|
467
|
+
/**
|
|
468
|
+
* @deprecated 请使用 asr.config.asrVad,将在 v2.0 移除
|
|
469
|
+
*
|
|
470
|
+
* 迁移示例:
|
|
471
|
+
* ```typescript
|
|
472
|
+
* // 旧写法
|
|
473
|
+
* { asrVad: 600 }
|
|
474
|
+
*
|
|
475
|
+
* // 新写法
|
|
476
|
+
* { asr: { provider: 'baidu', config: { asrVad: 600 } } }
|
|
477
|
+
* ```
|
|
478
|
+
*
|
|
200
479
|
* ASR 语音端点检测(VAD)的静音超时时长(单位:毫秒)
|
|
201
480
|
* —— 用于判断用户"说完一句话"的停顿阈值。
|
|
202
481
|
*
|
|
@@ -232,6 +511,57 @@ export interface IOptions<AS extends BaseAgentService = BaseAgentService> {
|
|
|
232
511
|
* @default true
|
|
233
512
|
*/
|
|
234
513
|
checkAudioDeviceBeforeStart?: boolean;
|
|
514
|
+
/**
|
|
515
|
+
* 🎙️【选填】麦克风检测失败时的处理策略
|
|
516
|
+
*
|
|
517
|
+
* 当 checkAudioDeviceBeforeStart=true 且麦克风检测失败时的行为:
|
|
518
|
+
*
|
|
519
|
+
* - 'error' (默认) → 抛出 AsrInitializationError,终止启动
|
|
520
|
+
* - 'warn' → 在控制台输出警告,禁用语音输入,继续启动(仅文本模式)
|
|
521
|
+
* - 'silent' → 静默降级为仅文本模式,不输出警告(仍会发出事件)
|
|
522
|
+
* - 'prompt' → 调用 onMicrophoneCheckFailed 回调,让开发者自定义交互逻辑
|
|
523
|
+
*
|
|
524
|
+
* 💡 使用场景:
|
|
525
|
+
* - 'error' → 严格要求语音功能的场景(如语音客服)
|
|
526
|
+
* - 'warn' → 语音为可选功能,允许文本输入兜底
|
|
527
|
+
* - 'silent' → 自动降级,不干扰用户体验
|
|
528
|
+
* - 'prompt' → 需要显示自定义对话框让用户确认
|
|
529
|
+
*
|
|
530
|
+
* ⚠️ 注意:
|
|
531
|
+
* - 降级后用户仍可通过 input(text) 方法进行文本输入
|
|
532
|
+
* - 系统会发出 device_check_completed 和 microphone_available 事件
|
|
533
|
+
* - ASR 服务会被自动禁用(controller.asrService.disabled = true)
|
|
534
|
+
*
|
|
535
|
+
* @default 'error'
|
|
536
|
+
*/
|
|
537
|
+
microphoneFailureHandling?: 'error' | 'warn' | 'silent' | 'prompt';
|
|
538
|
+
/**
|
|
539
|
+
* 🔔【选填】麦克风检测失败时的自定义处理回调
|
|
540
|
+
*
|
|
541
|
+
* 仅当 microphoneFailureHandling='prompt' 时生效。
|
|
542
|
+
*
|
|
543
|
+
* 💡 典型用途:
|
|
544
|
+
* - 显示自定义确认对话框确认是否继续流程
|
|
545
|
+
* - 根据错误类型展示不同的提示信息
|
|
546
|
+
* - 记录用户选择到分析系统
|
|
547
|
+
*
|
|
548
|
+
* @param result - 设备检测结果,包含错误详情、设备列表、权限状态等
|
|
549
|
+
* @returns Promise<boolean> - true: 继续启动(文本模式),false: 终止启动
|
|
550
|
+
*
|
|
551
|
+
* @example
|
|
552
|
+
* ```typescript
|
|
553
|
+
* onMicrophoneCheckFailed: async (result) => {
|
|
554
|
+
* // result.userMessage 已包含用户友好的错误提示
|
|
555
|
+
* const userChoice = await showDialog({
|
|
556
|
+
* title: '麦克风不可用',
|
|
557
|
+
* message: `${result.userMessage}\n\n您可以选择继续使用文本输入。`,
|
|
558
|
+
* buttons: ['继续(仅文本)', '取消']
|
|
559
|
+
* });
|
|
560
|
+
* return userChoice === 'continue';
|
|
561
|
+
* }
|
|
562
|
+
* ```
|
|
563
|
+
*/
|
|
564
|
+
onMicrophoneCheckFailed?: (result: NBaseAsrService.IDeviceCheckResult) => Promise<boolean>;
|
|
235
565
|
/**
|
|
236
566
|
* 🌐【选填】运行环境
|
|
237
567
|
* - 'development' → 开发调试(日志全开)
|
|
@@ -278,6 +608,17 @@ export interface IOptions<AS extends BaseAgentService = BaseAgentService> {
|
|
|
278
608
|
*/
|
|
279
609
|
autoChromaKey?: boolean;
|
|
280
610
|
/**
|
|
611
|
+
* @deprecated 请使用 asr.config.lang,将在 v2.0 移除
|
|
612
|
+
*
|
|
613
|
+
* 迁移示例:
|
|
614
|
+
* ```typescript
|
|
615
|
+
* // 旧写法
|
|
616
|
+
* { lang: Language.CHINESE }
|
|
617
|
+
*
|
|
618
|
+
* // 新写法
|
|
619
|
+
* { asr: { provider: 'baidu', config: { lang: Language.CHINESE } } }
|
|
620
|
+
* ```
|
|
621
|
+
*
|
|
281
622
|
* 🌐【选填】语言配置
|
|
282
623
|
*
|
|
283
624
|
* 用于配置 ASR 语音识别和 TTS 语音合成的语言
|
|
@@ -325,21 +666,20 @@ export declare const DEFAULT_OPTIONS: {
|
|
|
325
666
|
readonly minSplitLen: 5;
|
|
326
667
|
readonly ttsModel: "turbo_v2";
|
|
327
668
|
readonly env: "production";
|
|
328
|
-
readonly asrVad: 600;
|
|
329
669
|
readonly interruptible: true;
|
|
330
670
|
readonly rendererMode: "cloud";
|
|
331
671
|
readonly autoChromaKey: true;
|
|
332
672
|
readonly typeDelay: 163;
|
|
333
673
|
readonly enTypeDelay: 45;
|
|
334
|
-
readonly lang: "zh";
|
|
335
674
|
readonly inactivityPrompt: "您这么久没讲话,是不是有其它事情要忙,那我先挂断了";
|
|
336
675
|
readonly getMountContainer: () => HTMLElement;
|
|
337
676
|
readonly hotWordReplacementRules: ReplacementRule[];
|
|
338
677
|
readonly checkAudioDeviceBeforeStart: true;
|
|
678
|
+
readonly microphoneFailureHandling: "error";
|
|
339
679
|
readonly rtcID: "appreimunmd7utp";
|
|
340
680
|
};
|
|
341
681
|
type DefaultedOptionKeys = keyof typeof DEFAULT_OPTIONS;
|
|
342
|
-
export type IResolvedOptions<AS extends BaseAgentService = BaseAgentService> = IOptions<AS> & Required<Pick<IOptions<AS>, DefaultedOptionKeys>>;
|
|
682
|
+
export type IResolvedOptions<AS extends BaseAgentService = BaseAgentService> = IOptions<AS> & Required<Pick<IOptions<AS>, DefaultedOptionKeys>> & Required<Pick<IOptions<AS>, 'asr' | 'asrVad' | 'lang'>>;
|
|
343
683
|
export interface IAaaSPilotKitEmitter<AS extends BaseAgentService = BaseAgentService> {
|
|
344
684
|
/**
|
|
345
685
|
* 🔇【状态事件】静音状态变更
|
|
@@ -754,7 +1094,7 @@ export interface IAaaSPilotKitController<AS extends BaseAgentService = BaseAgent
|
|
|
754
1094
|
*/
|
|
755
1095
|
activateManually(): Promise<void>;
|
|
756
1096
|
/**
|
|
757
|
-
* 🕰️ 重置会话超时计时器 ——
|
|
1097
|
+
* 🕰️ 重置会话超时计时器 —— "我还在用,别退出!"
|
|
758
1098
|
*
|
|
759
1099
|
* 🔹 行为:
|
|
760
1100
|
* - 重新从 0 开始计时 opts.timeoutSec
|
|
@@ -765,4 +1105,18 @@ export interface IAaaSPilotKitController<AS extends BaseAgentService = BaseAgent
|
|
|
765
1105
|
* @returns this(支持链式调用)
|
|
766
1106
|
*/
|
|
767
1107
|
keepAlive(): IAaaSPilotKitController;
|
|
1108
|
+
/**
|
|
1109
|
+
* 🎤 播放开场白(Prologue)
|
|
1110
|
+
*
|
|
1111
|
+
* 🔹 行为:
|
|
1112
|
+
* - 若 opts.prologue 有值 → 直接播放自定义开场白
|
|
1113
|
+
* - 若未配置 → 调用 Agent 接口获取默认开场白
|
|
1114
|
+
*
|
|
1115
|
+
* 💡 适用场景:
|
|
1116
|
+
* - mount 完成后自动播放欢迎语(一般在 ready 之后调用)
|
|
1117
|
+
* - 手动激活(activateManually)后播放
|
|
1118
|
+
*
|
|
1119
|
+
* ⚠️ 注意:通常不需要手动调用,框架会在合适时机自动触发
|
|
1120
|
+
*/
|
|
1121
|
+
playPrologue(): void;
|
|
768
1122
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bdky/aaas-pilot-kit",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"description": "百度数字员工基础套件 - AI智能体、语音识别、数字人渲染全链路SDK,事件驱动、框架无关",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"digital-employee",
|
|
7
|
+
"digital-human",
|
|
8
|
+
"数字员工",
|
|
9
|
+
"数字人",
|
|
10
|
+
"ai-assistant",
|
|
11
|
+
"chatbot",
|
|
12
|
+
"conversational-ai",
|
|
13
|
+
"speech-recognition",
|
|
14
|
+
"asr",
|
|
15
|
+
"语音识别",
|
|
16
|
+
"tts",
|
|
17
|
+
"text-to-speech",
|
|
18
|
+
"语音合成",
|
|
19
|
+
"avatar",
|
|
20
|
+
"baidu",
|
|
21
|
+
"百度",
|
|
22
|
+
"客悦"
|
|
23
|
+
],
|
|
4
24
|
"license": "MIT",
|
|
25
|
+
"author": "Baidu Keyue Team <zhangwenxi@baidu.com>",
|
|
5
26
|
"contributors": [
|
|
6
27
|
{
|
|
7
28
|
"name": "zhangwenxi",
|
|
@@ -49,6 +70,7 @@
|
|
|
49
70
|
"fast-xml-parser": "^5.2.5",
|
|
50
71
|
"inversify": "^7.10.4",
|
|
51
72
|
"ky": "^1.14.0",
|
|
73
|
+
"microsoft-cognitiveservices-speech-sdk": "^1.47.0",
|
|
52
74
|
"p-defer": "^4.0.1",
|
|
53
75
|
"p-queue": "^9.0.0",
|
|
54
76
|
"reflect-metadata": "^0.2.2",
|