@digipair/skill-web-voice-activity-detection 0.89.0 → 0.91.0-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.swcrc +28 -0
- package/README.md +7 -0
- package/eslint.config.mjs +22 -0
- package/package.json +21 -5
- package/rollup.config.cjs +28 -0
- package/src/handlebars.d.ts +1 -0
- package/src/lib/skill-web-voice-activity-detection.spec.ts +7 -0
- package/src/lib/skill-web-voice-activity-detection.ts +119 -0
- package/tsconfig.json +13 -0
- package/tsconfig.lib.json +19 -0
- package/index.cjs.d.ts +0 -1
- package/index.cjs.js +0 -110
- package/index.esm.js +0 -28072
- package/libs/skill-web-voice-activity-detection/src/lib/skill-web-voice-activity-detection.d.ts +0 -5
- /package/{index.d.ts → src/index.d.ts} +0 -0
- /package/{libs/skill-web-voice-activity-detection/src/index.d.ts → src/index.ts} +0 -0
- /package/{schema.fr.json → src/schema.fr.json} +0 -0
- /package/{schema.json → src/schema.json} +0 -0
package/.swcrc
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"jsc": {
|
|
3
|
+
"target": "es2017",
|
|
4
|
+
"parser": {
|
|
5
|
+
"syntax": "typescript",
|
|
6
|
+
"decorators": true,
|
|
7
|
+
"dynamicImport": true
|
|
8
|
+
},
|
|
9
|
+
"transform": {
|
|
10
|
+
"decoratorMetadata": true,
|
|
11
|
+
"legacyDecorator": true
|
|
12
|
+
},
|
|
13
|
+
"keepClassNames": true,
|
|
14
|
+
"externalHelpers": true,
|
|
15
|
+
"loose": true
|
|
16
|
+
},
|
|
17
|
+
"module": {
|
|
18
|
+
"type": "es6"
|
|
19
|
+
},
|
|
20
|
+
"sourceMaps": true,
|
|
21
|
+
"exclude": [
|
|
22
|
+
"jest.config.ts",
|
|
23
|
+
".*\\.spec.tsx?$",
|
|
24
|
+
".*\\.test.tsx?$",
|
|
25
|
+
"./src/jest-setup.ts$",
|
|
26
|
+
"./**/jest-setup.ts$"
|
|
27
|
+
]
|
|
28
|
+
}
|
package/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import baseConfig from '../../eslint.config.mjs';
|
|
2
|
+
|
|
3
|
+
export default [
|
|
4
|
+
...baseConfig,
|
|
5
|
+
{
|
|
6
|
+
files: ['**/*.json'],
|
|
7
|
+
rules: {
|
|
8
|
+
'@nx/dependency-checks': [
|
|
9
|
+
'error',
|
|
10
|
+
{
|
|
11
|
+
ignoredFiles: [
|
|
12
|
+
'{projectRoot}/eslint.config.{js,cjs,mjs}',
|
|
13
|
+
'{projectRoot}/rollup.config.{js,ts,mjs,mts,cjs,cts}',
|
|
14
|
+
],
|
|
15
|
+
},
|
|
16
|
+
],
|
|
17
|
+
},
|
|
18
|
+
languageOptions: {
|
|
19
|
+
parser: await import('jsonc-eslint-parser'),
|
|
20
|
+
},
|
|
21
|
+
},
|
|
22
|
+
];
|
package/package.json
CHANGED
|
@@ -1,12 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@digipair/skill-web-voice-activity-detection",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.91.0-0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "dist/libs/skill-web-voice-activity-detection/index.cjs.js",
|
|
6
|
+
"module": "dist/libs/skill-web-voice-activity-detection/index.esm.js",
|
|
7
|
+
"types": "dist/libs/skill-web-voice-activity-detection/index.esm.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
"./package.json": "./libs/skill-web-voice-activity-detection/package.json",
|
|
10
|
+
".": {
|
|
11
|
+
"development": "./dist/libs/skill-web-voice-activity-detection/src/index.ts",
|
|
12
|
+
"types": "./dist/libs/skill-web-voice-activity-detection/index.esm.d.ts",
|
|
13
|
+
"import": "./dist/libs/skill-web-voice-activity-detection/index.esm.js",
|
|
14
|
+
"default": "./dist/libs/skill-web-voice-activity-detection/index.cjs.js"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
4
17
|
"keywords": [
|
|
5
18
|
"digipair",
|
|
6
19
|
"web",
|
|
7
20
|
"util"
|
|
8
21
|
],
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
22
|
+
"nx": {
|
|
23
|
+
"name": "skill-web-voice-activity-detection"
|
|
24
|
+
},
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@digipair/engine": "0.91.0-0"
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
const { withNx } = require('@nx/rollup/with-nx');
|
|
2
|
+
|
|
3
|
+
module.exports = withNx(
|
|
4
|
+
{
|
|
5
|
+
main: 'libs/skill-web-voice-activity-detection/src/index.ts',
|
|
6
|
+
outputPath: 'dist/libs/skill-web-voice-activity-detection',
|
|
7
|
+
tsConfig: 'libs/skill-web-voice-activity-detection/tsconfig.lib.json',
|
|
8
|
+
compiler: 'swc',
|
|
9
|
+
format: ['esm', "cjs"],
|
|
10
|
+
assets: [
|
|
11
|
+
{
|
|
12
|
+
input: 'libs/skill-web-voice-activity-detection/',
|
|
13
|
+
glob: 'package.json',
|
|
14
|
+
output: '.'
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
input: 'libs/skill-web-voice-activity-detection/src/',
|
|
18
|
+
glob: '*.json',
|
|
19
|
+
output: '.'
|
|
20
|
+
}
|
|
21
|
+
]
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
// Provide additional rollup configuration here. See: https://rollupjs.org/configuration-options
|
|
25
|
+
// e.g.
|
|
26
|
+
// output: { sourcemap: true },
|
|
27
|
+
}
|
|
28
|
+
);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
declare module 'handlebars/dist/handlebars.min.js';
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { skillWebVoiceActivityDetection } from './skill-web-voice-activity-detection';
|
|
2
|
+
|
|
3
|
+
describe('skillWebVoiceActivityDetection', () => {
|
|
4
|
+
it('should work', () => {
|
|
5
|
+
expect(skillWebVoiceActivityDetection()).toEqual('skill-web-voice-activity-detection');
|
|
6
|
+
});
|
|
7
|
+
});
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/no-unused-vars */
|
|
2
|
+
import { PinsSettings, executePinsList } from '@digipair/engine';
|
|
3
|
+
|
|
4
|
+
// add a script tag to the document
|
|
5
|
+
async function addScript(src: string) {
|
|
6
|
+
const script = document.createElement('script');
|
|
7
|
+
script.src = src;
|
|
8
|
+
script.async = false;
|
|
9
|
+
document.body.appendChild(script);
|
|
10
|
+
|
|
11
|
+
return new Promise(resolve => {
|
|
12
|
+
script.onload = resolve;
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
class VADService {
|
|
17
|
+
async listen(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
|
|
18
|
+
const globalInstance: any = typeof window === 'undefined' ? global : window;
|
|
19
|
+
const config = globalInstance.__DIGIPAIR_CONFIG__;
|
|
20
|
+
const {
|
|
21
|
+
stream = await navigator.mediaDevices.getUserMedia({
|
|
22
|
+
audio: {
|
|
23
|
+
channelCount: 1,
|
|
24
|
+
echoCancellation: true,
|
|
25
|
+
autoGainControl: true,
|
|
26
|
+
noiseSuppression: true,
|
|
27
|
+
},
|
|
28
|
+
}),
|
|
29
|
+
onFrameProcessed = [],
|
|
30
|
+
onVADMisfire = [],
|
|
31
|
+
onSpeechStart = [],
|
|
32
|
+
onSpeechEnd = [],
|
|
33
|
+
model = 'legacy',
|
|
34
|
+
additionalAudioConstraints = {},
|
|
35
|
+
baseAssetPath = `${config.BASE_URL}/@ricky0123/vad-web@0.0.22/dist/`,
|
|
36
|
+
onnxWASMBasePath = `${config.BASE_URL}/onnxruntime-web@1.18.0/dist/`,
|
|
37
|
+
submitUserSpeechOnPause = false,
|
|
38
|
+
positiveSpeechThreshold = 0.5,
|
|
39
|
+
negativeSpeechThreshold = 0.35,
|
|
40
|
+
preSpeechPadFrames = 1,
|
|
41
|
+
redemptionFrames = 8,
|
|
42
|
+
frameSamples = 1536,
|
|
43
|
+
minSpeechFrames = 3,
|
|
44
|
+
} = params;
|
|
45
|
+
|
|
46
|
+
if (!document.querySelector(`script[src="${onnxWASMBasePath}ort.js"]`)) {
|
|
47
|
+
await addScript(`${onnxWASMBasePath}ort.js`);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (!document.querySelector(`script[src="${baseAssetPath}bundle.min.js"]`)) {
|
|
51
|
+
await addScript(`${baseAssetPath}bundle.min.js`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const { MicVAD, utils } = (window as any)['vad'];
|
|
55
|
+
|
|
56
|
+
const vad = await MicVAD.new({
|
|
57
|
+
stream,
|
|
58
|
+
onFrameProcessed: (probabilities: any, frame: Float32Array) => {
|
|
59
|
+
executePinsList(onFrameProcessed, { ...context, probabilities, frame, vad }, `${context.__PATH__}.onFrameProcessed`);
|
|
60
|
+
},
|
|
61
|
+
onVADMisfire: () => {
|
|
62
|
+
executePinsList(onVADMisfire, { ...context, vad }, `${context.__PATH__}.onVADMisfire`);
|
|
63
|
+
},
|
|
64
|
+
onSpeechStart: () => {
|
|
65
|
+
executePinsList(onSpeechStart, { ...context, vad }, `${context.__PATH__}.onSpeechStart`);
|
|
66
|
+
},
|
|
67
|
+
onSpeechEnd: (buffer: any) => {
|
|
68
|
+
const wavBuffer = utils.encodeWAV(buffer);
|
|
69
|
+
const base64 = utils.arrayBufferToBase64(wavBuffer);
|
|
70
|
+
const audio = `data:audio/wav;base64,${base64}`;
|
|
71
|
+
|
|
72
|
+
executePinsList(onSpeechEnd, { ...context, audio, vad }, `${context.__PATH__}.onSpeechEnd`);
|
|
73
|
+
},
|
|
74
|
+
additionalAudioConstraints,
|
|
75
|
+
positiveSpeechThreshold,
|
|
76
|
+
negativeSpeechThreshold,
|
|
77
|
+
redemptionFrames,
|
|
78
|
+
frameSamples,
|
|
79
|
+
preSpeechPadFrames,
|
|
80
|
+
minSpeechFrames,
|
|
81
|
+
model,
|
|
82
|
+
baseAssetPath,
|
|
83
|
+
onnxWASMBasePath,
|
|
84
|
+
submitUserSpeechOnPause,
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
return vad;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async start(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
|
|
91
|
+
const { vad = context.vad } = params;
|
|
92
|
+
|
|
93
|
+
return vad.start();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
async pause(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
|
|
97
|
+
const { vad = context.vad } = params;
|
|
98
|
+
|
|
99
|
+
return vad.pause();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
async destroy(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
|
|
103
|
+
const { vad = context.vad } = params;
|
|
104
|
+
|
|
105
|
+
return vad.destroy();
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export const listen = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
|
|
110
|
+
new VADService().listen(params, pinsSettingsList, context);
|
|
111
|
+
|
|
112
|
+
export const start = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
|
|
113
|
+
new VADService().start(params, pinsSettingsList, context);
|
|
114
|
+
|
|
115
|
+
export const pause = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
|
|
116
|
+
new VADService().pause(params, pinsSettingsList, context);
|
|
117
|
+
|
|
118
|
+
export const destroy = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
|
|
119
|
+
new VADService().destroy(params, pinsSettingsList, context);
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "../../tsconfig.base.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"rootDir": "src",
|
|
5
|
+
"outDir": "dist",
|
|
6
|
+
"tsBuildInfoFile": "dist/tsconfig.lib.tsbuildinfo",
|
|
7
|
+
"emitDeclarationOnly": true,
|
|
8
|
+
"module": "esnext",
|
|
9
|
+
"moduleResolution": "node",
|
|
10
|
+
"forceConsistentCasingInFileNames": true,
|
|
11
|
+
"types": ["node"]
|
|
12
|
+
},
|
|
13
|
+
"include": ["src/**/*.ts"],
|
|
14
|
+
"references": [
|
|
15
|
+
{
|
|
16
|
+
"path": "../engine/tsconfig.lib.json"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
}
|
package/index.cjs.d.ts
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export * from "./src/index";
|
package/index.cjs.js
DELETED
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
-
|
|
5
|
-
var engine = require('@digipair/engine');
|
|
6
|
-
|
|
7
|
-
function _extends() {
|
|
8
|
-
_extends = Object.assign || function assign(target) {
|
|
9
|
-
for(var i = 1; i < arguments.length; i++){
|
|
10
|
-
var source = arguments[i];
|
|
11
|
-
for(var key in source)if (Object.prototype.hasOwnProperty.call(source, key)) target[key] = source[key];
|
|
12
|
-
}
|
|
13
|
-
return target;
|
|
14
|
-
};
|
|
15
|
-
return _extends.apply(this, arguments);
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
// add a script tag to the document
|
|
19
|
-
async function addScript(src) {
|
|
20
|
-
const script = document.createElement('script');
|
|
21
|
-
script.src = src;
|
|
22
|
-
script.async = false;
|
|
23
|
-
document.body.appendChild(script);
|
|
24
|
-
return new Promise((resolve)=>{
|
|
25
|
-
script.onload = resolve;
|
|
26
|
-
});
|
|
27
|
-
}
|
|
28
|
-
let VADService = class VADService {
|
|
29
|
-
async listen(params, _pinsSettingsList, context) {
|
|
30
|
-
const globalInstance = typeof window === 'undefined' ? global : window;
|
|
31
|
-
const config = globalInstance.__DIGIPAIR_CONFIG__;
|
|
32
|
-
const { stream = await navigator.mediaDevices.getUserMedia({
|
|
33
|
-
audio: {
|
|
34
|
-
channelCount: 1,
|
|
35
|
-
echoCancellation: true,
|
|
36
|
-
autoGainControl: true,
|
|
37
|
-
noiseSuppression: true
|
|
38
|
-
}
|
|
39
|
-
}), onFrameProcessed = [], onVADMisfire = [], onSpeechStart = [], onSpeechEnd = [], model = 'legacy', additionalAudioConstraints = {}, baseAssetPath = `${config.BASE_URL}/@ricky0123/vad-web@0.0.22/dist/`, onnxWASMBasePath = `${config.BASE_URL}/onnxruntime-web@1.18.0/dist/`, submitUserSpeechOnPause = false, positiveSpeechThreshold = 0.5, negativeSpeechThreshold = 0.35, preSpeechPadFrames = 1, redemptionFrames = 8, frameSamples = 1536, minSpeechFrames = 3 } = params;
|
|
40
|
-
if (!document.querySelector(`script[src="${onnxWASMBasePath}ort.js"]`)) {
|
|
41
|
-
await addScript(`${onnxWASMBasePath}ort.js`);
|
|
42
|
-
}
|
|
43
|
-
if (!document.querySelector(`script[src="${baseAssetPath}bundle.min.js"]`)) {
|
|
44
|
-
await addScript(`${baseAssetPath}bundle.min.js`);
|
|
45
|
-
}
|
|
46
|
-
const { MicVAD, utils } = window['vad'];
|
|
47
|
-
const vad = await MicVAD.new({
|
|
48
|
-
stream,
|
|
49
|
-
onFrameProcessed: (probabilities, frame)=>{
|
|
50
|
-
engine.executePinsList(onFrameProcessed, _extends({}, context, {
|
|
51
|
-
probabilities,
|
|
52
|
-
frame,
|
|
53
|
-
vad
|
|
54
|
-
}), `${context.__PATH__}.onFrameProcessed`);
|
|
55
|
-
},
|
|
56
|
-
onVADMisfire: ()=>{
|
|
57
|
-
engine.executePinsList(onVADMisfire, _extends({}, context, {
|
|
58
|
-
vad
|
|
59
|
-
}), `${context.__PATH__}.onVADMisfire`);
|
|
60
|
-
},
|
|
61
|
-
onSpeechStart: ()=>{
|
|
62
|
-
engine.executePinsList(onSpeechStart, _extends({}, context, {
|
|
63
|
-
vad
|
|
64
|
-
}), `${context.__PATH__}.onSpeechStart`);
|
|
65
|
-
},
|
|
66
|
-
onSpeechEnd: (buffer)=>{
|
|
67
|
-
const wavBuffer = utils.encodeWAV(buffer);
|
|
68
|
-
const base64 = utils.arrayBufferToBase64(wavBuffer);
|
|
69
|
-
const audio = `data:audio/wav;base64,${base64}`;
|
|
70
|
-
engine.executePinsList(onSpeechEnd, _extends({}, context, {
|
|
71
|
-
audio,
|
|
72
|
-
vad
|
|
73
|
-
}), `${context.__PATH__}.onSpeechEnd`);
|
|
74
|
-
},
|
|
75
|
-
additionalAudioConstraints,
|
|
76
|
-
positiveSpeechThreshold,
|
|
77
|
-
negativeSpeechThreshold,
|
|
78
|
-
redemptionFrames,
|
|
79
|
-
frameSamples,
|
|
80
|
-
preSpeechPadFrames,
|
|
81
|
-
minSpeechFrames,
|
|
82
|
-
model,
|
|
83
|
-
baseAssetPath,
|
|
84
|
-
onnxWASMBasePath,
|
|
85
|
-
submitUserSpeechOnPause
|
|
86
|
-
});
|
|
87
|
-
return vad;
|
|
88
|
-
}
|
|
89
|
-
async start(params, _pinsSettingsList, context) {
|
|
90
|
-
const { vad = context.vad } = params;
|
|
91
|
-
return vad.start();
|
|
92
|
-
}
|
|
93
|
-
async pause(params, _pinsSettingsList, context) {
|
|
94
|
-
const { vad = context.vad } = params;
|
|
95
|
-
return vad.pause();
|
|
96
|
-
}
|
|
97
|
-
async destroy(params, _pinsSettingsList, context) {
|
|
98
|
-
const { vad = context.vad } = params;
|
|
99
|
-
return vad.destroy();
|
|
100
|
-
}
|
|
101
|
-
};
|
|
102
|
-
const listen = (params, pinsSettingsList, context)=>new VADService().listen(params, pinsSettingsList, context);
|
|
103
|
-
const start = (params, pinsSettingsList, context)=>new VADService().start(params, pinsSettingsList, context);
|
|
104
|
-
const pause = (params, pinsSettingsList, context)=>new VADService().pause(params, pinsSettingsList, context);
|
|
105
|
-
const destroy = (params, pinsSettingsList, context)=>new VADService().destroy(params, pinsSettingsList, context);
|
|
106
|
-
|
|
107
|
-
exports.destroy = destroy;
|
|
108
|
-
exports.listen = listen;
|
|
109
|
-
exports.pause = pause;
|
|
110
|
-
exports.start = start;
|