@fonoster/autopilot 0.7.0 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/Autopilot.d.ts +8 -0
- package/dist/Autopilot.js +46 -18
- package/dist/assistants/AssistantSchema.d.ts +25 -0
- package/dist/assistants/AssistantSchema.js +32 -0
- package/dist/assistants/index.d.ts +2 -0
- package/dist/assistants/index.js +4 -1
- package/dist/assistants/loadAndValidateAssistant.d.ts +3 -0
- package/dist/assistants/loadAndValidateAssistant.js +46 -0
- package/dist/assistants/types.d.ts +8 -4
- package/dist/machine/machine.d.ts +75 -20
- package/dist/machine/machine.js +77 -46
- package/dist/machine/types.d.ts +9 -1
- package/dist/runner.d.ts +2 -0
- package/dist/{demo.js → runner.js} +10 -21
- package/dist/types.d.ts +2 -9
- package/dist/vad/SileroVadModel.d.ts +15 -0
- package/dist/vad/SileroVadModel.js +65 -0
- package/dist/vad/chunkToFloat32Array.d.ts +2 -0
- package/dist/vad/chunkToFloat32Array.js +25 -0
- package/dist/vad/index.d.ts +3 -0
- package/dist/vad/index.js +38 -0
- package/dist/vad/makeVad.d.ts +2 -0
- package/dist/vad/makeVad.js +83 -0
- package/dist/vad/micVadTest.js +48 -0
- package/dist/vad/types.d.ts +17 -0
- package/dist/vad/types.js +2 -0
- package/package.json +13 -6
- package/silero_vad.onnx +0 -0
- package/dist/assistants/examples.d.ts +0 -10
- package/dist/assistants/examples.js +0 -96
- /package/dist/{demo.d.ts → vad/micVadTest.d.ts} +0 -0
package/README.md
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
<a href="https://gitpod.io/#https://github.com/fonoster/fonoster"> <img src="https://img.shields.io/badge/Contribute%20with-Gitpod-908a85?logo=gitpod" alt="Contribute with Gitpod" />
|
|
2
2
|
|
|
3
|
-
This module is part of the [Fonoster](https://fonoster.com) project. By itself, it does not do much. It is intended to be used as a dependency for other modules. For more information about the project, please visit [https://github.com/fonoster/fonoster](https://github.com/fonoster/fonoster)
|
|
3
|
+
This module is part of the [Fonoster](https://fonoster.com) project. By itself, it does not do much. It is intended to be used as a dependency for other modules. For more information about the project, please visit [https://github.com/fonoster/fonoster](https://github.com/fonoster/fonoster).
|
package/dist/Autopilot.d.ts
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
import { AutopilotConfig } from "./types";
|
|
2
2
|
declare class Autopilot {
|
|
3
3
|
private config;
|
|
4
|
+
private assistant;
|
|
5
|
+
private actor;
|
|
6
|
+
private voice;
|
|
4
7
|
constructor(config: AutopilotConfig);
|
|
5
8
|
start(): void;
|
|
9
|
+
private createActor;
|
|
10
|
+
private subscribeToActorState;
|
|
11
|
+
private setupVoiceStream;
|
|
12
|
+
private handleVoicePayload;
|
|
13
|
+
private setupSpeechGathering;
|
|
6
14
|
}
|
|
7
15
|
export { Autopilot };
|
package/dist/Autopilot.js
CHANGED
|
@@ -24,34 +24,62 @@ const logger_1 = require("@fonoster/logger");
|
|
|
24
24
|
const xstate_1 = require("xstate");
|
|
25
25
|
const assistants_1 = require("./assistants");
|
|
26
26
|
const machine_1 = require("./machine/machine");
|
|
27
|
+
const vad_1 = require("./vad");
|
|
27
28
|
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
28
29
|
class Autopilot {
|
|
29
30
|
constructor(config) {
|
|
30
31
|
this.config = config;
|
|
31
|
-
this.
|
|
32
|
+
this.assistant = (0, assistants_1.makeAssistant)(config.assistantConfig);
|
|
33
|
+
this.actor = this.createActor();
|
|
34
|
+
this.voice = config.voice;
|
|
32
35
|
}
|
|
33
36
|
start() {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
37
|
+
this.actor.start();
|
|
38
|
+
this.setupSpeechGathering();
|
|
39
|
+
this.setupVoiceStream();
|
|
40
|
+
this.subscribeToActorState();
|
|
41
|
+
}
|
|
42
|
+
createActor() {
|
|
43
|
+
const { voice } = this.config;
|
|
44
|
+
const { firstMessage } = this.config.assistantConfig;
|
|
45
|
+
return (0, xstate_1.createActor)(machine_1.machine, {
|
|
46
|
+
input: { firstMessage, voice, assistant: this.assistant }
|
|
42
47
|
});
|
|
43
|
-
|
|
44
|
-
|
|
48
|
+
}
|
|
49
|
+
subscribeToActorState() {
|
|
50
|
+
this.actor.subscribe((state) => {
|
|
45
51
|
logger.verbose("actor's new state is", { state: state.value });
|
|
46
52
|
});
|
|
47
|
-
|
|
48
|
-
|
|
53
|
+
}
|
|
54
|
+
async setupVoiceStream() {
|
|
55
|
+
const stream = await this.config.voice.stream({
|
|
56
|
+
direction: common_1.StreamDirection.OUT
|
|
57
|
+
});
|
|
58
|
+
const vad = await (0, vad_1.makeVad)();
|
|
59
|
+
stream.onPayload(this.handleVoicePayload(vad));
|
|
60
|
+
}
|
|
61
|
+
handleVoicePayload(vad) {
|
|
62
|
+
return async (payload) => {
|
|
63
|
+
try {
|
|
64
|
+
// TODO: Investigate why we need to cast this to Float32Array
|
|
65
|
+
const data = payload.data;
|
|
66
|
+
await vad(data, (event) => {
|
|
67
|
+
if (event === "SPEECH_START" || event === "SPEECH_END") {
|
|
68
|
+
this.actor.send({ type: event });
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
catch (err) {
|
|
73
|
+
logger.error("an error occurred while processing vad", err);
|
|
74
|
+
}
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
async setupSpeechGathering() {
|
|
78
|
+
const stream = await this.voice.sgather({
|
|
49
79
|
source: common_1.StreamGatherSource.SPEECH
|
|
50
|
-
})
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
actor.send({ type: "HUMAN_PROMPT", speech: payload.speech });
|
|
54
|
-
});
|
|
80
|
+
});
|
|
81
|
+
stream.onPayload((payload) => {
|
|
82
|
+
this.actor.send({ type: "HUMAN_PROMPT", speech: payload.speech });
|
|
55
83
|
});
|
|
56
84
|
}
|
|
57
85
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { Model } from "./types";
|
|
3
|
+
declare const AssistantSchema: z.ZodObject<{
|
|
4
|
+
name: z.ZodString;
|
|
5
|
+
firstMessage: z.ZodString;
|
|
6
|
+
systemTemplate: z.ZodString;
|
|
7
|
+
model: z.ZodNativeEnum<typeof Model>;
|
|
8
|
+
temperature: z.ZodNumber;
|
|
9
|
+
maxTokens: z.ZodNumber;
|
|
10
|
+
}, "strip", z.ZodTypeAny, {
|
|
11
|
+
systemTemplate: string;
|
|
12
|
+
model: Model;
|
|
13
|
+
temperature: number;
|
|
14
|
+
maxTokens: number;
|
|
15
|
+
name: string;
|
|
16
|
+
firstMessage: string;
|
|
17
|
+
}, {
|
|
18
|
+
systemTemplate: string;
|
|
19
|
+
model: Model;
|
|
20
|
+
temperature: number;
|
|
21
|
+
maxTokens: number;
|
|
22
|
+
name: string;
|
|
23
|
+
firstMessage: string;
|
|
24
|
+
}>;
|
|
25
|
+
export { AssistantSchema };
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.AssistantSchema = void 0;
|
|
4
|
+
/*
|
|
5
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
6
|
+
* http://github.com/fonoster/fonoster
|
|
7
|
+
*
|
|
8
|
+
* This file is part of Fonoster
|
|
9
|
+
*
|
|
10
|
+
* Licensed under the MIT License (the "License");
|
|
11
|
+
* you may not use this file except in compliance with
|
|
12
|
+
* the License. You may obtain a copy of the License at
|
|
13
|
+
*
|
|
14
|
+
* https://opensource.org/licenses/MIT
|
|
15
|
+
*
|
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
* See the License for the specific language governing permissions and
|
|
20
|
+
* limitations under the License.
|
|
21
|
+
*/
|
|
22
|
+
const zod_1 = require("zod");
|
|
23
|
+
const types_1 = require("./types");
|
|
24
|
+
const AssistantSchema = zod_1.z.object({
|
|
25
|
+
name: zod_1.z.string(),
|
|
26
|
+
firstMessage: zod_1.z.string(),
|
|
27
|
+
systemTemplate: zod_1.z.string(),
|
|
28
|
+
model: zod_1.z.nativeEnum(types_1.Model),
|
|
29
|
+
temperature: zod_1.z.number(),
|
|
30
|
+
maxTokens: zod_1.z.number()
|
|
31
|
+
});
|
|
32
|
+
exports.AssistantSchema = AssistantSchema;
|
package/dist/assistants/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.makeAssistant = void 0;
|
|
17
|
+
exports.loadAndValidateAssistant = exports.makeAssistant = void 0;
|
|
18
18
|
/*
|
|
19
19
|
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
20
20
|
* http://github.com/fonoster/fonoster
|
|
@@ -35,4 +35,7 @@ exports.makeAssistant = void 0;
|
|
|
35
35
|
*/
|
|
36
36
|
var assistants_1 = require("./assistants");
|
|
37
37
|
Object.defineProperty(exports, "makeAssistant", { enumerable: true, get: function () { return assistants_1.makeAssistant; } });
|
|
38
|
+
var loadAndValidateAssistant_1 = require("./loadAndValidateAssistant");
|
|
39
|
+
Object.defineProperty(exports, "loadAndValidateAssistant", { enumerable: true, get: function () { return loadAndValidateAssistant_1.loadAndValidateAssistant; } });
|
|
40
|
+
__exportStar(require("./AssistantSchema"), exports);
|
|
38
41
|
__exportStar(require("./types"), exports);
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.loadAndValidateAssistant = loadAndValidateAssistant;
|
|
7
|
+
/*
|
|
8
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
9
|
+
* http://github.com/fonoster/fonoster
|
|
10
|
+
*
|
|
11
|
+
* This file is part of Fonoster
|
|
12
|
+
*
|
|
13
|
+
* Licensed under the MIT License (the "License");
|
|
14
|
+
* you may not use this file except in compliance with
|
|
15
|
+
* the License. You may obtain a copy of the License at
|
|
16
|
+
*
|
|
17
|
+
* https://opensource.org/licenses/MIT
|
|
18
|
+
*
|
|
19
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
20
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
21
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
22
|
+
* See the License for the specific language governing permissions and
|
|
23
|
+
* limitations under the License.
|
|
24
|
+
*/
|
|
25
|
+
const fs_1 = __importDefault(require("fs"));
|
|
26
|
+
const logger_1 = require("@fonoster/logger");
|
|
27
|
+
const AssistantSchema_1 = require("./AssistantSchema");
|
|
28
|
+
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
29
|
+
function loadAndValidateAssistant(path) {
|
|
30
|
+
if (!fs_1.default.existsSync(path)) {
|
|
31
|
+
logger.error("assistant file not found", { path });
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
try {
|
|
35
|
+
const fileContent = fs_1.default.readFileSync(path, "utf8");
|
|
36
|
+
const assistant = JSON.parse(fileContent);
|
|
37
|
+
return AssistantSchema_1.AssistantSchema.parse(assistant);
|
|
38
|
+
}
|
|
39
|
+
catch (e) {
|
|
40
|
+
logger.error("error parsing or validating assistant file", {
|
|
41
|
+
path,
|
|
42
|
+
error: e.message
|
|
43
|
+
});
|
|
44
|
+
process.exit(1);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -3,11 +3,15 @@ declare enum Model {
|
|
|
3
3
|
GPT_4 = "gpt-4",
|
|
4
4
|
GPT_4O_MINI = "gpt-4o-mini"
|
|
5
5
|
}
|
|
6
|
-
type
|
|
7
|
-
|
|
6
|
+
type AssistantFromJson = {
|
|
7
|
+
name: string;
|
|
8
|
+
firstMessage: string;
|
|
9
|
+
systemTemplate: string;
|
|
8
10
|
model: Model;
|
|
9
11
|
temperature: number;
|
|
10
12
|
maxTokens: number;
|
|
11
|
-
systemTemplate: string;
|
|
12
13
|
};
|
|
13
|
-
|
|
14
|
+
type AssistantConfig = AssistantFromJson & {
|
|
15
|
+
apiKey: string;
|
|
16
|
+
};
|
|
17
|
+
export { AssistantFromJson, AssistantConfig, Model };
|
|
@@ -1,9 +1,17 @@
|
|
|
1
|
-
|
|
1
|
+
declare const machine: import("xstate").StateMachine<{
|
|
2
2
|
firstMessage: string;
|
|
3
3
|
voice: import("@fonoster/voice").VoiceResponse;
|
|
4
4
|
assistant: import("../assistants/assistants").Assistant;
|
|
5
|
+
playbackRef: string;
|
|
6
|
+
speechBuffer: string;
|
|
7
|
+
speechResponseStartTime: number;
|
|
8
|
+
speechResponseTime: number;
|
|
5
9
|
}, {
|
|
6
|
-
type: "
|
|
10
|
+
type: "SPEECH_START";
|
|
11
|
+
} | {
|
|
12
|
+
type: "SPEECH_END";
|
|
13
|
+
} | {
|
|
14
|
+
type: "SESSION_END";
|
|
7
15
|
} | {
|
|
8
16
|
type: "HUMAN_PROMPT";
|
|
9
17
|
speech: string;
|
|
@@ -12,8 +20,12 @@ export declare const machine: import("xstate").StateMachine<{
|
|
|
12
20
|
type: "sendGreeting";
|
|
13
21
|
params: unknown;
|
|
14
22
|
};
|
|
15
|
-
|
|
16
|
-
type: "
|
|
23
|
+
interruptMachineSpeaking: {
|
|
24
|
+
type: "interruptMachineSpeaking";
|
|
25
|
+
params: unknown;
|
|
26
|
+
};
|
|
27
|
+
appendSpeech: {
|
|
28
|
+
type: "appendSpeech";
|
|
17
29
|
params: unknown;
|
|
18
30
|
};
|
|
19
31
|
processHumanRequest: {
|
|
@@ -24,7 +36,10 @@ export declare const machine: import("xstate").StateMachine<{
|
|
|
24
36
|
type: "hangup";
|
|
25
37
|
params: unknown;
|
|
26
38
|
};
|
|
27
|
-
}>,
|
|
39
|
+
}>, {
|
|
40
|
+
type: "hasSpeechBuffer";
|
|
41
|
+
params: unknown;
|
|
42
|
+
}, never, "hangup" | "welcome" | "machineListening" | "humanSpeaking", string, {
|
|
28
43
|
firstMessage: string;
|
|
29
44
|
voice: import("@fonoster/voice").VoiceResponse;
|
|
30
45
|
assistant: import("../assistants/assistants").Assistant;
|
|
@@ -48,13 +63,25 @@ export declare const machine: import("xstate").StateMachine<{
|
|
|
48
63
|
firstMessage: string;
|
|
49
64
|
voice: import("@fonoster/voice").VoiceResponse;
|
|
50
65
|
assistant: import("../assistants/assistants").Assistant;
|
|
66
|
+
playbackRef: string;
|
|
67
|
+
speechBuffer: string;
|
|
68
|
+
speechResponseStartTime: number;
|
|
69
|
+
speechResponseTime: number;
|
|
51
70
|
}, {
|
|
52
|
-
type: "
|
|
71
|
+
type: "SPEECH_START";
|
|
72
|
+
} | {
|
|
73
|
+
type: "SPEECH_END";
|
|
74
|
+
} | {
|
|
75
|
+
type: "SESSION_END";
|
|
53
76
|
} | {
|
|
54
77
|
type: "HUMAN_PROMPT";
|
|
55
78
|
speech: string;
|
|
56
79
|
}, Record<string, import("xstate").AnyActorRef | undefined>, import("xstate").StateValue, string, unknown, any, any>, {
|
|
57
|
-
type: "
|
|
80
|
+
type: "SPEECH_START";
|
|
81
|
+
} | {
|
|
82
|
+
type: "SPEECH_END";
|
|
83
|
+
} | {
|
|
84
|
+
type: "SESSION_END";
|
|
58
85
|
} | {
|
|
59
86
|
type: "HUMAN_PROMPT";
|
|
60
87
|
speech: string;
|
|
@@ -63,46 +90,74 @@ export declare const machine: import("xstate").StateMachine<{
|
|
|
63
90
|
firstMessage: string;
|
|
64
91
|
voice: import("@fonoster/voice").VoiceResponse;
|
|
65
92
|
assistant: import("@langchain/core/runnables").Runnable<any, string, import("@langchain/core/runnables").RunnableConfig>;
|
|
93
|
+
playbackRef: string;
|
|
94
|
+
speechBuffer: string;
|
|
95
|
+
speechResponseStartTime: number;
|
|
96
|
+
speechResponseTime: number;
|
|
66
97
|
};
|
|
67
98
|
readonly id: "fnAI";
|
|
68
99
|
readonly initial: "welcome";
|
|
69
100
|
readonly states: {
|
|
70
101
|
readonly welcome: {
|
|
71
|
-
readonly always: {
|
|
72
|
-
readonly target: "active";
|
|
73
|
-
};
|
|
74
102
|
readonly entry: {
|
|
75
103
|
readonly type: "sendGreeting";
|
|
76
104
|
};
|
|
105
|
+
readonly always: {
|
|
106
|
+
readonly target: "machineListening";
|
|
107
|
+
};
|
|
77
108
|
readonly description: "The initial state where the AI greets the Human.";
|
|
78
109
|
};
|
|
79
|
-
readonly
|
|
110
|
+
readonly machineListening: {
|
|
80
111
|
readonly on: {
|
|
112
|
+
readonly SPEECH_START: {
|
|
113
|
+
readonly target: "humanSpeaking";
|
|
114
|
+
readonly description: "This must be triggered by a VAD or similar system.";
|
|
115
|
+
};
|
|
81
116
|
readonly HUMAN_PROMPT: {
|
|
82
|
-
readonly target: "active";
|
|
83
117
|
readonly actions: {
|
|
84
|
-
readonly type: "
|
|
118
|
+
readonly type: "appendSpeech";
|
|
85
119
|
};
|
|
86
|
-
readonly description: "
|
|
120
|
+
readonly description: "Appends the speech to the buffer.";
|
|
87
121
|
};
|
|
88
122
|
};
|
|
89
|
-
readonly description: "The state where the AI is actively
|
|
123
|
+
readonly description: "The state where the AI is actively listening in conversation.";
|
|
90
124
|
};
|
|
91
125
|
readonly humanSpeaking: {
|
|
92
|
-
readonly always: {
|
|
93
|
-
readonly target: "active";
|
|
94
|
-
};
|
|
95
126
|
readonly entry: {
|
|
96
|
-
readonly type: "
|
|
127
|
+
readonly type: "interruptMachineSpeaking";
|
|
128
|
+
};
|
|
129
|
+
readonly on: {
|
|
130
|
+
readonly HUMAN_PROMPT: {
|
|
131
|
+
readonly actions: {
|
|
132
|
+
readonly type: "appendSpeech";
|
|
133
|
+
};
|
|
134
|
+
readonly description: "Appends the speech to the buffer.";
|
|
135
|
+
};
|
|
136
|
+
readonly SPEECH_END: {
|
|
137
|
+
readonly target: "machineListening";
|
|
138
|
+
readonly actions: {
|
|
139
|
+
readonly type: "processHumanRequest";
|
|
140
|
+
};
|
|
141
|
+
readonly guard: {
|
|
142
|
+
readonly type: "hasSpeechBuffer";
|
|
143
|
+
};
|
|
144
|
+
readonly description: "This must be triggered by a VAD or similar system.";
|
|
145
|
+
};
|
|
97
146
|
};
|
|
98
147
|
readonly description: "The state where the AI detects Human speech while it is speaking.";
|
|
99
148
|
};
|
|
100
149
|
readonly hangup: {
|
|
101
150
|
readonly type: "final";
|
|
102
|
-
readonly description: "The final state where the AI terminates the conversation due to inactivity.";
|
|
103
151
|
readonly entry: {
|
|
104
152
|
readonly type: "hangup";
|
|
105
153
|
};
|
|
154
|
+
readonly on: {
|
|
155
|
+
readonly SESSION_END: {
|
|
156
|
+
readonly target: "hangup";
|
|
157
|
+
};
|
|
158
|
+
};
|
|
159
|
+
readonly description: "The final state where the AI terminates the conversation due to inactivity.";
|
|
106
160
|
};
|
|
107
161
|
};
|
|
108
162
|
}>;
|
|
163
|
+
export { machine };
|
package/dist/machine/machine.js
CHANGED
|
@@ -1,13 +1,4 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
3
|
exports.machine = void 0;
|
|
13
4
|
/*
|
|
@@ -28,84 +19,124 @@ exports.machine = void 0;
|
|
|
28
19
|
* See the License for the specific language governing permissions and
|
|
29
20
|
* limitations under the License.
|
|
30
21
|
*/
|
|
22
|
+
const common_1 = require("@fonoster/common");
|
|
31
23
|
const logger_1 = require("@fonoster/logger");
|
|
24
|
+
const uuid_1 = require("uuid");
|
|
32
25
|
const xstate_1 = require("xstate");
|
|
33
26
|
const types_1 = require("./types");
|
|
34
27
|
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
35
|
-
|
|
28
|
+
const machine = (0, xstate_1.setup)({
|
|
36
29
|
types: types_1.types,
|
|
37
30
|
actions: {
|
|
38
|
-
sendGreeting: function (
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
31
|
+
sendGreeting: async function ({ context }) {
|
|
32
|
+
await context.voice.answer();
|
|
33
|
+
await context.voice.say(context.firstMessage, {
|
|
34
|
+
playbackRef: context.playbackRef
|
|
42
35
|
});
|
|
43
36
|
},
|
|
44
|
-
|
|
45
|
-
logger.verbose("
|
|
46
|
-
|
|
47
|
-
processHumanRequest: function (_a) {
|
|
48
|
-
return __awaiter(this, arguments, void 0, function* ({ context, event }) {
|
|
49
|
-
const speech = event.speech;
|
|
50
|
-
logger.verbose("human request", { speech });
|
|
51
|
-
const response = yield context.assistant.invoke({
|
|
52
|
-
text: speech
|
|
53
|
-
});
|
|
54
|
-
logger.verbose("assistant response", { response });
|
|
55
|
-
yield context.voice.say(response);
|
|
37
|
+
interruptMachineSpeaking: async function ({ context }) {
|
|
38
|
+
logger.verbose("interrupting the machine", {
|
|
39
|
+
playbackRef: context.playbackRef
|
|
56
40
|
});
|
|
41
|
+
await context.voice.playbackControl(context.playbackRef, common_1.PlaybackControlAction.STOP);
|
|
42
|
+
},
|
|
43
|
+
appendSpeech: function ({ context, event }) {
|
|
44
|
+
const speech = event.speech;
|
|
45
|
+
context.speechBuffer = (context.speechBuffer || "") + " " + speech;
|
|
46
|
+
context.speechResponseStartTime = Date.now();
|
|
47
|
+
logger.verbose("appended speech", { speechBuffer: context.speechBuffer });
|
|
57
48
|
},
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
49
|
+
processHumanRequest: async function ({ context }) {
|
|
50
|
+
const speech = context.speechBuffer.trim();
|
|
51
|
+
logger.verbose("processing human request", { speech });
|
|
52
|
+
const response = await context.assistant.invoke({
|
|
53
|
+
text: speech
|
|
54
|
+
});
|
|
55
|
+
const speechResponseTime = Date.now() - context.speechResponseStartTime;
|
|
56
|
+
context.speechResponseTime = speechResponseTime;
|
|
57
|
+
logger.verbose("assistant response", {
|
|
58
|
+
response,
|
|
59
|
+
responseTime: speechResponseTime
|
|
61
60
|
});
|
|
61
|
+
await context.voice.say(response, { playbackRef: context.playbackRef });
|
|
62
|
+
// Clear the speech buffer and reset response timing
|
|
63
|
+
context.speechBuffer = "";
|
|
64
|
+
context.speechResponseStartTime = 0;
|
|
65
|
+
},
|
|
66
|
+
hangup: async function ({ context }) {
|
|
67
|
+
await context.voice.hangup();
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
guards: {
|
|
71
|
+
hasSpeechBuffer: function ({ context }) {
|
|
72
|
+
return context.speechBuffer?.trim().length > 0;
|
|
62
73
|
}
|
|
63
74
|
}
|
|
64
75
|
}).createMachine({
|
|
65
76
|
context: ({ input }) => ({
|
|
66
77
|
firstMessage: input.firstMessage,
|
|
67
78
|
voice: input.voice,
|
|
68
|
-
assistant: input.assistant
|
|
79
|
+
assistant: input.assistant,
|
|
80
|
+
playbackRef: (0, uuid_1.v4)(),
|
|
81
|
+
speechBuffer: "",
|
|
82
|
+
speechResponseStartTime: 0,
|
|
83
|
+
speechResponseTime: 0
|
|
69
84
|
}),
|
|
70
85
|
id: "fnAI",
|
|
71
86
|
initial: "welcome",
|
|
72
87
|
states: {
|
|
73
88
|
welcome: {
|
|
74
|
-
always: {
|
|
75
|
-
target: "active"
|
|
76
|
-
},
|
|
77
89
|
entry: {
|
|
78
90
|
type: "sendGreeting"
|
|
79
91
|
},
|
|
92
|
+
always: {
|
|
93
|
+
target: "machineListening"
|
|
94
|
+
},
|
|
80
95
|
description: "The initial state where the AI greets the Human."
|
|
81
96
|
},
|
|
82
|
-
|
|
97
|
+
machineListening: {
|
|
83
98
|
on: {
|
|
99
|
+
SPEECH_START: {
|
|
100
|
+
target: "humanSpeaking",
|
|
101
|
+
description: "This must be triggered by a VAD or similar system."
|
|
102
|
+
},
|
|
84
103
|
HUMAN_PROMPT: {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
type: "processHumanRequest"
|
|
88
|
-
},
|
|
89
|
-
description: "This must be triggered when speech to text ends."
|
|
104
|
+
actions: { type: "appendSpeech" },
|
|
105
|
+
description: "Appends the speech to the buffer."
|
|
90
106
|
}
|
|
91
107
|
},
|
|
92
|
-
description: "The state where the AI is actively
|
|
108
|
+
description: "The state where the AI is actively listening in conversation."
|
|
93
109
|
},
|
|
94
110
|
humanSpeaking: {
|
|
95
|
-
always: {
|
|
96
|
-
target: "active"
|
|
97
|
-
},
|
|
98
111
|
entry: {
|
|
99
|
-
type: "
|
|
112
|
+
type: "interruptMachineSpeaking"
|
|
113
|
+
},
|
|
114
|
+
on: {
|
|
115
|
+
HUMAN_PROMPT: {
|
|
116
|
+
actions: { type: "appendSpeech" },
|
|
117
|
+
description: "Appends the speech to the buffer."
|
|
118
|
+
},
|
|
119
|
+
SPEECH_END: {
|
|
120
|
+
target: "machineListening",
|
|
121
|
+
actions: { type: "processHumanRequest" },
|
|
122
|
+
guard: { type: "hasSpeechBuffer" },
|
|
123
|
+
description: "This must be triggered by a VAD or similar system."
|
|
124
|
+
}
|
|
100
125
|
},
|
|
101
126
|
description: "The state where the AI detects Human speech while it is speaking."
|
|
102
127
|
},
|
|
103
128
|
hangup: {
|
|
104
129
|
type: "final",
|
|
105
|
-
description: "The final state where the AI terminates the conversation due to inactivity.",
|
|
106
130
|
entry: {
|
|
107
131
|
type: "hangup"
|
|
108
|
-
}
|
|
132
|
+
},
|
|
133
|
+
on: {
|
|
134
|
+
SESSION_END: {
|
|
135
|
+
target: "hangup"
|
|
136
|
+
}
|
|
137
|
+
},
|
|
138
|
+
description: "The final state where the AI terminates the conversation due to inactivity."
|
|
109
139
|
}
|
|
110
140
|
}
|
|
111
141
|
});
|
|
142
|
+
exports.machine = machine;
|
package/dist/machine/types.d.ts
CHANGED
|
@@ -5,6 +5,10 @@ declare const types: {
|
|
|
5
5
|
firstMessage: string;
|
|
6
6
|
voice: VoiceResponse;
|
|
7
7
|
assistant: Assistant;
|
|
8
|
+
playbackRef: string;
|
|
9
|
+
speechBuffer: string;
|
|
10
|
+
speechResponseStartTime: number;
|
|
11
|
+
speechResponseTime: number;
|
|
8
12
|
};
|
|
9
13
|
input: {
|
|
10
14
|
firstMessage: string;
|
|
@@ -12,7 +16,11 @@ declare const types: {
|
|
|
12
16
|
assistant: Assistant;
|
|
13
17
|
};
|
|
14
18
|
events: {
|
|
15
|
-
type: "
|
|
19
|
+
type: "SPEECH_START";
|
|
20
|
+
} | {
|
|
21
|
+
type: "SPEECH_END";
|
|
22
|
+
} | {
|
|
23
|
+
type: "SESSION_END";
|
|
16
24
|
} | {
|
|
17
25
|
type: "HUMAN_PROMPT";
|
|
18
26
|
speech: string;
|
package/dist/runner.d.ts
ADDED
|
@@ -1,13 +1,5 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
1
2
|
"use strict";
|
|
2
|
-
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
-
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
-
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
-
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
-
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
-
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
-
});
|
|
10
|
-
};
|
|
11
3
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
12
4
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
13
5
|
};
|
|
@@ -32,26 +24,23 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
32
24
|
*/
|
|
33
25
|
const logger_1 = require("@fonoster/logger");
|
|
34
26
|
const voice_1 = __importDefault(require("@fonoster/voice"));
|
|
35
|
-
const
|
|
36
|
-
const types_1 = require("./assistants/types");
|
|
27
|
+
const assistants_1 = require("./assistants");
|
|
37
28
|
const Autopilot_1 = require("./Autopilot");
|
|
38
29
|
const envs_1 = require("./envs");
|
|
39
30
|
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
40
|
-
|
|
41
|
-
new voice_1.default({ skipIdentity
|
|
31
|
+
const skipIdentity = process.env.NODE_ENV === "dev";
|
|
32
|
+
new voice_1.default({ skipIdentity }).listen(async (req, voice) => {
|
|
42
33
|
const { ingressNumber, sessionRef, appRef } = req;
|
|
43
34
|
logger.verbose("voice request", { ingressNumber, sessionRef, appRef });
|
|
44
|
-
const
|
|
35
|
+
const assistantPath = process.argv[2];
|
|
36
|
+
const assistant = (0, assistants_1.loadAndValidateAssistant)(assistantPath);
|
|
37
|
+
logger.verbose("interacting with assistant", { name: assistant.name });
|
|
45
38
|
const autopilot = new Autopilot_1.Autopilot({
|
|
46
39
|
voice,
|
|
47
|
-
firstMessage,
|
|
48
40
|
assistantConfig: {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
temperature: 0.9,
|
|
52
|
-
maxTokens: 100,
|
|
53
|
-
systemTemplate
|
|
41
|
+
...assistant,
|
|
42
|
+
apiKey: envs_1.OPENAI_API_KEY
|
|
54
43
|
}
|
|
55
44
|
});
|
|
56
45
|
autopilot.start();
|
|
57
|
-
})
|
|
46
|
+
});
|
package/dist/types.d.ts
CHANGED
|
@@ -1,14 +1,7 @@
|
|
|
1
1
|
import { VoiceResponse } from "@fonoster/voice";
|
|
2
|
-
import {
|
|
2
|
+
import { AssistantConfig } from "./assistants/types";
|
|
3
3
|
type AutopilotConfig = {
|
|
4
4
|
voice: VoiceResponse;
|
|
5
|
-
|
|
6
|
-
assistantConfig: {
|
|
7
|
-
apiKey: string;
|
|
8
|
-
model: Model;
|
|
9
|
-
temperature: number;
|
|
10
|
-
maxTokens: number;
|
|
11
|
-
systemTemplate: string;
|
|
12
|
-
};
|
|
5
|
+
assistantConfig: AssistantConfig;
|
|
13
6
|
};
|
|
14
7
|
export { AutopilotConfig };
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { ONNXRuntimeAPI, SpeechProbabilities } from "./types";
|
|
2
|
+
declare class SileroVadModel {
|
|
3
|
+
private ort;
|
|
4
|
+
private pathToModel;
|
|
5
|
+
_session: any;
|
|
6
|
+
_h: unknown;
|
|
7
|
+
_c: unknown;
|
|
8
|
+
_sr: unknown;
|
|
9
|
+
constructor(ort: ONNXRuntimeAPI, pathToModel: string);
|
|
10
|
+
static new: (ort: ONNXRuntimeAPI, pathToModel: string) => Promise<SileroVadModel>;
|
|
11
|
+
init(): Promise<void>;
|
|
12
|
+
process(audioFrame: Float32Array): Promise<SpeechProbabilities>;
|
|
13
|
+
resetState(): void;
|
|
14
|
+
}
|
|
15
|
+
export { SileroVadModel };
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var _a;
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.SileroVadModel = void 0;
|
|
5
|
+
/*
|
|
6
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
7
|
+
* http://github.com/fonoster/fonoster
|
|
8
|
+
*
|
|
9
|
+
* This file is part of Fonoster
|
|
10
|
+
*
|
|
11
|
+
* Licensed under the MIT License (the "License");
|
|
12
|
+
* you may not use this file except in compliance with
|
|
13
|
+
* the License. You may obtain a copy of the License at
|
|
14
|
+
*
|
|
15
|
+
* https://opensource.org/licenses/MIT
|
|
16
|
+
*
|
|
17
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
18
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
19
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
20
|
+
* See the License for the specific language governing permissions and
|
|
21
|
+
* limitations under the License.
|
|
22
|
+
*/
|
|
23
|
+
const fs_1 = require("fs");
|
|
24
|
+
class SileroVadModel {
|
|
25
|
+
constructor(ort, pathToModel) {
|
|
26
|
+
this.ort = ort;
|
|
27
|
+
this.pathToModel = pathToModel;
|
|
28
|
+
}
|
|
29
|
+
async init() {
|
|
30
|
+
const modelArrayBuffer = (0, fs_1.readFileSync)(this.pathToModel).buffer;
|
|
31
|
+
this._session = await this.ort.InferenceSession.create(modelArrayBuffer);
|
|
32
|
+
this._sr = new this.ort.Tensor("int64", [16000n]);
|
|
33
|
+
this.resetState();
|
|
34
|
+
}
|
|
35
|
+
async process(audioFrame) {
|
|
36
|
+
const t = new this.ort.Tensor("float32", audioFrame, [
|
|
37
|
+
1,
|
|
38
|
+
audioFrame.length
|
|
39
|
+
]);
|
|
40
|
+
const inputs = {
|
|
41
|
+
input: t,
|
|
42
|
+
h: this._h,
|
|
43
|
+
c: this._c,
|
|
44
|
+
sr: this._sr
|
|
45
|
+
};
|
|
46
|
+
const out = await this._session.run(inputs);
|
|
47
|
+
this._h = out.hn;
|
|
48
|
+
this._c = out.cn;
|
|
49
|
+
const [isSpeech] = out.output.data;
|
|
50
|
+
const notSpeech = 1 - isSpeech;
|
|
51
|
+
return { notSpeech, isSpeech };
|
|
52
|
+
}
|
|
53
|
+
resetState() {
|
|
54
|
+
const zeroes = Array(2 * 64).fill(0);
|
|
55
|
+
this._h = new this.ort.Tensor("float32", zeroes, [2, 1, 64]);
|
|
56
|
+
this._c = new this.ort.Tensor("float32", zeroes, [2, 1, 64]);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
exports.SileroVadModel = SileroVadModel;
|
|
60
|
+
_a = SileroVadModel;
|
|
61
|
+
SileroVadModel.new = async (ort, pathToModel) => {
|
|
62
|
+
const model = new _a(ort, pathToModel);
|
|
63
|
+
await model.init();
|
|
64
|
+
return model;
|
|
65
|
+
};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.chunkToFloat32Array = chunkToFloat32Array;
|
|
4
|
+
/*
|
|
5
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
6
|
+
* http://github.com/fonoster/fonoster
|
|
7
|
+
*
|
|
8
|
+
* This file is part of Fonoster
|
|
9
|
+
*
|
|
10
|
+
* Licensed under the MIT License (the "License");
|
|
11
|
+
* you may not use this file except in compliance with
|
|
12
|
+
* the License. You may obtain a copy of the License at
|
|
13
|
+
*
|
|
14
|
+
* https://opensource.org/licenses/MIT
|
|
15
|
+
*
|
|
16
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
+
* See the License for the specific language governing permissions and
|
|
20
|
+
* limitations under the License.
|
|
21
|
+
*/
|
|
22
|
+
function chunkToFloat32Array(chunk) {
|
|
23
|
+
const int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / Int16Array.BYTES_PER_ELEMENT);
|
|
24
|
+
return new Float32Array(Array.from(int16Array, (sample) => sample / 32768.0));
|
|
25
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
/* eslint-disable no-loops/no-loops */
|
|
18
|
+
/*
|
|
19
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
20
|
+
* http://github.com/fonoster/fonoster
|
|
21
|
+
*
|
|
22
|
+
* This file is part of Fonoster
|
|
23
|
+
*
|
|
24
|
+
* Licensed under the MIT License (the "License");
|
|
25
|
+
* you may not use this file except in compliance with
|
|
26
|
+
* the License. You may obtain a copy of the License at
|
|
27
|
+
*
|
|
28
|
+
* https://opensource.org/licenses/MIT
|
|
29
|
+
*
|
|
30
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
31
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
32
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
33
|
+
* See the License for the specific language governing permissions and
|
|
34
|
+
* limitations under the License.
|
|
35
|
+
*/
|
|
36
|
+
__exportStar(require("./SileroVadModel"), exports);
|
|
37
|
+
__exportStar(require("./types"), exports);
|
|
38
|
+
__exportStar(require("./makeVad"), exports);
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
|
19
|
+
if (mod && mod.__esModule) return mod;
|
|
20
|
+
var result = {};
|
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
22
|
+
__setModuleDefault(result, mod);
|
|
23
|
+
return result;
|
|
24
|
+
};
|
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
26
|
+
exports.makeVad = makeVad;
|
|
27
|
+
/* eslint-disable no-loops/no-loops */
|
|
28
|
+
/*
|
|
29
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
30
|
+
* http://github.com/fonoster/fonoster
|
|
31
|
+
*
|
|
32
|
+
* This file is part of Fonoster
|
|
33
|
+
*
|
|
34
|
+
* Licensed under the MIT License (the "License");
|
|
35
|
+
* you may not use this file except in compliance with
|
|
36
|
+
* the License. You may obtain a copy of the License at
|
|
37
|
+
*
|
|
38
|
+
* https://opensource.org/licenses/MIT
|
|
39
|
+
*
|
|
40
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
41
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
42
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
43
|
+
* See the License for the specific language governing permissions and
|
|
44
|
+
* limitations under the License.
|
|
45
|
+
*/
|
|
46
|
+
const path_1 = require("path");
|
|
47
|
+
const ort = __importStar(require("onnxruntime-node"));
|
|
48
|
+
const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
|
|
49
|
+
const SileroVadModel_1 = require("./SileroVadModel");
|
|
50
|
+
const BUFFER_SIZE = 16000;
|
|
51
|
+
async function makeVad(pathToModel) {
|
|
52
|
+
const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad.onnx");
|
|
53
|
+
const silero = await SileroVadModel_1.SileroVadModel.new(ort, effectivePath);
|
|
54
|
+
let audioBuffer = [];
|
|
55
|
+
let isSpeechActive = false;
|
|
56
|
+
return async function process(chunk, callback) {
|
|
57
|
+
const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
|
|
58
|
+
audioBuffer.push(...float32Array);
|
|
59
|
+
const processBuffer = async (buffer) => {
|
|
60
|
+
if (buffer.length < BUFFER_SIZE)
|
|
61
|
+
return buffer;
|
|
62
|
+
const audioFrame = buffer.slice(0, BUFFER_SIZE);
|
|
63
|
+
const remainingBuffer = buffer.slice(BUFFER_SIZE);
|
|
64
|
+
const result = await silero.process(new Float32Array(audioFrame));
|
|
65
|
+
if (result.isSpeech > 0.5) {
|
|
66
|
+
if (!isSpeechActive) {
|
|
67
|
+
isSpeechActive = true;
|
|
68
|
+
callback("SPEECH_START", {});
|
|
69
|
+
return processBuffer(remainingBuffer);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
if (isSpeechActive) {
|
|
74
|
+
isSpeechActive = false;
|
|
75
|
+
callback("SPEECH_END", {});
|
|
76
|
+
return processBuffer(remainingBuffer);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return processBuffer(remainingBuffer);
|
|
80
|
+
};
|
|
81
|
+
audioBuffer = await processBuffer(audioBuffer);
|
|
82
|
+
};
|
|
83
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
/*
|
|
7
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
8
|
+
* http://github.com/fonoster/fonoster
|
|
9
|
+
*
|
|
10
|
+
* This file is part of Fonoster
|
|
11
|
+
*
|
|
12
|
+
* Licensed under the MIT License (the "License");
|
|
13
|
+
* you may not use this file except in compliance with
|
|
14
|
+
* the License. You may obtain a copy of the License at
|
|
15
|
+
*
|
|
16
|
+
* https://opensource.org/licenses/MIT
|
|
17
|
+
*
|
|
18
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
19
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
20
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
21
|
+
* See the License for the specific language governing permissions and
|
|
22
|
+
* limitations under the License.
|
|
23
|
+
*/
|
|
24
|
+
const logger_1 = require("@fonoster/logger");
|
|
25
|
+
const node_record_lpcm16_1 = __importDefault(require("node-record-lpcm16"));
|
|
26
|
+
const makeVad_1 = require("./makeVad");
|
|
27
|
+
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
28
|
+
async function main() {
|
|
29
|
+
const vad = await (0, makeVad_1.makeVad)();
|
|
30
|
+
// Start recording from the default microphone
|
|
31
|
+
const mic = node_record_lpcm16_1.default
|
|
32
|
+
.record({
|
|
33
|
+
sampleRate: 16000, // 16 kHz sample rate
|
|
34
|
+
channels: 1,
|
|
35
|
+
threshold: 0.5
|
|
36
|
+
})
|
|
37
|
+
.stream();
|
|
38
|
+
mic.on("data", async (data) => {
|
|
39
|
+
const chunk = new Float32Array(data.buffer);
|
|
40
|
+
await vad(chunk, (event, _data) => {
|
|
41
|
+
logger.info("vad event:", { event, data: _data });
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
mic.on("error", (err) => {
|
|
45
|
+
logger.error("an error occurred:", { err });
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
main().catch(logger.error);
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
type Vad = (chunk: Float32Array, cb: (event: string) => void) => Promise<void>;
|
|
2
|
+
type SpeechProbabilities = {
|
|
3
|
+
notSpeech: number;
|
|
4
|
+
isSpeech: number;
|
|
5
|
+
};
|
|
6
|
+
type ONNXRuntimeAPI = {
|
|
7
|
+
InferenceSession: {
|
|
8
|
+
create(modelArrayBuffer: ArrayBuffer): Promise<unknown>;
|
|
9
|
+
};
|
|
10
|
+
Tensor: {
|
|
11
|
+
new (type: "int64", dims: [16000n]): unknown;
|
|
12
|
+
new (type: "float32", data: number[], dims: [2, 1, 64]): unknown;
|
|
13
|
+
new (type: "float32", data: Float32Array, dims: [1, number]): unknown;
|
|
14
|
+
new (type: "float32", data: Float32Array, dims: [1, number]): unknown;
|
|
15
|
+
};
|
|
16
|
+
};
|
|
17
|
+
export { SpeechProbabilities, ONNXRuntimeAPI, Vad };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/autopilot",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.3",
|
|
4
4
|
"description": "Voice AI for the Fonoster platform",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -16,8 +16,12 @@
|
|
|
16
16
|
"build": "tsc -b tsconfig.json",
|
|
17
17
|
"clean": "rimraf ./dist node_modules tsconfig.tsbuildinfo"
|
|
18
18
|
},
|
|
19
|
+
"bin": {
|
|
20
|
+
"autopilot": "./dist/runner.js"
|
|
21
|
+
},
|
|
19
22
|
"files": [
|
|
20
|
-
"dist"
|
|
23
|
+
"dist",
|
|
24
|
+
"silero_vad.onnx"
|
|
21
25
|
],
|
|
22
26
|
"publishConfig": {
|
|
23
27
|
"access": "public"
|
|
@@ -30,14 +34,17 @@
|
|
|
30
34
|
"url": "https://github.com/fonoster/fonoster/issues"
|
|
31
35
|
},
|
|
32
36
|
"dependencies": {
|
|
33
|
-
"@fonoster/logger": "^0.7.
|
|
34
|
-
"@fonoster/voice": "^0.7.
|
|
37
|
+
"@fonoster/logger": "^0.7.2",
|
|
38
|
+
"@fonoster/voice": "^0.7.3",
|
|
35
39
|
"@langchain/openai": "^0.2.7",
|
|
36
40
|
"dotenv": "^16.4.5",
|
|
37
|
-
"
|
|
41
|
+
"onnxruntime-node": "^1.19.0",
|
|
42
|
+
"xstate": "^5.17.3",
|
|
43
|
+
"zod": "^3.23.8"
|
|
38
44
|
},
|
|
39
45
|
"devDependencies": {
|
|
46
|
+
"node-record-lpcm16": "^1.0.1",
|
|
40
47
|
"typescript": "^5.5.4"
|
|
41
48
|
},
|
|
42
|
-
"gitHead": "
|
|
49
|
+
"gitHead": "97aa6649691819fe8a704b96bc62b1b142162393"
|
|
43
50
|
}
|
package/silero_vad.onnx
ADDED
|
Binary file
|
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.OLIVIA_AI_PHONE_ASSISTANT = exports.RESTAURANT_PHONE_ASSISTANT = void 0;
|
|
4
|
-
/*
|
|
5
|
-
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
6
|
-
* http://github.com/fonoster/fonoster
|
|
7
|
-
*
|
|
8
|
-
* This file is part of Fonoster
|
|
9
|
-
*
|
|
10
|
-
* Licensed under the MIT License (the "License");
|
|
11
|
-
* you may not use this file except in compliance with
|
|
12
|
-
* the License. You may obtain a copy of the License at
|
|
13
|
-
*
|
|
14
|
-
* https://opensource.org/licenses/MIT
|
|
15
|
-
*
|
|
16
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
17
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
18
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
19
|
-
* See the License for the specific language governing permissions and
|
|
20
|
-
* limitations under the License.
|
|
21
|
-
*/
|
|
22
|
-
exports.RESTAURANT_PHONE_ASSISTANT = {
|
|
23
|
-
name: "Restaurant Phone Assistant",
|
|
24
|
-
firstMessage: "Hello, I'm Martha the AI assistant from Restaurant La Casa. How can I help you today?",
|
|
25
|
-
template: `
|
|
26
|
-
## La casa AI Phone Assistant (Martha)
|
|
27
|
-
|
|
28
|
-
### Mission Statement
|
|
29
|
-
|
|
30
|
-
To assist users in navigating our restaurant's offerings. This includes providing information on the menu,
|
|
31
|
-
handling reservations, and updating on special events.
|
|
32
|
-
|
|
33
|
-
### Interaction Modes
|
|
34
|
-
|
|
35
|
-
- **Age Restriction:** None; suitable for all ages.
|
|
36
|
-
- **Meal Options:** Brunch, Lunch, Dinner.
|
|
37
|
-
- **Special Notes:** Users should specify any dietary restrictions or preferences.
|
|
38
|
-
|
|
39
|
-
### Available Links
|
|
40
|
-
|
|
41
|
-
Since you are a AI Phone assistant, you can't browse the web. However, you can send links to the user's phone via SMS.
|
|
42
|
-
Here are some useful links for Restaurant La Casa:
|
|
43
|
-
|
|
44
|
-
- [Menu Information](https://www.lacasarestaurant.com/menu)
|
|
45
|
-
- [Make a Reservation](https://www.lacasarestaurant.com/reservations)
|
|
46
|
-
- [Special Events Details](https://www.lacasarestaurant.com/events)
|
|
47
|
-
|
|
48
|
-
### Hours of Operation
|
|
49
|
-
|
|
50
|
-
- **Brunch:** 9:00 AM - 12:00 PM
|
|
51
|
-
- **Lunch:** 12:00 PM - 3:00 PM
|
|
52
|
-
- **Dinner:** 5:00 PM - 10:00 PM
|
|
53
|
-
|
|
54
|
-
### Special Instructions
|
|
55
|
-
|
|
56
|
-
Provide accurate and timely responses to user inquiries. Stay on brand and maintain a friendly and professional tone.
|
|
57
|
-
Only provide information that is relevant to the user's request. If the user asks for something that is not within the scope of the system,
|
|
58
|
-
politely inform them that you are unable to assist with that request.
|
|
59
|
-
|
|
60
|
-
In case of any medical emergency instruct the user to call 911 immediately.
|
|
61
|
-
|
|
62
|
-
Make sure all response are readable by a TTS engine. For example, when reading the hours of operation, say "Brunch is served from 9:00 AM to 12:00 PM.".
|
|
63
|
-
Similarly, when providing links, say "I have sent you a link to the menu."
|
|
64
|
-
`
|
|
65
|
-
};
|
|
66
|
-
exports.OLIVIA_AI_PHONE_ASSISTANT = {
|
|
67
|
-
name: "Olivia AI Phone Assistant",
|
|
68
|
-
firstMessage: "Hello, I'm Olivia your friendly AI. What would you like to chat about today?",
|
|
69
|
-
template: `
|
|
70
|
-
## Olivia the friendly AI
|
|
71
|
-
|
|
72
|
-
### Mission Statement
|
|
73
|
-
|
|
74
|
-
Olivia is designed to help users by researching topics, bringing news updates, and telling engaging stories. Our goal is to provide accurate information and captivating narratives in a timely manner.
|
|
75
|
-
|
|
76
|
-
### Available Links
|
|
77
|
-
|
|
78
|
-
As an AI, Olivia can browse the web and retrieve information in real-time. Here are some resources that Olivia may use to enhance the user experience:
|
|
79
|
-
|
|
80
|
-
- [Latest News](https://www.news.com)
|
|
81
|
-
- [Popular Topics](https://www.populartopics.com)
|
|
82
|
-
- [Story Archive](https://www.storyarchive.com)
|
|
83
|
-
|
|
84
|
-
### Special Instructions
|
|
85
|
-
|
|
86
|
-
Provide accurate and up-to-date information on requested topics. Maintain a friendly, engaging, and creative tone. Ensure that all responses are clear and easy to understand. If a topic or request is beyond Olivia's capabilities, politely inform the user and suggest alternative sources if possible.
|
|
87
|
-
|
|
88
|
-
In case of urgent or critical news, inform users to consult trusted news sources immediately.
|
|
89
|
-
|
|
90
|
-
Make sure all responses are concise and engaging. For instance, when delivering news updates, say "Here’s the latest update on the topic." When telling stories, ensure the narrative is compelling and well-structured.
|
|
91
|
-
|
|
92
|
-
When telling story, begin with conversation starters like "Sure, I have a great story for you!" or "Let me tell you a fascinating tale!".
|
|
93
|
-
|
|
94
|
-
When you finish a story, or news summary, end with a conversation prompt like "What do you think?" or "Would you like to hear more?".
|
|
95
|
-
`
|
|
96
|
-
};
|
|
File without changes
|