@fonoster/autopilot 0.8.43 → 0.8.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,9 +14,6 @@ The autopilot module provides a way to create Voice AI applications in Fonoster.
14
14
  * [Adding Tools](#configuring-the-tools)
15
15
  * [What's Next](#whats-next)
16
16
 
17
- > [!WARNING]
18
- > This is an overview of Fonoster's Autopilot. This module is still under heavy development, and the API is subject to change. This overview implements OpenAI and Groq models, but we are working to bring support to additional providers. If you have any suggestions or feedback, please let us know.
19
-
20
17
  ## Installation
21
18
 
22
19
  ```sh-session
@@ -126,6 +126,9 @@ const machine = (0, xstate_1.setup)({
126
126
  return context.idleTimeoutCount + 1 > context.maxIdleTimeoutCount;
127
127
  },
128
128
  hasSpeechResult: function ({ context }) {
129
+ logger.verbose("called hasSpeechResult guard", {
130
+ speechBuffer: context.speechBuffer
131
+ });
129
132
  return context.speechBuffer;
130
133
  },
131
134
  isSpeaking: function ({ context }) {
@@ -239,11 +242,11 @@ const machine = (0, xstate_1.setup)({
239
242
  entry: { type: "cleanSpeech" },
240
243
  on: {
241
244
  SPEECH_START: {
242
- target: "waitingForUserRequest",
245
+ target: "listeningToUser",
243
246
  description: "Event from VAD system."
244
247
  },
245
248
  SPEECH_RESULT: {
246
- target: "waitingForUserRequest",
249
+ target: "listeningToUser",
247
250
  description: "User started speaking before SPEECH_START event",
248
251
  actions: [{ type: "appendSpeech" }]
249
252
  }
@@ -267,24 +270,19 @@ const machine = (0, xstate_1.setup)({
267
270
  }
268
271
  },
269
272
  idleTransition: {
273
+ // This intermediate state is necessary to ensure the IDLE_TIMEOUT
274
+ // event is properly reset and retriggered when returning to idle.
275
+ // Without it, the timer would not restart correctly.
270
276
  always: {
271
277
  target: "idle"
272
278
  }
273
279
  },
274
- waitingForUserRequest: {
275
- always: {
276
- target: "updatingSpeech"
277
- },
280
+ listeningToUser: {
278
281
  entry: [
279
282
  { type: "interruptPlayback" },
280
283
  { type: "resetIdleTimeoutCount" },
281
284
  { type: "setSpeaking" }
282
- ]
283
- },
284
- hangup: {
285
- type: "final"
286
- },
287
- updatingSpeech: {
285
+ ],
288
286
  on: {
289
287
  SPEECH_END: [
290
288
  {
@@ -305,6 +303,14 @@ const machine = (0, xstate_1.setup)({
305
303
  actions: { type: "appendSpeech" },
306
304
  guard: "isSpeaking",
307
305
  description: "Just append the speech result when actively speaking"
306
+ },
307
+ {
308
+ target: "processingUserRequest",
309
+ guard: (0, xstate_1.not)("isSpeaking"),
310
+ actions: [
311
+ { type: "appendSpeech" }
312
+ ],
313
+ description: "Append final speech and process the request"
308
314
  }
309
315
  ]
310
316
  }
@@ -312,7 +318,7 @@ const machine = (0, xstate_1.setup)({
312
318
  waitingForSpeechTimeout: {
313
319
  on: {
314
320
  SPEECH_START: {
315
- target: "waitingForUserRequest",
321
+ target: "listeningToUser",
316
322
  description: "User started speaking again"
317
323
  },
318
324
  SPEECH_RESULT: {
@@ -335,22 +341,16 @@ const machine = (0, xstate_1.setup)({
335
341
  ]
336
342
  }
337
343
  },
344
+ hangup: {
345
+ type: "final"
346
+ },
338
347
  processingUserRequest: {
339
348
  on: {
340
349
  SPEECH_START: {
341
- target: "waitingForUserRequest",
350
+ target: "listeningToUser",
342
351
  description: "Event from VAD or similar system.",
343
- actions: [{ type: "interruptPlayback" }, { type: "cleanSpeech" }]
352
+ actions: [{ type: "cleanSpeech" }]
344
353
  },
345
- SPEECH_RESULT: {
346
- target: "waitingForUserRequest",
347
- description: "User interrupted with new speech",
348
- actions: [
349
- { type: "interruptPlayback" },
350
- { type: "cleanSpeech" },
351
- { type: "appendSpeech" }
352
- ]
353
- }
354
354
  },
355
355
  invoke: {
356
356
  src: "doProcessUserRequest",
@@ -362,12 +362,7 @@ const machine = (0, xstate_1.setup)({
362
362
  },
363
363
  systemError: {
364
364
  entry: "announceSystemError",
365
- after: {
366
- SYSTEM_ERROR_RECOVERY_TIMEOUT: {
367
- target: "idle",
368
- actions: "resetState"
369
- }
370
- }
365
+ target: "idle"
371
366
  }
372
367
  }
373
368
  });
@@ -59,7 +59,7 @@ const ort = __importStar(require("onnxruntime-node"));
59
59
  const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
60
60
  const SileroVadModel_1 = require("./SileroVadModel");
61
61
  const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
62
- const BUFFER_SIZE = 16000;
62
+ const BUFFER_SIZE = 8000;
63
63
  async function makeVad(params) {
64
64
  const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
65
65
  const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad.onnx");
@@ -51,8 +51,8 @@ class SileroVadModel {
51
51
  sr: this._sr
52
52
  };
53
53
  const out = await this._session.run(inputs);
54
- this._state = out["stateN"];
55
- const [isSpeech] = out["output"].data;
54
+ this._state = out.stateN;
55
+ const [isSpeech] = out.output.data;
56
56
  const notSpeech = 1 - isSpeech;
57
57
  return { notSpeech, isSpeech };
58
58
  }
@@ -54,7 +54,7 @@ exports.createVad = createVad;
54
54
  * limitations under the License.
55
55
  */
56
56
  const ort = __importStar(require("onnxruntime-node"));
57
- const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
57
+ const chunkToFloat32Array_1 = require("../vad/chunkToFloat32Array");
58
58
  const SileroVadModel_1 = require("./SileroVadModel");
59
59
  const BUFFER_SIZE = 512;
60
60
  async function createVad(params) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/autopilot",
3
- "version": "0.8.43",
3
+ "version": "0.8.45",
4
4
  "description": "Voice AI for the Fonoster platform",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -33,11 +33,11 @@
33
33
  },
34
34
  "dependencies": {
35
35
  "@aws-sdk/client-s3": "^3.712.0",
36
- "@fonoster/common": "^0.8.43",
37
- "@fonoster/logger": "^0.8.43",
38
- "@fonoster/sdk": "^0.8.43",
39
- "@fonoster/types": "^0.8.43",
40
- "@fonoster/voice": "^0.8.43",
36
+ "@fonoster/common": "^0.8.45",
37
+ "@fonoster/logger": "^0.8.45",
38
+ "@fonoster/sdk": "^0.8.45",
39
+ "@fonoster/types": "^0.8.45",
40
+ "@fonoster/voice": "^0.8.45",
41
41
  "@langchain/community": "^0.3.19",
42
42
  "@langchain/core": "^0.3.23",
43
43
  "@langchain/groq": "^0.1.2",
@@ -55,5 +55,5 @@
55
55
  "devDependencies": {
56
56
  "typescript": "^5.5.4"
57
57
  },
58
- "gitHead": "b6971e5e184c3deb7d2f133bf99abaa70cc6ccaa"
58
+ "gitHead": "5aabc6054bebd038f165d8614a2fd843dafdc1bb"
59
59
  }
@@ -1,2 +0,0 @@
1
- declare function chunkToFloat32Array(chunk: Uint8Array): Float32Array;
2
- export { chunkToFloat32Array };
@@ -1,41 +0,0 @@
1
- "use strict";
2
- /*
3
- * Copyright (C) 2025 by Fonoster Inc (https://fonoster.com)
4
- * http://github.com/fonoster/fonoster
5
- *
6
- * This file is part of Fonoster
7
- *
8
- * Licensed under the MIT License (the "License");
9
- * you may not use this file except in compliance with
10
- * the License. You may obtain a copy of the License at
11
- *
12
- * https://opensource.org/licenses/MIT
13
- *
14
- * Unless required by applicable law or agreed to in writing, software
15
- * distributed under the License is distributed on an "AS IS" BASIS,
16
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
- * See the License for the specific language governing permissions and
18
- * limitations under the License.
19
- */
20
- Object.defineProperty(exports, "__esModule", { value: true });
21
- exports.chunkToFloat32Array = chunkToFloat32Array;
22
- // This version of the chunkToFloat32Array accounts for the case where
23
- // the byteOffset is misaligned.
24
- //
25
- // Q. Would it be the same if we just created a new Uint8Array from the chunk?
26
- function chunkToFloat32Array(chunk) {
27
- let int16Array;
28
- const alignedByteOffset = chunk.byteOffset % Int16Array.BYTES_PER_ELEMENT === 0;
29
- if (alignedByteOffset) {
30
- int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / Int16Array.BYTES_PER_ELEMENT);
31
- }
32
- else {
33
- const alignedChunk = new Uint8Array(chunk);
34
- int16Array = new Int16Array(alignedChunk.buffer, alignedChunk.byteOffset, alignedChunk.byteLength / Int16Array.BYTES_PER_ELEMENT);
35
- }
36
- const floatArray = new Float32Array(int16Array.length);
37
- for (let i = 0; i < int16Array.length; i++) {
38
- floatArray[i] = int16Array[i] / 32768.0;
39
- }
40
- return floatArray;
41
- }