hume 0.13.4 → 0.13.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.mock/definition/empathic-voice/__package__.yml +8 -3
  2. package/.mock/definition/tts/__package__.yml +4 -0
  3. package/.mock/definition/tts/streamInput.yml +30 -2
  4. package/.mock/fern.config.json +1 -1
  5. package/Client.js +10 -3
  6. package/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -2
  7. package/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -0
  8. package/api/resources/index.d.ts +1 -1
  9. package/api/resources/index.js +2 -2
  10. package/api/resources/tts/types/SnippetAudioChunk.d.ts +5 -0
  11. package/dist/Client.js +10 -3
  12. package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -2
  13. package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -0
  14. package/dist/api/resources/index.d.ts +1 -1
  15. package/dist/api/resources/index.js +2 -2
  16. package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +5 -0
  17. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +1 -1
  18. package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +1 -1
  19. package/dist/serialization/resources/empathicVoice/types/SessionSettings.d.ts +1 -0
  20. package/dist/serialization/resources/empathicVoice/types/SessionSettings.js +1 -0
  21. package/dist/serialization/resources/index.d.ts +1 -1
  22. package/dist/serialization/resources/index.js +2 -2
  23. package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +1 -0
  24. package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +1 -0
  25. package/dist/version.d.ts +1 -1
  26. package/dist/version.js +1 -1
  27. package/dist/wrapper/SilenceFiller.d.ts +85 -0
  28. package/dist/wrapper/SilenceFiller.js +203 -0
  29. package/dist/wrapper/collate.d.ts +36 -0
  30. package/dist/wrapper/collate.js +126 -0
  31. package/dist/wrapper/index.d.ts +2 -0
  32. package/dist/wrapper/index.js +5 -1
  33. package/package.json +1 -1
  34. package/reference.md +702 -702
  35. package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +1 -1
  36. package/serialization/resources/empathicVoice/types/ReturnConfig.js +1 -1
  37. package/serialization/resources/empathicVoice/types/SessionSettings.d.ts +1 -0
  38. package/serialization/resources/empathicVoice/types/SessionSettings.js +1 -0
  39. package/serialization/resources/index.d.ts +1 -1
  40. package/serialization/resources/index.js +2 -2
  41. package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +1 -0
  42. package/serialization/resources/tts/types/SnippetAudioChunk.js +1 -0
  43. package/version.d.ts +1 -1
  44. package/version.js +1 -1
  45. package/wrapper/SilenceFiller.d.ts +85 -0
  46. package/wrapper/SilenceFiller.js +203 -0
  47. package/wrapper/collate.d.ts +36 -0
  48. package/wrapper/collate.js +126 -0
  49. package/wrapper/index.d.ts +2 -0
  50. package/wrapper/index.js +5 -1
@@ -351,6 +351,11 @@ types:
351
351
  session-specific details. For more guidance, see our [guide on using
352
352
  dynamic
353
353
  variables](/docs/speech-to-speech-evi/features/dynamic-variables).
354
+ voice_id:
355
+ type: optional<string>
356
+ docs: >-
357
+ Allows you to change the voice during an active chat. Updating the
358
+ voice does not affect chat context or conversation history.
354
359
  source:
355
360
  openapi: evi-asyncapi.json
356
361
  Tool:
@@ -1526,12 +1531,12 @@ types:
1526
1531
  Version numbers are integer values representing different iterations
1527
1532
  of the Config. Each update to the Config increments its version
1528
1533
  number.
1529
- tools:
1530
- type: optional<list<optional<ReturnUserDefinedTool>>>
1531
- docs: List of user-defined tools associated with this Config.
1532
1534
  version_description:
1533
1535
  type: optional<string>
1534
1536
  docs: An optional description of the Config version.
1537
+ tools:
1538
+ type: optional<list<optional<ReturnUserDefinedTool>>>
1539
+ docs: List of user-defined tools associated with this Config.
1535
1540
  language_model:
1536
1541
  type: optional<ReturnLanguageModel>
1537
1542
  docs: >-
@@ -317,7 +317,11 @@ types:
317
317
  source:
318
318
  openapi: tts-openapi.json
319
319
  SnippetAudioChunk:
320
+ docs: Metadata for a chunk of generated audio.
320
321
  properties:
322
+ request_id:
323
+ type: string
324
+ docs: ID of the initiating request.
321
325
  generation_id:
322
326
  type: string
323
327
  docs: >-
@@ -25,12 +25,40 @@ channel:
25
25
  type: optional<boolean>
26
26
  default: true
27
27
  docs: >-
28
- Accelerates processing to reduce streaming latency.Incurs approximately
29
- 10% additional cost while preserving full voice quality.
28
+ Enables ultra-low latency streaming, significantly reducing the time
29
+ until the first audio chunk is received. Recommended for real-time
30
+ applications requiring immediate audio playback. For further details,
31
+ see our documentation on [instant
32
+ mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).
30
33
  no_binary:
31
34
  type: optional<boolean>
32
35
  default: false
33
36
  docs: If enabled, no binary websocket messages will be sent to the client.
37
+ access_token:
38
+ type: optional<string>
39
+ default: ''
40
+ docs: >-
41
+ Access token used for authenticating the client. If not provided, an
42
+ `api_key` must be provided to authenticate.
43
+
44
+
45
+ The access token is generated using both an API key and a Secret key,
46
+ which provides an additional layer of security compared to using just an
47
+ API key.
48
+
49
+
50
+ For more details, refer to the [Authentication Strategies
51
+ Guide](/docs/introduction/api-key#authentication-strategies).
52
+ api_key:
53
+ type: optional<string>
54
+ default: ''
55
+ docs: >-
56
+ API key used for authenticating the client. If not provided, an
57
+ `access_token` must be provided to authenticate.
58
+
59
+
60
+ For more details, refer to the [Authentication Strategies
61
+ Guide](/docs/introduction/api-key#authentication-strategies).
34
62
  messages:
35
63
  publish:
36
64
  origin: client
@@ -1,4 +1,4 @@
1
1
  {
2
2
  "organization" : "hume",
3
- "version" : "0.66.15"
3
+ "version" : "0.76.2"
4
4
  }
package/Client.js CHANGED
@@ -40,13 +40,20 @@ const Client_1 = require("./api/resources/tts/client/Client");
40
40
  const Client_2 = require("./api/resources/empathicVoice/client/Client");
41
41
  const Client_3 = require("./api/resources/expressionMeasurement/client/Client");
42
42
  const version_1 = require("./version");
43
- const fetcherThatAddsHeaders = (fetcherToWrap) => {
43
+ const customFetcher = (fetcherToWrap, accessToken) => {
44
44
  return (args) => {
45
- var _a;
45
+ var _a, _b;
46
46
  const newArgs = Object.assign({}, args);
47
47
  newArgs.headers = (_a = newArgs.headers) !== null && _a !== void 0 ? _a : {};
48
48
  ((newArgs.headers["X-Hume-Client-Name"] = "typescript_sdk"),
49
49
  (newArgs.headers["X-Hume-Client-Version"] = version_1.SDK_VERSION));
50
+ if (accessToken) {
51
+ const supplied = core.Supplier.get(accessToken);
52
+ if (supplied) {
53
+ newArgs.headers = (_b = newArgs.headers) !== null && _b !== void 0 ? _b : {};
54
+ newArgs.headers["Authorization"] = `Bearer ${supplied}`;
55
+ }
56
+ }
50
57
  return fetcherToWrap(args);
51
58
  };
52
59
  };
@@ -55,7 +62,7 @@ class HumeClient {
55
62
  var _a;
56
63
  this._options = _options;
57
64
  const defaultFetcher = (_a = _options.fetcher) !== null && _a !== void 0 ? _a : core.fetcher;
58
- this._options.fetcher = fetcherThatAddsHeaders(defaultFetcher);
65
+ this._options.fetcher = customFetcher(defaultFetcher, _options.accessToken);
59
66
  }
60
67
  get tts() {
61
68
  var _a;
@@ -18,10 +18,10 @@ export interface ReturnConfig {
18
18
  * Version numbers are integer values representing different iterations of the Config. Each update to the Config increments its version number.
19
19
  */
20
20
  version?: number;
21
- /** List of user-defined tools associated with this Config. */
22
- tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
23
21
  /** An optional description of the Config version. */
24
22
  versionDescription?: string;
23
+ /** List of user-defined tools associated with this Config. */
24
+ tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
25
25
  /**
26
26
  * The supplemental language model associated with this Config.
27
27
  *
@@ -75,4 +75,6 @@ export interface SessionSettings {
75
75
  * Using this field, you can personalize responses based on session-specific details. For more guidance, see our [guide on using dynamic variables](/docs/speech-to-speech-evi/features/dynamic-variables).
76
76
  */
77
77
  variables?: Record<string, Hume.empathicVoice.SessionSettingsVariablesValue>;
78
+ /** Allows you to change the voice during an active chat. Updating the voice does not affect chat context or conversation history. */
79
+ voiceId?: string;
78
80
  }
@@ -1,3 +1,3 @@
1
- export * as tts from "./tts";
2
1
  export * as empathicVoice from "./empathicVoice";
2
+ export * as tts from "./tts";
3
3
  export * as expressionMeasurement from "./expressionMeasurement";
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.expressionMeasurement = exports.empathicVoice = exports.tts = void 0;
37
- exports.tts = __importStar(require("./tts"));
36
+ exports.expressionMeasurement = exports.tts = exports.empathicVoice = void 0;
38
37
  exports.empathicVoice = __importStar(require("./empathicVoice"));
38
+ exports.tts = __importStar(require("./tts"));
39
39
  exports.expressionMeasurement = __importStar(require("./expressionMeasurement"));
@@ -2,7 +2,12 @@
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
4
  import * as Hume from "../../../index";
5
+ /**
6
+ * Metadata for a chunk of generated audio.
7
+ */
5
8
  export interface SnippetAudioChunk {
9
+ /** ID of the initiating request. */
10
+ requestId: string;
6
11
  /** The generation ID of the parent snippet that this chunk corresponds to. */
7
12
  generationId: string;
8
13
  /** The ID of the parent snippet that this chunk corresponds to. */
package/dist/Client.js CHANGED
@@ -40,13 +40,20 @@ const Client_1 = require("./api/resources/tts/client/Client");
40
40
  const Client_2 = require("./api/resources/empathicVoice/client/Client");
41
41
  const Client_3 = require("./api/resources/expressionMeasurement/client/Client");
42
42
  const version_1 = require("./version");
43
- const fetcherThatAddsHeaders = (fetcherToWrap) => {
43
+ const customFetcher = (fetcherToWrap, accessToken) => {
44
44
  return (args) => {
45
- var _a;
45
+ var _a, _b;
46
46
  const newArgs = Object.assign({}, args);
47
47
  newArgs.headers = (_a = newArgs.headers) !== null && _a !== void 0 ? _a : {};
48
48
  ((newArgs.headers["X-Hume-Client-Name"] = "typescript_sdk"),
49
49
  (newArgs.headers["X-Hume-Client-Version"] = version_1.SDK_VERSION));
50
+ if (accessToken) {
51
+ const supplied = core.Supplier.get(accessToken);
52
+ if (supplied) {
53
+ newArgs.headers = (_b = newArgs.headers) !== null && _b !== void 0 ? _b : {};
54
+ newArgs.headers["Authorization"] = `Bearer ${supplied}`;
55
+ }
56
+ }
50
57
  return fetcherToWrap(args);
51
58
  };
52
59
  };
@@ -55,7 +62,7 @@ class HumeClient {
55
62
  var _a;
56
63
  this._options = _options;
57
64
  const defaultFetcher = (_a = _options.fetcher) !== null && _a !== void 0 ? _a : core.fetcher;
58
- this._options.fetcher = fetcherThatAddsHeaders(defaultFetcher);
65
+ this._options.fetcher = customFetcher(defaultFetcher, _options.accessToken);
59
66
  }
60
67
  get tts() {
61
68
  var _a;
@@ -18,10 +18,10 @@ export interface ReturnConfig {
18
18
  * Version numbers are integer values representing different iterations of the Config. Each update to the Config increments its version number.
19
19
  */
20
20
  version?: number;
21
- /** List of user-defined tools associated with this Config. */
22
- tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
23
21
  /** An optional description of the Config version. */
24
22
  versionDescription?: string;
23
+ /** List of user-defined tools associated with this Config. */
24
+ tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
25
25
  /**
26
26
  * The supplemental language model associated with this Config.
27
27
  *
@@ -75,4 +75,6 @@ export interface SessionSettings {
75
75
  * Using this field, you can personalize responses based on session-specific details. For more guidance, see our [guide on using dynamic variables](/docs/speech-to-speech-evi/features/dynamic-variables).
76
76
  */
77
77
  variables?: Record<string, Hume.empathicVoice.SessionSettingsVariablesValue>;
78
+ /** Allows you to change the voice during an active chat. Updating the voice does not affect chat context or conversation history. */
79
+ voiceId?: string;
78
80
  }
@@ -1,3 +1,3 @@
1
- export * as tts from "./tts";
2
1
  export * as empathicVoice from "./empathicVoice";
2
+ export * as tts from "./tts";
3
3
  export * as expressionMeasurement from "./expressionMeasurement";
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.expressionMeasurement = exports.empathicVoice = exports.tts = void 0;
37
- exports.tts = __importStar(require("./tts"));
36
+ exports.expressionMeasurement = exports.tts = exports.empathicVoice = void 0;
38
37
  exports.empathicVoice = __importStar(require("./empathicVoice"));
38
+ exports.tts = __importStar(require("./tts"));
39
39
  exports.expressionMeasurement = __importStar(require("./expressionMeasurement"));
@@ -2,7 +2,12 @@
2
2
  * This file was auto-generated by Fern from our API Definition.
3
3
  */
4
4
  import * as Hume from "../../../index";
5
+ /**
6
+ * Metadata for a chunk of generated audio.
7
+ */
5
8
  export interface SnippetAudioChunk {
9
+ /** ID of the initiating request. */
10
+ requestId: string;
6
11
  /** The generation ID of the parent snippet that this chunk corresponds to. */
7
12
  generationId: string;
8
13
  /** The ID of the parent snippet that this chunk corresponds to. */
@@ -20,8 +20,8 @@ export declare namespace ReturnConfig {
20
20
  name?: string | null;
21
21
  id?: string | null;
22
22
  version?: number | null;
23
- tools?: (ReturnUserDefinedTool.Raw | null | undefined)[] | null;
24
23
  version_description?: string | null;
24
+ tools?: (ReturnUserDefinedTool.Raw | null | undefined)[] | null;
25
25
  language_model?: ReturnLanguageModel.Raw | null;
26
26
  builtin_tools?: (ReturnBuiltinTool.Raw | null | undefined)[] | null;
27
27
  evi_version?: string | null;
@@ -52,8 +52,8 @@ exports.ReturnConfig = core.serialization.object({
52
52
  name: core.serialization.string().optional(),
53
53
  id: core.serialization.string().optional(),
54
54
  version: core.serialization.number().optional(),
55
- tools: core.serialization.list(ReturnUserDefinedTool_1.ReturnUserDefinedTool.optional()).optional(),
56
55
  versionDescription: core.serialization.property("version_description", core.serialization.string().optional()),
56
+ tools: core.serialization.list(ReturnUserDefinedTool_1.ReturnUserDefinedTool.optional()).optional(),
57
57
  languageModel: core.serialization.property("language_model", ReturnLanguageModel_1.ReturnLanguageModel.optional()),
58
58
  builtinTools: core.serialization.property("builtin_tools", core.serialization.list(ReturnBuiltinTool_1.ReturnBuiltinTool.optional()).optional()),
59
59
  eviVersion: core.serialization.property("evi_version", core.serialization.string().optional()),
@@ -22,5 +22,6 @@ export declare namespace SessionSettings {
22
22
  builtin_tools?: BuiltinToolConfig.Raw[] | null;
23
23
  metadata?: Record<string, unknown> | null;
24
24
  variables?: Record<string, SessionSettingsVariablesValue.Raw> | null;
25
+ voice_id?: string | null;
25
26
  }
26
27
  }
@@ -54,4 +54,5 @@ exports.SessionSettings = core.serialization.object({
54
54
  builtinTools: core.serialization.property("builtin_tools", core.serialization.list(BuiltinToolConfig_1.BuiltinToolConfig).optional()),
55
55
  metadata: core.serialization.record(core.serialization.string(), core.serialization.unknown()).optional(),
56
56
  variables: core.serialization.record(core.serialization.string(), SessionSettingsVariablesValue_1.SessionSettingsVariablesValue).optional(),
57
+ voiceId: core.serialization.property("voice_id", core.serialization.string().optional()),
57
58
  });
@@ -1,3 +1,3 @@
1
- export * as tts from "./tts";
2
1
  export * as empathicVoice from "./empathicVoice";
2
+ export * as tts from "./tts";
3
3
  export * as expressionMeasurement from "./expressionMeasurement";
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
33
33
  };
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.expressionMeasurement = exports.empathicVoice = exports.tts = void 0;
37
- exports.tts = __importStar(require("./tts"));
36
+ exports.expressionMeasurement = exports.tts = exports.empathicVoice = void 0;
38
37
  exports.empathicVoice = __importStar(require("./empathicVoice"));
38
+ exports.tts = __importStar(require("./tts"));
39
39
  exports.expressionMeasurement = __importStar(require("./expressionMeasurement"));
@@ -9,6 +9,7 @@ import { Snippet } from "./Snippet";
9
9
  export declare const SnippetAudioChunk: core.serialization.ObjectSchema<serializers.tts.SnippetAudioChunk.Raw, Hume.tts.SnippetAudioChunk>;
10
10
  export declare namespace SnippetAudioChunk {
11
11
  interface Raw {
12
+ request_id: string;
12
13
  generation_id: string;
13
14
  snippet_id: string;
14
15
  text: string;
@@ -41,6 +41,7 @@ const core = __importStar(require("../../../../core"));
41
41
  const AudioFormatType_1 = require("./AudioFormatType");
42
42
  const Snippet_1 = require("./Snippet");
43
43
  exports.SnippetAudioChunk = core.serialization.object({
44
+ requestId: core.serialization.property("request_id", core.serialization.string()),
44
45
  generationId: core.serialization.property("generation_id", core.serialization.string()),
45
46
  snippetId: core.serialization.property("snippet_id", core.serialization.string()),
46
47
  text: core.serialization.string(),
package/dist/version.d.ts CHANGED
@@ -1 +1 @@
1
- export declare const SDK_VERSION = "0.13.4";
1
+ export declare const SDK_VERSION = "0.13.6";
package/dist/version.js CHANGED
@@ -1,4 +1,4 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.SDK_VERSION = void 0;
4
- exports.SDK_VERSION = "0.13.4";
4
+ exports.SDK_VERSION = "0.13.6";
@@ -0,0 +1,85 @@
1
+ import { Readable } from "stream";
2
+ /**
3
+ * SilenceFiller is a Readable stream that intersperses incoming audio data
4
+ * with bytes of silence. This is important in some cases to keep an audio
5
+ * stream "alive". Audio players, such as ffmpeg, can interpret inactivity as
6
+ * meaning the stream is ended, or disconnected.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * import { SilenceFiller } from 'hume';
11
+ *
12
+ * const BYTES_PER_SAMPLE = 2; // 16-bit samples
13
+ * const SAMPLE_RATE = 48000;
14
+ * const BUFFER_SIZE = Math.floor(SAMPLE_RATE * 0.1 * BYTES_PER_SAMPLE); // 100ms buffer
15
+ * const silenceFiller = new SilenceFiller(BUFFER_SIZE, SAMPLE_RATE, BYTES_PER_SAMPLE, 10);
16
+ *
17
+ * // Pipe silence filler output to audio player stdin
18
+ * silenceFiller.pipe(audioPlayer.stdin);
19
+ *
20
+ * // Handle pipe errors
21
+ * silenceFiller.on('error', (err) => {
22
+ * console.error("SilenceFiller error:", err);
23
+ * });
24
+ *
25
+ * // Write audio data as it arrives
26
+ * silenceFiller.writeAudio(audioBuffer);
27
+ *
28
+ * // End the stream when done
29
+ * await silenceFiller.endStream();
30
+ * ```
31
+ */
32
+ export declare class SilenceFiller extends Readable {
33
+ private unclockedSilenceFiller;
34
+ private isStarted;
35
+ private pushInterval;
36
+ private bytesPerSample;
37
+ private pushIntervalMs;
38
+ /**
39
+ * Creates a new SilenceFiller instance.
40
+ *
41
+ * @param pushIntervalMs - The interval in milliseconds for pushing audio data (default: 5ms).
42
+ * @param sampleRate - The sample rate of the audio (e.g., 48000).
43
+ * @param bytesPerSample - The number of bytes per audio sample (e.g., 2 for 16-bit).
44
+ * @param bufferSize - How much to 'prebuffer'. If you set this too low there
45
+ * is a chance that playback will stutter, but if you set it too high
46
+ * playback will take longer to start.
47
+ */
48
+ constructor(pushIntervalMs?: number, sampleRate?: number, bytesPerSample?: number, bufferSize?: number);
49
+ /**
50
+ * Writes audio data to the silence filler.
51
+ *
52
+ * @param audioBuffer - The audio buffer to write.
53
+ */
54
+ writeAudio(audioBuffer: Buffer): void;
55
+ private startPushInterval;
56
+ private pushData;
57
+ _read(): void;
58
+ _destroy(error: Error | null, callback: (error?: Error | null) => void): void;
59
+ /**
60
+ * Ends the stream and drains all remaining audio data.
61
+ *
62
+ * @returns A promise that resolves when the stream has ended.
63
+ */
64
+ endStream(): Promise<void>;
65
+ }
66
+ /**
67
+ * Does the actual calculation of how interspersing audio with silence
68
+ * is "pure" in the sense that it does not rely on the system clock.
69
+ * It's up to the caller to provide timestamps.
70
+ *
71
+ * @internal
72
+ */
73
+ export declare class UnclockedSilenceFiller {
74
+ private audioQueue;
75
+ private totalBufferedBytes;
76
+ private startTimestamp;
77
+ private totalBytesSent;
78
+ donePrebuffering: boolean;
79
+ private bufferSize;
80
+ private sampleRate;
81
+ private bytesPerSample;
82
+ constructor(bufferSize: number, sampleRate: number, bytesPerSample: number);
83
+ writeAudio(audioBuffer: Buffer, timestamp: number): void;
84
+ readAudio(timestamp: number): Buffer | null;
85
+ }
@@ -0,0 +1,203 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.UnclockedSilenceFiller = exports.SilenceFiller = void 0;
4
+ const stream_1 = require("stream");
5
+ /**
6
+ * SilenceFiller is a Readable stream that intersperses incoming audio data
7
+ * with bytes of silence. This is important in some cases to keep an audio
8
+ * stream "alive". Audio players, such as ffmpeg, can interpret inactivity as
9
+ * meaning the stream is ended, or disconnected.
10
+ *
11
+ * @example
12
+ * ```typescript
13
+ * import { SilenceFiller } from 'hume';
14
+ *
15
+ * const BYTES_PER_SAMPLE = 2; // 16-bit samples
16
+ * const SAMPLE_RATE = 48000;
17
+ * const BUFFER_SIZE = Math.floor(SAMPLE_RATE * 0.1 * BYTES_PER_SAMPLE); // 100ms buffer
18
+ * const silenceFiller = new SilenceFiller(BUFFER_SIZE, SAMPLE_RATE, BYTES_PER_SAMPLE, 10);
19
+ *
20
+ * // Pipe silence filler output to audio player stdin
21
+ * silenceFiller.pipe(audioPlayer.stdin);
22
+ *
23
+ * // Handle pipe errors
24
+ * silenceFiller.on('error', (err) => {
25
+ * console.error("SilenceFiller error:", err);
26
+ * });
27
+ *
28
+ * // Write audio data as it arrives
29
+ * silenceFiller.writeAudio(audioBuffer);
30
+ *
31
+ * // End the stream when done
32
+ * await silenceFiller.endStream();
33
+ * ```
34
+ */
35
+ class SilenceFiller extends stream_1.Readable {
36
+ /**
37
+ * Creates a new SilenceFiller instance.
38
+ *
39
+ * @param pushIntervalMs - The interval in milliseconds for pushing audio data (default: 5ms).
40
+ * @param sampleRate - The sample rate of the audio (e.g., 48000).
41
+ * @param bytesPerSample - The number of bytes per audio sample (e.g., 2 for 16-bit).
42
+ * @param bufferSize - How much to 'prebuffer'. If you set this too low there
43
+ * is a chance that playback will stutter, but if you set it too high
44
+ * playback will take longer to start.
45
+ */
46
+ constructor(pushIntervalMs = 5, sampleRate = 48000, bytesPerSample = 2, bufferSize = 9600) {
47
+ super({ objectMode: false });
48
+ this.isStarted = false;
49
+ this.pushInterval = null;
50
+ this.unclockedSilenceFiller = new UnclockedSilenceFiller(bufferSize, sampleRate, bytesPerSample);
51
+ this.bytesPerSample = bytesPerSample;
52
+ this.pushIntervalMs = pushIntervalMs;
53
+ }
54
+ /**
55
+ * Writes audio data to the silence filler.
56
+ *
57
+ * @param audioBuffer - The audio buffer to write.
58
+ */
59
+ writeAudio(audioBuffer) {
60
+ const now = Date.now();
61
+ try {
62
+ this.unclockedSilenceFiller.writeAudio(audioBuffer, now);
63
+ if (!this.isStarted && this.unclockedSilenceFiller.donePrebuffering) {
64
+ this.isStarted = true;
65
+ this.startPushInterval();
66
+ }
67
+ }
68
+ catch (error) {
69
+ console.error(`[SilenceFiller] Error writing audio:`, error);
70
+ this.emit("error", error);
71
+ }
72
+ }
73
+ startPushInterval() {
74
+ this.pushInterval = setInterval(() => {
75
+ this.pushData();
76
+ }, this.pushIntervalMs);
77
+ }
78
+ pushData() {
79
+ if (!this.isStarted)
80
+ return;
81
+ try {
82
+ const now = Date.now();
83
+ const audioChunk = this.unclockedSilenceFiller.readAudio(now);
84
+ if (audioChunk && audioChunk.length > 0) {
85
+ // Ensure chunk size is aligned to bytesPerSample
86
+ const alignedChunkSize = Math.floor(audioChunk.length / this.bytesPerSample) * this.bytesPerSample;
87
+ if (alignedChunkSize > 0) {
88
+ const chunk = audioChunk.subarray(0, alignedChunkSize);
89
+ this.push(chunk);
90
+ }
91
+ }
92
+ }
93
+ catch (error) {
94
+ console.error(`[SilenceFiller] Error pushing data:`, error);
95
+ this.emit("error", error);
96
+ }
97
+ }
98
+ _read() { }
99
+ _destroy(error, callback) {
100
+ super._destroy(error, callback);
101
+ }
102
+ /**
103
+ * Ends the stream and drains all remaining audio data.
104
+ *
105
+ * @returns A promise that resolves when the stream has ended.
106
+ */
107
+ endStream() {
108
+ return new Promise((resolve) => {
109
+ // Stop pushing data
110
+ if (this.pushInterval) {
111
+ clearInterval(this.pushInterval);
112
+ this.pushInterval = null;
113
+ }
114
+ // Drain all remaining audio from SilenceFiller
115
+ const now = Date.now();
116
+ // Keep reading until no more audio is available
117
+ while (true) {
118
+ const remainingChunk = this.unclockedSilenceFiller.readAudio(now);
119
+ if (!remainingChunk || remainingChunk.length === 0) {
120
+ break;
121
+ }
122
+ const alignedChunkSize = Math.floor(remainingChunk.length / this.bytesPerSample) * this.bytesPerSample;
123
+ if (alignedChunkSize > 0) {
124
+ const chunk = remainingChunk.subarray(0, alignedChunkSize);
125
+ this.push(chunk);
126
+ }
127
+ }
128
+ this.push(null); // Signal end of stream
129
+ this.once("end", () => {
130
+ resolve();
131
+ });
132
+ });
133
+ }
134
+ }
135
+ exports.SilenceFiller = SilenceFiller;
136
+ /**
137
+ * Does the actual calculation of how interspersing audio with silence
138
+ * is "pure" in the sense that it does not rely on the system clock.
139
+ * It's up to the caller to provide timestamps.
140
+ *
141
+ * @internal
142
+ */
143
+ class UnclockedSilenceFiller {
144
+ constructor(bufferSize, sampleRate, bytesPerSample) {
145
+ this.audioQueue = [];
146
+ this.totalBufferedBytes = 0;
147
+ this.startTimestamp = null;
148
+ this.totalBytesSent = 0;
149
+ this.donePrebuffering = false;
150
+ this.bufferSize = bufferSize;
151
+ this.sampleRate = sampleRate;
152
+ this.bytesPerSample = bytesPerSample;
153
+ }
154
+ writeAudio(audioBuffer, timestamp) {
155
+ this.audioQueue.push(audioBuffer);
156
+ this.totalBufferedBytes += audioBuffer.length;
157
+ if (this.startTimestamp === null) {
158
+ this.startTimestamp = timestamp;
159
+ }
160
+ if (!this.donePrebuffering && this.totalBufferedBytes >= this.bufferSize) {
161
+ this.donePrebuffering = true;
162
+ }
163
+ }
164
+ readAudio(timestamp) {
165
+ if (this.startTimestamp === null || !this.donePrebuffering) {
166
+ return null;
167
+ }
168
+ const elapsedMs = timestamp - this.startTimestamp;
169
+ const targetBytesSent = Math.floor(((this.sampleRate * elapsedMs) / 1000) * this.bytesPerSample);
170
+ const bytesNeeded = targetBytesSent - this.totalBytesSent;
171
+ if (bytesNeeded <= 0) {
172
+ return null;
173
+ }
174
+ // Ensure bytesNeeded is a multiple of bytesPerSample
175
+ const alignedBytesNeeded = Math.floor(bytesNeeded / this.bytesPerSample) * this.bytesPerSample;
176
+ if (alignedBytesNeeded <= 0) {
177
+ return null;
178
+ }
179
+ let chunk = Buffer.alloc(0);
180
+ // Drain from queue until we have enough bytes
181
+ while (chunk.length < alignedBytesNeeded && this.audioQueue.length > 0) {
182
+ const nextBuffer = this.audioQueue.shift();
183
+ chunk = Buffer.concat([chunk, nextBuffer]);
184
+ this.totalBufferedBytes -= nextBuffer.length;
185
+ }
186
+ // If we have more than needed, put the excess back
187
+ if (chunk.length > alignedBytesNeeded) {
188
+ const excess = chunk.subarray(alignedBytesNeeded);
189
+ this.audioQueue.unshift(excess);
190
+ this.totalBufferedBytes += excess.length;
191
+ chunk = chunk.subarray(0, alignedBytesNeeded);
192
+ }
193
+ // Fill remaining with silence if needed
194
+ if (chunk.length < alignedBytesNeeded) {
195
+ const silenceNeeded = Buffer.alloc(alignedBytesNeeded - chunk.length, 0);
196
+ chunk = Buffer.concat([chunk, silenceNeeded]);
197
+ }
198
+ // Update total bytes sent
199
+ this.totalBytesSent += chunk.length;
200
+ return chunk;
201
+ }
202
+ }
203
+ exports.UnclockedSilenceFiller = UnclockedSilenceFiller;