hume 0.13.4 → 0.13.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mock/definition/empathic-voice/__package__.yml +8 -3
- package/.mock/definition/tts/__package__.yml +4 -0
- package/.mock/definition/tts/streamInput.yml +30 -2
- package/.mock/fern.config.json +1 -1
- package/Client.js +10 -3
- package/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -2
- package/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -0
- package/api/resources/index.d.ts +1 -1
- package/api/resources/index.js +2 -2
- package/api/resources/tts/types/SnippetAudioChunk.d.ts +5 -0
- package/dist/Client.js +10 -3
- package/dist/api/resources/empathicVoice/types/ReturnConfig.d.ts +2 -2
- package/dist/api/resources/empathicVoice/types/SessionSettings.d.ts +2 -0
- package/dist/api/resources/index.d.ts +1 -1
- package/dist/api/resources/index.js +2 -2
- package/dist/api/resources/tts/types/SnippetAudioChunk.d.ts +5 -0
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +1 -1
- package/dist/serialization/resources/empathicVoice/types/ReturnConfig.js +1 -1
- package/dist/serialization/resources/empathicVoice/types/SessionSettings.d.ts +1 -0
- package/dist/serialization/resources/empathicVoice/types/SessionSettings.js +1 -0
- package/dist/serialization/resources/index.d.ts +1 -1
- package/dist/serialization/resources/index.js +2 -2
- package/dist/serialization/resources/tts/types/SnippetAudioChunk.d.ts +1 -0
- package/dist/serialization/resources/tts/types/SnippetAudioChunk.js +1 -0
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/dist/wrapper/SilenceFiller.d.ts +85 -0
- package/dist/wrapper/SilenceFiller.js +203 -0
- package/dist/wrapper/collate.d.ts +36 -0
- package/dist/wrapper/collate.js +126 -0
- package/dist/wrapper/index.d.ts +2 -0
- package/dist/wrapper/index.js +5 -1
- package/package.json +1 -1
- package/reference.md +702 -702
- package/serialization/resources/empathicVoice/types/ReturnConfig.d.ts +1 -1
- package/serialization/resources/empathicVoice/types/ReturnConfig.js +1 -1
- package/serialization/resources/empathicVoice/types/SessionSettings.d.ts +1 -0
- package/serialization/resources/empathicVoice/types/SessionSettings.js +1 -0
- package/serialization/resources/index.d.ts +1 -1
- package/serialization/resources/index.js +2 -2
- package/serialization/resources/tts/types/SnippetAudioChunk.d.ts +1 -0
- package/serialization/resources/tts/types/SnippetAudioChunk.js +1 -0
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/wrapper/SilenceFiller.d.ts +85 -0
- package/wrapper/SilenceFiller.js +203 -0
- package/wrapper/collate.d.ts +36 -0
- package/wrapper/collate.js +126 -0
- package/wrapper/index.d.ts +2 -0
- package/wrapper/index.js +5 -1
|
@@ -351,6 +351,11 @@ types:
|
|
|
351
351
|
session-specific details. For more guidance, see our [guide on using
|
|
352
352
|
dynamic
|
|
353
353
|
variables](/docs/speech-to-speech-evi/features/dynamic-variables).
|
|
354
|
+
voice_id:
|
|
355
|
+
type: optional<string>
|
|
356
|
+
docs: >-
|
|
357
|
+
Allows you to change the voice during an active chat. Updating the
|
|
358
|
+
voice does not affect chat context or conversation history.
|
|
354
359
|
source:
|
|
355
360
|
openapi: evi-asyncapi.json
|
|
356
361
|
Tool:
|
|
@@ -1526,12 +1531,12 @@ types:
|
|
|
1526
1531
|
Version numbers are integer values representing different iterations
|
|
1527
1532
|
of the Config. Each update to the Config increments its version
|
|
1528
1533
|
number.
|
|
1529
|
-
tools:
|
|
1530
|
-
type: optional<list<optional<ReturnUserDefinedTool>>>
|
|
1531
|
-
docs: List of user-defined tools associated with this Config.
|
|
1532
1534
|
version_description:
|
|
1533
1535
|
type: optional<string>
|
|
1534
1536
|
docs: An optional description of the Config version.
|
|
1537
|
+
tools:
|
|
1538
|
+
type: optional<list<optional<ReturnUserDefinedTool>>>
|
|
1539
|
+
docs: List of user-defined tools associated with this Config.
|
|
1535
1540
|
language_model:
|
|
1536
1541
|
type: optional<ReturnLanguageModel>
|
|
1537
1542
|
docs: >-
|
|
@@ -317,7 +317,11 @@ types:
|
|
|
317
317
|
source:
|
|
318
318
|
openapi: tts-openapi.json
|
|
319
319
|
SnippetAudioChunk:
|
|
320
|
+
docs: Metadata for a chunk of generated audio.
|
|
320
321
|
properties:
|
|
322
|
+
request_id:
|
|
323
|
+
type: string
|
|
324
|
+
docs: ID of the initiating request.
|
|
321
325
|
generation_id:
|
|
322
326
|
type: string
|
|
323
327
|
docs: >-
|
|
@@ -25,12 +25,40 @@ channel:
|
|
|
25
25
|
type: optional<boolean>
|
|
26
26
|
default: true
|
|
27
27
|
docs: >-
|
|
28
|
-
|
|
29
|
-
|
|
28
|
+
Enables ultra-low latency streaming, significantly reducing the time
|
|
29
|
+
until the first audio chunk is received. Recommended for real-time
|
|
30
|
+
applications requiring immediate audio playback. For further details,
|
|
31
|
+
see our documentation on [instant
|
|
32
|
+
mode](/docs/text-to-speech-tts/overview#ultra-low-latency-streaming-instant-mode).
|
|
30
33
|
no_binary:
|
|
31
34
|
type: optional<boolean>
|
|
32
35
|
default: false
|
|
33
36
|
docs: If enabled, no binary websocket messages will be sent to the client.
|
|
37
|
+
access_token:
|
|
38
|
+
type: optional<string>
|
|
39
|
+
default: ''
|
|
40
|
+
docs: >-
|
|
41
|
+
Access token used for authenticating the client. If not provided, an
|
|
42
|
+
`api_key` must be provided to authenticate.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
The access token is generated using both an API key and a Secret key,
|
|
46
|
+
which provides an additional layer of security compared to using just an
|
|
47
|
+
API key.
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
For more details, refer to the [Authentication Strategies
|
|
51
|
+
Guide](/docs/introduction/api-key#authentication-strategies).
|
|
52
|
+
api_key:
|
|
53
|
+
type: optional<string>
|
|
54
|
+
default: ''
|
|
55
|
+
docs: >-
|
|
56
|
+
API key used for authenticating the client. If not provided, an
|
|
57
|
+
`access_token` must be provided to authenticate.
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
For more details, refer to the [Authentication Strategies
|
|
61
|
+
Guide](/docs/introduction/api-key#authentication-strategies).
|
|
34
62
|
messages:
|
|
35
63
|
publish:
|
|
36
64
|
origin: client
|
package/.mock/fern.config.json
CHANGED
package/Client.js
CHANGED
|
@@ -40,13 +40,20 @@ const Client_1 = require("./api/resources/tts/client/Client");
|
|
|
40
40
|
const Client_2 = require("./api/resources/empathicVoice/client/Client");
|
|
41
41
|
const Client_3 = require("./api/resources/expressionMeasurement/client/Client");
|
|
42
42
|
const version_1 = require("./version");
|
|
43
|
-
const
|
|
43
|
+
const customFetcher = (fetcherToWrap, accessToken) => {
|
|
44
44
|
return (args) => {
|
|
45
|
-
var _a;
|
|
45
|
+
var _a, _b;
|
|
46
46
|
const newArgs = Object.assign({}, args);
|
|
47
47
|
newArgs.headers = (_a = newArgs.headers) !== null && _a !== void 0 ? _a : {};
|
|
48
48
|
((newArgs.headers["X-Hume-Client-Name"] = "typescript_sdk"),
|
|
49
49
|
(newArgs.headers["X-Hume-Client-Version"] = version_1.SDK_VERSION));
|
|
50
|
+
if (accessToken) {
|
|
51
|
+
const supplied = core.Supplier.get(accessToken);
|
|
52
|
+
if (supplied) {
|
|
53
|
+
newArgs.headers = (_b = newArgs.headers) !== null && _b !== void 0 ? _b : {};
|
|
54
|
+
newArgs.headers["Authorization"] = `Bearer ${supplied}`;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
50
57
|
return fetcherToWrap(args);
|
|
51
58
|
};
|
|
52
59
|
};
|
|
@@ -55,7 +62,7 @@ class HumeClient {
|
|
|
55
62
|
var _a;
|
|
56
63
|
this._options = _options;
|
|
57
64
|
const defaultFetcher = (_a = _options.fetcher) !== null && _a !== void 0 ? _a : core.fetcher;
|
|
58
|
-
this._options.fetcher =
|
|
65
|
+
this._options.fetcher = customFetcher(defaultFetcher, _options.accessToken);
|
|
59
66
|
}
|
|
60
67
|
get tts() {
|
|
61
68
|
var _a;
|
|
@@ -18,10 +18,10 @@ export interface ReturnConfig {
|
|
|
18
18
|
* Version numbers are integer values representing different iterations of the Config. Each update to the Config increments its version number.
|
|
19
19
|
*/
|
|
20
20
|
version?: number;
|
|
21
|
-
/** List of user-defined tools associated with this Config. */
|
|
22
|
-
tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
|
|
23
21
|
/** An optional description of the Config version. */
|
|
24
22
|
versionDescription?: string;
|
|
23
|
+
/** List of user-defined tools associated with this Config. */
|
|
24
|
+
tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
|
|
25
25
|
/**
|
|
26
26
|
* The supplemental language model associated with this Config.
|
|
27
27
|
*
|
|
@@ -75,4 +75,6 @@ export interface SessionSettings {
|
|
|
75
75
|
* Using this field, you can personalize responses based on session-specific details. For more guidance, see our [guide on using dynamic variables](/docs/speech-to-speech-evi/features/dynamic-variables).
|
|
76
76
|
*/
|
|
77
77
|
variables?: Record<string, Hume.empathicVoice.SessionSettingsVariablesValue>;
|
|
78
|
+
/** Allows you to change the voice during an active chat. Updating the voice does not affect chat context or conversation history. */
|
|
79
|
+
voiceId?: string;
|
|
78
80
|
}
|
package/api/resources/index.d.ts
CHANGED
package/api/resources/index.js
CHANGED
|
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.expressionMeasurement = exports.
|
|
37
|
-
exports.tts = __importStar(require("./tts"));
|
|
36
|
+
exports.expressionMeasurement = exports.tts = exports.empathicVoice = void 0;
|
|
38
37
|
exports.empathicVoice = __importStar(require("./empathicVoice"));
|
|
38
|
+
exports.tts = __importStar(require("./tts"));
|
|
39
39
|
exports.expressionMeasurement = __importStar(require("./expressionMeasurement"));
|
|
@@ -2,7 +2,12 @@
|
|
|
2
2
|
* This file was auto-generated by Fern from our API Definition.
|
|
3
3
|
*/
|
|
4
4
|
import * as Hume from "../../../index";
|
|
5
|
+
/**
|
|
6
|
+
* Metadata for a chunk of generated audio.
|
|
7
|
+
*/
|
|
5
8
|
export interface SnippetAudioChunk {
|
|
9
|
+
/** ID of the initiating request. */
|
|
10
|
+
requestId: string;
|
|
6
11
|
/** The generation ID of the parent snippet that this chunk corresponds to. */
|
|
7
12
|
generationId: string;
|
|
8
13
|
/** The ID of the parent snippet that this chunk corresponds to. */
|
package/dist/Client.js
CHANGED
|
@@ -40,13 +40,20 @@ const Client_1 = require("./api/resources/tts/client/Client");
|
|
|
40
40
|
const Client_2 = require("./api/resources/empathicVoice/client/Client");
|
|
41
41
|
const Client_3 = require("./api/resources/expressionMeasurement/client/Client");
|
|
42
42
|
const version_1 = require("./version");
|
|
43
|
-
const
|
|
43
|
+
const customFetcher = (fetcherToWrap, accessToken) => {
|
|
44
44
|
return (args) => {
|
|
45
|
-
var _a;
|
|
45
|
+
var _a, _b;
|
|
46
46
|
const newArgs = Object.assign({}, args);
|
|
47
47
|
newArgs.headers = (_a = newArgs.headers) !== null && _a !== void 0 ? _a : {};
|
|
48
48
|
((newArgs.headers["X-Hume-Client-Name"] = "typescript_sdk"),
|
|
49
49
|
(newArgs.headers["X-Hume-Client-Version"] = version_1.SDK_VERSION));
|
|
50
|
+
if (accessToken) {
|
|
51
|
+
const supplied = core.Supplier.get(accessToken);
|
|
52
|
+
if (supplied) {
|
|
53
|
+
newArgs.headers = (_b = newArgs.headers) !== null && _b !== void 0 ? _b : {};
|
|
54
|
+
newArgs.headers["Authorization"] = `Bearer ${supplied}`;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
50
57
|
return fetcherToWrap(args);
|
|
51
58
|
};
|
|
52
59
|
};
|
|
@@ -55,7 +62,7 @@ class HumeClient {
|
|
|
55
62
|
var _a;
|
|
56
63
|
this._options = _options;
|
|
57
64
|
const defaultFetcher = (_a = _options.fetcher) !== null && _a !== void 0 ? _a : core.fetcher;
|
|
58
|
-
this._options.fetcher =
|
|
65
|
+
this._options.fetcher = customFetcher(defaultFetcher, _options.accessToken);
|
|
59
66
|
}
|
|
60
67
|
get tts() {
|
|
61
68
|
var _a;
|
|
@@ -18,10 +18,10 @@ export interface ReturnConfig {
|
|
|
18
18
|
* Version numbers are integer values representing different iterations of the Config. Each update to the Config increments its version number.
|
|
19
19
|
*/
|
|
20
20
|
version?: number;
|
|
21
|
-
/** List of user-defined tools associated with this Config. */
|
|
22
|
-
tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
|
|
23
21
|
/** An optional description of the Config version. */
|
|
24
22
|
versionDescription?: string;
|
|
23
|
+
/** List of user-defined tools associated with this Config. */
|
|
24
|
+
tools?: (Hume.empathicVoice.ReturnUserDefinedTool | undefined)[];
|
|
25
25
|
/**
|
|
26
26
|
* The supplemental language model associated with this Config.
|
|
27
27
|
*
|
|
@@ -75,4 +75,6 @@ export interface SessionSettings {
|
|
|
75
75
|
* Using this field, you can personalize responses based on session-specific details. For more guidance, see our [guide on using dynamic variables](/docs/speech-to-speech-evi/features/dynamic-variables).
|
|
76
76
|
*/
|
|
77
77
|
variables?: Record<string, Hume.empathicVoice.SessionSettingsVariablesValue>;
|
|
78
|
+
/** Allows you to change the voice during an active chat. Updating the voice does not affect chat context or conversation history. */
|
|
79
|
+
voiceId?: string;
|
|
78
80
|
}
|
|
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.expressionMeasurement = exports.
|
|
37
|
-
exports.tts = __importStar(require("./tts"));
|
|
36
|
+
exports.expressionMeasurement = exports.tts = exports.empathicVoice = void 0;
|
|
38
37
|
exports.empathicVoice = __importStar(require("./empathicVoice"));
|
|
38
|
+
exports.tts = __importStar(require("./tts"));
|
|
39
39
|
exports.expressionMeasurement = __importStar(require("./expressionMeasurement"));
|
|
@@ -2,7 +2,12 @@
|
|
|
2
2
|
* This file was auto-generated by Fern from our API Definition.
|
|
3
3
|
*/
|
|
4
4
|
import * as Hume from "../../../index";
|
|
5
|
+
/**
|
|
6
|
+
* Metadata for a chunk of generated audio.
|
|
7
|
+
*/
|
|
5
8
|
export interface SnippetAudioChunk {
|
|
9
|
+
/** ID of the initiating request. */
|
|
10
|
+
requestId: string;
|
|
6
11
|
/** The generation ID of the parent snippet that this chunk corresponds to. */
|
|
7
12
|
generationId: string;
|
|
8
13
|
/** The ID of the parent snippet that this chunk corresponds to. */
|
|
@@ -20,8 +20,8 @@ export declare namespace ReturnConfig {
|
|
|
20
20
|
name?: string | null;
|
|
21
21
|
id?: string | null;
|
|
22
22
|
version?: number | null;
|
|
23
|
-
tools?: (ReturnUserDefinedTool.Raw | null | undefined)[] | null;
|
|
24
23
|
version_description?: string | null;
|
|
24
|
+
tools?: (ReturnUserDefinedTool.Raw | null | undefined)[] | null;
|
|
25
25
|
language_model?: ReturnLanguageModel.Raw | null;
|
|
26
26
|
builtin_tools?: (ReturnBuiltinTool.Raw | null | undefined)[] | null;
|
|
27
27
|
evi_version?: string | null;
|
|
@@ -52,8 +52,8 @@ exports.ReturnConfig = core.serialization.object({
|
|
|
52
52
|
name: core.serialization.string().optional(),
|
|
53
53
|
id: core.serialization.string().optional(),
|
|
54
54
|
version: core.serialization.number().optional(),
|
|
55
|
-
tools: core.serialization.list(ReturnUserDefinedTool_1.ReturnUserDefinedTool.optional()).optional(),
|
|
56
55
|
versionDescription: core.serialization.property("version_description", core.serialization.string().optional()),
|
|
56
|
+
tools: core.serialization.list(ReturnUserDefinedTool_1.ReturnUserDefinedTool.optional()).optional(),
|
|
57
57
|
languageModel: core.serialization.property("language_model", ReturnLanguageModel_1.ReturnLanguageModel.optional()),
|
|
58
58
|
builtinTools: core.serialization.property("builtin_tools", core.serialization.list(ReturnBuiltinTool_1.ReturnBuiltinTool.optional()).optional()),
|
|
59
59
|
eviVersion: core.serialization.property("evi_version", core.serialization.string().optional()),
|
|
@@ -54,4 +54,5 @@ exports.SessionSettings = core.serialization.object({
|
|
|
54
54
|
builtinTools: core.serialization.property("builtin_tools", core.serialization.list(BuiltinToolConfig_1.BuiltinToolConfig).optional()),
|
|
55
55
|
metadata: core.serialization.record(core.serialization.string(), core.serialization.unknown()).optional(),
|
|
56
56
|
variables: core.serialization.record(core.serialization.string(), SessionSettingsVariablesValue_1.SessionSettingsVariablesValue).optional(),
|
|
57
|
+
voiceId: core.serialization.property("voice_id", core.serialization.string().optional()),
|
|
57
58
|
});
|
|
@@ -33,7 +33,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
33
33
|
};
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.expressionMeasurement = exports.
|
|
37
|
-
exports.tts = __importStar(require("./tts"));
|
|
36
|
+
exports.expressionMeasurement = exports.tts = exports.empathicVoice = void 0;
|
|
38
37
|
exports.empathicVoice = __importStar(require("./empathicVoice"));
|
|
38
|
+
exports.tts = __importStar(require("./tts"));
|
|
39
39
|
exports.expressionMeasurement = __importStar(require("./expressionMeasurement"));
|
|
@@ -9,6 +9,7 @@ import { Snippet } from "./Snippet";
|
|
|
9
9
|
export declare const SnippetAudioChunk: core.serialization.ObjectSchema<serializers.tts.SnippetAudioChunk.Raw, Hume.tts.SnippetAudioChunk>;
|
|
10
10
|
export declare namespace SnippetAudioChunk {
|
|
11
11
|
interface Raw {
|
|
12
|
+
request_id: string;
|
|
12
13
|
generation_id: string;
|
|
13
14
|
snippet_id: string;
|
|
14
15
|
text: string;
|
|
@@ -41,6 +41,7 @@ const core = __importStar(require("../../../../core"));
|
|
|
41
41
|
const AudioFormatType_1 = require("./AudioFormatType");
|
|
42
42
|
const Snippet_1 = require("./Snippet");
|
|
43
43
|
exports.SnippetAudioChunk = core.serialization.object({
|
|
44
|
+
requestId: core.serialization.property("request_id", core.serialization.string()),
|
|
44
45
|
generationId: core.serialization.property("generation_id", core.serialization.string()),
|
|
45
46
|
snippetId: core.serialization.property("snippet_id", core.serialization.string()),
|
|
46
47
|
text: core.serialization.string(),
|
package/dist/version.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
export declare const SDK_VERSION = "0.13.
|
|
1
|
+
export declare const SDK_VERSION = "0.13.6";
|
package/dist/version.js
CHANGED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { Readable } from "stream";
|
|
2
|
+
/**
|
|
3
|
+
* SilenceFiller is a Readable stream that intersperses incoming audio data
|
|
4
|
+
* with bytes of silence. This is important in some cases to keep an audio
|
|
5
|
+
* stream "alive". Audio players, such as ffmpeg, can interpret inactivity as
|
|
6
|
+
* meaning the stream is ended, or disconnected.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import { SilenceFiller } from 'hume';
|
|
11
|
+
*
|
|
12
|
+
* const BYTES_PER_SAMPLE = 2; // 16-bit samples
|
|
13
|
+
* const SAMPLE_RATE = 48000;
|
|
14
|
+
* const BUFFER_SIZE = Math.floor(SAMPLE_RATE * 0.1 * BYTES_PER_SAMPLE); // 100ms buffer
|
|
15
|
+
* const silenceFiller = new SilenceFiller(BUFFER_SIZE, SAMPLE_RATE, BYTES_PER_SAMPLE, 10);
|
|
16
|
+
*
|
|
17
|
+
* // Pipe silence filler output to audio player stdin
|
|
18
|
+
* silenceFiller.pipe(audioPlayer.stdin);
|
|
19
|
+
*
|
|
20
|
+
* // Handle pipe errors
|
|
21
|
+
* silenceFiller.on('error', (err) => {
|
|
22
|
+
* console.error("SilenceFiller error:", err);
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* // Write audio data as it arrives
|
|
26
|
+
* silenceFiller.writeAudio(audioBuffer);
|
|
27
|
+
*
|
|
28
|
+
* // End the stream when done
|
|
29
|
+
* await silenceFiller.endStream();
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
export declare class SilenceFiller extends Readable {
|
|
33
|
+
private unclockedSilenceFiller;
|
|
34
|
+
private isStarted;
|
|
35
|
+
private pushInterval;
|
|
36
|
+
private bytesPerSample;
|
|
37
|
+
private pushIntervalMs;
|
|
38
|
+
/**
|
|
39
|
+
* Creates a new SilenceFiller instance.
|
|
40
|
+
*
|
|
41
|
+
* @param pushIntervalMs - The interval in milliseconds for pushing audio data (default: 5ms).
|
|
42
|
+
* @param sampleRate - The sample rate of the audio (e.g., 48000).
|
|
43
|
+
* @param bytesPerSample - The number of bytes per audio sample (e.g., 2 for 16-bit).
|
|
44
|
+
* @param bufferSize - How much to 'prebuffer'. If you set this too low there
|
|
45
|
+
* is a chance that playback will stutter, but if you set it too high
|
|
46
|
+
* playback will take longer to start.
|
|
47
|
+
*/
|
|
48
|
+
constructor(pushIntervalMs?: number, sampleRate?: number, bytesPerSample?: number, bufferSize?: number);
|
|
49
|
+
/**
|
|
50
|
+
* Writes audio data to the silence filler.
|
|
51
|
+
*
|
|
52
|
+
* @param audioBuffer - The audio buffer to write.
|
|
53
|
+
*/
|
|
54
|
+
writeAudio(audioBuffer: Buffer): void;
|
|
55
|
+
private startPushInterval;
|
|
56
|
+
private pushData;
|
|
57
|
+
_read(): void;
|
|
58
|
+
_destroy(error: Error | null, callback: (error?: Error | null) => void): void;
|
|
59
|
+
/**
|
|
60
|
+
* Ends the stream and drains all remaining audio data.
|
|
61
|
+
*
|
|
62
|
+
* @returns A promise that resolves when the stream has ended.
|
|
63
|
+
*/
|
|
64
|
+
endStream(): Promise<void>;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Does the actual calculation of how interspersing audio with silence
|
|
68
|
+
* is "pure" in the sense that it does not rely on the system clock.
|
|
69
|
+
* It's up to the caller to provide timestamps.
|
|
70
|
+
*
|
|
71
|
+
* @internal
|
|
72
|
+
*/
|
|
73
|
+
export declare class UnclockedSilenceFiller {
|
|
74
|
+
private audioQueue;
|
|
75
|
+
private totalBufferedBytes;
|
|
76
|
+
private startTimestamp;
|
|
77
|
+
private totalBytesSent;
|
|
78
|
+
donePrebuffering: boolean;
|
|
79
|
+
private bufferSize;
|
|
80
|
+
private sampleRate;
|
|
81
|
+
private bytesPerSample;
|
|
82
|
+
constructor(bufferSize: number, sampleRate: number, bytesPerSample: number);
|
|
83
|
+
writeAudio(audioBuffer: Buffer, timestamp: number): void;
|
|
84
|
+
readAudio(timestamp: number): Buffer | null;
|
|
85
|
+
}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.UnclockedSilenceFiller = exports.SilenceFiller = void 0;
|
|
4
|
+
const stream_1 = require("stream");
|
|
5
|
+
/**
|
|
6
|
+
* SilenceFiller is a Readable stream that intersperses incoming audio data
|
|
7
|
+
* with bytes of silence. This is important in some cases to keep an audio
|
|
8
|
+
* stream "alive". Audio players, such as ffmpeg, can interpret inactivity as
|
|
9
|
+
* meaning the stream is ended, or disconnected.
|
|
10
|
+
*
|
|
11
|
+
* @example
|
|
12
|
+
* ```typescript
|
|
13
|
+
* import { SilenceFiller } from 'hume';
|
|
14
|
+
*
|
|
15
|
+
* const BYTES_PER_SAMPLE = 2; // 16-bit samples
|
|
16
|
+
* const SAMPLE_RATE = 48000;
|
|
17
|
+
* const BUFFER_SIZE = Math.floor(SAMPLE_RATE * 0.1 * BYTES_PER_SAMPLE); // 100ms buffer
|
|
18
|
+
* const silenceFiller = new SilenceFiller(BUFFER_SIZE, SAMPLE_RATE, BYTES_PER_SAMPLE, 10);
|
|
19
|
+
*
|
|
20
|
+
* // Pipe silence filler output to audio player stdin
|
|
21
|
+
* silenceFiller.pipe(audioPlayer.stdin);
|
|
22
|
+
*
|
|
23
|
+
* // Handle pipe errors
|
|
24
|
+
* silenceFiller.on('error', (err) => {
|
|
25
|
+
* console.error("SilenceFiller error:", err);
|
|
26
|
+
* });
|
|
27
|
+
*
|
|
28
|
+
* // Write audio data as it arrives
|
|
29
|
+
* silenceFiller.writeAudio(audioBuffer);
|
|
30
|
+
*
|
|
31
|
+
* // End the stream when done
|
|
32
|
+
* await silenceFiller.endStream();
|
|
33
|
+
* ```
|
|
34
|
+
*/
|
|
35
|
+
class SilenceFiller extends stream_1.Readable {
|
|
36
|
+
/**
|
|
37
|
+
* Creates a new SilenceFiller instance.
|
|
38
|
+
*
|
|
39
|
+
* @param pushIntervalMs - The interval in milliseconds for pushing audio data (default: 5ms).
|
|
40
|
+
* @param sampleRate - The sample rate of the audio (e.g., 48000).
|
|
41
|
+
* @param bytesPerSample - The number of bytes per audio sample (e.g., 2 for 16-bit).
|
|
42
|
+
* @param bufferSize - How much to 'prebuffer'. If you set this too low there
|
|
43
|
+
* is a chance that playback will stutter, but if you set it too high
|
|
44
|
+
* playback will take longer to start.
|
|
45
|
+
*/
|
|
46
|
+
constructor(pushIntervalMs = 5, sampleRate = 48000, bytesPerSample = 2, bufferSize = 9600) {
|
|
47
|
+
super({ objectMode: false });
|
|
48
|
+
this.isStarted = false;
|
|
49
|
+
this.pushInterval = null;
|
|
50
|
+
this.unclockedSilenceFiller = new UnclockedSilenceFiller(bufferSize, sampleRate, bytesPerSample);
|
|
51
|
+
this.bytesPerSample = bytesPerSample;
|
|
52
|
+
this.pushIntervalMs = pushIntervalMs;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Writes audio data to the silence filler.
|
|
56
|
+
*
|
|
57
|
+
* @param audioBuffer - The audio buffer to write.
|
|
58
|
+
*/
|
|
59
|
+
writeAudio(audioBuffer) {
|
|
60
|
+
const now = Date.now();
|
|
61
|
+
try {
|
|
62
|
+
this.unclockedSilenceFiller.writeAudio(audioBuffer, now);
|
|
63
|
+
if (!this.isStarted && this.unclockedSilenceFiller.donePrebuffering) {
|
|
64
|
+
this.isStarted = true;
|
|
65
|
+
this.startPushInterval();
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
catch (error) {
|
|
69
|
+
console.error(`[SilenceFiller] Error writing audio:`, error);
|
|
70
|
+
this.emit("error", error);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
startPushInterval() {
|
|
74
|
+
this.pushInterval = setInterval(() => {
|
|
75
|
+
this.pushData();
|
|
76
|
+
}, this.pushIntervalMs);
|
|
77
|
+
}
|
|
78
|
+
pushData() {
|
|
79
|
+
if (!this.isStarted)
|
|
80
|
+
return;
|
|
81
|
+
try {
|
|
82
|
+
const now = Date.now();
|
|
83
|
+
const audioChunk = this.unclockedSilenceFiller.readAudio(now);
|
|
84
|
+
if (audioChunk && audioChunk.length > 0) {
|
|
85
|
+
// Ensure chunk size is aligned to bytesPerSample
|
|
86
|
+
const alignedChunkSize = Math.floor(audioChunk.length / this.bytesPerSample) * this.bytesPerSample;
|
|
87
|
+
if (alignedChunkSize > 0) {
|
|
88
|
+
const chunk = audioChunk.subarray(0, alignedChunkSize);
|
|
89
|
+
this.push(chunk);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
catch (error) {
|
|
94
|
+
console.error(`[SilenceFiller] Error pushing data:`, error);
|
|
95
|
+
this.emit("error", error);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
_read() { }
|
|
99
|
+
_destroy(error, callback) {
|
|
100
|
+
super._destroy(error, callback);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Ends the stream and drains all remaining audio data.
|
|
104
|
+
*
|
|
105
|
+
* @returns A promise that resolves when the stream has ended.
|
|
106
|
+
*/
|
|
107
|
+
endStream() {
|
|
108
|
+
return new Promise((resolve) => {
|
|
109
|
+
// Stop pushing data
|
|
110
|
+
if (this.pushInterval) {
|
|
111
|
+
clearInterval(this.pushInterval);
|
|
112
|
+
this.pushInterval = null;
|
|
113
|
+
}
|
|
114
|
+
// Drain all remaining audio from SilenceFiller
|
|
115
|
+
const now = Date.now();
|
|
116
|
+
// Keep reading until no more audio is available
|
|
117
|
+
while (true) {
|
|
118
|
+
const remainingChunk = this.unclockedSilenceFiller.readAudio(now);
|
|
119
|
+
if (!remainingChunk || remainingChunk.length === 0) {
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
const alignedChunkSize = Math.floor(remainingChunk.length / this.bytesPerSample) * this.bytesPerSample;
|
|
123
|
+
if (alignedChunkSize > 0) {
|
|
124
|
+
const chunk = remainingChunk.subarray(0, alignedChunkSize);
|
|
125
|
+
this.push(chunk);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
this.push(null); // Signal end of stream
|
|
129
|
+
this.once("end", () => {
|
|
130
|
+
resolve();
|
|
131
|
+
});
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
exports.SilenceFiller = SilenceFiller;
|
|
136
|
+
/**
|
|
137
|
+
* Does the actual calculation of how interspersing audio with silence
|
|
138
|
+
* is "pure" in the sense that it does not rely on the system clock.
|
|
139
|
+
* It's up to the caller to provide timestamps.
|
|
140
|
+
*
|
|
141
|
+
* @internal
|
|
142
|
+
*/
|
|
143
|
+
class UnclockedSilenceFiller {
|
|
144
|
+
constructor(bufferSize, sampleRate, bytesPerSample) {
|
|
145
|
+
this.audioQueue = [];
|
|
146
|
+
this.totalBufferedBytes = 0;
|
|
147
|
+
this.startTimestamp = null;
|
|
148
|
+
this.totalBytesSent = 0;
|
|
149
|
+
this.donePrebuffering = false;
|
|
150
|
+
this.bufferSize = bufferSize;
|
|
151
|
+
this.sampleRate = sampleRate;
|
|
152
|
+
this.bytesPerSample = bytesPerSample;
|
|
153
|
+
}
|
|
154
|
+
writeAudio(audioBuffer, timestamp) {
|
|
155
|
+
this.audioQueue.push(audioBuffer);
|
|
156
|
+
this.totalBufferedBytes += audioBuffer.length;
|
|
157
|
+
if (this.startTimestamp === null) {
|
|
158
|
+
this.startTimestamp = timestamp;
|
|
159
|
+
}
|
|
160
|
+
if (!this.donePrebuffering && this.totalBufferedBytes >= this.bufferSize) {
|
|
161
|
+
this.donePrebuffering = true;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
readAudio(timestamp) {
|
|
165
|
+
if (this.startTimestamp === null || !this.donePrebuffering) {
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
168
|
+
const elapsedMs = timestamp - this.startTimestamp;
|
|
169
|
+
const targetBytesSent = Math.floor(((this.sampleRate * elapsedMs) / 1000) * this.bytesPerSample);
|
|
170
|
+
const bytesNeeded = targetBytesSent - this.totalBytesSent;
|
|
171
|
+
if (bytesNeeded <= 0) {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
// Ensure bytesNeeded is a multiple of bytesPerSample
|
|
175
|
+
const alignedBytesNeeded = Math.floor(bytesNeeded / this.bytesPerSample) * this.bytesPerSample;
|
|
176
|
+
if (alignedBytesNeeded <= 0) {
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
let chunk = Buffer.alloc(0);
|
|
180
|
+
// Drain from queue until we have enough bytes
|
|
181
|
+
while (chunk.length < alignedBytesNeeded && this.audioQueue.length > 0) {
|
|
182
|
+
const nextBuffer = this.audioQueue.shift();
|
|
183
|
+
chunk = Buffer.concat([chunk, nextBuffer]);
|
|
184
|
+
this.totalBufferedBytes -= nextBuffer.length;
|
|
185
|
+
}
|
|
186
|
+
// If we have more than needed, put the excess back
|
|
187
|
+
if (chunk.length > alignedBytesNeeded) {
|
|
188
|
+
const excess = chunk.subarray(alignedBytesNeeded);
|
|
189
|
+
this.audioQueue.unshift(excess);
|
|
190
|
+
this.totalBufferedBytes += excess.length;
|
|
191
|
+
chunk = chunk.subarray(0, alignedBytesNeeded);
|
|
192
|
+
}
|
|
193
|
+
// Fill remaining with silence if needed
|
|
194
|
+
if (chunk.length < alignedBytesNeeded) {
|
|
195
|
+
const silenceNeeded = Buffer.alloc(alignedBytesNeeded - chunk.length, 0);
|
|
196
|
+
chunk = Buffer.concat([chunk, silenceNeeded]);
|
|
197
|
+
}
|
|
198
|
+
// Update total bytes sent
|
|
199
|
+
this.totalBytesSent += chunk.length;
|
|
200
|
+
return chunk;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
exports.UnclockedSilenceFiller = UnclockedSilenceFiller;
|