@speech-sdk/core 0.7.0 → 0.8.1-alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -108
- package/dist/__tests__/e2e/_save-audio.d.ts +0 -42
- package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
- package/dist/__tests__/e2e/_save-audio.js +0 -59
- package/dist/__tests__/e2e/_save-audio.js.map +1 -1
- package/dist/audio-duration.d.ts +0 -5
- package/dist/audio-duration.d.ts.map +1 -1
- package/dist/audio-duration.js +3 -10
- package/dist/audio-duration.js.map +1 -1
- package/dist/audio-utils.d.ts +0 -10
- package/dist/audio-utils.d.ts.map +1 -1
- package/dist/audio-utils.js +2 -14
- package/dist/audio-utils.js.map +1 -1
- package/dist/captions.d.ts +0 -108
- package/dist/captions.d.ts.map +1 -1
- package/dist/captions.js +8 -98
- package/dist/captions.js.map +1 -1
- package/dist/conversation/attribute-timestamps.d.ts +26 -0
- package/dist/conversation/attribute-timestamps.d.ts.map +1 -0
- package/dist/conversation/attribute-timestamps.js +276 -0
- package/dist/conversation/attribute-timestamps.js.map +1 -0
- package/dist/conversation/dispatch.d.ts +5 -5
- package/dist/conversation/dispatch.d.ts.map +1 -1
- package/dist/conversation/dispatch.js +18 -8
- package/dist/conversation/dispatch.js.map +1 -1
- package/dist/conversation/errors.d.ts +3 -0
- package/dist/conversation/errors.d.ts.map +1 -1
- package/dist/conversation/errors.js +6 -0
- package/dist/conversation/errors.js.map +1 -1
- package/dist/conversation/pcm-concat.d.ts +0 -23
- package/dist/conversation/pcm-concat.d.ts.map +1 -1
- package/dist/conversation/pcm-concat.js +5 -43
- package/dist/conversation/pcm-concat.js.map +1 -1
- package/dist/conversation/proportional-fill.d.ts +10 -0
- package/dist/conversation/proportional-fill.d.ts.map +1 -0
- package/dist/conversation/proportional-fill.js +64 -0
- package/dist/conversation/proportional-fill.js.map +1 -0
- package/dist/conversation/silence-detection.d.ts +14 -0
- package/dist/conversation/silence-detection.d.ts.map +1 -0
- package/dist/conversation/silence-detection.js +52 -0
- package/dist/conversation/silence-detection.js.map +1 -0
- package/dist/conversation/stitch.d.ts +5 -6
- package/dist/conversation/stitch.d.ts.map +1 -1
- package/dist/conversation/stitch.js +42 -36
- package/dist/conversation/stitch.js.map +1 -1
- package/dist/conversation/types.d.ts +1 -35
- package/dist/conversation/types.d.ts.map +1 -1
- package/dist/conversation/validate.d.ts +1 -16
- package/dist/conversation/validate.d.ts.map +1 -1
- package/dist/conversation/validate.js +29 -29
- package/dist/conversation/validate.js.map +1 -1
- package/dist/default-stt-fallback.d.ts +3 -0
- package/dist/default-stt-fallback.d.ts.map +1 -0
- package/dist/default-stt-fallback.js +11 -0
- package/dist/default-stt-fallback.js.map +1 -0
- package/dist/derive-timestamps.d.ts +1 -5
- package/dist/derive-timestamps.d.ts.map +1 -1
- package/dist/derive-timestamps.js +1 -15
- package/dist/derive-timestamps.js.map +1 -1
- package/dist/errors.d.ts +5 -12
- package/dist/errors.d.ts.map +1 -1
- package/dist/errors.js +12 -14
- package/dist/errors.js.map +1 -1
- package/dist/generate-conversation.d.ts +4 -3
- package/dist/generate-conversation.d.ts.map +1 -1
- package/dist/generate-conversation.js +162 -67
- package/dist/generate-conversation.js.map +1 -1
- package/dist/generate-speech.d.ts +1 -26
- package/dist/generate-speech.d.ts.map +1 -1
- package/dist/generate-speech.js +85 -64
- package/dist/generate-speech.js.map +1 -1
- package/dist/index.d.ts +4 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +2 -13
- package/dist/logger.js.map +1 -1
- package/dist/metadata.d.ts +0 -22
- package/dist/metadata.d.ts.map +1 -1
- package/dist/provider-utils.d.ts +3 -9
- package/dist/provider-utils.d.ts.map +1 -1
- package/dist/provider-utils.js +34 -51
- package/dist/provider-utils.js.map +1 -1
- package/dist/providers/cartesia/alignment.d.ts +0 -16
- package/dist/providers/cartesia/alignment.d.ts.map +1 -1
- package/dist/providers/cartesia/alignment.js +1 -6
- package/dist/providers/cartesia/alignment.js.map +1 -1
- package/dist/providers/cartesia/index.d.ts +7 -19
- package/dist/providers/cartesia/index.d.ts.map +1 -1
- package/dist/providers/cartesia/index.js +68 -80
- package/dist/providers/cartesia/index.js.map +1 -1
- package/dist/providers/deepgram/index.d.ts +7 -8
- package/dist/providers/deepgram/index.d.ts.map +1 -1
- package/dist/providers/deepgram/index.js +17 -18
- package/dist/providers/deepgram/index.js.map +1 -1
- package/dist/providers/elevenlabs/alignment.d.ts +7 -21
- package/dist/providers/elevenlabs/alignment.d.ts.map +1 -1
- package/dist/providers/elevenlabs/alignment.js +8 -9
- package/dist/providers/elevenlabs/alignment.js.map +1 -1
- package/dist/providers/elevenlabs/index.d.ts +7 -38
- package/dist/providers/elevenlabs/index.d.ts.map +1 -1
- package/dist/providers/elevenlabs/index.js +161 -169
- package/dist/providers/elevenlabs/index.js.map +1 -1
- package/dist/providers/fal/index.d.ts +7 -18
- package/dist/providers/fal/index.d.ts.map +1 -1
- package/dist/providers/fal/index.js +37 -31
- package/dist/providers/fal/index.js.map +1 -1
- package/dist/providers/fish-audio/index.d.ts +7 -8
- package/dist/providers/fish-audio/index.d.ts.map +1 -1
- package/dist/providers/fish-audio/index.js +23 -19
- package/dist/providers/fish-audio/index.js.map +1 -1
- package/dist/providers/gateway/index.d.ts +68 -0
- package/dist/providers/gateway/index.d.ts.map +1 -0
- package/dist/providers/gateway/index.js +236 -0
- package/dist/providers/gateway/index.js.map +1 -0
- package/dist/providers/google/index.d.ts +7 -20
- package/dist/providers/google/index.d.ts.map +1 -1
- package/dist/providers/google/index.js +161 -151
- package/dist/providers/google/index.js.map +1 -1
- package/dist/providers/hume/alignment.d.ts +30 -35
- package/dist/providers/hume/alignment.d.ts.map +1 -1
- package/dist/providers/hume/alignment.js +14 -8
- package/dist/providers/hume/alignment.js.map +1 -1
- package/dist/providers/hume/index.d.ts +7 -16
- package/dist/providers/hume/index.d.ts.map +1 -1
- package/dist/providers/hume/index.js +55 -65
- package/dist/providers/hume/index.js.map +1 -1
- package/dist/providers/inworld/alignment.d.ts +8 -22
- package/dist/providers/inworld/alignment.d.ts.map +1 -1
- package/dist/providers/inworld/alignment.js +9 -8
- package/dist/providers/inworld/alignment.js.map +1 -1
- package/dist/providers/inworld/index.d.ts +7 -20
- package/dist/providers/inworld/index.d.ts.map +1 -1
- package/dist/providers/inworld/index.js +47 -39
- package/dist/providers/inworld/index.js.map +1 -1
- package/dist/providers/mistral/index.d.ts +7 -8
- package/dist/providers/mistral/index.d.ts.map +1 -1
- package/dist/providers/mistral/index.js +39 -38
- package/dist/providers/mistral/index.js.map +1 -1
- package/dist/providers/murf/alignment.d.ts +10 -19
- package/dist/providers/murf/alignment.d.ts.map +1 -1
- package/dist/providers/murf/alignment.js +10 -5
- package/dist/providers/murf/alignment.js.map +1 -1
- package/dist/providers/murf/index.d.ts +7 -16
- package/dist/providers/murf/index.d.ts.map +1 -1
- package/dist/providers/murf/index.js +65 -57
- package/dist/providers/murf/index.js.map +1 -1
- package/dist/providers/openai/index.d.ts +36 -29
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +270 -106
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/resemble/alignment.d.ts +8 -29
- package/dist/providers/resemble/alignment.d.ts.map +1 -1
- package/dist/providers/resemble/alignment.js +9 -12
- package/dist/providers/resemble/alignment.js.map +1 -1
- package/dist/providers/resemble/index.d.ts +7 -11
- package/dist/providers/resemble/index.d.ts.map +1 -1
- package/dist/providers/resemble/index.js +54 -48
- package/dist/providers/resemble/index.js.map +1 -1
- package/dist/providers/xai/index.d.ts +7 -9
- package/dist/providers/xai/index.d.ts.map +1 -1
- package/dist/providers/xai/index.js +37 -40
- package/dist/providers/xai/index.js.map +1 -1
- package/dist/providers.d.ts +29 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +15 -0
- package/dist/providers.js.map +1 -0
- package/dist/resolve-provider.d.ts.map +1 -1
- package/dist/resolve-provider.js +8 -51
- package/dist/resolve-provider.js.map +1 -1
- package/dist/speech-provider.d.ts +13 -53
- package/dist/speech-provider.d.ts.map +1 -1
- package/dist/speech-provider.js +5 -26
- package/dist/speech-provider.js.map +1 -1
- package/dist/speech-result.d.ts +8 -9
- package/dist/speech-result.d.ts.map +1 -1
- package/dist/speech-result.js.map +1 -1
- package/dist/speech-to-text-provider.d.ts +0 -12
- package/dist/speech-to-text-provider.d.ts.map +1 -1
- package/dist/stream-speech.d.ts.map +1 -1
- package/dist/stream-speech.js +2 -3
- package/dist/stream-speech.js.map +1 -1
- package/dist/timestamps.d.ts +3 -17
- package/dist/timestamps.d.ts.map +1 -1
- package/dist/turns.d.ts +9 -0
- package/dist/turns.d.ts.map +1 -0
- package/dist/turns.js +21 -0
- package/dist/turns.js.map +1 -0
- package/dist/types.d.ts +25 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/volume-adjust.d.ts +0 -6
- package/dist/volume-adjust.d.ts.map +1 -1
- package/dist/volume-adjust.js +0 -6
- package/dist/volume-adjust.js.map +1 -1
- package/package.json +11 -66
- package/dist/stt-providers/openai/index.d.ts +0 -42
- package/dist/stt-providers/openai/index.d.ts.map +0 -1
- package/dist/stt-providers/openai/index.js +0 -184
- package/dist/stt-providers/openai/index.js.map +0 -1
package/dist/audio-duration.js
CHANGED
|
@@ -1,15 +1,8 @@
|
|
|
1
1
|
import { ALL_FORMATS, BlobSource, Input } from "mediabunny";
|
|
2
|
-
/**
|
|
3
|
-
* Compute audio duration in milliseconds from raw audio bytes.
|
|
4
|
-
* Uses mediabunny to parse the audio container (MP3, WAV, Ogg, FLAC, etc.)
|
|
5
|
-
* and extract duration. Returns undefined if parsing fails.
|
|
6
|
-
*/
|
|
7
2
|
export async function computeAudioDuration(data, mediaType) {
|
|
8
3
|
try {
|
|
9
4
|
const bytes = data instanceof Uint8Array ? data : base64ToUint8Array(data);
|
|
10
|
-
// Copy to a fresh ArrayBuffer
|
|
11
|
-
// bytes — not the entire underlying buffer (which may be larger when
|
|
12
|
-
// bytes is a subarray) and satisfies Blob's BlobPart type.
|
|
5
|
+
// Copy to a fresh ArrayBuffer — bytes may be a subarray of a larger buffer.
|
|
13
6
|
const ab = new ArrayBuffer(bytes.byteLength);
|
|
14
7
|
new Uint8Array(ab).set(bytes);
|
|
15
8
|
const blob = new Blob([ab], { type: mediaType });
|
|
@@ -19,13 +12,13 @@ export async function computeAudioDuration(data, mediaType) {
|
|
|
19
12
|
});
|
|
20
13
|
const track = await input.getPrimaryAudioTrack();
|
|
21
14
|
if (!track) {
|
|
22
|
-
return
|
|
15
|
+
return;
|
|
23
16
|
}
|
|
24
17
|
const durationSeconds = await track.computeDuration();
|
|
25
18
|
return Math.round(durationSeconds * 1000);
|
|
26
19
|
}
|
|
27
20
|
catch {
|
|
28
|
-
return
|
|
21
|
+
return;
|
|
29
22
|
}
|
|
30
23
|
}
|
|
31
24
|
function base64ToUint8Array(b64) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio-duration.js","sourceRoot":"","sources":["../src/audio-duration.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAE5D
|
|
1
|
+
{"version":3,"file":"audio-duration.js","sourceRoot":"","sources":["../src/audio-duration.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AAE5D,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,IAAyB,EACzB,SAAiB;IAEjB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAI,YAAY,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAC3E,4EAA4E;QAC5E,MAAM,EAAE,GAAG,IAAI,WAAW,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QAC7C,IAAI,UAAU,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9B,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;QACjD,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC;YACtB,MAAM,EAAE,IAAI,UAAU,CAAC,IAAI,CAAC;YAC5B,OAAO,EAAE,WAAW;SACrB,CAAC,CAAC;QACH,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,oBAAoB,EAAE,CAAC;QACjD,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,OAAO;QACT,CAAC;QACD,MAAM,eAAe,GAAG,MAAM,KAAK,CAAC,eAAe,EAAE,CAAC;QACtD,OAAO,IAAI,CAAC,KAAK,CAAC,eAAe,GAAG,IAAI,CAAC,CAAC;IAC5C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;IACT,CAAC;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,KAAK,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/audio-utils.d.ts
CHANGED
|
@@ -1,14 +1,4 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Parse a numeric parameter from a mediaType string (e.g. "audio/pcm;rate=24000").
|
|
3
|
-
* Returns undefined if missing or non-positive.
|
|
4
|
-
*/
|
|
5
1
|
export declare function parseMediaTypeParam(mediaType: string, name: string): number | undefined;
|
|
6
|
-
/**
|
|
7
|
-
* Wrap raw 16-bit little-endian mono PCM bytes in a WAV container.
|
|
8
|
-
* Cross-platform (browser, Node, edge) via mediabunny's container ops —
|
|
9
|
-
* does not require the WebCodecs encoder.
|
|
10
|
-
*/
|
|
11
2
|
export declare function wrapPcm16Mono(pcm: Uint8Array, sampleRate: number): Promise<Uint8Array>;
|
|
12
|
-
/** Decode a base64 string into raw bytes. */
|
|
13
3
|
export declare function base64ToUint8Array(b64: string): Uint8Array;
|
|
14
4
|
//# sourceMappingURL=audio-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio-utils.d.ts","sourceRoot":"","sources":["../src/audio-utils.ts"],"names":[],"mappings":"AAUA
|
|
1
|
+
{"version":3,"file":"audio-utils.d.ts","sourceRoot":"","sources":["../src/audio-utils.ts"],"names":[],"mappings":"AAUA,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,GACX,MAAM,GAAG,SAAS,CAapB;AAED,wBAAsB,aAAa,CACjC,GAAG,EAAE,UAAU,EACf,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,UAAU,CAAC,CA0BrB;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,UAAU,CAO1D"}
|
package/dist/audio-utils.js
CHANGED
|
@@ -1,29 +1,19 @@
|
|
|
1
1
|
import { BufferTarget, EncodedAudioPacketSource, EncodedPacket, Output, WavOutputFormat, } from "mediabunny";
|
|
2
2
|
const PARAM_REGEX_CACHE = new Map();
|
|
3
|
-
/**
|
|
4
|
-
* Parse a numeric parameter from a mediaType string (e.g. "audio/pcm;rate=24000").
|
|
5
|
-
* Returns undefined if missing or non-positive.
|
|
6
|
-
*/
|
|
7
3
|
export function parseMediaTypeParam(mediaType, name) {
|
|
8
4
|
let re = PARAM_REGEX_CACHE.get(name);
|
|
9
5
|
if (!re) {
|
|
10
|
-
// End boundary
|
|
11
|
-
// end-of-string. Rejects values like "rate=24000x".
|
|
6
|
+
// End boundary rejects values like "rate=24000x".
|
|
12
7
|
re = new RegExp(`(?:^|;)\\s*${name}=(\\d+)(?=$|;|\\s)`, "i");
|
|
13
8
|
PARAM_REGEX_CACHE.set(name, re);
|
|
14
9
|
}
|
|
15
10
|
const match = mediaType.match(re);
|
|
16
11
|
if (!match) {
|
|
17
|
-
return
|
|
12
|
+
return;
|
|
18
13
|
}
|
|
19
14
|
const value = Number(match[1]);
|
|
20
15
|
return Number.isFinite(value) && value > 0 ? value : undefined;
|
|
21
16
|
}
|
|
22
|
-
/**
|
|
23
|
-
* Wrap raw 16-bit little-endian mono PCM bytes in a WAV container.
|
|
24
|
-
* Cross-platform (browser, Node, edge) via mediabunny's container ops —
|
|
25
|
-
* does not require the WebCodecs encoder.
|
|
26
|
-
*/
|
|
27
17
|
export async function wrapPcm16Mono(pcm, sampleRate) {
|
|
28
18
|
const output = new Output({
|
|
29
19
|
format: new WavOutputFormat(),
|
|
@@ -32,7 +22,6 @@ export async function wrapPcm16Mono(pcm, sampleRate) {
|
|
|
32
22
|
const source = new EncodedAudioPacketSource("pcm-s16");
|
|
33
23
|
output.addAudioTrack(source);
|
|
34
24
|
await output.start();
|
|
35
|
-
// 2 bytes per sample, mono.
|
|
36
25
|
const numSamples = pcm.length / 2;
|
|
37
26
|
const durationSeconds = numSamples / sampleRate;
|
|
38
27
|
const packet = new EncodedPacket(pcm, "key", 0, durationSeconds, 0);
|
|
@@ -50,7 +39,6 @@ export async function wrapPcm16Mono(pcm, sampleRate) {
|
|
|
50
39
|
}
|
|
51
40
|
return new Uint8Array(buffer);
|
|
52
41
|
}
|
|
53
|
-
/** Decode a base64 string into raw bytes. */
|
|
54
42
|
export function base64ToUint8Array(b64) {
|
|
55
43
|
const binary = atob(b64);
|
|
56
44
|
const bytes = new Uint8Array(binary.length);
|
package/dist/audio-utils.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audio-utils.js","sourceRoot":"","sources":["../src/audio-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,YAAY,EACZ,wBAAwB,EACxB,aAAa,EACb,MAAM,EACN,eAAe,GAChB,MAAM,YAAY,CAAC;AAEpB,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;AAEpD
|
|
1
|
+
{"version":3,"file":"audio-utils.js","sourceRoot":"","sources":["../src/audio-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,YAAY,EACZ,wBAAwB,EACxB,aAAa,EACb,MAAM,EACN,eAAe,GAChB,MAAM,YAAY,CAAC;AAEpB,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAkB,CAAC;AAEpD,MAAM,UAAU,mBAAmB,CACjC,SAAiB,EACjB,IAAY;IAEZ,IAAI,EAAE,GAAG,iBAAiB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACrC,IAAI,CAAC,EAAE,EAAE,CAAC;QACR,kDAAkD;QAClD,EAAE,GAAG,IAAI,MAAM,CAAC,cAAc,IAAI,oBAAoB,EAAE,GAAG,CAAC,CAAC;QAC7D,iBAAiB,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAClC,CAAC;IACD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAClC,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO;IACT,CAAC;IACD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/B,OAAO,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;AACjE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,GAAe,EACf,UAAkB;IAElB,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC;QACxB,MAAM,EAAE,IAAI,eAAe,EAAE;QAC7B,MAAM,EAAE,IAAI,YAAY,EAAE;KAC3B,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,IAAI,wBAAwB,CAAC,SAAS,CAAC,CAAC;IACvD,MAAM,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;IAC7B,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IAErB,MAAM,UAAU,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IAClC,MAAM,eAAe,GAAG,UAAU,GAAG,UAAU,CAAC;IAChD,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,eAAe,EAAE,CAAC,CAAC,CAAC;IACpE,MAAM,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE;QACvB,aAAa,EAAE;YACb,KAAK,EAAE,SAAS;YAChB,gBAAgB,EAAE,CAAC;YACnB,UAAU;SACX;KACF,CAAC,CAAC;IAEH,MAAM,MAAM,CAAC,QAAQ,EAAE,CAAC;IACxB,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;IACpC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;IACrE,CAAC;IACD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IACzB,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,KAAK,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/captions.d.ts
CHANGED
|
@@ -1,137 +1,29 @@
|
|
|
1
1
|
import type { WordTimestamp } from "./timestamps.js";
|
|
2
|
-
/**
|
|
3
|
-
* Sanitizes caption-body text: strips C0 control characters (U+0000–U+001F
|
|
4
|
-
* minus whitespace, plus U+007F DEL), folds non-ASCII typography (curly
|
|
5
|
-
* quotes, en/em dashes, ellipsis) to ASCII equivalents, and collapses
|
|
6
|
-
* whitespace runs to a single space. Exported for testing.
|
|
7
|
-
*/
|
|
8
2
|
export declare function normalizeTypography(text: string): string;
|
|
9
|
-
/**
|
|
10
|
-
* Escapes characters that would otherwise be interpreted as inline WebVTT
|
|
11
|
-
* markup. Applied only to the VTT render path; SRT passes raw text through.
|
|
12
|
-
* Exported for testing; not part of the public API.
|
|
13
|
-
*/
|
|
14
3
|
export declare function escapeVttText(text: string): string;
|
|
15
|
-
/**
|
|
16
|
-
* Formats a number of seconds as an SRT timestamp: `HH:MM:SS,mmm`.
|
|
17
|
-
* Negative inputs are clamped to zero. Milliseconds are rounded.
|
|
18
|
-
* Exported for testing; not part of the public API.
|
|
19
|
-
*/
|
|
20
4
|
export declare function formatSrtTime(seconds: number): string;
|
|
21
|
-
/**
|
|
22
|
-
* Formats a number of seconds as a WebVTT timestamp: `HH:MM:SS.mmm`.
|
|
23
|
-
* Negative inputs are clamped to zero. Milliseconds are rounded.
|
|
24
|
-
* Exported for testing; not part of the public API.
|
|
25
|
-
*/
|
|
26
5
|
export declare function formatVttTime(seconds: number): string;
|
|
27
|
-
/**
|
|
28
|
-
* Groups a flat list of word timestamps into sentences using terminator
|
|
29
|
-
* punctuation attached to the trailing word. Supported terminators:
|
|
30
|
-
*
|
|
31
|
-
* - ASCII: `.`, `!`, `?`
|
|
32
|
-
* - CJK: `。`, `!`, `?`
|
|
33
|
-
* - Devanagari (Hindi, Sanskrit, Marathi): `।`, `॥`
|
|
34
|
-
* - Arabic: `؟`, `۔`
|
|
35
|
-
*
|
|
36
|
-
* A trailing closing quote (`"`, `'`, curly variants, or CJK corner
|
|
37
|
-
* bracket `」` / `』`) attached to the terminator is tolerated.
|
|
38
|
-
*
|
|
39
|
-
* Known limitations:
|
|
40
|
-
* - Abbreviations like "Dr." or "e.g." are treated as sentence ends.
|
|
41
|
-
* - Thai and other scripts without word-level whitespace or inline
|
|
42
|
-
* terminators fall through to char/duration-based hard breaks.
|
|
43
|
-
*
|
|
44
|
-
* Exported for testing; not part of the public API.
|
|
45
|
-
*/
|
|
46
6
|
export declare function groupIntoSentences(words: readonly WordTimestamp[]): WordTimestamp[][];
|
|
47
7
|
interface CueSplitOptions {
|
|
48
8
|
readonly longPhraseCommaBreakChars: number;
|
|
49
9
|
readonly maxCharsPerCue: number;
|
|
50
10
|
readonly maxCueDurationMs: number;
|
|
51
11
|
}
|
|
52
|
-
/**
|
|
53
|
-
* Subdivides a sentence (an ordered list of words) into one or more cues.
|
|
54
|
-
* Breaks are chosen in this priority order:
|
|
55
|
-
* 1. Hard: character budget exceeded → break before the offending word.
|
|
56
|
-
* 2. Hard: duration exceeded → break before the offending word.
|
|
57
|
-
* 3. Soft: comma in a word that leaves the current cue above
|
|
58
|
-
* `longPhraseCommaBreakChars` → break after that word.
|
|
59
|
-
*
|
|
60
|
-
* Exported for testing; not part of the public API.
|
|
61
|
-
*/
|
|
62
12
|
export declare function splitSentenceIntoCues(sentence: readonly WordTimestamp[], options: CueSplitOptions): WordTimestamp[][];
|
|
63
13
|
interface WrapOptions {
|
|
64
14
|
readonly maxLineLength: number;
|
|
65
15
|
readonly maxLines: number;
|
|
66
16
|
}
|
|
67
|
-
/**
|
|
68
|
-
* Wraps a sequence of words into up to `maxLines` lines, trying to keep
|
|
69
|
-
* each line at or below `maxLineLength` characters. A word longer than
|
|
70
|
-
* `maxLineLength` is placed on its own line rather than split. If words
|
|
71
|
-
* remain after the final line is full, they are appended to that final
|
|
72
|
-
* line (accept overflow; cue splitter is expected to have prevented this
|
|
73
|
-
* in normal flow).
|
|
74
|
-
*
|
|
75
|
-
* Exported for testing; not part of the public API.
|
|
76
|
-
*/
|
|
77
17
|
export declare function wrapCueText(words: readonly string[], options: WrapOptions): string;
|
|
78
|
-
/**
|
|
79
|
-
* Supported caption output formats.
|
|
80
|
-
*
|
|
81
|
-
* - `"srt"` — SubRip (`.srt`). Comma-decimal timestamps, numeric cue IDs,
|
|
82
|
-
* plain text bodies. Widely supported by media players and upload tools.
|
|
83
|
-
* - `"vtt"` — WebVTT (`.vtt`). Period-decimal timestamps, `WEBVTT` header,
|
|
84
|
-
* HTML-escaped bodies (`&`, `<`, `>`). Required for HTML `<track>`.
|
|
85
|
-
*/
|
|
86
18
|
export type CaptionFormat = "srt" | "vtt";
|
|
87
|
-
/**
|
|
88
|
-
* Options for {@link timestampsToCaptions}.
|
|
89
|
-
*/
|
|
90
19
|
export interface CaptionsOptions {
|
|
91
|
-
/** Output format. Default `"srt"`. */
|
|
92
20
|
readonly format?: CaptionFormat;
|
|
93
|
-
/**
|
|
94
|
-
* Minimum cue-char-count at which a trailing comma triggers a soft cue
|
|
95
|
-
* break. Prevents tiny fragments after every comma. Default `60`.
|
|
96
|
-
*/
|
|
97
21
|
readonly longPhraseCommaBreakChars?: number;
|
|
98
|
-
/** Max total chars per cue. Default `maxLineLength * maxLinesPerCue`. */
|
|
99
22
|
readonly maxCharsPerCue?: number;
|
|
100
|
-
/** Max cue duration in milliseconds. Default `7000`. */
|
|
101
23
|
readonly maxCueDurationMs?: number;
|
|
102
|
-
/**
|
|
103
|
-
* Max chars per line (word-boundary wrap). Default `42` — the common
|
|
104
|
-
* broadcast convention for Latin-alphabet subtitles.
|
|
105
|
-
*
|
|
106
|
-
* Character counts use JavaScript `string.length` (UTF-16 code units).
|
|
107
|
-
* For CJK (Japanese, Chinese, Korean) content, each character is roughly
|
|
108
|
-
* twice the visual width of an ASCII character in monospaced players;
|
|
109
|
-
* pass a smaller value (e.g. `16`) to match Japanese broadcast norms.
|
|
110
|
-
*/
|
|
111
24
|
readonly maxLineLength?: number;
|
|
112
|
-
/** Max lines per cue. Default `2`. */
|
|
113
25
|
readonly maxLinesPerCue?: number;
|
|
114
26
|
}
|
|
115
|
-
/**
|
|
116
|
-
* Converts word-level timestamps into a caption string in SRT or WebVTT
|
|
117
|
-
* format.
|
|
118
|
-
*
|
|
119
|
-
* Sentence boundaries (`.`, `!`, `?` in word text, optionally followed
|
|
120
|
-
* by a closing quote) create cue breaks; long sentences are subdivided
|
|
121
|
-
* by character count, duration, and soft comma breaks. Each cue is
|
|
122
|
-
* greedily wrapped into up to `maxLinesPerCue` lines of `maxLineLength`
|
|
123
|
-
* characters.
|
|
124
|
-
*
|
|
125
|
-
* Returns the empty string for empty input.
|
|
126
|
-
*
|
|
127
|
-
* @example
|
|
128
|
-
* ```ts
|
|
129
|
-
* const { timestamps } = await generateSpeech({ ... });
|
|
130
|
-
*
|
|
131
|
-
* const srt = timestampsToCaptions(timestamps ?? []);
|
|
132
|
-
* const vtt = timestampsToCaptions(timestamps ?? [], { format: "vtt" });
|
|
133
|
-
* ```
|
|
134
|
-
*/
|
|
135
27
|
export declare function timestampsToCaptions(timestamps: readonly WordTimestamp[], options?: CaptionsOptions): string;
|
|
136
28
|
export {};
|
|
137
29
|
//# sourceMappingURL=captions.d.ts.map
|
package/dist/captions.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"captions.d.ts","sourceRoot":"","sources":["../src/captions.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;
|
|
1
|
+
{"version":3,"file":"captions.d.ts","sourceRoot":"","sources":["../src/captions.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAsBrD,wBAAgB,mBAAmB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMxD;AAQD,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAMlD;AAeD,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAErD;AAED,wBAAgB,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAErD;AAOD,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,SAAS,aAAa,EAAE,GAC9B,aAAa,EAAE,EAAE,CAcnB;AAMD,UAAU,eAAe;IACvB,QAAQ,CAAC,yBAAyB,EAAE,MAAM,CAAC;IAC3C,QAAQ,CAAC,cAAc,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;CACnC;AA0BD,wBAAgB,qBAAqB,CACnC,QAAQ,EAAE,SAAS,aAAa,EAAE,EAClC,OAAO,EAAE,eAAe,GACvB,aAAa,EAAE,EAAE,CAgCnB;AAED,UAAU,WAAW;IACnB,QAAQ,CAAC,aAAa,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC;CAC3B;AAGD,wBAAgB,WAAW,CACzB,KAAK,EAAE,SAAS,MAAM,EAAE,EACxB,OAAO,EAAE,WAAW,GACnB,MAAM,CAmBR;AAED,MAAM,MAAM,aAAa,GAAG,KAAK,GAAG,KAAK,CAAC;AAE1C,MAAM,WAAW,eAAe;IAC9B,QAAQ,CAAC,MAAM,CAAC,EAAE,aAAa,CAAC;IAChC,QAAQ,CAAC,yBAAyB,CAAC,EAAE,MAAM,CAAC;IAC5C,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;IACjC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAEnC,QAAQ,CAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAChC,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM,CAAC;CAClC;AAWD,wBAAgB,oBAAoB,CAClC,UAAU,EAAE,SAAS,aAAa,EAAE,EACpC,OAAO,GAAE,eAAoB,GAC5B,MAAM,CA8DR"}
|
package/dist/captions.js
CHANGED
|
@@ -10,18 +10,10 @@ const TYPOGRAPHY_MAP = [
|
|
|
10
10
|
[/\u2014/g, "-"],
|
|
11
11
|
[/\u2026/g, "..."],
|
|
12
12
|
];
|
|
13
|
-
// C0
|
|
14
|
-
// and DEL. Providers should never emit these in text, but a stray NUL or ESC
|
|
15
|
-
// would silently corrupt SRT/VTT output — some parsers truncate on NUL.
|
|
13
|
+
// Strip C0 controls (excluding whitespace) + DEL — some parsers truncate on NUL.
|
|
16
14
|
// biome-ignore lint/suspicious/noControlCharactersInRegex: intentional — this regex exists to strip control characters
|
|
17
15
|
const CONTROL_CHARS = /[\u0000-\u0008\u000E-\u001F\u007F]/g;
|
|
18
16
|
const WHITESPACE_RUN = /\s+/g;
|
|
19
|
-
/**
|
|
20
|
-
* Sanitizes caption-body text: strips C0 control characters (U+0000–U+001F
|
|
21
|
-
* minus whitespace, plus U+007F DEL), folds non-ASCII typography (curly
|
|
22
|
-
* quotes, en/em dashes, ellipsis) to ASCII equivalents, and collapses
|
|
23
|
-
* whitespace runs to a single space. Exported for testing.
|
|
24
|
-
*/
|
|
25
17
|
export function normalizeTypography(text) {
|
|
26
18
|
let out = text.replace(CONTROL_CHARS, "");
|
|
27
19
|
for (const [pattern, replacement] of TYPOGRAPHY_MAP) {
|
|
@@ -34,11 +26,6 @@ const VTT_ESCAPE_MAP = [
|
|
|
34
26
|
[/</g, "<"],
|
|
35
27
|
[/>/g, ">"],
|
|
36
28
|
];
|
|
37
|
-
/**
|
|
38
|
-
* Escapes characters that would otherwise be interpreted as inline WebVTT
|
|
39
|
-
* markup. Applied only to the VTT render path; SRT passes raw text through.
|
|
40
|
-
* Exported for testing; not part of the public API.
|
|
41
|
-
*/
|
|
42
29
|
export function escapeVttText(text) {
|
|
43
30
|
let out = text;
|
|
44
31
|
for (const [pattern, replacement] of VTT_ESCAPE_MAP) {
|
|
@@ -56,48 +43,15 @@ function formatTimestamp(seconds, separator) {
|
|
|
56
43
|
const secs = totalSeconds % SECONDS_PER_MINUTE;
|
|
57
44
|
return `${String(hours).padStart(2, "0")}:${String(minutes).padStart(2, "0")}:${String(secs).padStart(2, "0")}${separator}${String(ms).padStart(3, "0")}`;
|
|
58
45
|
}
|
|
59
|
-
/**
|
|
60
|
-
* Formats a number of seconds as an SRT timestamp: `HH:MM:SS,mmm`.
|
|
61
|
-
* Negative inputs are clamped to zero. Milliseconds are rounded.
|
|
62
|
-
* Exported for testing; not part of the public API.
|
|
63
|
-
*/
|
|
64
46
|
export function formatSrtTime(seconds) {
|
|
65
47
|
return formatTimestamp(seconds, ",");
|
|
66
48
|
}
|
|
67
|
-
/**
|
|
68
|
-
* Formats a number of seconds as a WebVTT timestamp: `HH:MM:SS.mmm`.
|
|
69
|
-
* Negative inputs are clamped to zero. Milliseconds are rounded.
|
|
70
|
-
* Exported for testing; not part of the public API.
|
|
71
|
-
*/
|
|
72
49
|
export function formatVttTime(seconds) {
|
|
73
50
|
return formatTimestamp(seconds, ".");
|
|
74
51
|
}
|
|
75
|
-
// Sentence
|
|
76
|
-
// ASCII: . ! ?
|
|
77
|
-
// CJK: 。 ! ? (U+3002, U+FF01, U+FF1F)
|
|
78
|
-
// Devanagari: । ॥ (U+0964 danda, U+0965 double danda)
|
|
79
|
-
// Arabic: ؟ ۔ (U+061F question, U+06D4 full stop)
|
|
80
|
-
// Optionally followed by a closing quote: ASCII, curly, or CJK corner bracket.
|
|
52
|
+
// Sentence terminators: ASCII + CJK + Devanagari + Arabic, optional trailing quote.
|
|
81
53
|
const SENTENCE_TERMINATOR = /[.!?\u3002\uFF01\uFF1F\u0964\u0965\u061F\u06D4]["'\u2018\u2019\u201C\u201D\u300D\u300F]?$/;
|
|
82
|
-
|
|
83
|
-
* Groups a flat list of word timestamps into sentences using terminator
|
|
84
|
-
* punctuation attached to the trailing word. Supported terminators:
|
|
85
|
-
*
|
|
86
|
-
* - ASCII: `.`, `!`, `?`
|
|
87
|
-
* - CJK: `。`, `!`, `?`
|
|
88
|
-
* - Devanagari (Hindi, Sanskrit, Marathi): `।`, `॥`
|
|
89
|
-
* - Arabic: `؟`, `۔`
|
|
90
|
-
*
|
|
91
|
-
* A trailing closing quote (`"`, `'`, curly variants, or CJK corner
|
|
92
|
-
* bracket `」` / `』`) attached to the terminator is tolerated.
|
|
93
|
-
*
|
|
94
|
-
* Known limitations:
|
|
95
|
-
* - Abbreviations like "Dr." or "e.g." are treated as sentence ends.
|
|
96
|
-
* - Thai and other scripts without word-level whitespace or inline
|
|
97
|
-
* terminators fall through to char/duration-based hard breaks.
|
|
98
|
-
*
|
|
99
|
-
* Exported for testing; not part of the public API.
|
|
100
|
-
*/
|
|
54
|
+
// Limitations: "Dr."/"e.g." are treated as sentence ends; Thai etc. fall through to char/duration breaks.
|
|
101
55
|
export function groupIntoSentences(words) {
|
|
102
56
|
const sentences = [];
|
|
103
57
|
let current = [];
|
|
@@ -113,11 +67,9 @@ export function groupIntoSentences(words) {
|
|
|
113
67
|
}
|
|
114
68
|
return sentences;
|
|
115
69
|
}
|
|
116
|
-
// Comma-equivalent soft
|
|
117
|
-
// fullwidth (`,`), and Arabic (`،`).
|
|
70
|
+
// Comma-equivalent soft breaks: ASCII + CJK + Arabic.
|
|
118
71
|
const COMMA_TERMINATOR = /[,\u3001\uFF0C\u060C]["'\u2018\u2019\u201C\u201D\u300D\u300F]?$/;
|
|
119
72
|
function cueCharLength(cue) {
|
|
120
|
-
// Sum word lengths + (n-1) spaces between words.
|
|
121
73
|
let chars = 0;
|
|
122
74
|
for (const word of cue) {
|
|
123
75
|
chars += word.text.length;
|
|
@@ -138,16 +90,7 @@ function cueDurationMs(cue) {
|
|
|
138
90
|
}
|
|
139
91
|
return (last.end - first.start) * 1000;
|
|
140
92
|
}
|
|
141
|
-
|
|
142
|
-
* Subdivides a sentence (an ordered list of words) into one or more cues.
|
|
143
|
-
* Breaks are chosen in this priority order:
|
|
144
|
-
* 1. Hard: character budget exceeded → break before the offending word.
|
|
145
|
-
* 2. Hard: duration exceeded → break before the offending word.
|
|
146
|
-
* 3. Soft: comma in a word that leaves the current cue above
|
|
147
|
-
* `longPhraseCommaBreakChars` → break after that word.
|
|
148
|
-
*
|
|
149
|
-
* Exported for testing; not part of the public API.
|
|
150
|
-
*/
|
|
93
|
+
// Break priority: char budget → duration → comma above longPhraseCommaBreakChars.
|
|
151
94
|
export function splitSentenceIntoCues(sentence, options) {
|
|
152
95
|
const cues = [];
|
|
153
96
|
let current = [];
|
|
@@ -173,16 +116,7 @@ export function splitSentenceIntoCues(sentence, options) {
|
|
|
173
116
|
}
|
|
174
117
|
return cues;
|
|
175
118
|
}
|
|
176
|
-
|
|
177
|
-
* Wraps a sequence of words into up to `maxLines` lines, trying to keep
|
|
178
|
-
* each line at or below `maxLineLength` characters. A word longer than
|
|
179
|
-
* `maxLineLength` is placed on its own line rather than split. If words
|
|
180
|
-
* remain after the final line is full, they are appended to that final
|
|
181
|
-
* line (accept overflow; cue splitter is expected to have prevented this
|
|
182
|
-
* in normal flow).
|
|
183
|
-
*
|
|
184
|
-
* Exported for testing; not part of the public API.
|
|
185
|
-
*/
|
|
119
|
+
// Words longer than maxLineLength go on their own line; overflow appends to the last line.
|
|
186
120
|
export function wrapCueText(words, options) {
|
|
187
121
|
if (words.length === 0) {
|
|
188
122
|
return "";
|
|
@@ -209,32 +143,10 @@ const DEFAULT_LONG_PHRASE_COMMA_BREAK_CHARS = 60;
|
|
|
209
143
|
function identity(text) {
|
|
210
144
|
return text;
|
|
211
145
|
}
|
|
212
|
-
/**
|
|
213
|
-
* Converts word-level timestamps into a caption string in SRT or WebVTT
|
|
214
|
-
* format.
|
|
215
|
-
*
|
|
216
|
-
* Sentence boundaries (`.`, `!`, `?` in word text, optionally followed
|
|
217
|
-
* by a closing quote) create cue breaks; long sentences are subdivided
|
|
218
|
-
* by character count, duration, and soft comma breaks. Each cue is
|
|
219
|
-
* greedily wrapped into up to `maxLinesPerCue` lines of `maxLineLength`
|
|
220
|
-
* characters.
|
|
221
|
-
*
|
|
222
|
-
* Returns the empty string for empty input.
|
|
223
|
-
*
|
|
224
|
-
* @example
|
|
225
|
-
* ```ts
|
|
226
|
-
* const { timestamps } = await generateSpeech({ ... });
|
|
227
|
-
*
|
|
228
|
-
* const srt = timestampsToCaptions(timestamps ?? []);
|
|
229
|
-
* const vtt = timestampsToCaptions(timestamps ?? [], { format: "vtt" });
|
|
230
|
-
* ```
|
|
231
|
-
*/
|
|
232
146
|
export function timestampsToCaptions(timestamps, options = {}) {
|
|
233
147
|
const format = options.format ?? "srt";
|
|
234
148
|
if (timestamps.length === 0) {
|
|
235
|
-
//
|
|
236
|
-
// requires the `WEBVTT` header per W3C §3.1; emit the minimal valid
|
|
237
|
-
// zero-cue file so callers can still write the output as `.vtt`.
|
|
149
|
+
// WebVTT requires the WEBVTT header per W3C §3.1, even with zero cues.
|
|
238
150
|
return format === "vtt" ? "WEBVTT\n\n" : "";
|
|
239
151
|
}
|
|
240
152
|
const maxLineLength = options.maxLineLength ?? DEFAULT_MAX_LINE_LENGTH;
|
|
@@ -275,9 +187,7 @@ export function timestampsToCaptions(timestamps, options = {}) {
|
|
|
275
187
|
blocks.push(`${index}\n${formatTime(first.start)} --> ${formatTime(last.end)}\n${body}\n`);
|
|
276
188
|
index++;
|
|
277
189
|
}
|
|
278
|
-
//
|
|
279
|
-
// last cue — required by WebVTT's empty-line-separator rule and the SRT
|
|
280
|
-
// convention that strict parsers (e.g. ffmpeg, browser <track>) expect.
|
|
190
|
+
// Trailing blank line required by WebVTT and expected by strict SRT parsers.
|
|
281
191
|
return `${blocks.join("\n")}\n`;
|
|
282
192
|
}
|
|
283
193
|
//# sourceMappingURL=captions.js.map
|
package/dist/captions.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"captions.js","sourceRoot":"","sources":["../src/captions.ts"],"names":[],"mappings":"AAEA,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAC9B,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAC9B,MAAM,aAAa,GAAG,IAAI,CAAC;AAE3B,MAAM,cAAc,GAA6C;IAC/D,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,KAAK,CAAC;CACnB,CAAC;AAEF,
|
|
1
|
+
{"version":3,"file":"captions.js","sourceRoot":"","sources":["../src/captions.ts"],"names":[],"mappings":"AAEA,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAC9B,MAAM,kBAAkB,GAAG,EAAE,CAAC;AAC9B,MAAM,aAAa,GAAG,IAAI,CAAC;AAE3B,MAAM,cAAc,GAA6C;IAC/D,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,GAAG,CAAC;IAChB,CAAC,SAAS,EAAE,KAAK,CAAC;CACnB,CAAC;AAEF,iFAAiF;AACjF,uHAAuH;AACvH,MAAM,aAAa,GAAG,qCAAqC,CAAC;AAE5D,MAAM,cAAc,GAAG,MAAM,CAAC;AAE9B,MAAM,UAAU,mBAAmB,CAAC,IAAY;IAC9C,IAAI,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IAC1C,KAAK,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,IAAI,cAAc,EAAE,CAAC;QACpD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,cAAc,GAA6C;IAC/D,CAAC,IAAI,EAAE,OAAO,CAAC;IACf,CAAC,IAAI,EAAE,MAAM,CAAC;IACd,CAAC,IAAI,EAAE,MAAM,CAAC;CACf,CAAC;AAEF,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,IAAI,GAAG,GAAG,IAAI,CAAC;IACf,KAAK,MAAM,CAAC,OAAO,EAAE,WAAW,CAAC,IAAI,cAAc,EAAE,CAAC;QACpD,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,eAAe,CAAC,OAAe,EAAE,SAAoB;IAC5D,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IACrC,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,aAAa,CAAC,CAAC;IACpD,MAAM,EAAE,GAAG,OAAO,GAAG,aAAa,CAAC;IACnC,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,aAAa,CAAC,CAAC;IACzD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,GAAG,gBAAgB,CAAC,CAAC;IAC1D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CACxB,CAAC,YAAY,GAAG,gBAAgB,CAAC,GAAG,kBAAkB,CACvD,CAAC;IACF,MAAM,IAAI,GAAG,YAAY,GAAG,kBAAkB,CAAC;IAC/C,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,SAAS,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;AAC5J,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,OAAe;IAC3C,OAAO,eAAe,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,OAAe;IAC3C,OAAO,eAAe,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;AACvC,CAAC;AAED,oFAAoF;AACpF,MAAM,mBAAmB,GACvB,2FAA2F,CAAC;AAE9F,0GAA0G;AAC1G,MAAM,UAAU,kBAAkB,CAChC,KAA+B;IAE/B,MAAM,SAAS,GAAsB,EAAE,CAAC;IACxC,IAAI,OAAO,GAAoB,EAAE,CAAC;IAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,IAAI,mBAAmB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YAC/C,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACxB,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,sDAAsD;AACtD,MAAM,gBAAgB,GACpB,iEAAiE,CAAC;AAQpE,SAAS,aAAa,CAAC,GAA6B;IAClD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,IAAI,IAAI,GAAG,EAAE,CAAC;QACvB,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;IAC5B,CAAC;IACD,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnB,KAAK,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,aAAa,CAAC,GAA6B;IAClD,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,CAAC,CAAC;IACX,CAAC;IACD,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IACrB,MAAM,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,CAAC,CAAC;IACX,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;AACzC,CAAC;AAED,kFAAkF;AAClF,MAAM,UAAU,qBAAqB,CACnC,QAAkC,EAClC,OAAwB;IAExB,MAAM,IAAI,GAAsB,EAAE,CAAC;IACnC,IAAI,OAAO,GAAoB,EAAE,CAAC;IAElC,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,SAAS,GAAG,CAAC,GAAG,OAAO,EAAE,IAAI,CAAC,CAAC;QACrC,MAAM,YAAY,GAAG,aAAa,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,cAAc,CAAC;QACvE,MAAM,eAAe,GAAG,aAAa,CAAC,SAAS,CAAC,GAAG,OAAO,CAAC,gBAAgB,CAAC;QAE5E,IAAI,CAAC,YAAY,IAAI,eAAe,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5D,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACnB,OAAO,GAAG,CAAC,IAAI,CAAC,CAAC;YACjB,SAAS;QACX,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEnB,MAAM,aAAa,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9D,IACE,aAAa;YACb,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,yBAAyB,EAC/D,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACnB,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAOD,2FAA2F;AAC3F,MAAM,UAAU,WAAW,CACzB,KAAwB,EACxB,OAAoB;IAEpB,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACZ,CAAC;IACD,MAAM,KAAK,GAAa,CAAC,EAAE,CAAC,CAAC;IAC7B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAChC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,IAAI,EAAE,CAAC;QAC/D,IACE,SAAS,CAAC,MAAM,IAAI,OAAO,CAAC,aAAa;YACzC,IAAI,CAAC,MAAM,KAAK,CAAC;YACjB,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,QAAQ,EAChC,CAAC;YACD,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACnB,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAcD,MAAM,uBAAuB,GAAG,EAAE,CAAC;AACnC,MAAM,yBAAyB,GAAG,CAAC,CAAC;AACpC,MAAM,2BAA2B,GAAG,IAAI,CAAC;AACzC,MAAM,qCAAqC,GAAG,EAAE,CAAC;AAEjD,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,UAAoC,EACpC,UAA2B,EAAE;IAE7B,MAAM,MAAM,GAAkB,OAAO,CAAC,MAAM,IAAI,KAAK,CAAC;IAEtD,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC5B,uEAAuE;QACvE,OAAO,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9C,CAAC;IAED,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,uBAAuB,CAAC;IACvE,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,yBAAyB,CAAC;IAC3E,MAAM,cAAc,GAClB,OAAO,CAAC,cAAc,IAAI,aAAa,GAAG,cAAc,CAAC;IAC3D,MAAM,gBAAgB,GACpB,OAAO,CAAC,gBAAgB,IAAI,2BAA2B,CAAC;IAC1D,MAAM,yBAAyB,GAC7B,OAAO,CAAC,yBAAyB,IAAI,qCAAqC,CAAC;IAE7E,MAAM,SAAS,GAAG,kBAAkB,CAAC,UAAU,CAAC,CAAC;IACjD,MAAM,IAAI,GAAsB,EAAE,CAAC;IACnC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,CAAC,IAAI,CACP,GAAG,qBAAqB,CAAC,QAAQ,EAAE;YACjC,cAAc;YACd,gBAAgB;YAChB,yBAAyB;SAC1B,CAAC,CACH,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,aAAa,CAAC;IACpE,MAAM,UAAU,GAAG,MAAM,KAAK,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE/D,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;QACrB,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IAED,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrB,SAAS;QACX,CAAC;QACD,MAAM,eAAe,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CACrC,UAAU,CAAC,mBAAmB,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CACzC,CAAC;QACF,MAAM,IAAI,GAAG,WAAW,CAAC,eAAe,EAAE;YACxC,aAAa;YACb,QAAQ,EAAE,cAAc;SACzB,CAAC,CAAC;QACH,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QACD,MAAM,CAAC,IAAI,CACT,GAAG,KAAK,KAAK,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,IAAI,IAAI,CAC9E,CAAC;QACF,KAAK,EAAE,CAAC;IACV,CAAC;IAED,6EAA6E;IAC7E,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC;AAClC,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { ConversationWordTimestamp, WordTimestamp } from "../timestamps.js";
|
|
2
|
+
import type { SilenceGap } from "./silence-detection.js";
|
|
3
|
+
export interface Tier2Result {
|
|
4
|
+
readonly budgetExceeded: boolean;
|
|
5
|
+
readonly mismatches: number;
|
|
6
|
+
readonly timestamps: readonly ConversationWordTimestamp[];
|
|
7
|
+
}
|
|
8
|
+
export declare function tier2TextMatch(args: {
|
|
9
|
+
timestamps: readonly WordTimestamp[];
|
|
10
|
+
turnTexts: readonly string[];
|
|
11
|
+
}): Tier2Result;
|
|
12
|
+
export declare function tier1SilenceAnchored(args: {
|
|
13
|
+
timestamps: readonly WordTimestamp[];
|
|
14
|
+
gaps: readonly SilenceGap[];
|
|
15
|
+
turnTexts: readonly string[];
|
|
16
|
+
}): readonly ConversationWordTimestamp[] | undefined;
|
|
17
|
+
export interface AttributeTimestampsResult {
|
|
18
|
+
readonly timestamps?: readonly ConversationWordTimestamp[];
|
|
19
|
+
readonly warnings: readonly string[];
|
|
20
|
+
}
|
|
21
|
+
export declare function attributeTimestamps(args: {
|
|
22
|
+
timestamps: readonly WordTimestamp[];
|
|
23
|
+
turnTexts: readonly string[];
|
|
24
|
+
silenceGaps: readonly SilenceGap[];
|
|
25
|
+
}): AttributeTimestampsResult;
|
|
26
|
+
//# sourceMappingURL=attribute-timestamps.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"attribute-timestamps.d.ts","sourceRoot":"","sources":["../../src/conversation/attribute-timestamps.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,yBAAyB,EACzB,aAAa,EACd,MAAM,kBAAkB,CAAC;AAE1B,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AA6DzD,MAAM,WAAW,WAAW;IAC1B,QAAQ,CAAC,cAAc,EAAE,OAAO,CAAC;IACjC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,UAAU,EAAE,SAAS,yBAAyB,EAAE,CAAC;CAC3D;AAGD,wBAAgB,cAAc,CAAC,IAAI,EAAE;IACnC,UAAU,EAAE,SAAS,aAAa,EAAE,CAAC;IACrC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;CAC9B,GAAG,WAAW,CA4Id;AAYD,wBAAgB,oBAAoB,CAAC,IAAI,EAAE;IACzC,UAAU,EAAE,SAAS,aAAa,EAAE,CAAC;IACrC,IAAI,EAAE,SAAS,UAAU,EAAE,CAAC;IAC5B,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;CAC9B,GAAG,SAAS,yBAAyB,EAAE,GAAG,SAAS,CAqEnD;AAED,MAAM,WAAW,yBAAyB;IACxC,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,yBAAyB,EAAE,CAAC;IAC3D,QAAQ,CAAC,QAAQ,EAAE,SAAS,MAAM,EAAE,CAAC;CACtC;AAED,wBAAgB,mBAAmB,CAAC,IAAI,EAAE;IACxC,UAAU,EAAE,SAAS,aAAa,EAAE,CAAC;IACrC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IAC7B,WAAW,EAAE,SAAS,UAAU,EAAE,CAAC;CACpC,GAAG,yBAAyB,CAwC5B"}
|