@remotion/media 4.0.355 → 4.0.356
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio/audio-for-rendering.js +37 -3
- package/dist/audio/audio.js +1 -1
- package/dist/audio/props.d.ts +15 -0
- package/dist/audio-extraction/audio-iterator.d.ts +3 -2
- package/dist/audio-extraction/audio-iterator.js +13 -2
- package/dist/audio-extraction/audio-manager.d.ts +6 -5
- package/dist/audio-extraction/audio-manager.js +5 -3
- package/dist/audio-extraction/extract-audio.d.ts +3 -2
- package/dist/audio-extraction/extract-audio.js +11 -4
- package/dist/caches.d.ts +6 -5
- package/dist/convert-audiodata/apply-tonefrequency.d.ts +2 -0
- package/dist/convert-audiodata/apply-tonefrequency.js +44 -0
- package/dist/convert-audiodata/wsola.d.ts +13 -0
- package/dist/convert-audiodata/wsola.js +197 -0
- package/dist/esm/index.mjs +1297 -140
- package/dist/extract-frame-and-audio.d.ts +3 -2
- package/dist/extract-frame-and-audio.js +60 -26
- package/dist/get-sink-weak.d.ts +2 -7
- package/dist/index.d.ts +12 -3
- package/dist/index.js +11 -2
- package/dist/video/media-player.d.ts +70 -0
- package/dist/video/media-player.js +419 -0
- package/dist/video/props.d.ts +36 -18
- package/dist/video/timeout-utils.d.ts +2 -0
- package/dist/video/timeout-utils.js +18 -0
- package/dist/video/video-for-preview.d.ts +17 -0
- package/dist/video/video-for-preview.js +218 -0
- package/dist/video/video-for-rendering.d.ts +23 -2
- package/dist/video/video-for-rendering.js +47 -4
- package/dist/video/video.js +13 -14
- package/dist/video-extraction/extract-frame-via-broadcast-channel.d.ts +3 -2
- package/dist/video-extraction/extract-frame-via-broadcast-channel.js +53 -4
- package/dist/video-extraction/extract-frame.d.ts +2 -1
- package/dist/video-extraction/extract-frame.js +9 -3
- package/dist/video-extraction/get-frames-since-keyframe.d.ts +12 -7
- package/dist/video-extraction/get-frames-since-keyframe.js +70 -17
- package/package.json +3 -3
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
import { jsx as _jsx } from "react/jsx-runtime";
|
|
1
2
|
import { useContext, useLayoutEffect, useState } from 'react';
|
|
2
|
-
import { cancelRender, Internals, useCurrentFrame, useDelayRender, useRemotionEnvironment, } from 'remotion';
|
|
3
|
+
import { Audio, cancelRender, Internals, useCurrentFrame, useDelayRender, useRemotionEnvironment, } from 'remotion';
|
|
3
4
|
import { applyVolume } from '../convert-audiodata/apply-volume';
|
|
4
5
|
import { frameForVolumeProp } from '../looped-frame';
|
|
5
6
|
import { extractFrameViaBroadcastChannel } from '../video-extraction/extract-frame-via-broadcast-channel';
|
|
6
|
-
export const AudioForRendering = ({ volume: volumeProp, playbackRate, src, muted, loopVolumeCurveBehavior, delayRenderRetries, delayRenderTimeoutInMilliseconds, logLevel = window.remotion_logLevel, loop, }) => {
|
|
7
|
+
export const AudioForRendering = ({ volume: volumeProp, playbackRate, src, muted, loopVolumeCurveBehavior, delayRenderRetries, delayRenderTimeoutInMilliseconds, logLevel = window.remotion_logLevel, loop, fallbackHtml5AudioProps, audioStreamIndex, showInTimeline, style, name, disallowFallbackToHtml5Audio, }) => {
|
|
7
8
|
const frame = useCurrentFrame();
|
|
8
9
|
const absoluteFrame = Internals.useTimelinePosition();
|
|
9
10
|
const videoConfig = Internals.useUnsafeVideoConfig();
|
|
@@ -19,6 +20,7 @@ export const AudioForRendering = ({ volume: volumeProp, playbackRate, src, muted
|
|
|
19
20
|
}
|
|
20
21
|
const { fps } = videoConfig;
|
|
21
22
|
const { delayRender, continueRender } = useDelayRender();
|
|
23
|
+
const [replaceWithHtml5Audio, setReplaceWithHtml5Audio] = useState(false);
|
|
22
24
|
useLayoutEffect(() => {
|
|
23
25
|
const actualFps = playbackRate ? fps / playbackRate : fps;
|
|
24
26
|
const timestamp = frame / actualFps;
|
|
@@ -46,8 +48,34 @@ export const AudioForRendering = ({ volume: volumeProp, playbackRate, src, muted
|
|
|
46
48
|
includeVideo: false,
|
|
47
49
|
isClientSideRendering: environment.isClientSideRendering,
|
|
48
50
|
loop: loop ?? false,
|
|
51
|
+
audioStreamIndex: audioStreamIndex ?? 0,
|
|
49
52
|
})
|
|
50
|
-
.then((
|
|
53
|
+
.then((result) => {
|
|
54
|
+
if (result === 'unknown-container-format') {
|
|
55
|
+
if (disallowFallbackToHtml5Audio) {
|
|
56
|
+
cancelRender(new Error(`Unknown container format ${src}, and 'disallowFallbackToHtml5Audio' was set. Failing the render.`));
|
|
57
|
+
}
|
|
58
|
+
Internals.Log.warn({ logLevel, tag: '@remotion/media' }, `Unknown container format for ${src} (Supported formats: https://www.remotion.dev/docs/mediabunny/formats), falling back to <Audio>`);
|
|
59
|
+
setReplaceWithHtml5Audio(true);
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
if (result === 'cannot-decode') {
|
|
63
|
+
if (disallowFallbackToHtml5Audio) {
|
|
64
|
+
cancelRender(new Error(`Cannot decode ${src}, and 'disallowFallbackToHtml5Audio' was set. Failing the render.`));
|
|
65
|
+
}
|
|
66
|
+
Internals.Log.warn({ logLevel, tag: '@remotion/media' }, `Cannot decode ${src}, falling back to <Audio>`);
|
|
67
|
+
setReplaceWithHtml5Audio(true);
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
if (result === 'network-error') {
|
|
71
|
+
if (disallowFallbackToHtml5Audio) {
|
|
72
|
+
cancelRender(new Error(`Cannot decode ${src}, and 'disallowFallbackToHtml5Audio' was set. Failing the render.`));
|
|
73
|
+
}
|
|
74
|
+
Internals.Log.warn({ logLevel, tag: '@remotion/media' }, `Network error fetching ${src}, falling back to <Audio>`);
|
|
75
|
+
setReplaceWithHtml5Audio(true);
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
const { audio, durationInSeconds: assetDurationInSeconds } = result;
|
|
51
79
|
const volumePropsFrame = frameForVolumeProp({
|
|
52
80
|
behavior: loopVolumeCurveBehavior ?? 'repeat',
|
|
53
81
|
loop: loop ?? false,
|
|
@@ -90,6 +118,7 @@ export const AudioForRendering = ({ volume: volumeProp, playbackRate, src, muted
|
|
|
90
118
|
delayRender,
|
|
91
119
|
delayRenderRetries,
|
|
92
120
|
delayRenderTimeoutInMilliseconds,
|
|
121
|
+
disallowFallbackToHtml5Audio,
|
|
93
122
|
environment.isClientSideRendering,
|
|
94
123
|
fps,
|
|
95
124
|
frame,
|
|
@@ -104,6 +133,11 @@ export const AudioForRendering = ({ volume: volumeProp, playbackRate, src, muted
|
|
|
104
133
|
startsAt,
|
|
105
134
|
unregisterRenderAsset,
|
|
106
135
|
volumeProp,
|
|
136
|
+
audioStreamIndex,
|
|
107
137
|
]);
|
|
138
|
+
if (replaceWithHtml5Audio) {
|
|
139
|
+
// TODO: Loop and other props
|
|
140
|
+
return (_jsx(Audio, { src: src, playbackRate: playbackRate, muted: muted, loop: loop, volume: volumeProp, delayRenderRetries: delayRenderRetries, delayRenderTimeoutInMilliseconds: delayRenderTimeoutInMilliseconds, style: style, loopVolumeCurveBehavior: loopVolumeCurveBehavior, audioStreamIndex: audioStreamIndex, useWebAudioApi: fallbackHtml5AudioProps?.useWebAudioApi, onError: fallbackHtml5AudioProps?.onError, toneFrequency: fallbackHtml5AudioProps?.toneFrequency, acceptableTimeShiftInSeconds: fallbackHtml5AudioProps?.acceptableTimeShiftInSeconds, name: name, showInTimeline: showInTimeline }));
|
|
141
|
+
}
|
|
108
142
|
return null;
|
|
109
143
|
};
|
package/dist/audio/audio.js
CHANGED
|
@@ -52,7 +52,7 @@ export const Audio = (props) => {
|
|
|
52
52
|
typeof trimAfterValue !== 'undefined') {
|
|
53
53
|
return (_jsx(Sequence, { layout: "none", from: 0 - (trimBeforeValue ?? 0), showInTimeline: false, durationInFrames: trimAfterValue, name: name, children: _jsx(Audio, { pauseWhenBuffering: pauseWhenBuffering ?? false, ...otherProps }) }));
|
|
54
54
|
}
|
|
55
|
-
validateMediaProps(props, '
|
|
55
|
+
validateMediaProps({ playbackRate: props.playbackRate, volume: props.volume }, 'Audio');
|
|
56
56
|
if (environment.isRendering) {
|
|
57
57
|
return _jsx(AudioForRendering, { ...otherProps });
|
|
58
58
|
}
|
package/dist/audio/props.d.ts
CHANGED
|
@@ -1,4 +1,11 @@
|
|
|
1
1
|
import type { LogLevel, LoopVolumeCurveBehavior, VolumeProp } from 'remotion';
|
|
2
|
+
export type FallbackHtml5AudioProps = {
|
|
3
|
+
offthreadAudioProps: {
|
|
4
|
+
playbackRate?: number;
|
|
5
|
+
muted?: boolean;
|
|
6
|
+
loop?: boolean;
|
|
7
|
+
};
|
|
8
|
+
};
|
|
2
9
|
export type AudioProps = {
|
|
3
10
|
src: string;
|
|
4
11
|
trimBefore?: number;
|
|
@@ -19,5 +26,13 @@ export type AudioProps = {
|
|
|
19
26
|
stack?: string;
|
|
20
27
|
logLevel?: LogLevel;
|
|
21
28
|
loop?: boolean;
|
|
29
|
+
audioStreamIndex?: number;
|
|
22
30
|
_remotionInternalNativeLoopPassed?: boolean;
|
|
31
|
+
fallbackHtml5AudioProps?: {
|
|
32
|
+
onError?: (err: Error) => void;
|
|
33
|
+
useWebAudioApi?: boolean;
|
|
34
|
+
toneFrequency?: number;
|
|
35
|
+
acceptableTimeShiftInSeconds?: number;
|
|
36
|
+
};
|
|
37
|
+
disallowFallbackToHtml5Audio?: boolean;
|
|
23
38
|
};
|
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
import type { AudioSample, AudioSampleSink } from 'mediabunny';
|
|
2
2
|
import { type LogLevel } from 'remotion';
|
|
3
3
|
import type { RememberActualMatroskaTimestamps } from '../video-extraction/remember-actual-matroska-timestamps';
|
|
4
|
-
export declare const makeAudioIterator: ({ audioSampleSink, isMatroska, startTimestamp, src, actualMatroskaTimestamps, }: {
|
|
4
|
+
export declare const makeAudioIterator: ({ audioSampleSink, isMatroska, startTimestamp, src, actualMatroskaTimestamps, logLevel, }: {
|
|
5
5
|
audioSampleSink: AudioSampleSink;
|
|
6
6
|
isMatroska: boolean;
|
|
7
7
|
startTimestamp: number;
|
|
8
8
|
src: string;
|
|
9
9
|
actualMatroskaTimestamps: RememberActualMatroskaTimestamps;
|
|
10
|
+
logLevel: LogLevel;
|
|
10
11
|
}) => {
|
|
11
12
|
src: string;
|
|
12
13
|
getSamples: (ts: number, dur: number) => Promise<AudioSample[]>;
|
|
13
14
|
waitForCompletion: () => Promise<boolean>;
|
|
14
15
|
canSatisfyRequestedTime: (timestamp: number) => boolean;
|
|
15
|
-
logOpenFrames: (
|
|
16
|
+
logOpenFrames: () => void;
|
|
16
17
|
getCacheStats: () => {
|
|
17
18
|
count: number;
|
|
18
19
|
size: number;
|
|
@@ -7,11 +7,22 @@ import { makeAudioCache } from './audio-cache';
|
|
|
7
7
|
// The worst case seems to be FLAC files with a 65'535 sample window, which would be 1486.0ms at 44.1Khz.
|
|
8
8
|
// So let's set a threshold of 1.5 seconds.
|
|
9
9
|
const extraThreshold = 1.5;
|
|
10
|
-
|
|
10
|
+
const warned = {};
|
|
11
|
+
const warnAboutMatroskaOnce = (src, logLevel) => {
|
|
12
|
+
if (warned[src]) {
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
warned[src] = true;
|
|
16
|
+
Internals.Log.warn({ logLevel, tag: '@remotion/media' }, `Audio from ${src} will need to be read from the beginning. https://www.remotion.dev/docs/media/support#matroska-limitation`);
|
|
17
|
+
};
|
|
18
|
+
export const makeAudioIterator = ({ audioSampleSink, isMatroska, startTimestamp, src, actualMatroskaTimestamps, logLevel, }) => {
|
|
11
19
|
// Matroska timestamps are not accurate unless we start from the beginning
|
|
12
20
|
// So for matroska, we need to decode all samples :(
|
|
13
21
|
// https://github.com/Vanilagy/mediabunny/issues/105
|
|
14
22
|
const sampleIterator = audioSampleSink.samples(isMatroska ? 0 : Math.max(0, startTimestamp - extraThreshold));
|
|
23
|
+
if (isMatroska) {
|
|
24
|
+
warnAboutMatroskaOnce(src, logLevel);
|
|
25
|
+
}
|
|
15
26
|
let fullDuration = null;
|
|
16
27
|
const cache = makeAudioCache();
|
|
17
28
|
let lastUsed = Date.now();
|
|
@@ -61,7 +72,7 @@ export const makeAudioIterator = ({ audioSampleSink, isMatroska, startTimestamp,
|
|
|
61
72
|
}
|
|
62
73
|
return samples;
|
|
63
74
|
};
|
|
64
|
-
const logOpenFrames = (
|
|
75
|
+
const logOpenFrames = () => {
|
|
65
76
|
Internals.Log.verbose({ logLevel, tag: '@remotion/media' }, 'Open audio samples for src', src, cache
|
|
66
77
|
.getOpenTimestamps()
|
|
67
78
|
.map((t) => t.toFixed(3))
|
|
@@ -2,18 +2,19 @@ import type { AudioSampleSink } from 'mediabunny';
|
|
|
2
2
|
import type { LogLevel } from 'remotion';
|
|
3
3
|
import type { RememberActualMatroskaTimestamps } from '../video-extraction/remember-actual-matroska-timestamps';
|
|
4
4
|
export declare const makeAudioManager: () => {
|
|
5
|
-
makeIterator: ({ timeInSeconds, src, audioSampleSink, isMatroska, actualMatroskaTimestamps, }: {
|
|
5
|
+
makeIterator: ({ timeInSeconds, src, audioSampleSink, isMatroska, actualMatroskaTimestamps, logLevel, }: {
|
|
6
6
|
timeInSeconds: number;
|
|
7
7
|
src: string;
|
|
8
8
|
audioSampleSink: AudioSampleSink;
|
|
9
9
|
isMatroska: boolean;
|
|
10
10
|
actualMatroskaTimestamps: RememberActualMatroskaTimestamps;
|
|
11
|
+
logLevel: LogLevel;
|
|
11
12
|
}) => {
|
|
12
13
|
src: string;
|
|
13
14
|
getSamples: (ts: number, dur: number) => Promise<import("mediabunny").AudioSample[]>;
|
|
14
15
|
waitForCompletion: () => Promise<boolean>;
|
|
15
16
|
canSatisfyRequestedTime: (timestamp: number) => boolean;
|
|
16
|
-
logOpenFrames: (
|
|
17
|
+
logOpenFrames: () => void;
|
|
17
18
|
getCacheStats: () => {
|
|
18
19
|
count: number;
|
|
19
20
|
size: number;
|
|
@@ -34,7 +35,7 @@ export declare const makeAudioManager: () => {
|
|
|
34
35
|
getSamples: (ts: number, dur: number) => Promise<import("mediabunny").AudioSample[]>;
|
|
35
36
|
waitForCompletion: () => Promise<boolean>;
|
|
36
37
|
canSatisfyRequestedTime: (timestamp: number) => boolean;
|
|
37
|
-
logOpenFrames: (
|
|
38
|
+
logOpenFrames: () => void;
|
|
38
39
|
getCacheStats: () => {
|
|
39
40
|
count: number;
|
|
40
41
|
size: number;
|
|
@@ -52,7 +53,7 @@ export declare const makeAudioManager: () => {
|
|
|
52
53
|
getSamples: (ts: number, dur: number) => Promise<import("mediabunny").AudioSample[]>;
|
|
53
54
|
waitForCompletion: () => Promise<boolean>;
|
|
54
55
|
canSatisfyRequestedTime: (timestamp: number) => boolean;
|
|
55
|
-
logOpenFrames: (
|
|
56
|
+
logOpenFrames: () => void;
|
|
56
57
|
getCacheStats: () => {
|
|
57
58
|
count: number;
|
|
58
59
|
size: number;
|
|
@@ -61,5 +62,5 @@ export declare const makeAudioManager: () => {
|
|
|
61
62
|
prepareForDeletion: () => Promise<void>;
|
|
62
63
|
startTimestamp: number;
|
|
63
64
|
} | null;
|
|
64
|
-
logOpenFrames: (
|
|
65
|
+
logOpenFrames: () => void;
|
|
65
66
|
};
|
|
@@ -2,13 +2,14 @@ import { getMaxVideoCacheSize, getTotalCacheStats } from '../caches';
|
|
|
2
2
|
import { makeAudioIterator } from './audio-iterator';
|
|
3
3
|
export const makeAudioManager = () => {
|
|
4
4
|
const iterators = [];
|
|
5
|
-
const makeIterator = ({ timeInSeconds, src, audioSampleSink, isMatroska, actualMatroskaTimestamps, }) => {
|
|
5
|
+
const makeIterator = ({ timeInSeconds, src, audioSampleSink, isMatroska, actualMatroskaTimestamps, logLevel, }) => {
|
|
6
6
|
const iterator = makeAudioIterator({
|
|
7
7
|
audioSampleSink,
|
|
8
8
|
isMatroska,
|
|
9
9
|
startTimestamp: timeInSeconds,
|
|
10
10
|
src,
|
|
11
11
|
actualMatroskaTimestamps,
|
|
12
|
+
logLevel,
|
|
12
13
|
});
|
|
13
14
|
iterators.push(iterator);
|
|
14
15
|
return iterator;
|
|
@@ -57,6 +58,7 @@ export const makeAudioManager = () => {
|
|
|
57
58
|
audioSampleSink,
|
|
58
59
|
isMatroska,
|
|
59
60
|
actualMatroskaTimestamps,
|
|
61
|
+
logLevel,
|
|
60
62
|
});
|
|
61
63
|
};
|
|
62
64
|
const getCacheStats = () => {
|
|
@@ -69,9 +71,9 @@ export const makeAudioManager = () => {
|
|
|
69
71
|
}
|
|
70
72
|
return { count: totalCount, totalSize };
|
|
71
73
|
};
|
|
72
|
-
const logOpenFrames = (
|
|
74
|
+
const logOpenFrames = () => {
|
|
73
75
|
for (const iterator of iterators) {
|
|
74
|
-
iterator.logOpenFrames(
|
|
76
|
+
iterator.logOpenFrames();
|
|
75
77
|
}
|
|
76
78
|
};
|
|
77
79
|
return {
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import { type LogLevel } from 'remotion';
|
|
2
2
|
import type { PcmS16AudioData } from '../convert-audiodata/convert-audiodata';
|
|
3
|
-
export declare const extractAudio: ({ src, timeInSeconds: unloopedTimeInSeconds, durationInSeconds, logLevel, loop, playbackRate, }: {
|
|
3
|
+
export declare const extractAudio: ({ src, timeInSeconds: unloopedTimeInSeconds, durationInSeconds, logLevel, loop, playbackRate, audioStreamIndex, }: {
|
|
4
4
|
src: string;
|
|
5
5
|
timeInSeconds: number;
|
|
6
6
|
durationInSeconds: number;
|
|
7
7
|
logLevel: LogLevel;
|
|
8
8
|
loop: boolean;
|
|
9
9
|
playbackRate: number;
|
|
10
|
+
audioStreamIndex: number;
|
|
10
11
|
}) => Promise<{
|
|
11
12
|
data: PcmS16AudioData | null;
|
|
12
13
|
durationInSeconds: number | null;
|
|
13
|
-
}>;
|
|
14
|
+
} | "cannot-decode" | "unknown-container-format">;
|
|
@@ -3,15 +3,22 @@ import { combineAudioDataAndClosePrevious } from '../convert-audiodata/combine-a
|
|
|
3
3
|
import { convertAudioData } from '../convert-audiodata/convert-audiodata';
|
|
4
4
|
import { TARGET_NUMBER_OF_CHANNELS, TARGET_SAMPLE_RATE, } from '../convert-audiodata/resample-audiodata';
|
|
5
5
|
import { getSinkWeak } from '../get-sink-weak';
|
|
6
|
-
export const extractAudio = async ({ src, timeInSeconds: unloopedTimeInSeconds, durationInSeconds, logLevel, loop, playbackRate, }) => {
|
|
7
|
-
const {
|
|
6
|
+
export const extractAudio = async ({ src, timeInSeconds: unloopedTimeInSeconds, durationInSeconds, logLevel, loop, playbackRate, audioStreamIndex, }) => {
|
|
7
|
+
const { getAudio, actualMatroskaTimestamps, isMatroska, getDuration } = await getSinkWeak(src, logLevel);
|
|
8
8
|
let duration = null;
|
|
9
9
|
if (loop) {
|
|
10
10
|
duration = await getDuration();
|
|
11
11
|
}
|
|
12
|
-
|
|
12
|
+
const audio = await getAudio(audioStreamIndex);
|
|
13
|
+
if (audio === 'no-audio-track') {
|
|
13
14
|
return { data: null, durationInSeconds: null };
|
|
14
15
|
}
|
|
16
|
+
if (audio === 'cannot-decode-audio') {
|
|
17
|
+
return 'cannot-decode';
|
|
18
|
+
}
|
|
19
|
+
if (audio === 'unknown-container-format') {
|
|
20
|
+
return 'unknown-container-format';
|
|
21
|
+
}
|
|
15
22
|
const timeInSeconds = loop
|
|
16
23
|
? unloopedTimeInSeconds % duration
|
|
17
24
|
: unloopedTimeInSeconds;
|
|
@@ -24,7 +31,7 @@ export const extractAudio = async ({ src, timeInSeconds: unloopedTimeInSeconds,
|
|
|
24
31
|
logLevel,
|
|
25
32
|
});
|
|
26
33
|
const samples = await sampleIterator.getSamples(timeInSeconds, durationInSeconds);
|
|
27
|
-
audioManager.logOpenFrames(
|
|
34
|
+
audioManager.logOpenFrames();
|
|
28
35
|
const audioDataArray = [];
|
|
29
36
|
for (let i = 0; i < samples.length; i++) {
|
|
30
37
|
const sample = samples[i];
|
package/dist/caches.d.ts
CHANGED
|
@@ -20,18 +20,19 @@ export declare const keyframeManager: {
|
|
|
20
20
|
clearAll: () => Promise<void>;
|
|
21
21
|
};
|
|
22
22
|
export declare const audioManager: {
|
|
23
|
-
makeIterator: ({ timeInSeconds, src, audioSampleSink, isMatroska, actualMatroskaTimestamps, }: {
|
|
23
|
+
makeIterator: ({ timeInSeconds, src, audioSampleSink, isMatroska, actualMatroskaTimestamps, logLevel, }: {
|
|
24
24
|
timeInSeconds: number;
|
|
25
25
|
src: string;
|
|
26
26
|
audioSampleSink: import("mediabunny").AudioSampleSink;
|
|
27
27
|
isMatroska: boolean;
|
|
28
28
|
actualMatroskaTimestamps: import("./video-extraction/remember-actual-matroska-timestamps").RememberActualMatroskaTimestamps;
|
|
29
|
+
logLevel: LogLevel;
|
|
29
30
|
}) => {
|
|
30
31
|
src: string;
|
|
31
32
|
getSamples: (ts: number, dur: number) => Promise<import("mediabunny").AudioSample[]>;
|
|
32
33
|
waitForCompletion: () => Promise<boolean>;
|
|
33
34
|
canSatisfyRequestedTime: (timestamp: number) => boolean;
|
|
34
|
-
logOpenFrames: (
|
|
35
|
+
logOpenFrames: () => void;
|
|
35
36
|
getCacheStats: () => {
|
|
36
37
|
count: number;
|
|
37
38
|
size: number;
|
|
@@ -52,7 +53,7 @@ export declare const audioManager: {
|
|
|
52
53
|
getSamples: (ts: number, dur: number) => Promise<import("mediabunny").AudioSample[]>;
|
|
53
54
|
waitForCompletion: () => Promise<boolean>;
|
|
54
55
|
canSatisfyRequestedTime: (timestamp: number) => boolean;
|
|
55
|
-
logOpenFrames: (
|
|
56
|
+
logOpenFrames: () => void;
|
|
56
57
|
getCacheStats: () => {
|
|
57
58
|
count: number;
|
|
58
59
|
size: number;
|
|
@@ -70,7 +71,7 @@ export declare const audioManager: {
|
|
|
70
71
|
getSamples: (ts: number, dur: number) => Promise<import("mediabunny").AudioSample[]>;
|
|
71
72
|
waitForCompletion: () => Promise<boolean>;
|
|
72
73
|
canSatisfyRequestedTime: (timestamp: number) => boolean;
|
|
73
|
-
logOpenFrames: (
|
|
74
|
+
logOpenFrames: () => void;
|
|
74
75
|
getCacheStats: () => {
|
|
75
76
|
count: number;
|
|
76
77
|
size: number;
|
|
@@ -79,7 +80,7 @@ export declare const audioManager: {
|
|
|
79
80
|
prepareForDeletion: () => Promise<void>;
|
|
80
81
|
startTimestamp: number;
|
|
81
82
|
} | null;
|
|
82
|
-
logOpenFrames: (
|
|
83
|
+
logOpenFrames: () => void;
|
|
83
84
|
};
|
|
84
85
|
export declare const getTotalCacheStats: () => Promise<{
|
|
85
86
|
count: number;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { FORMAT } from './convert-audiodata';
|
|
2
|
+
import { resampleAudioData, TARGET_SAMPLE_RATE } from './resample-audiodata';
|
|
3
|
+
import { wsolaInt16Interleaved } from './wsola';
|
|
4
|
+
export const applyToneFrequency = (audioData, toneFrequency) => {
|
|
5
|
+
// In FFmpeg, we apply toneFrequency as follows:
|
|
6
|
+
// `asetrate=${DEFAULT_SAMPLE_RATE}*${toneFrequency},aresample=${DEFAULT_SAMPLE_RATE},atempo=1/${toneFrequency}`
|
|
7
|
+
// So there are 2 steps:
|
|
8
|
+
// 1. Change the assumed sample rate
|
|
9
|
+
// 2. Resample to 48Khz
|
|
10
|
+
// 3. Apply playback rate
|
|
11
|
+
const step1 = {
|
|
12
|
+
...audioData,
|
|
13
|
+
sampleRate: audioData.sampleRate * toneFrequency,
|
|
14
|
+
};
|
|
15
|
+
const newNumberOfFrames = Math.round(audioData.numberOfFrames * (TARGET_SAMPLE_RATE / step1.sampleRate));
|
|
16
|
+
const step2Data = new Int16Array(newNumberOfFrames * audioData.numberOfChannels);
|
|
17
|
+
const chunkSize = audioData.numberOfFrames / newNumberOfFrames;
|
|
18
|
+
resampleAudioData({
|
|
19
|
+
srcNumberOfChannels: step1.numberOfChannels,
|
|
20
|
+
sourceChannels: step1.data,
|
|
21
|
+
destination: step2Data,
|
|
22
|
+
targetFrames: newNumberOfFrames,
|
|
23
|
+
chunkSize,
|
|
24
|
+
});
|
|
25
|
+
const step2AudioData = {
|
|
26
|
+
data: step2Data,
|
|
27
|
+
format: FORMAT,
|
|
28
|
+
numberOfChannels: step1.numberOfChannels,
|
|
29
|
+
numberOfFrames: newNumberOfFrames,
|
|
30
|
+
sampleRate: TARGET_SAMPLE_RATE,
|
|
31
|
+
timestamp: audioData.timestamp,
|
|
32
|
+
};
|
|
33
|
+
const step3Data = wsolaInt16Interleaved(step2AudioData.data, step2AudioData.numberOfChannels, toneFrequency);
|
|
34
|
+
// Target per-channel length and interleave
|
|
35
|
+
const targetPerChan = Math.max(1, Math.round(step2AudioData.numberOfFrames * toneFrequency));
|
|
36
|
+
const targetTotal = targetPerChan * step2AudioData.numberOfChannels;
|
|
37
|
+
return {
|
|
38
|
+
data: step3Data,
|
|
39
|
+
numberOfChannels: step2AudioData.numberOfChannels,
|
|
40
|
+
numberOfFrames: targetTotal,
|
|
41
|
+
sampleRate: TARGET_SAMPLE_RATE,
|
|
42
|
+
timestamp: audioData.timestamp,
|
|
43
|
+
};
|
|
44
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WSOLA time-scale modification for interleaved Int16 PCM (multi-channel).
|
|
3
|
+
* - Preserves pitch approximately while changing tempo by factor f.
|
|
4
|
+
* - Works for N interleaved channels.
|
|
5
|
+
* - Mitigates head/tail fade-out via overlap-weight normalization and boundary reinforcement.
|
|
6
|
+
*
|
|
7
|
+
* @param input Interleaved Int16 PCM (e.g., LRLRLR... for stereo)
|
|
8
|
+
* @param channels Number of channels (>=1)
|
|
9
|
+
* @param f Tempo factor: >1 = faster/shorter, <1 = slower/longer
|
|
10
|
+
* @param opts Optional tuning parameters
|
|
11
|
+
* @returns Interleaved Int16Array with length ≈ round(input.length * f)
|
|
12
|
+
*/
|
|
13
|
+
export declare function wsolaInt16Interleaved(input: Int16Array, channels: number, f: number): Int16Array;
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
function clamp16(x) {
|
|
2
|
+
const y = Math.round(x);
|
|
3
|
+
return y < -32768 ? -32768 : y > 32767 ? 32767 : y;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* WSOLA time-scale modification for interleaved Int16 PCM (multi-channel).
|
|
7
|
+
* - Preserves pitch approximately while changing tempo by factor f.
|
|
8
|
+
* - Works for N interleaved channels.
|
|
9
|
+
* - Mitigates head/tail fade-out via overlap-weight normalization and boundary reinforcement.
|
|
10
|
+
*
|
|
11
|
+
* @param input Interleaved Int16 PCM (e.g., LRLRLR... for stereo)
|
|
12
|
+
* @param channels Number of channels (>=1)
|
|
13
|
+
* @param f Tempo factor: >1 = faster/shorter, <1 = slower/longer
|
|
14
|
+
* @param opts Optional tuning parameters
|
|
15
|
+
* @returns Interleaved Int16Array with length ≈ round(input.length * f)
|
|
16
|
+
*/
|
|
17
|
+
export function wsolaInt16Interleaved(input, channels, f) {
|
|
18
|
+
if (!Number.isFinite(f) || f <= 0)
|
|
19
|
+
throw new Error('f must be a positive finite number');
|
|
20
|
+
if (!Number.isInteger(channels) || channels <= 0)
|
|
21
|
+
throw new Error('channels must be a positive integer');
|
|
22
|
+
const n = input.length;
|
|
23
|
+
if (n === 0)
|
|
24
|
+
return new Int16Array(0);
|
|
25
|
+
if (n % channels !== 0)
|
|
26
|
+
throw new Error('input length must be a multiple of channels');
|
|
27
|
+
// Parameters and sensible defaults
|
|
28
|
+
const sampleRate = 48000;
|
|
29
|
+
const frameMs = 30; // 20–40 ms typical
|
|
30
|
+
const overlapRatio = 0.5;
|
|
31
|
+
const searchMs = 15; // +/- 8 ms local search
|
|
32
|
+
const winKind = 'hann';
|
|
33
|
+
const headReinf = 3;
|
|
34
|
+
const tailReinf = 3;
|
|
35
|
+
// Work per-channel
|
|
36
|
+
const samplesPerChannel = (n / channels) | 0;
|
|
37
|
+
// Frame and hop sizing
|
|
38
|
+
const frameSize = Math.max(128, Math.floor((sampleRate * frameMs) / 1000));
|
|
39
|
+
const overlap = Math.floor(frameSize * overlapRatio);
|
|
40
|
+
const anaHop = Math.max(1, frameSize - overlap);
|
|
41
|
+
const synHop = Math.max(1, Math.round(anaHop * f));
|
|
42
|
+
// Search radius in samples
|
|
43
|
+
const searchRadius = Math.max(0, Math.floor((sampleRate * searchMs) / 1000));
|
|
44
|
+
// Window
|
|
45
|
+
const win = new Float32Array(frameSize);
|
|
46
|
+
for (let i = 0; i < frameSize; i++) {
|
|
47
|
+
const x = (Math.PI * 2 * i) / (frameSize - 1);
|
|
48
|
+
win[i] =
|
|
49
|
+
winKind === 'hann' ? 0.5 * (1 - Math.cos(x)) : 0.54 - 0.46 * Math.cos(x); // Hamming
|
|
50
|
+
}
|
|
51
|
+
// Estimate output length per channel and allocate with extra headroom
|
|
52
|
+
const estFrames = Math.max(1, Math.ceil(Math.max(0, samplesPerChannel - frameSize) / anaHop) + 1);
|
|
53
|
+
const estLen = Math.max(0, frameSize + synHop * (estFrames - 1));
|
|
54
|
+
const extraHead = frameSize * (headReinf + 1);
|
|
55
|
+
const extraTail = frameSize * (tailReinf + 2);
|
|
56
|
+
const outLenAlloc = estLen + searchRadius + extraHead + extraTail;
|
|
57
|
+
const out = Array.from({ length: channels }, () => new Float32Array(outLenAlloc));
|
|
58
|
+
const outWeight = new Float32Array(outLenAlloc);
|
|
59
|
+
// Temporary buffers
|
|
60
|
+
const chanFrames = Array.from({ length: channels }, () => new Float32Array(frameSize));
|
|
61
|
+
const guideFrame = new Float32Array(frameSize);
|
|
62
|
+
// Helpers
|
|
63
|
+
function readChannelFrame(chan, start, dst) {
|
|
64
|
+
let srcIndex = start * channels + chan;
|
|
65
|
+
for (let i = 0; i < frameSize; i++) {
|
|
66
|
+
const pos = start + i;
|
|
67
|
+
dst[i] = pos >= 0 && pos < samplesPerChannel ? input[srcIndex] : 0;
|
|
68
|
+
srcIndex += channels;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
function readGuideFrame(start) {
|
|
72
|
+
for (let i = 0; i < frameSize; i++) {
|
|
73
|
+
const pos = start + i;
|
|
74
|
+
if (pos >= 0 && pos < samplesPerChannel) {
|
|
75
|
+
let sum = 0;
|
|
76
|
+
const base = pos * channels;
|
|
77
|
+
for (let c = 0; c < channels; c++)
|
|
78
|
+
sum += input[base + c];
|
|
79
|
+
guideFrame[i] = sum / channels;
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
guideFrame[i] = 0;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Find best local alignment around outPos using normalized cross-correlation
|
|
87
|
+
function bestAlignment(outPoss) {
|
|
88
|
+
let bestShift = 0;
|
|
89
|
+
let bestScore = -Infinity;
|
|
90
|
+
for (let shift = -searchRadius; shift <= searchRadius; shift++) {
|
|
91
|
+
const pos = outPoss + shift - overlap;
|
|
92
|
+
let score = 0;
|
|
93
|
+
let normA = 0;
|
|
94
|
+
let normB = 0;
|
|
95
|
+
for (let i = 0; i < overlap; i++) {
|
|
96
|
+
const idx = pos + i;
|
|
97
|
+
const outVal = idx >= 0 && idx < outLenAlloc ? out[0][idx] : 0; // channel 0 proxy
|
|
98
|
+
const frmVal = guideFrame[i];
|
|
99
|
+
score += outVal * frmVal;
|
|
100
|
+
normA += outVal * outVal;
|
|
101
|
+
normB += frmVal * frmVal;
|
|
102
|
+
}
|
|
103
|
+
const denom = Math.sqrt((normA || 1e-9) * (normB || 1e-9));
|
|
104
|
+
const corr = score / denom;
|
|
105
|
+
if (corr > bestScore) {
|
|
106
|
+
bestScore = corr;
|
|
107
|
+
bestShift = shift;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return bestShift;
|
|
111
|
+
}
|
|
112
|
+
// Overlap-add a frame for all channels at writeStart with windowing
|
|
113
|
+
function olaAllChannels(writeStart) {
|
|
114
|
+
for (let c = 0; c < channels; c++) {
|
|
115
|
+
for (let i = 0; i < frameSize; i++) {
|
|
116
|
+
const idx = writeStart + i;
|
|
117
|
+
if (idx >= 0 && idx < outLenAlloc) {
|
|
118
|
+
const w = win[i];
|
|
119
|
+
out[c][idx] += chanFrames[c][i] * w;
|
|
120
|
+
if (c === 0)
|
|
121
|
+
outWeight[idx] += w; // track weights once
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
// 1) Seed: place the first frame at t=0
|
|
127
|
+
readGuideFrame(0);
|
|
128
|
+
for (let c = 0; c < channels; c++)
|
|
129
|
+
readChannelFrame(c, 0, chanFrames[c]);
|
|
130
|
+
olaAllChannels(0);
|
|
131
|
+
// 2) Head reinforcement: place extra frames whose writeStart <= 0
|
|
132
|
+
for (let h = 0; h < headReinf; h++) {
|
|
133
|
+
// Option 1: reuse the first analysis position to strengthen early region
|
|
134
|
+
const headIn = Math.min(anaHop * h, Math.max(0, samplesPerChannel - frameSize));
|
|
135
|
+
readGuideFrame(headIn);
|
|
136
|
+
for (let c = 0; c < channels; c++)
|
|
137
|
+
readChannelFrame(c, headIn, chanFrames[c]);
|
|
138
|
+
// Align around outPos=0 so we bias writeStart near/before 0
|
|
139
|
+
const shift = bestAlignment(0);
|
|
140
|
+
const writeStart = shift - overlap; // likely negative; ok, we clamp on write
|
|
141
|
+
olaAllChannels(writeStart);
|
|
142
|
+
}
|
|
143
|
+
// 3) Main WSOLA loop
|
|
144
|
+
let inPos = anaHop; // next analysis position (we already seeded at 0)
|
|
145
|
+
let outPos = synHop; // next synthesis position
|
|
146
|
+
while (inPos < samplesPerChannel - 1) {
|
|
147
|
+
readGuideFrame(inPos);
|
|
148
|
+
for (let c = 0; c < channels; c++)
|
|
149
|
+
readChannelFrame(c, inPos, chanFrames[c]);
|
|
150
|
+
const shift = bestAlignment(outPos);
|
|
151
|
+
const writeStart = outPos + shift - overlap;
|
|
152
|
+
olaAllChannels(writeStart);
|
|
153
|
+
inPos += anaHop;
|
|
154
|
+
outPos += synHop;
|
|
155
|
+
// Safety: if we're very close to capacity, break to handle tail separately
|
|
156
|
+
if (outPos > outLenAlloc - (frameSize + searchRadius + 8))
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
// 4) Tail reinforcement: ensure the end gets full coverage
|
|
160
|
+
// Place a few extra frames around the last outPos using the last available input frames.
|
|
161
|
+
for (let t = 0; t < tailReinf; t++) {
|
|
162
|
+
const tailIn = Math.max(0, Math.min(samplesPerChannel - frameSize, inPos - anaHop * t));
|
|
163
|
+
readGuideFrame(tailIn);
|
|
164
|
+
for (let c = 0; c < channels; c++)
|
|
165
|
+
readChannelFrame(c, tailIn, chanFrames[c]);
|
|
166
|
+
const shift = bestAlignment(outPos);
|
|
167
|
+
const writeStart = outPos + shift - overlap;
|
|
168
|
+
olaAllChannels(writeStart);
|
|
169
|
+
outPos += synHop;
|
|
170
|
+
}
|
|
171
|
+
// 5) Normalize by accumulated weights BEFORE trimming
|
|
172
|
+
for (let i = 0; i < outLenAlloc; i++) {
|
|
173
|
+
const w = outWeight[i];
|
|
174
|
+
if (w > 1e-9) {
|
|
175
|
+
const inv = 1 / w;
|
|
176
|
+
for (let c = 0; c < channels; c++)
|
|
177
|
+
out[c][i] *= inv;
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
for (let c = 0; c < channels; c++)
|
|
181
|
+
out[c][i] = 0;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// 6) Produce final interleaved Int16Array with length ≈ round(n * f)
|
|
185
|
+
const targetPerChan = Math.max(1, Math.round(samplesPerChannel * f));
|
|
186
|
+
const targetTotal = targetPerChan * channels;
|
|
187
|
+
const result = new Int16Array(targetTotal);
|
|
188
|
+
// Interleave and clamp/round
|
|
189
|
+
for (let i = 0; i < targetPerChan; i++) {
|
|
190
|
+
for (let c = 0; c < channels; c++) {
|
|
191
|
+
const v = i < out[c].length ? out[c][i] : 0;
|
|
192
|
+
const y = clamp16(v);
|
|
193
|
+
result[i * channels + c] = y;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
return result;
|
|
197
|
+
}
|