@storyteller-platform/align 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.txt +21 -0
- package/README.md +3 -0
- package/dist/align/align.cjs +525 -0
- package/dist/align/align.d.cts +58 -0
- package/dist/align/align.d.ts +58 -0
- package/dist/align/align.js +458 -0
- package/dist/align/fuzzy.cjs +164 -0
- package/dist/align/fuzzy.d.cts +6 -0
- package/dist/align/fuzzy.d.ts +6 -0
- package/dist/align/fuzzy.js +141 -0
- package/dist/align/getSentenceRanges.cjs +304 -0
- package/dist/align/getSentenceRanges.d.cts +31 -0
- package/dist/align/getSentenceRanges.d.ts +31 -0
- package/dist/align/getSentenceRanges.js +277 -0
- package/dist/align/parse.cjs +63 -0
- package/dist/align/parse.d.cts +30 -0
- package/dist/align/parse.d.ts +30 -0
- package/dist/align/parse.js +51 -0
- package/dist/chunk-BIEQXUOY.js +50 -0
- package/dist/cli/bin.cjs +368 -0
- package/dist/cli/bin.d.cts +1 -0
- package/dist/cli/bin.d.ts +1 -0
- package/dist/cli/bin.js +319 -0
- package/dist/common/ffmpeg.cjs +232 -0
- package/dist/common/ffmpeg.d.cts +33 -0
- package/dist/common/ffmpeg.d.ts +33 -0
- package/dist/common/ffmpeg.js +196 -0
- package/dist/common/logging.cjs +45 -0
- package/dist/common/logging.d.cts +5 -0
- package/dist/common/logging.d.ts +5 -0
- package/dist/common/logging.js +12 -0
- package/dist/common/parse.cjs +73 -0
- package/dist/common/parse.d.cts +28 -0
- package/dist/common/parse.d.ts +28 -0
- package/dist/common/parse.js +56 -0
- package/dist/common/shell.cjs +30 -0
- package/dist/common/shell.d.cts +3 -0
- package/dist/common/shell.d.ts +3 -0
- package/dist/common/shell.js +7 -0
- package/dist/index.cjs +37 -0
- package/dist/index.d.cts +12 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +11 -0
- package/dist/markup/__tests__/markup.test.cjs +464 -0
- package/dist/markup/__tests__/markup.test.d.cts +2 -0
- package/dist/markup/__tests__/markup.test.d.ts +2 -0
- package/dist/markup/__tests__/markup.test.js +441 -0
- package/dist/markup/markup.cjs +316 -0
- package/dist/markup/markup.d.cts +24 -0
- package/dist/markup/markup.d.ts +24 -0
- package/dist/markup/markup.js +254 -0
- package/dist/markup/parse.cjs +55 -0
- package/dist/markup/parse.d.cts +17 -0
- package/dist/markup/parse.d.ts +17 -0
- package/dist/markup/parse.js +43 -0
- package/dist/markup/segmentation.cjs +87 -0
- package/dist/markup/segmentation.d.cts +8 -0
- package/dist/markup/segmentation.d.ts +8 -0
- package/dist/markup/segmentation.js +67 -0
- package/dist/markup/semantics.cjs +79 -0
- package/dist/markup/semantics.d.cts +6 -0
- package/dist/markup/semantics.d.ts +6 -0
- package/dist/markup/semantics.js +53 -0
- package/dist/process/AudioEncoding.cjs +16 -0
- package/dist/process/AudioEncoding.d.cts +8 -0
- package/dist/process/AudioEncoding.d.ts +8 -0
- package/dist/process/AudioEncoding.js +0 -0
- package/dist/process/__tests__/processAudiobook.test.cjs +232 -0
- package/dist/process/__tests__/processAudiobook.test.d.cts +2 -0
- package/dist/process/__tests__/processAudiobook.test.d.ts +2 -0
- package/dist/process/__tests__/processAudiobook.test.js +209 -0
- package/dist/process/mime.cjs +43 -0
- package/dist/process/mime.d.cts +3 -0
- package/dist/process/mime.d.ts +3 -0
- package/dist/process/mime.js +24 -0
- package/dist/process/parse.cjs +84 -0
- package/dist/process/parse.d.cts +28 -0
- package/dist/process/parse.d.ts +28 -0
- package/dist/process/parse.js +73 -0
- package/dist/process/processAudiobook.cjs +220 -0
- package/dist/process/processAudiobook.d.cts +24 -0
- package/dist/process/processAudiobook.d.ts +24 -0
- package/dist/process/processAudiobook.js +166 -0
- package/dist/process/ranges.cjs +203 -0
- package/dist/process/ranges.d.cts +15 -0
- package/dist/process/ranges.d.ts +15 -0
- package/dist/process/ranges.js +137 -0
- package/dist/transcribe/parse.cjs +149 -0
- package/dist/transcribe/parse.d.cts +114 -0
- package/dist/transcribe/parse.d.ts +114 -0
- package/dist/transcribe/parse.js +143 -0
- package/dist/transcribe/transcribe.cjs +400 -0
- package/dist/transcribe/transcribe.d.cts +41 -0
- package/dist/transcribe/transcribe.d.ts +41 -0
- package/dist/transcribe/transcribe.js +330 -0
- package/package.json +96 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import "../chunk-BIEQXUOY.js";
|
|
2
|
+
function findNearestMatch(needle, haystack, maxDist) {
|
|
3
|
+
let nearest = null;
|
|
4
|
+
for (const match of levenshteinNgram(needle, haystack, maxDist)) {
|
|
5
|
+
if (!nearest || match.dist < nearest.dist) {
|
|
6
|
+
nearest = match;
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
return nearest && {
|
|
10
|
+
match: haystack.slice(nearest.start, nearest.end),
|
|
11
|
+
index: nearest.start
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
function reverse(str, from = str.length, to = 0) {
|
|
15
|
+
let reversed = "";
|
|
16
|
+
for (let i = from - 1; i >= to; i--) {
|
|
17
|
+
reversed = reversed + str[i];
|
|
18
|
+
}
|
|
19
|
+
return reversed;
|
|
20
|
+
}
|
|
21
|
+
function* searchExact(subsequence, sequence, startIndex = 0, endIndex = sequence.length) {
|
|
22
|
+
let index = sequence.indexOf(subsequence, startIndex);
|
|
23
|
+
while (index !== -1 && index + subsequence.length < endIndex) {
|
|
24
|
+
yield index;
|
|
25
|
+
index = sequence.indexOf(subsequence, index + 1);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function expand(subsequence, sequence, maxDist) {
|
|
29
|
+
const subsequenceLength = subsequence.length;
|
|
30
|
+
if (subsequenceLength === 0) {
|
|
31
|
+
return { index: 0, score: 0 };
|
|
32
|
+
}
|
|
33
|
+
const scores = Array.from({ length: subsequenceLength + 1 }).map((_, i) => i);
|
|
34
|
+
let minScore = subsequenceLength;
|
|
35
|
+
let minScoreIndex = -1;
|
|
36
|
+
let maxGoodScore = maxDist;
|
|
37
|
+
let newNeedleIndexRangeStart = 0;
|
|
38
|
+
let newNeedleIndexRangeEnd = subsequenceLength - 1;
|
|
39
|
+
for (let sequenceIndex = 0; sequenceIndex < sequence.length; sequenceIndex++) {
|
|
40
|
+
const char = sequence[sequenceIndex];
|
|
41
|
+
const needleIndexRangeStart = newNeedleIndexRangeStart;
|
|
42
|
+
const needleIndexRangeEnd = Math.min(
|
|
43
|
+
subsequenceLength,
|
|
44
|
+
newNeedleIndexRangeEnd + 1
|
|
45
|
+
);
|
|
46
|
+
let a = sequenceIndex;
|
|
47
|
+
let c = a + 1;
|
|
48
|
+
if (c <= maxGoodScore) {
|
|
49
|
+
newNeedleIndexRangeStart = 0;
|
|
50
|
+
newNeedleIndexRangeEnd = 0;
|
|
51
|
+
} else {
|
|
52
|
+
newNeedleIndexRangeStart = null;
|
|
53
|
+
newNeedleIndexRangeEnd = -1;
|
|
54
|
+
}
|
|
55
|
+
for (let subsequenceIndex = needleIndexRangeStart; subsequenceIndex < needleIndexRangeEnd; subsequenceIndex++) {
|
|
56
|
+
const b = scores[subsequenceIndex];
|
|
57
|
+
c = scores[subsequenceIndex] = Math.min(
|
|
58
|
+
a + (char === subsequence[subsequenceIndex] ? 0 : 1),
|
|
59
|
+
b + 1,
|
|
60
|
+
c + 1
|
|
61
|
+
);
|
|
62
|
+
a = b;
|
|
63
|
+
if (c <= maxGoodScore) {
|
|
64
|
+
if (newNeedleIndexRangeStart === null) {
|
|
65
|
+
newNeedleIndexRangeStart = subsequenceIndex;
|
|
66
|
+
}
|
|
67
|
+
newNeedleIndexRangeEnd = Math.max(
|
|
68
|
+
newNeedleIndexRangeEnd,
|
|
69
|
+
subsequenceIndex + 1 + (maxGoodScore - c)
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (newNeedleIndexRangeStart === null) {
|
|
74
|
+
break;
|
|
75
|
+
}
|
|
76
|
+
if (needleIndexRangeEnd === subsequenceLength && c <= minScore) {
|
|
77
|
+
minScore = c;
|
|
78
|
+
minScoreIndex = sequenceIndex;
|
|
79
|
+
if (minScore < maxGoodScore) {
|
|
80
|
+
maxGoodScore = minScore;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return minScore <= maxDist ? { score: minScore, index: minScoreIndex + 1 } : null;
|
|
85
|
+
}
|
|
86
|
+
function* levenshteinNgram(subsequence, sequence, maxDist) {
|
|
87
|
+
const subsequenceLength = subsequence.length;
|
|
88
|
+
const sequenceLength = sequence.length;
|
|
89
|
+
const ngramLength = Math.round(subsequenceLength / (maxDist + 1));
|
|
90
|
+
if (ngramLength === 0) {
|
|
91
|
+
throw new Error("The subsequence length must be greater than maxDist");
|
|
92
|
+
}
|
|
93
|
+
for (let ngramStart = 0; ngramStart < subsequenceLength - ngramLength + 1; ngramStart += ngramLength) {
|
|
94
|
+
const ngramEnd = ngramStart + ngramLength;
|
|
95
|
+
const subsequenceBeforeReversed = reverse(subsequence, ngramStart);
|
|
96
|
+
const subsequenceAfter = subsequence.slice(ngramEnd);
|
|
97
|
+
const startIndex = Math.max(0, ngramStart - maxDist);
|
|
98
|
+
const endIndex = Math.min(
|
|
99
|
+
sequenceLength,
|
|
100
|
+
sequenceLength - subsequenceLength + ngramEnd + maxDist
|
|
101
|
+
);
|
|
102
|
+
for (const index of searchExact(
|
|
103
|
+
subsequence.slice(ngramStart, ngramEnd),
|
|
104
|
+
sequence,
|
|
105
|
+
startIndex,
|
|
106
|
+
endIndex
|
|
107
|
+
)) {
|
|
108
|
+
const rightMatch = expand(
|
|
109
|
+
subsequenceAfter,
|
|
110
|
+
sequence.slice(
|
|
111
|
+
index + ngramLength,
|
|
112
|
+
index - ngramStart + subsequenceLength + maxDist
|
|
113
|
+
),
|
|
114
|
+
maxDist
|
|
115
|
+
);
|
|
116
|
+
if (rightMatch === null) continue;
|
|
117
|
+
const { score: distRight, index: rightExpandSize } = rightMatch;
|
|
118
|
+
const leftMatch = expand(
|
|
119
|
+
subsequenceBeforeReversed,
|
|
120
|
+
reverse(
|
|
121
|
+
sequence,
|
|
122
|
+
index,
|
|
123
|
+
Math.max(0, index - ngramStart - (maxDist - distRight))
|
|
124
|
+
),
|
|
125
|
+
maxDist - distRight
|
|
126
|
+
);
|
|
127
|
+
if (leftMatch === null) continue;
|
|
128
|
+
const { score: distLeft, index: leftExpandSize } = leftMatch;
|
|
129
|
+
const start = index - leftExpandSize;
|
|
130
|
+
yield {
|
|
131
|
+
start,
|
|
132
|
+
end: index + ngramLength + rightExpandSize,
|
|
133
|
+
// dist: distLeft + distRight + (start / sequenceLength) * maxDist,
|
|
134
|
+
dist: distLeft + distRight
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
export {
|
|
140
|
+
findNearestMatch
|
|
141
|
+
};
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
var getSentenceRanges_exports = {};
|
|
20
|
+
__export(getSentenceRanges_exports, {
|
|
21
|
+
expandEmptySentenceRanges: () => expandEmptySentenceRanges,
|
|
22
|
+
findEndTimestamp: () => findEndTimestamp,
|
|
23
|
+
getChapterDuration: () => getChapterDuration,
|
|
24
|
+
getSentenceRanges: () => getSentenceRanges,
|
|
25
|
+
interpolateSentenceRanges: () => interpolateSentenceRanges
|
|
26
|
+
});
|
|
27
|
+
module.exports = __toCommonJS(getSentenceRanges_exports);
|
|
28
|
+
var import_text_segmentation = require("@echogarden/text-segmentation");
|
|
29
|
+
var import_ffmpeg = require("../common/ffmpeg.cjs");
|
|
30
|
+
var import_fuzzy = require("./fuzzy.cjs");
|
|
31
|
+
async function getSentencesWithOffsets(text) {
|
|
32
|
+
const sentences = await (0, import_text_segmentation.segmentText)(text).then(
|
|
33
|
+
(r) => r.sentences.map((s) => s.text)
|
|
34
|
+
);
|
|
35
|
+
const sentencesWithOffsets = [];
|
|
36
|
+
let lastSentenceEnd = 0;
|
|
37
|
+
for (const sentence of sentences) {
|
|
38
|
+
const sentenceStart = text.indexOf(sentence, lastSentenceEnd);
|
|
39
|
+
if (sentenceStart > lastSentenceEnd) {
|
|
40
|
+
sentencesWithOffsets.push(text.slice(lastSentenceEnd, sentenceStart));
|
|
41
|
+
}
|
|
42
|
+
sentencesWithOffsets.push(sentence);
|
|
43
|
+
lastSentenceEnd = sentenceStart + sentence.length;
|
|
44
|
+
}
|
|
45
|
+
if (text.length > lastSentenceEnd) {
|
|
46
|
+
sentencesWithOffsets.push(text.slice(lastSentenceEnd));
|
|
47
|
+
}
|
|
48
|
+
return sentencesWithOffsets;
|
|
49
|
+
}
|
|
50
|
+
function findStartTimestamp(matchStartIndex, transcription) {
|
|
51
|
+
const entry = transcription.timeline.find(
|
|
52
|
+
(entry2) => (entry2.endOffsetUtf16 ?? 0) > matchStartIndex
|
|
53
|
+
);
|
|
54
|
+
if (!entry) return null;
|
|
55
|
+
return {
|
|
56
|
+
start: entry.startTime,
|
|
57
|
+
end: entry.endTime,
|
|
58
|
+
audiofile: entry.audiofile
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
function findEndTimestamp(matchEndIndex, transcription) {
|
|
62
|
+
const entry = transcription.timeline.findLast(
|
|
63
|
+
(entry2) => (entry2.startOffsetUtf16 ?? 0) < matchEndIndex
|
|
64
|
+
);
|
|
65
|
+
return (entry == null ? void 0 : entry.endTime) ?? null;
|
|
66
|
+
}
|
|
67
|
+
function getWindowIndexFromOffset(window, offset) {
|
|
68
|
+
let index = 0;
|
|
69
|
+
while (index < window.length - 1 && offset >= window[index].length) {
|
|
70
|
+
offset -= window[index].length;
|
|
71
|
+
index += 1;
|
|
72
|
+
}
|
|
73
|
+
return { index, offset };
|
|
74
|
+
}
|
|
75
|
+
function collapseWhitespace(input) {
|
|
76
|
+
return input.replaceAll(/\s+/g, " ");
|
|
77
|
+
}
|
|
78
|
+
async function getSentenceRanges(startSentence, transcription, sentences, chapterOffset, lastSentenceRange) {
|
|
79
|
+
const sentenceRanges = [];
|
|
80
|
+
const fullTranscriptionText = transcription.transcript;
|
|
81
|
+
const transcriptionText = fullTranscriptionText.slice(chapterOffset);
|
|
82
|
+
const transcriptionSentences = await getSentencesWithOffsets(
|
|
83
|
+
transcriptionText
|
|
84
|
+
).then((s) => s.map((sentence) => sentence.toLowerCase()));
|
|
85
|
+
let startSentenceEntry = startSentence;
|
|
86
|
+
const sentenceEntries = sentences.map((sentence, index) => [index, sentence]).filter(([index, sentence]) => {
|
|
87
|
+
if (sentence.replaceAll(/[.-_()[\],/?!@#$%^^&*`~;:='"<>+ˌˈ]/g, "").length <= 3) {
|
|
88
|
+
if (index < startSentence) startSentenceEntry--;
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
return true;
|
|
92
|
+
});
|
|
93
|
+
let transcriptionWindowIndex = 0;
|
|
94
|
+
let transcriptionWindowOffset = 0;
|
|
95
|
+
let lastGoodTranscriptionWindow = 0;
|
|
96
|
+
let notFound = 0;
|
|
97
|
+
let sentenceIndex = startSentenceEntry;
|
|
98
|
+
let lastMatchEnd = chapterOffset;
|
|
99
|
+
while (sentenceIndex < sentenceEntries.length) {
|
|
100
|
+
const [sentenceId, sentence] = sentenceEntries[sentenceIndex];
|
|
101
|
+
const transcriptionWindowList = transcriptionSentences.slice(
|
|
102
|
+
transcriptionWindowIndex,
|
|
103
|
+
transcriptionWindowIndex + 10
|
|
104
|
+
);
|
|
105
|
+
const transcriptionWindow = transcriptionWindowList.join("").slice(transcriptionWindowOffset);
|
|
106
|
+
const query = collapseWhitespace(sentence.trim()).toLowerCase();
|
|
107
|
+
const firstMatch = (0, import_fuzzy.findNearestMatch)(
|
|
108
|
+
query,
|
|
109
|
+
transcriptionWindow,
|
|
110
|
+
Math.max(Math.floor(0.25 * query.length), 1)
|
|
111
|
+
);
|
|
112
|
+
if (!firstMatch) {
|
|
113
|
+
sentenceIndex += 1;
|
|
114
|
+
notFound += 1;
|
|
115
|
+
if (notFound === 3 || sentenceIndex === sentenceEntries.length) {
|
|
116
|
+
transcriptionWindowIndex += 1;
|
|
117
|
+
if (transcriptionWindowIndex == lastGoodTranscriptionWindow + 30) {
|
|
118
|
+
transcriptionWindowIndex = lastGoodTranscriptionWindow;
|
|
119
|
+
notFound = 0;
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
sentenceIndex -= notFound;
|
|
123
|
+
notFound = 0;
|
|
124
|
+
}
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
const transcriptionOffset = transcriptionSentences.slice(0, transcriptionWindowIndex).join("").length;
|
|
128
|
+
const startResult = findStartTimestamp(
|
|
129
|
+
firstMatch.index + transcriptionOffset + transcriptionWindowOffset + chapterOffset,
|
|
130
|
+
transcription
|
|
131
|
+
);
|
|
132
|
+
if (!startResult) {
|
|
133
|
+
sentenceIndex += 1;
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
let start = startResult.start;
|
|
137
|
+
const audiofile = startResult.audiofile;
|
|
138
|
+
const end = findEndTimestamp(
|
|
139
|
+
firstMatch.index + firstMatch.match.length + transcriptionOffset + transcriptionWindowOffset + chapterOffset,
|
|
140
|
+
transcription
|
|
141
|
+
) ?? startResult.end;
|
|
142
|
+
if (sentenceRanges.length > 0) {
|
|
143
|
+
const previousSentenceRange = sentenceRanges[sentenceRanges.length - 1];
|
|
144
|
+
const previousAudiofile = previousSentenceRange.audiofile;
|
|
145
|
+
if (audiofile === previousAudiofile) {
|
|
146
|
+
if (previousSentenceRange.id === sentenceId - 1) {
|
|
147
|
+
previousSentenceRange.end = start;
|
|
148
|
+
}
|
|
149
|
+
} else {
|
|
150
|
+
if (previousSentenceRange.id === sentenceId - 1) {
|
|
151
|
+
const lastTrackDuration = await (0, import_ffmpeg.getTrackDuration)(previousAudiofile);
|
|
152
|
+
previousSentenceRange.end = lastTrackDuration;
|
|
153
|
+
start = 0;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
} else if (lastSentenceRange !== null) {
|
|
157
|
+
if (audiofile === lastSentenceRange.audiofile) {
|
|
158
|
+
if (sentenceId === 0) {
|
|
159
|
+
lastSentenceRange.end = start;
|
|
160
|
+
}
|
|
161
|
+
} else {
|
|
162
|
+
const lastTrackDuration = await (0, import_ffmpeg.getTrackDuration)(
|
|
163
|
+
lastSentenceRange.audiofile
|
|
164
|
+
);
|
|
165
|
+
lastSentenceRange.end = lastTrackDuration;
|
|
166
|
+
if (sentenceId === 0) {
|
|
167
|
+
start = 0;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
} else if (sentenceId === 0) {
|
|
171
|
+
start = 0;
|
|
172
|
+
}
|
|
173
|
+
sentenceRanges.push({
|
|
174
|
+
id: sentenceId,
|
|
175
|
+
start,
|
|
176
|
+
end,
|
|
177
|
+
audiofile
|
|
178
|
+
});
|
|
179
|
+
notFound = 0;
|
|
180
|
+
lastMatchEnd = firstMatch.index + firstMatch.match.length + transcriptionOffset + transcriptionWindowOffset + chapterOffset;
|
|
181
|
+
const windowIndexResult = getWindowIndexFromOffset(
|
|
182
|
+
transcriptionWindowList,
|
|
183
|
+
firstMatch.index + firstMatch.match.length + transcriptionWindowOffset
|
|
184
|
+
);
|
|
185
|
+
transcriptionWindowIndex += windowIndexResult.index;
|
|
186
|
+
transcriptionWindowOffset = windowIndexResult.offset;
|
|
187
|
+
lastGoodTranscriptionWindow = transcriptionWindowIndex;
|
|
188
|
+
sentenceIndex += 1;
|
|
189
|
+
}
|
|
190
|
+
return {
|
|
191
|
+
sentenceRanges,
|
|
192
|
+
transcriptionOffset: lastMatchEnd
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
async function getLargestGap(trailing, leading) {
|
|
196
|
+
const leadingGap = leading.start;
|
|
197
|
+
const trailingGap = await (0, import_ffmpeg.getTrackDuration)(trailing.audiofile) - trailing.end;
|
|
198
|
+
if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
|
|
199
|
+
return [leadingGap, leading.audiofile];
|
|
200
|
+
}
|
|
201
|
+
async function interpolateSentenceRanges(sentenceRanges, lastSentenceRange) {
|
|
202
|
+
const interpolated = [];
|
|
203
|
+
const [first, ...rest] = sentenceRanges;
|
|
204
|
+
if (!first) return interpolated;
|
|
205
|
+
if (first.id !== 0) {
|
|
206
|
+
const count = first.id;
|
|
207
|
+
const crossesAudioBoundary = !lastSentenceRange || first.audiofile !== lastSentenceRange.audiofile;
|
|
208
|
+
let diff = crossesAudioBoundary ? first.start : first.start - lastSentenceRange.end;
|
|
209
|
+
if (!crossesAudioBoundary && diff <= 0) {
|
|
210
|
+
diff = 0.25;
|
|
211
|
+
lastSentenceRange.end = first.start - diff;
|
|
212
|
+
}
|
|
213
|
+
const interpolatedLength = diff / count;
|
|
214
|
+
const start = crossesAudioBoundary ? 0 : lastSentenceRange.end;
|
|
215
|
+
for (let i = 0; i < count; i++) {
|
|
216
|
+
interpolated.push({
|
|
217
|
+
id: i,
|
|
218
|
+
start: start + interpolatedLength * i,
|
|
219
|
+
end: start + interpolatedLength * (i + 1),
|
|
220
|
+
audiofile: first.audiofile
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
interpolated.push(first);
|
|
224
|
+
} else {
|
|
225
|
+
rest.unshift(first);
|
|
226
|
+
}
|
|
227
|
+
for (const sentenceRange of rest) {
|
|
228
|
+
if (interpolated.length === 0) {
|
|
229
|
+
interpolated.push(sentenceRange);
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
const lastSentenceRange2 = interpolated[interpolated.length - 1];
|
|
233
|
+
const count = sentenceRange.id - lastSentenceRange2.id - 1;
|
|
234
|
+
if (count === 0) {
|
|
235
|
+
interpolated.push(sentenceRange);
|
|
236
|
+
continue;
|
|
237
|
+
}
|
|
238
|
+
const crossesAudioBoundary = sentenceRange.audiofile !== lastSentenceRange2.audiofile;
|
|
239
|
+
let [diff, audiofile] = crossesAudioBoundary ? await getLargestGap(lastSentenceRange2, sentenceRange) : [sentenceRange.start - lastSentenceRange2.end, sentenceRange.audiofile];
|
|
240
|
+
if (diff <= 0) {
|
|
241
|
+
if (crossesAudioBoundary) {
|
|
242
|
+
const rangeLength = sentenceRange.end - sentenceRange.start;
|
|
243
|
+
diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
|
|
244
|
+
sentenceRange.start = diff;
|
|
245
|
+
} else {
|
|
246
|
+
diff = 0.25;
|
|
247
|
+
lastSentenceRange2.end = sentenceRange.start - diff;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
const interpolatedLength = diff / count;
|
|
251
|
+
const start = crossesAudioBoundary ? 0 : lastSentenceRange2.end;
|
|
252
|
+
for (let i = 0; i < count; i++) {
|
|
253
|
+
interpolated.push({
|
|
254
|
+
id: lastSentenceRange2.id + i + 1,
|
|
255
|
+
start: start + interpolatedLength * i,
|
|
256
|
+
end: start + interpolatedLength * (i + 1),
|
|
257
|
+
audiofile
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
interpolated.push(sentenceRange);
|
|
261
|
+
}
|
|
262
|
+
return interpolated;
|
|
263
|
+
}
|
|
264
|
+
function expandEmptySentenceRanges(sentenceRanges) {
|
|
265
|
+
const expandedRanges = [];
|
|
266
|
+
for (const sentenceRange of sentenceRanges) {
|
|
267
|
+
const previousSentenceRange = expandedRanges[expandedRanges.length - 1];
|
|
268
|
+
if (!previousSentenceRange) {
|
|
269
|
+
expandedRanges.push(sentenceRange);
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
const nudged = previousSentenceRange.end > sentenceRange.start && previousSentenceRange.audiofile === sentenceRange.audiofile ? { ...sentenceRange, start: previousSentenceRange.end } : sentenceRange;
|
|
273
|
+
const expanded = nudged.end <= nudged.start ? { ...nudged, end: nudged.start + 1e-3 } : nudged;
|
|
274
|
+
expandedRanges.push(expanded);
|
|
275
|
+
}
|
|
276
|
+
return expandedRanges;
|
|
277
|
+
}
|
|
278
|
+
function getChapterDuration(sentenceRanges) {
|
|
279
|
+
let i = 0;
|
|
280
|
+
let duration = 0;
|
|
281
|
+
let audiofile = null;
|
|
282
|
+
let start = 0;
|
|
283
|
+
let end = 0;
|
|
284
|
+
while (i < sentenceRanges.length) {
|
|
285
|
+
const sentenceRange = sentenceRanges[i];
|
|
286
|
+
if (sentenceRange.audiofile !== audiofile) {
|
|
287
|
+
duration += end - start;
|
|
288
|
+
start = sentenceRange.start;
|
|
289
|
+
audiofile = sentenceRange.audiofile;
|
|
290
|
+
}
|
|
291
|
+
end = sentenceRange.end;
|
|
292
|
+
i++;
|
|
293
|
+
}
|
|
294
|
+
duration += end - start;
|
|
295
|
+
return duration;
|
|
296
|
+
}
|
|
297
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
298
|
+
0 && (module.exports = {
|
|
299
|
+
expandEmptySentenceRanges,
|
|
300
|
+
findEndTimestamp,
|
|
301
|
+
getChapterDuration,
|
|
302
|
+
getSentenceRanges,
|
|
303
|
+
interpolateSentenceRanges
|
|
304
|
+
});
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { TimelineEntry } from '@storyteller-platform/ghost-story';
|
|
2
|
+
|
|
3
|
+
type StorytellerTimelineEntry = TimelineEntry & {
|
|
4
|
+
audiofile: string;
|
|
5
|
+
};
|
|
6
|
+
type StorytellerTranscription = {
|
|
7
|
+
transcript: string;
|
|
8
|
+
timeline: StorytellerTimelineEntry[];
|
|
9
|
+
};
|
|
10
|
+
type SentenceRange = {
|
|
11
|
+
id: number;
|
|
12
|
+
start: number;
|
|
13
|
+
end: number;
|
|
14
|
+
audiofile: string;
|
|
15
|
+
};
|
|
16
|
+
declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): number | null;
|
|
17
|
+
declare function getSentenceRanges(startSentence: number, transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, lastSentenceRange: SentenceRange | null): Promise<{
|
|
18
|
+
sentenceRanges: SentenceRange[];
|
|
19
|
+
transcriptionOffset: number;
|
|
20
|
+
}>;
|
|
21
|
+
declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
|
|
22
|
+
/**
|
|
23
|
+
* Whisper sometimes provides words with no time information,
|
|
24
|
+
* or start and end timestamps that are equal. EpubCheck complains
|
|
25
|
+
* about these, so we nudge them out a bit to make sure that they're
|
|
26
|
+
* not truly equal.
|
|
27
|
+
*/
|
|
28
|
+
declare function expandEmptySentenceRanges(sentenceRanges: SentenceRange[]): SentenceRange[];
|
|
29
|
+
declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
|
|
30
|
+
|
|
31
|
+
export { type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges };
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { TimelineEntry } from '@storyteller-platform/ghost-story';
|
|
2
|
+
|
|
3
|
+
type StorytellerTimelineEntry = TimelineEntry & {
|
|
4
|
+
audiofile: string;
|
|
5
|
+
};
|
|
6
|
+
type StorytellerTranscription = {
|
|
7
|
+
transcript: string;
|
|
8
|
+
timeline: StorytellerTimelineEntry[];
|
|
9
|
+
};
|
|
10
|
+
type SentenceRange = {
|
|
11
|
+
id: number;
|
|
12
|
+
start: number;
|
|
13
|
+
end: number;
|
|
14
|
+
audiofile: string;
|
|
15
|
+
};
|
|
16
|
+
declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): number | null;
|
|
17
|
+
declare function getSentenceRanges(startSentence: number, transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, lastSentenceRange: SentenceRange | null): Promise<{
|
|
18
|
+
sentenceRanges: SentenceRange[];
|
|
19
|
+
transcriptionOffset: number;
|
|
20
|
+
}>;
|
|
21
|
+
declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
|
|
22
|
+
/**
|
|
23
|
+
* Whisper sometimes provides words with no time information,
|
|
24
|
+
* or start and end timestamps that are equal. EpubCheck complains
|
|
25
|
+
* about these, so we nudge them out a bit to make sure that they're
|
|
26
|
+
* not truly equal.
|
|
27
|
+
*/
|
|
28
|
+
declare function expandEmptySentenceRanges(sentenceRanges: SentenceRange[]): SentenceRange[];
|
|
29
|
+
declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
|
|
30
|
+
|
|
31
|
+
export { type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges };
|