@storyteller-platform/align 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE.txt +21 -0
  2. package/README.md +3 -0
  3. package/dist/align/align.cjs +525 -0
  4. package/dist/align/align.d.cts +58 -0
  5. package/dist/align/align.d.ts +58 -0
  6. package/dist/align/align.js +458 -0
  7. package/dist/align/fuzzy.cjs +164 -0
  8. package/dist/align/fuzzy.d.cts +6 -0
  9. package/dist/align/fuzzy.d.ts +6 -0
  10. package/dist/align/fuzzy.js +141 -0
  11. package/dist/align/getSentenceRanges.cjs +304 -0
  12. package/dist/align/getSentenceRanges.d.cts +31 -0
  13. package/dist/align/getSentenceRanges.d.ts +31 -0
  14. package/dist/align/getSentenceRanges.js +277 -0
  15. package/dist/align/parse.cjs +63 -0
  16. package/dist/align/parse.d.cts +30 -0
  17. package/dist/align/parse.d.ts +30 -0
  18. package/dist/align/parse.js +51 -0
  19. package/dist/chunk-BIEQXUOY.js +50 -0
  20. package/dist/cli/bin.cjs +368 -0
  21. package/dist/cli/bin.d.cts +1 -0
  22. package/dist/cli/bin.d.ts +1 -0
  23. package/dist/cli/bin.js +319 -0
  24. package/dist/common/ffmpeg.cjs +232 -0
  25. package/dist/common/ffmpeg.d.cts +33 -0
  26. package/dist/common/ffmpeg.d.ts +33 -0
  27. package/dist/common/ffmpeg.js +196 -0
  28. package/dist/common/logging.cjs +45 -0
  29. package/dist/common/logging.d.cts +5 -0
  30. package/dist/common/logging.d.ts +5 -0
  31. package/dist/common/logging.js +12 -0
  32. package/dist/common/parse.cjs +73 -0
  33. package/dist/common/parse.d.cts +28 -0
  34. package/dist/common/parse.d.ts +28 -0
  35. package/dist/common/parse.js +56 -0
  36. package/dist/common/shell.cjs +30 -0
  37. package/dist/common/shell.d.cts +3 -0
  38. package/dist/common/shell.d.ts +3 -0
  39. package/dist/common/shell.js +7 -0
  40. package/dist/index.cjs +37 -0
  41. package/dist/index.d.cts +12 -0
  42. package/dist/index.d.ts +12 -0
  43. package/dist/index.js +11 -0
  44. package/dist/markup/__tests__/markup.test.cjs +464 -0
  45. package/dist/markup/__tests__/markup.test.d.cts +2 -0
  46. package/dist/markup/__tests__/markup.test.d.ts +2 -0
  47. package/dist/markup/__tests__/markup.test.js +441 -0
  48. package/dist/markup/markup.cjs +316 -0
  49. package/dist/markup/markup.d.cts +24 -0
  50. package/dist/markup/markup.d.ts +24 -0
  51. package/dist/markup/markup.js +254 -0
  52. package/dist/markup/parse.cjs +55 -0
  53. package/dist/markup/parse.d.cts +17 -0
  54. package/dist/markup/parse.d.ts +17 -0
  55. package/dist/markup/parse.js +43 -0
  56. package/dist/markup/segmentation.cjs +87 -0
  57. package/dist/markup/segmentation.d.cts +8 -0
  58. package/dist/markup/segmentation.d.ts +8 -0
  59. package/dist/markup/segmentation.js +67 -0
  60. package/dist/markup/semantics.cjs +79 -0
  61. package/dist/markup/semantics.d.cts +6 -0
  62. package/dist/markup/semantics.d.ts +6 -0
  63. package/dist/markup/semantics.js +53 -0
  64. package/dist/process/AudioEncoding.cjs +16 -0
  65. package/dist/process/AudioEncoding.d.cts +8 -0
  66. package/dist/process/AudioEncoding.d.ts +8 -0
  67. package/dist/process/AudioEncoding.js +0 -0
  68. package/dist/process/__tests__/processAudiobook.test.cjs +232 -0
  69. package/dist/process/__tests__/processAudiobook.test.d.cts +2 -0
  70. package/dist/process/__tests__/processAudiobook.test.d.ts +2 -0
  71. package/dist/process/__tests__/processAudiobook.test.js +209 -0
  72. package/dist/process/mime.cjs +43 -0
  73. package/dist/process/mime.d.cts +3 -0
  74. package/dist/process/mime.d.ts +3 -0
  75. package/dist/process/mime.js +24 -0
  76. package/dist/process/parse.cjs +84 -0
  77. package/dist/process/parse.d.cts +28 -0
  78. package/dist/process/parse.d.ts +28 -0
  79. package/dist/process/parse.js +73 -0
  80. package/dist/process/processAudiobook.cjs +220 -0
  81. package/dist/process/processAudiobook.d.cts +24 -0
  82. package/dist/process/processAudiobook.d.ts +24 -0
  83. package/dist/process/processAudiobook.js +166 -0
  84. package/dist/process/ranges.cjs +203 -0
  85. package/dist/process/ranges.d.cts +15 -0
  86. package/dist/process/ranges.d.ts +15 -0
  87. package/dist/process/ranges.js +137 -0
  88. package/dist/transcribe/parse.cjs +149 -0
  89. package/dist/transcribe/parse.d.cts +114 -0
  90. package/dist/transcribe/parse.d.ts +114 -0
  91. package/dist/transcribe/parse.js +143 -0
  92. package/dist/transcribe/transcribe.cjs +400 -0
  93. package/dist/transcribe/transcribe.d.cts +41 -0
  94. package/dist/transcribe/transcribe.d.ts +41 -0
  95. package/dist/transcribe/transcribe.js +330 -0
  96. package/package.json +96 -0
@@ -0,0 +1,141 @@
1
+ import "../chunk-BIEQXUOY.js";
2
+ function findNearestMatch(needle, haystack, maxDist) {
3
+ let nearest = null;
4
+ for (const match of levenshteinNgram(needle, haystack, maxDist)) {
5
+ if (!nearest || match.dist < nearest.dist) {
6
+ nearest = match;
7
+ }
8
+ }
9
+ return nearest && {
10
+ match: haystack.slice(nearest.start, nearest.end),
11
+ index: nearest.start
12
+ };
13
+ }
14
+ function reverse(str, from = str.length, to = 0) {
15
+ let reversed = "";
16
+ for (let i = from - 1; i >= to; i--) {
17
+ reversed = reversed + str[i];
18
+ }
19
+ return reversed;
20
+ }
21
+ function* searchExact(subsequence, sequence, startIndex = 0, endIndex = sequence.length) {
22
+ let index = sequence.indexOf(subsequence, startIndex);
23
+ while (index !== -1 && index + subsequence.length < endIndex) {
24
+ yield index;
25
+ index = sequence.indexOf(subsequence, index + 1);
26
+ }
27
+ }
28
+ function expand(subsequence, sequence, maxDist) {
29
+ const subsequenceLength = subsequence.length;
30
+ if (subsequenceLength === 0) {
31
+ return { index: 0, score: 0 };
32
+ }
33
+ const scores = Array.from({ length: subsequenceLength + 1 }).map((_, i) => i);
34
+ let minScore = subsequenceLength;
35
+ let minScoreIndex = -1;
36
+ let maxGoodScore = maxDist;
37
+ let newNeedleIndexRangeStart = 0;
38
+ let newNeedleIndexRangeEnd = subsequenceLength - 1;
39
+ for (let sequenceIndex = 0; sequenceIndex < sequence.length; sequenceIndex++) {
40
+ const char = sequence[sequenceIndex];
41
+ const needleIndexRangeStart = newNeedleIndexRangeStart;
42
+ const needleIndexRangeEnd = Math.min(
43
+ subsequenceLength,
44
+ newNeedleIndexRangeEnd + 1
45
+ );
46
+ let a = sequenceIndex;
47
+ let c = a + 1;
48
+ if (c <= maxGoodScore) {
49
+ newNeedleIndexRangeStart = 0;
50
+ newNeedleIndexRangeEnd = 0;
51
+ } else {
52
+ newNeedleIndexRangeStart = null;
53
+ newNeedleIndexRangeEnd = -1;
54
+ }
55
+ for (let subsequenceIndex = needleIndexRangeStart; subsequenceIndex < needleIndexRangeEnd; subsequenceIndex++) {
56
+ const b = scores[subsequenceIndex];
57
+ c = scores[subsequenceIndex] = Math.min(
58
+ a + (char === subsequence[subsequenceIndex] ? 0 : 1),
59
+ b + 1,
60
+ c + 1
61
+ );
62
+ a = b;
63
+ if (c <= maxGoodScore) {
64
+ if (newNeedleIndexRangeStart === null) {
65
+ newNeedleIndexRangeStart = subsequenceIndex;
66
+ }
67
+ newNeedleIndexRangeEnd = Math.max(
68
+ newNeedleIndexRangeEnd,
69
+ subsequenceIndex + 1 + (maxGoodScore - c)
70
+ );
71
+ }
72
+ }
73
+ if (newNeedleIndexRangeStart === null) {
74
+ break;
75
+ }
76
+ if (needleIndexRangeEnd === subsequenceLength && c <= minScore) {
77
+ minScore = c;
78
+ minScoreIndex = sequenceIndex;
79
+ if (minScore < maxGoodScore) {
80
+ maxGoodScore = minScore;
81
+ }
82
+ }
83
+ }
84
+ return minScore <= maxDist ? { score: minScore, index: minScoreIndex + 1 } : null;
85
+ }
86
+ function* levenshteinNgram(subsequence, sequence, maxDist) {
87
+ const subsequenceLength = subsequence.length;
88
+ const sequenceLength = sequence.length;
89
+ const ngramLength = Math.round(subsequenceLength / (maxDist + 1));
90
+ if (ngramLength === 0) {
91
+ throw new Error("The subsequence length must be greater than maxDist");
92
+ }
93
+ for (let ngramStart = 0; ngramStart < subsequenceLength - ngramLength + 1; ngramStart += ngramLength) {
94
+ const ngramEnd = ngramStart + ngramLength;
95
+ const subsequenceBeforeReversed = reverse(subsequence, ngramStart);
96
+ const subsequenceAfter = subsequence.slice(ngramEnd);
97
+ const startIndex = Math.max(0, ngramStart - maxDist);
98
+ const endIndex = Math.min(
99
+ sequenceLength,
100
+ sequenceLength - subsequenceLength + ngramEnd + maxDist
101
+ );
102
+ for (const index of searchExact(
103
+ subsequence.slice(ngramStart, ngramEnd),
104
+ sequence,
105
+ startIndex,
106
+ endIndex
107
+ )) {
108
+ const rightMatch = expand(
109
+ subsequenceAfter,
110
+ sequence.slice(
111
+ index + ngramLength,
112
+ index - ngramStart + subsequenceLength + maxDist
113
+ ),
114
+ maxDist
115
+ );
116
+ if (rightMatch === null) continue;
117
+ const { score: distRight, index: rightExpandSize } = rightMatch;
118
+ const leftMatch = expand(
119
+ subsequenceBeforeReversed,
120
+ reverse(
121
+ sequence,
122
+ index,
123
+ Math.max(0, index - ngramStart - (maxDist - distRight))
124
+ ),
125
+ maxDist - distRight
126
+ );
127
+ if (leftMatch === null) continue;
128
+ const { score: distLeft, index: leftExpandSize } = leftMatch;
129
+ const start = index - leftExpandSize;
130
+ yield {
131
+ start,
132
+ end: index + ngramLength + rightExpandSize,
133
+ // dist: distLeft + distRight + (start / sequenceLength) * maxDist,
134
+ dist: distLeft + distRight
135
+ };
136
+ }
137
+ }
138
+ }
139
+ export {
140
+ findNearestMatch
141
+ };
@@ -0,0 +1,304 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var getSentenceRanges_exports = {};
20
+ __export(getSentenceRanges_exports, {
21
+ expandEmptySentenceRanges: () => expandEmptySentenceRanges,
22
+ findEndTimestamp: () => findEndTimestamp,
23
+ getChapterDuration: () => getChapterDuration,
24
+ getSentenceRanges: () => getSentenceRanges,
25
+ interpolateSentenceRanges: () => interpolateSentenceRanges
26
+ });
27
+ module.exports = __toCommonJS(getSentenceRanges_exports);
28
+ var import_text_segmentation = require("@echogarden/text-segmentation");
29
+ var import_ffmpeg = require("../common/ffmpeg.cjs");
30
+ var import_fuzzy = require("./fuzzy.cjs");
31
+ async function getSentencesWithOffsets(text) {
32
+ const sentences = await (0, import_text_segmentation.segmentText)(text).then(
33
+ (r) => r.sentences.map((s) => s.text)
34
+ );
35
+ const sentencesWithOffsets = [];
36
+ let lastSentenceEnd = 0;
37
+ for (const sentence of sentences) {
38
+ const sentenceStart = text.indexOf(sentence, lastSentenceEnd);
39
+ if (sentenceStart > lastSentenceEnd) {
40
+ sentencesWithOffsets.push(text.slice(lastSentenceEnd, sentenceStart));
41
+ }
42
+ sentencesWithOffsets.push(sentence);
43
+ lastSentenceEnd = sentenceStart + sentence.length;
44
+ }
45
+ if (text.length > lastSentenceEnd) {
46
+ sentencesWithOffsets.push(text.slice(lastSentenceEnd));
47
+ }
48
+ return sentencesWithOffsets;
49
+ }
50
+ function findStartTimestamp(matchStartIndex, transcription) {
51
+ const entry = transcription.timeline.find(
52
+ (entry2) => (entry2.endOffsetUtf16 ?? 0) > matchStartIndex
53
+ );
54
+ if (!entry) return null;
55
+ return {
56
+ start: entry.startTime,
57
+ end: entry.endTime,
58
+ audiofile: entry.audiofile
59
+ };
60
+ }
61
+ function findEndTimestamp(matchEndIndex, transcription) {
62
+ const entry = transcription.timeline.findLast(
63
+ (entry2) => (entry2.startOffsetUtf16 ?? 0) < matchEndIndex
64
+ );
65
+ return (entry == null ? void 0 : entry.endTime) ?? null;
66
+ }
67
+ function getWindowIndexFromOffset(window, offset) {
68
+ let index = 0;
69
+ while (index < window.length - 1 && offset >= window[index].length) {
70
+ offset -= window[index].length;
71
+ index += 1;
72
+ }
73
+ return { index, offset };
74
+ }
75
+ function collapseWhitespace(input) {
76
+ return input.replaceAll(/\s+/g, " ");
77
+ }
78
+ async function getSentenceRanges(startSentence, transcription, sentences, chapterOffset, lastSentenceRange) {
79
+ const sentenceRanges = [];
80
+ const fullTranscriptionText = transcription.transcript;
81
+ const transcriptionText = fullTranscriptionText.slice(chapterOffset);
82
+ const transcriptionSentences = await getSentencesWithOffsets(
83
+ transcriptionText
84
+ ).then((s) => s.map((sentence) => sentence.toLowerCase()));
85
+ let startSentenceEntry = startSentence;
86
+ const sentenceEntries = sentences.map((sentence, index) => [index, sentence]).filter(([index, sentence]) => {
87
+ if (sentence.replaceAll(/[.-_()[\],/?!@#$%^^&*`~;:='"<>+ˌˈ]/g, "").length <= 3) {
88
+ if (index < startSentence) startSentenceEntry--;
89
+ return false;
90
+ }
91
+ return true;
92
+ });
93
+ let transcriptionWindowIndex = 0;
94
+ let transcriptionWindowOffset = 0;
95
+ let lastGoodTranscriptionWindow = 0;
96
+ let notFound = 0;
97
+ let sentenceIndex = startSentenceEntry;
98
+ let lastMatchEnd = chapterOffset;
99
+ while (sentenceIndex < sentenceEntries.length) {
100
+ const [sentenceId, sentence] = sentenceEntries[sentenceIndex];
101
+ const transcriptionWindowList = transcriptionSentences.slice(
102
+ transcriptionWindowIndex,
103
+ transcriptionWindowIndex + 10
104
+ );
105
+ const transcriptionWindow = transcriptionWindowList.join("").slice(transcriptionWindowOffset);
106
+ const query = collapseWhitespace(sentence.trim()).toLowerCase();
107
+ const firstMatch = (0, import_fuzzy.findNearestMatch)(
108
+ query,
109
+ transcriptionWindow,
110
+ Math.max(Math.floor(0.25 * query.length), 1)
111
+ );
112
+ if (!firstMatch) {
113
+ sentenceIndex += 1;
114
+ notFound += 1;
115
+ if (notFound === 3 || sentenceIndex === sentenceEntries.length) {
116
+ transcriptionWindowIndex += 1;
117
+ if (transcriptionWindowIndex == lastGoodTranscriptionWindow + 30) {
118
+ transcriptionWindowIndex = lastGoodTranscriptionWindow;
119
+ notFound = 0;
120
+ continue;
121
+ }
122
+ sentenceIndex -= notFound;
123
+ notFound = 0;
124
+ }
125
+ continue;
126
+ }
127
+ const transcriptionOffset = transcriptionSentences.slice(0, transcriptionWindowIndex).join("").length;
128
+ const startResult = findStartTimestamp(
129
+ firstMatch.index + transcriptionOffset + transcriptionWindowOffset + chapterOffset,
130
+ transcription
131
+ );
132
+ if (!startResult) {
133
+ sentenceIndex += 1;
134
+ continue;
135
+ }
136
+ let start = startResult.start;
137
+ const audiofile = startResult.audiofile;
138
+ const end = findEndTimestamp(
139
+ firstMatch.index + firstMatch.match.length + transcriptionOffset + transcriptionWindowOffset + chapterOffset,
140
+ transcription
141
+ ) ?? startResult.end;
142
+ if (sentenceRanges.length > 0) {
143
+ const previousSentenceRange = sentenceRanges[sentenceRanges.length - 1];
144
+ const previousAudiofile = previousSentenceRange.audiofile;
145
+ if (audiofile === previousAudiofile) {
146
+ if (previousSentenceRange.id === sentenceId - 1) {
147
+ previousSentenceRange.end = start;
148
+ }
149
+ } else {
150
+ if (previousSentenceRange.id === sentenceId - 1) {
151
+ const lastTrackDuration = await (0, import_ffmpeg.getTrackDuration)(previousAudiofile);
152
+ previousSentenceRange.end = lastTrackDuration;
153
+ start = 0;
154
+ }
155
+ }
156
+ } else if (lastSentenceRange !== null) {
157
+ if (audiofile === lastSentenceRange.audiofile) {
158
+ if (sentenceId === 0) {
159
+ lastSentenceRange.end = start;
160
+ }
161
+ } else {
162
+ const lastTrackDuration = await (0, import_ffmpeg.getTrackDuration)(
163
+ lastSentenceRange.audiofile
164
+ );
165
+ lastSentenceRange.end = lastTrackDuration;
166
+ if (sentenceId === 0) {
167
+ start = 0;
168
+ }
169
+ }
170
+ } else if (sentenceId === 0) {
171
+ start = 0;
172
+ }
173
+ sentenceRanges.push({
174
+ id: sentenceId,
175
+ start,
176
+ end,
177
+ audiofile
178
+ });
179
+ notFound = 0;
180
+ lastMatchEnd = firstMatch.index + firstMatch.match.length + transcriptionOffset + transcriptionWindowOffset + chapterOffset;
181
+ const windowIndexResult = getWindowIndexFromOffset(
182
+ transcriptionWindowList,
183
+ firstMatch.index + firstMatch.match.length + transcriptionWindowOffset
184
+ );
185
+ transcriptionWindowIndex += windowIndexResult.index;
186
+ transcriptionWindowOffset = windowIndexResult.offset;
187
+ lastGoodTranscriptionWindow = transcriptionWindowIndex;
188
+ sentenceIndex += 1;
189
+ }
190
+ return {
191
+ sentenceRanges,
192
+ transcriptionOffset: lastMatchEnd
193
+ };
194
+ }
195
+ async function getLargestGap(trailing, leading) {
196
+ const leadingGap = leading.start;
197
+ const trailingGap = await (0, import_ffmpeg.getTrackDuration)(trailing.audiofile) - trailing.end;
198
+ if (trailingGap > leadingGap) return [trailingGap, trailing.audiofile];
199
+ return [leadingGap, leading.audiofile];
200
+ }
201
+ async function interpolateSentenceRanges(sentenceRanges, lastSentenceRange) {
202
+ const interpolated = [];
203
+ const [first, ...rest] = sentenceRanges;
204
+ if (!first) return interpolated;
205
+ if (first.id !== 0) {
206
+ const count = first.id;
207
+ const crossesAudioBoundary = !lastSentenceRange || first.audiofile !== lastSentenceRange.audiofile;
208
+ let diff = crossesAudioBoundary ? first.start : first.start - lastSentenceRange.end;
209
+ if (!crossesAudioBoundary && diff <= 0) {
210
+ diff = 0.25;
211
+ lastSentenceRange.end = first.start - diff;
212
+ }
213
+ const interpolatedLength = diff / count;
214
+ const start = crossesAudioBoundary ? 0 : lastSentenceRange.end;
215
+ for (let i = 0; i < count; i++) {
216
+ interpolated.push({
217
+ id: i,
218
+ start: start + interpolatedLength * i,
219
+ end: start + interpolatedLength * (i + 1),
220
+ audiofile: first.audiofile
221
+ });
222
+ }
223
+ interpolated.push(first);
224
+ } else {
225
+ rest.unshift(first);
226
+ }
227
+ for (const sentenceRange of rest) {
228
+ if (interpolated.length === 0) {
229
+ interpolated.push(sentenceRange);
230
+ continue;
231
+ }
232
+ const lastSentenceRange2 = interpolated[interpolated.length - 1];
233
+ const count = sentenceRange.id - lastSentenceRange2.id - 1;
234
+ if (count === 0) {
235
+ interpolated.push(sentenceRange);
236
+ continue;
237
+ }
238
+ const crossesAudioBoundary = sentenceRange.audiofile !== lastSentenceRange2.audiofile;
239
+ let [diff, audiofile] = crossesAudioBoundary ? await getLargestGap(lastSentenceRange2, sentenceRange) : [sentenceRange.start - lastSentenceRange2.end, sentenceRange.audiofile];
240
+ if (diff <= 0) {
241
+ if (crossesAudioBoundary) {
242
+ const rangeLength = sentenceRange.end - sentenceRange.start;
243
+ diff = rangeLength < 0.5 ? rangeLength / 2 : 0.25;
244
+ sentenceRange.start = diff;
245
+ } else {
246
+ diff = 0.25;
247
+ lastSentenceRange2.end = sentenceRange.start - diff;
248
+ }
249
+ }
250
+ const interpolatedLength = diff / count;
251
+ const start = crossesAudioBoundary ? 0 : lastSentenceRange2.end;
252
+ for (let i = 0; i < count; i++) {
253
+ interpolated.push({
254
+ id: lastSentenceRange2.id + i + 1,
255
+ start: start + interpolatedLength * i,
256
+ end: start + interpolatedLength * (i + 1),
257
+ audiofile
258
+ });
259
+ }
260
+ interpolated.push(sentenceRange);
261
+ }
262
+ return interpolated;
263
+ }
264
+ function expandEmptySentenceRanges(sentenceRanges) {
265
+ const expandedRanges = [];
266
+ for (const sentenceRange of sentenceRanges) {
267
+ const previousSentenceRange = expandedRanges[expandedRanges.length - 1];
268
+ if (!previousSentenceRange) {
269
+ expandedRanges.push(sentenceRange);
270
+ continue;
271
+ }
272
+ const nudged = previousSentenceRange.end > sentenceRange.start && previousSentenceRange.audiofile === sentenceRange.audiofile ? { ...sentenceRange, start: previousSentenceRange.end } : sentenceRange;
273
+ const expanded = nudged.end <= nudged.start ? { ...nudged, end: nudged.start + 1e-3 } : nudged;
274
+ expandedRanges.push(expanded);
275
+ }
276
+ return expandedRanges;
277
+ }
278
+ function getChapterDuration(sentenceRanges) {
279
+ let i = 0;
280
+ let duration = 0;
281
+ let audiofile = null;
282
+ let start = 0;
283
+ let end = 0;
284
+ while (i < sentenceRanges.length) {
285
+ const sentenceRange = sentenceRanges[i];
286
+ if (sentenceRange.audiofile !== audiofile) {
287
+ duration += end - start;
288
+ start = sentenceRange.start;
289
+ audiofile = sentenceRange.audiofile;
290
+ }
291
+ end = sentenceRange.end;
292
+ i++;
293
+ }
294
+ duration += end - start;
295
+ return duration;
296
+ }
297
+ // Annotate the CommonJS export names for ESM import in node:
298
+ 0 && (module.exports = {
299
+ expandEmptySentenceRanges,
300
+ findEndTimestamp,
301
+ getChapterDuration,
302
+ getSentenceRanges,
303
+ interpolateSentenceRanges
304
+ });
@@ -0,0 +1,31 @@
1
+ import { TimelineEntry } from '@storyteller-platform/ghost-story';
2
+
3
+ type StorytellerTimelineEntry = TimelineEntry & {
4
+ audiofile: string;
5
+ };
6
+ type StorytellerTranscription = {
7
+ transcript: string;
8
+ timeline: StorytellerTimelineEntry[];
9
+ };
10
+ type SentenceRange = {
11
+ id: number;
12
+ start: number;
13
+ end: number;
14
+ audiofile: string;
15
+ };
16
+ declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): number | null;
17
+ declare function getSentenceRanges(startSentence: number, transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, lastSentenceRange: SentenceRange | null): Promise<{
18
+ sentenceRanges: SentenceRange[];
19
+ transcriptionOffset: number;
20
+ }>;
21
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
22
+ /**
23
+ * Whisper sometimes provides words with no time information,
24
+ * or start and end timestamps that are equal. EpubCheck complains
25
+ * about these, so we nudge them out a bit to make sure that they're
26
+ * not truly equal.
27
+ */
28
+ declare function expandEmptySentenceRanges(sentenceRanges: SentenceRange[]): SentenceRange[];
29
+ declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
30
+
31
+ export { type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges };
@@ -0,0 +1,31 @@
1
+ import { TimelineEntry } from '@storyteller-platform/ghost-story';
2
+
3
+ type StorytellerTimelineEntry = TimelineEntry & {
4
+ audiofile: string;
5
+ };
6
+ type StorytellerTranscription = {
7
+ transcript: string;
8
+ timeline: StorytellerTimelineEntry[];
9
+ };
10
+ type SentenceRange = {
11
+ id: number;
12
+ start: number;
13
+ end: number;
14
+ audiofile: string;
15
+ };
16
+ declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): number | null;
17
+ declare function getSentenceRanges(startSentence: number, transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, lastSentenceRange: SentenceRange | null): Promise<{
18
+ sentenceRanges: SentenceRange[];
19
+ transcriptionOffset: number;
20
+ }>;
21
+ declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
22
+ /**
23
+ * Whisper sometimes provides words with no time information,
24
+ * or start and end timestamps that are equal. EpubCheck complains
25
+ * about these, so we nudge them out a bit to make sure that they're
26
+ * not truly equal.
27
+ */
28
+ declare function expandEmptySentenceRanges(sentenceRanges: SentenceRange[]): SentenceRange[];
29
+ declare function getChapterDuration(sentenceRanges: SentenceRange[]): number;
30
+
31
+ export { type SentenceRange, type StorytellerTimelineEntry, type StorytellerTranscription, expandEmptySentenceRanges, findEndTimestamp, getChapterDuration, getSentenceRanges, interpolateSentenceRanges };