@storyteller-platform/align 0.1.12 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,8 +26,10 @@ __export(getSentenceRanges_exports, {
26
26
  });
27
27
  module.exports = __toCommonJS(getSentenceRanges_exports);
28
28
  var import_itertools = require("itertools");
29
+ var import_runes2 = require("runes2");
29
30
  var import_ffmpeg = require("../common/ffmpeg.cjs");
30
31
  var import_errorAlign = require("../errorAlign/errorAlign.cjs");
32
+ var import_utils = require("../errorAlign/utils.cjs");
31
33
  var import_slugify = require("./slugify.cjs");
32
34
  function findStartTimestamp(matchStartIndex, transcription) {
33
35
  const entry = transcription.timeline.find(
@@ -44,21 +46,96 @@ function findEndTimestamp(matchEndIndex, transcription) {
44
46
  const entry = transcription.timeline.findLast(
45
47
  (entry2) => (entry2.startOffsetUtf16 ?? 0) < matchEndIndex
46
48
  );
47
- return (entry == null ? void 0 : entry.endTime) ?? null;
49
+ if (!entry) return null;
50
+ return {
51
+ start: entry.startTime,
52
+ end: entry.endTime,
53
+ audiofile: entry.audiofile
54
+ };
48
55
  }
49
56
  function getAlignmentsForSentence(sentence, alignments) {
50
57
  const result = [];
58
+ let score = Math.floor(sentence.length / 2);
51
59
  let sentenceIndex = 0;
52
60
  for (const alignment of alignments) {
53
61
  if (sentenceIndex === sentence.length) break;
54
62
  if (alignment.opType !== "INSERT") {
55
63
  sentenceIndex += alignment.ref.length + (sentenceIndex === 0 ? 0 : 1);
56
64
  }
65
+ if (alignment.opType === "DELETE" || alignment.opType === "INSERT" && sentenceIndex > 0) {
66
+ score -= (alignment.ref ?? alignment.hyp).length + 1;
67
+ }
57
68
  result.push(alignment);
58
69
  }
59
- return result;
70
+ return {
71
+ alignments: result,
72
+ score: result.some((a) => a.opType === "MATCH") ? score : -1
73
+ };
60
74
  }
61
- async function getSentenceRanges(startSentence, endSentence, transcription, sentences, chapterOffset, chapterEndOffset, locale) {
75
+ function errorAlignWithNarrowing(refSentences, hyp, narrowStart, narrowEnd) {
76
+ const firstAttempt = (0, import_errorAlign.errorAlign)(refSentences.join("-"), hyp);
77
+ let alignmentIndex = 0;
78
+ let firstGood = 0;
79
+ if (narrowStart) {
80
+ for (const sentence of refSentences) {
81
+ const { alignments: sentenceAlignments, score } = getAlignmentsForSentence(sentence, firstAttempt.slice(alignmentIndex));
82
+ alignmentIndex += sentenceAlignments.length;
83
+ if (sentence === "" || score <= 0) {
84
+ firstGood++;
85
+ } else {
86
+ break;
87
+ }
88
+ }
89
+ }
90
+ const reversedFirstAttempt = firstAttempt.toReversed().map((a) => {
91
+ if (!a.ref) return a;
92
+ return new import_utils.Alignment(
93
+ a.opType,
94
+ a.refSlice,
95
+ a.hypSlice,
96
+ (0, import_runes2.runes)(a.ref).toReversed().join(""),
97
+ a.hyp,
98
+ a.leftCompound,
99
+ a.rightCompound
100
+ );
101
+ });
102
+ let lastGood = 0;
103
+ alignmentIndex = 0;
104
+ if (narrowEnd) {
105
+ for (const sentence of (0, import_utils.reversed)(refSentences)) {
106
+ const reversedSentence = (0, import_runes2.runes)(sentence).toReversed().join("");
107
+ const { alignments: sentenceAlignments, score } = getAlignmentsForSentence(
108
+ reversedSentence,
109
+ reversedFirstAttempt.slice(alignmentIndex)
110
+ );
111
+ alignmentIndex += sentenceAlignments.length;
112
+ if (sentence === "" || score <= 0) {
113
+ lastGood++;
114
+ } else {
115
+ break;
116
+ }
117
+ }
118
+ }
119
+ lastGood = refSentences.length - lastGood;
120
+ if (firstGood <= 1 && lastGood >= refSentences.length - 2) {
121
+ return {
122
+ alignments: firstAttempt,
123
+ slice: [0, refSentences.length]
124
+ };
125
+ }
126
+ const slice = [
127
+ Math.max(firstGood - 1, 0),
128
+ Math.min(refSentences.length, lastGood + 1)
129
+ ];
130
+ const { alignments, slice: narrowed } = errorAlignWithNarrowing(
131
+ refSentences.slice(...slice),
132
+ hyp,
133
+ narrowStart,
134
+ narrowEnd
135
+ );
136
+ return { alignments, slice: [slice[0] + narrowed[0], slice[0] + narrowed[1]] };
137
+ }
138
+ async function getSentenceRanges(transcription, sentences, chapterOffset, chapterEndOffset, locale) {
62
139
  const sentenceRanges = [];
63
140
  const fullTranscript = transcription.transcript;
64
141
  const chapterTranscript = fullTranscript.slice(
@@ -66,58 +143,105 @@ async function getSentenceRanges(startSentence, endSentence, transcription, sent
66
143
  chapterEndOffset
67
144
  );
68
145
  const { result: slugifiedChapterTranscript, mapping: transcriptMapping } = await (0, import_slugify.slugify)(chapterTranscript, locale);
146
+ const slugifiedChapterSentences = [];
147
+ for (const s of sentences) {
148
+ const { result } = await (0, import_slugify.slugify)(s, locale);
149
+ slugifiedChapterSentences.push(result);
150
+ }
151
+ let firstFoundSentence = 0;
152
+ let lastFoundSentence = sentences.length - 1;
69
153
  let chapterTranscriptEndIndex = chapterOffset;
70
- let chapterSentenceIndex = startSentence;
154
+ let chapterSentenceIndex = 0;
71
155
  let slugifiedChapterTranscriptWindowStartIndex = 0;
72
- while (chapterSentenceIndex < endSentence) {
73
- const slugifiedChapterSentenceWindowList = [];
156
+ while (chapterSentenceIndex < slugifiedChapterSentences.length) {
157
+ let slugifiedChapterSentenceWindowList = [];
74
158
  let sentenceWindowLength = 0;
75
159
  let i = chapterSentenceIndex;
76
- while (sentenceWindowLength < 5e3 && i < sentences.length) {
77
- const { result: sentence } = await (0, import_slugify.slugify)(sentences[i], locale);
160
+ while (sentenceWindowLength < 5e3 && i < slugifiedChapterSentences.length) {
161
+ const sentence = slugifiedChapterSentences[i];
78
162
  slugifiedChapterSentenceWindowList.push(sentence);
79
163
  sentenceWindowLength += sentence.length;
80
164
  i++;
81
165
  }
82
- const slugifiedChapterSentenceWindow = slugifiedChapterSentenceWindowList.join("-");
166
+ const remainingSlugifiedSentences = slugifiedChapterSentences.slice(i);
167
+ const remainingSlugifiedSentenceLength = remainingSlugifiedSentences.reduce(
168
+ (acc, s) => acc + s.length,
169
+ 0
170
+ );
171
+ if (remainingSlugifiedSentenceLength < 5e3) {
172
+ slugifiedChapterSentenceWindowList.push(...remainingSlugifiedSentences);
173
+ sentenceWindowLength += remainingSlugifiedSentenceLength;
174
+ i = slugifiedChapterSentences.length;
175
+ }
83
176
  const slugifiedChapterTranscriptWindow = slugifiedChapterTranscript.slice(
84
177
  slugifiedChapterTranscriptWindowStartIndex,
85
178
  slugifiedChapterTranscriptWindowStartIndex + sentenceWindowLength * 1.2
86
179
  );
87
- const alignments = (0, import_errorAlign.errorAlign)(
88
- slugifiedChapterSentenceWindow,
89
- slugifiedChapterTranscriptWindow
90
- );
180
+ let alignments;
181
+ let slice = [0, slugifiedChapterSentenceWindowList.length - 1];
182
+ if (chapterSentenceIndex === 0 || i === sentences.length) {
183
+ const result = errorAlignWithNarrowing(
184
+ slugifiedChapterSentenceWindowList,
185
+ slugifiedChapterTranscriptWindow,
186
+ chapterSentenceIndex === 0,
187
+ i === sentences.length
188
+ );
189
+ alignments = result.alignments;
190
+ slice = result.slice;
191
+ if (chapterSentenceIndex === 0) {
192
+ firstFoundSentence = chapterSentenceIndex + slice[0];
193
+ }
194
+ if (i === sentences.length) {
195
+ lastFoundSentence = chapterSentenceIndex + slice[0] + slice[1] - 1;
196
+ }
197
+ slugifiedChapterSentenceWindowList = slugifiedChapterSentenceWindowList.slice(...slice);
198
+ } else {
199
+ alignments = (0, import_errorAlign.errorAlign)(
200
+ slugifiedChapterSentenceWindowList.join("-"),
201
+ slugifiedChapterTranscriptWindow
202
+ );
203
+ }
91
204
  let alignmentIndex = 0;
92
205
  let currentTranscriptWindowIndex = 0;
93
- for (const [i2, slugifiedSentence] of (0, import_itertools.enumerate)(
206
+ for (const [j, slugifiedSentence] of (0, import_itertools.enumerate)(
94
207
  slugifiedChapterSentenceWindowList
95
208
  )) {
96
209
  if (!slugifiedSentence) continue;
97
- const sentenceAlignments = getAlignmentsForSentence(
210
+ const { alignments: sentenceAlignments, score } = getAlignmentsForSentence(
98
211
  slugifiedSentence,
99
212
  alignments.slice(alignmentIndex)
100
213
  );
101
214
  const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
102
- const start = findStartTimestamp(
103
- chapterOffset + transcriptMapping.invert().map(
104
- slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
105
- 1
106
- ),
107
- transcription
108
- );
109
- chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
110
- slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
111
- -1
112
- );
113
- const end = findEndTimestamp(chapterTranscriptEndIndex, transcription);
114
- if (start && end !== null) {
115
- sentenceRanges.push({
116
- id: i2 + chapterSentenceIndex,
117
- start: start.start,
118
- audiofile: start.audiofile,
119
- end
120
- });
215
+ if (score > 0) {
216
+ const start = findStartTimestamp(
217
+ chapterOffset + transcriptMapping.invert().map(
218
+ slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
219
+ 1
220
+ ),
221
+ transcription
222
+ );
223
+ chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
224
+ slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
225
+ -1
226
+ );
227
+ const end = findEndTimestamp(chapterTranscriptEndIndex, transcription);
228
+ if (start && end) {
229
+ if (start.audiofile !== end.audiofile) {
230
+ sentenceRanges.push({
231
+ id: j + chapterSentenceIndex + slice[0],
232
+ start: 0,
233
+ audiofile: end.audiofile,
234
+ end: end.end
235
+ });
236
+ } else {
237
+ sentenceRanges.push({
238
+ id: j + chapterSentenceIndex + slice[0],
239
+ start: start.start,
240
+ audiofile: start.audiofile,
241
+ end: end.end
242
+ });
243
+ }
244
+ }
121
245
  }
122
246
  alignmentIndex += sentenceAlignments.length;
123
247
  currentTranscriptWindowIndex += sentenceLengthInSlugifiedTranscript;
@@ -125,13 +249,18 @@ async function getSentenceRanges(startSentence, endSentence, transcription, sent
125
249
  currentTranscriptWindowIndex++;
126
250
  }
127
251
  }
128
- chapterSentenceIndex += slugifiedChapterSentenceWindowList.length;
252
+ chapterSentenceIndex = i;
129
253
  slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
130
254
  if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
131
255
  slugifiedChapterTranscriptWindowStartIndex++;
132
256
  }
133
257
  }
134
- return { sentenceRanges, transcriptionOffset: chapterTranscriptEndIndex };
258
+ return {
259
+ sentenceRanges,
260
+ transcriptionOffset: chapterTranscriptEndIndex,
261
+ firstFoundSentence,
262
+ lastFoundSentence
263
+ };
135
264
  }
136
265
  async function getLargestGap(trailing, leading) {
137
266
  const leadingGap = leading.start;
@@ -13,10 +13,16 @@ type SentenceRange = {
13
13
  end: number;
14
14
  audiofile: string;
15
15
  };
16
- declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): number | null;
17
- declare function getSentenceRanges(startSentence: number, endSentence: number, transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, chapterEndOffset: number, locale: Intl.Locale): Promise<{
16
+ declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): {
17
+ start: number;
18
+ end: number;
19
+ audiofile: string;
20
+ } | null;
21
+ declare function getSentenceRanges(transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, chapterEndOffset: number, locale: Intl.Locale): Promise<{
18
22
  sentenceRanges: SentenceRange[];
19
23
  transcriptionOffset: number;
24
+ firstFoundSentence: number;
25
+ lastFoundSentence: number;
20
26
  }>;
21
27
  declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
22
28
  /**
@@ -13,10 +13,16 @@ type SentenceRange = {
13
13
  end: number;
14
14
  audiofile: string;
15
15
  };
16
- declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): number | null;
17
- declare function getSentenceRanges(startSentence: number, endSentence: number, transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, chapterEndOffset: number, locale: Intl.Locale): Promise<{
16
+ declare function findEndTimestamp(matchEndIndex: number, transcription: StorytellerTranscription): {
17
+ start: number;
18
+ end: number;
19
+ audiofile: string;
20
+ } | null;
21
+ declare function getSentenceRanges(transcription: StorytellerTranscription, sentences: string[], chapterOffset: number, chapterEndOffset: number, locale: Intl.Locale): Promise<{
18
22
  sentenceRanges: SentenceRange[];
19
23
  transcriptionOffset: number;
24
+ firstFoundSentence: number;
25
+ lastFoundSentence: number;
20
26
  }>;
21
27
  declare function interpolateSentenceRanges(sentenceRanges: SentenceRange[], lastSentenceRange: SentenceRange | null): Promise<SentenceRange[]>;
22
28
  /**
@@ -1,7 +1,9 @@
1
1
  import "../chunk-BIEQXUOY.js";
2
2
  import { enumerate } from "itertools";
3
+ import { runes } from "runes2";
3
4
  import { getTrackDuration } from "../common/ffmpeg.js";
4
5
  import { errorAlign } from "../errorAlign/errorAlign.js";
6
+ import { Alignment, reversed } from "../errorAlign/utils.js";
5
7
  import { slugify } from "./slugify.js";
6
8
  function findStartTimestamp(matchStartIndex, transcription) {
7
9
  const entry = transcription.timeline.find(
@@ -18,21 +20,96 @@ function findEndTimestamp(matchEndIndex, transcription) {
18
20
  const entry = transcription.timeline.findLast(
19
21
  (entry2) => (entry2.startOffsetUtf16 ?? 0) < matchEndIndex
20
22
  );
21
- return (entry == null ? void 0 : entry.endTime) ?? null;
23
+ if (!entry) return null;
24
+ return {
25
+ start: entry.startTime,
26
+ end: entry.endTime,
27
+ audiofile: entry.audiofile
28
+ };
22
29
  }
23
30
  function getAlignmentsForSentence(sentence, alignments) {
24
31
  const result = [];
32
+ let score = Math.floor(sentence.length / 2);
25
33
  let sentenceIndex = 0;
26
34
  for (const alignment of alignments) {
27
35
  if (sentenceIndex === sentence.length) break;
28
36
  if (alignment.opType !== "INSERT") {
29
37
  sentenceIndex += alignment.ref.length + (sentenceIndex === 0 ? 0 : 1);
30
38
  }
39
+ if (alignment.opType === "DELETE" || alignment.opType === "INSERT" && sentenceIndex > 0) {
40
+ score -= (alignment.ref ?? alignment.hyp).length + 1;
41
+ }
31
42
  result.push(alignment);
32
43
  }
33
- return result;
44
+ return {
45
+ alignments: result,
46
+ score: result.some((a) => a.opType === "MATCH") ? score : -1
47
+ };
34
48
  }
35
- async function getSentenceRanges(startSentence, endSentence, transcription, sentences, chapterOffset, chapterEndOffset, locale) {
49
+ function errorAlignWithNarrowing(refSentences, hyp, narrowStart, narrowEnd) {
50
+ const firstAttempt = errorAlign(refSentences.join("-"), hyp);
51
+ let alignmentIndex = 0;
52
+ let firstGood = 0;
53
+ if (narrowStart) {
54
+ for (const sentence of refSentences) {
55
+ const { alignments: sentenceAlignments, score } = getAlignmentsForSentence(sentence, firstAttempt.slice(alignmentIndex));
56
+ alignmentIndex += sentenceAlignments.length;
57
+ if (sentence === "" || score <= 0) {
58
+ firstGood++;
59
+ } else {
60
+ break;
61
+ }
62
+ }
63
+ }
64
+ const reversedFirstAttempt = firstAttempt.toReversed().map((a) => {
65
+ if (!a.ref) return a;
66
+ return new Alignment(
67
+ a.opType,
68
+ a.refSlice,
69
+ a.hypSlice,
70
+ runes(a.ref).toReversed().join(""),
71
+ a.hyp,
72
+ a.leftCompound,
73
+ a.rightCompound
74
+ );
75
+ });
76
+ let lastGood = 0;
77
+ alignmentIndex = 0;
78
+ if (narrowEnd) {
79
+ for (const sentence of reversed(refSentences)) {
80
+ const reversedSentence = runes(sentence).toReversed().join("");
81
+ const { alignments: sentenceAlignments, score } = getAlignmentsForSentence(
82
+ reversedSentence,
83
+ reversedFirstAttempt.slice(alignmentIndex)
84
+ );
85
+ alignmentIndex += sentenceAlignments.length;
86
+ if (sentence === "" || score <= 0) {
87
+ lastGood++;
88
+ } else {
89
+ break;
90
+ }
91
+ }
92
+ }
93
+ lastGood = refSentences.length - lastGood;
94
+ if (firstGood <= 1 && lastGood >= refSentences.length - 2) {
95
+ return {
96
+ alignments: firstAttempt,
97
+ slice: [0, refSentences.length]
98
+ };
99
+ }
100
+ const slice = [
101
+ Math.max(firstGood - 1, 0),
102
+ Math.min(refSentences.length, lastGood + 1)
103
+ ];
104
+ const { alignments, slice: narrowed } = errorAlignWithNarrowing(
105
+ refSentences.slice(...slice),
106
+ hyp,
107
+ narrowStart,
108
+ narrowEnd
109
+ );
110
+ return { alignments, slice: [slice[0] + narrowed[0], slice[0] + narrowed[1]] };
111
+ }
112
+ async function getSentenceRanges(transcription, sentences, chapterOffset, chapterEndOffset, locale) {
36
113
  const sentenceRanges = [];
37
114
  const fullTranscript = transcription.transcript;
38
115
  const chapterTranscript = fullTranscript.slice(
@@ -40,58 +117,105 @@ async function getSentenceRanges(startSentence, endSentence, transcription, sent
40
117
  chapterEndOffset
41
118
  );
42
119
  const { result: slugifiedChapterTranscript, mapping: transcriptMapping } = await slugify(chapterTranscript, locale);
120
+ const slugifiedChapterSentences = [];
121
+ for (const s of sentences) {
122
+ const { result } = await slugify(s, locale);
123
+ slugifiedChapterSentences.push(result);
124
+ }
125
+ let firstFoundSentence = 0;
126
+ let lastFoundSentence = sentences.length - 1;
43
127
  let chapterTranscriptEndIndex = chapterOffset;
44
- let chapterSentenceIndex = startSentence;
128
+ let chapterSentenceIndex = 0;
45
129
  let slugifiedChapterTranscriptWindowStartIndex = 0;
46
- while (chapterSentenceIndex < endSentence) {
47
- const slugifiedChapterSentenceWindowList = [];
130
+ while (chapterSentenceIndex < slugifiedChapterSentences.length) {
131
+ let slugifiedChapterSentenceWindowList = [];
48
132
  let sentenceWindowLength = 0;
49
133
  let i = chapterSentenceIndex;
50
- while (sentenceWindowLength < 5e3 && i < sentences.length) {
51
- const { result: sentence } = await slugify(sentences[i], locale);
134
+ while (sentenceWindowLength < 5e3 && i < slugifiedChapterSentences.length) {
135
+ const sentence = slugifiedChapterSentences[i];
52
136
  slugifiedChapterSentenceWindowList.push(sentence);
53
137
  sentenceWindowLength += sentence.length;
54
138
  i++;
55
139
  }
56
- const slugifiedChapterSentenceWindow = slugifiedChapterSentenceWindowList.join("-");
140
+ const remainingSlugifiedSentences = slugifiedChapterSentences.slice(i);
141
+ const remainingSlugifiedSentenceLength = remainingSlugifiedSentences.reduce(
142
+ (acc, s) => acc + s.length,
143
+ 0
144
+ );
145
+ if (remainingSlugifiedSentenceLength < 5e3) {
146
+ slugifiedChapterSentenceWindowList.push(...remainingSlugifiedSentences);
147
+ sentenceWindowLength += remainingSlugifiedSentenceLength;
148
+ i = slugifiedChapterSentences.length;
149
+ }
57
150
  const slugifiedChapterTranscriptWindow = slugifiedChapterTranscript.slice(
58
151
  slugifiedChapterTranscriptWindowStartIndex,
59
152
  slugifiedChapterTranscriptWindowStartIndex + sentenceWindowLength * 1.2
60
153
  );
61
- const alignments = errorAlign(
62
- slugifiedChapterSentenceWindow,
63
- slugifiedChapterTranscriptWindow
64
- );
154
+ let alignments;
155
+ let slice = [0, slugifiedChapterSentenceWindowList.length - 1];
156
+ if (chapterSentenceIndex === 0 || i === sentences.length) {
157
+ const result = errorAlignWithNarrowing(
158
+ slugifiedChapterSentenceWindowList,
159
+ slugifiedChapterTranscriptWindow,
160
+ chapterSentenceIndex === 0,
161
+ i === sentences.length
162
+ );
163
+ alignments = result.alignments;
164
+ slice = result.slice;
165
+ if (chapterSentenceIndex === 0) {
166
+ firstFoundSentence = chapterSentenceIndex + slice[0];
167
+ }
168
+ if (i === sentences.length) {
169
+ lastFoundSentence = chapterSentenceIndex + slice[0] + slice[1] - 1;
170
+ }
171
+ slugifiedChapterSentenceWindowList = slugifiedChapterSentenceWindowList.slice(...slice);
172
+ } else {
173
+ alignments = errorAlign(
174
+ slugifiedChapterSentenceWindowList.join("-"),
175
+ slugifiedChapterTranscriptWindow
176
+ );
177
+ }
65
178
  let alignmentIndex = 0;
66
179
  let currentTranscriptWindowIndex = 0;
67
- for (const [i2, slugifiedSentence] of enumerate(
180
+ for (const [j, slugifiedSentence] of enumerate(
68
181
  slugifiedChapterSentenceWindowList
69
182
  )) {
70
183
  if (!slugifiedSentence) continue;
71
- const sentenceAlignments = getAlignmentsForSentence(
184
+ const { alignments: sentenceAlignments, score } = getAlignmentsForSentence(
72
185
  slugifiedSentence,
73
186
  alignments.slice(alignmentIndex)
74
187
  );
75
188
  const sentenceLengthInSlugifiedTranscript = sentenceAlignments.filter((a) => a.opType !== "DELETE").map((a) => a.hyp).join("-").length;
76
- const start = findStartTimestamp(
77
- chapterOffset + transcriptMapping.invert().map(
78
- slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
79
- 1
80
- ),
81
- transcription
82
- );
83
- chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
84
- slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
85
- -1
86
- );
87
- const end = findEndTimestamp(chapterTranscriptEndIndex, transcription);
88
- if (start && end !== null) {
89
- sentenceRanges.push({
90
- id: i2 + chapterSentenceIndex,
91
- start: start.start,
92
- audiofile: start.audiofile,
93
- end
94
- });
189
+ if (score > 0) {
190
+ const start = findStartTimestamp(
191
+ chapterOffset + transcriptMapping.invert().map(
192
+ slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex,
193
+ 1
194
+ ),
195
+ transcription
196
+ );
197
+ chapterTranscriptEndIndex = chapterOffset + transcriptMapping.invert().map(
198
+ slugifiedChapterTranscriptWindowStartIndex + currentTranscriptWindowIndex + sentenceLengthInSlugifiedTranscript,
199
+ -1
200
+ );
201
+ const end = findEndTimestamp(chapterTranscriptEndIndex, transcription);
202
+ if (start && end) {
203
+ if (start.audiofile !== end.audiofile) {
204
+ sentenceRanges.push({
205
+ id: j + chapterSentenceIndex + slice[0],
206
+ start: 0,
207
+ audiofile: end.audiofile,
208
+ end: end.end
209
+ });
210
+ } else {
211
+ sentenceRanges.push({
212
+ id: j + chapterSentenceIndex + slice[0],
213
+ start: start.start,
214
+ audiofile: start.audiofile,
215
+ end: end.end
216
+ });
217
+ }
218
+ }
95
219
  }
96
220
  alignmentIndex += sentenceAlignments.length;
97
221
  currentTranscriptWindowIndex += sentenceLengthInSlugifiedTranscript;
@@ -99,13 +223,18 @@ async function getSentenceRanges(startSentence, endSentence, transcription, sent
99
223
  currentTranscriptWindowIndex++;
100
224
  }
101
225
  }
102
- chapterSentenceIndex += slugifiedChapterSentenceWindowList.length;
226
+ chapterSentenceIndex = i;
103
227
  slugifiedChapterTranscriptWindowStartIndex += currentTranscriptWindowIndex;
104
228
  if (slugifiedChapterTranscript[slugifiedChapterTranscriptWindowStartIndex] === "-") {
105
229
  slugifiedChapterTranscriptWindowStartIndex++;
106
230
  }
107
231
  }
108
- return { sentenceRanges, transcriptionOffset: chapterTranscriptEndIndex };
232
+ return {
233
+ sentenceRanges,
234
+ transcriptionOffset: chapterTranscriptEndIndex,
235
+ firstFoundSentence,
236
+ lastFoundSentence
237
+ };
109
238
  }
110
239
  async function getLargestGap(trailing, leading) {
111
240
  const leadingGap = leading.start;