Anchor-annotator 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.1.dist-info}/METADATA +1 -1
- Anchor_annotator-0.2.1.dist-info/RECORD +21 -0
- anchor/_version.py +2 -2
- anchor/main.py +44 -22
- anchor/models.py +826 -481
- anchor/plot.py +428 -399
- anchor/undo.py +103 -134
- anchor/widgets.py +36 -45
- anchor/workers.py +43 -17
- Anchor_annotator-0.1.0.dist-info/RECORD +0 -21
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.1.dist-info}/LICENSE +0 -0
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.1.dist-info}/WHEEL +0 -0
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.1.dist-info}/top_level.txt +0 -0
anchor/models.py
CHANGED
@@ -31,7 +31,7 @@ from montreal_forced_aligner.utils import mfa_open
|
|
31
31
|
from PySide6 import QtCore
|
32
32
|
from sqlalchemy.orm import joinedload
|
33
33
|
|
34
|
-
from anchor import undo
|
34
|
+
from anchor import undo, workers
|
35
35
|
from anchor.settings import AnchorSettings
|
36
36
|
|
37
37
|
|
@@ -148,38 +148,480 @@ class TableModel(QtCore.QAbstractTableModel):
|
|
148
148
|
return len(self._header_data)
|
149
149
|
|
150
150
|
|
151
|
-
class
|
151
|
+
class FileUtterancesModel(QtCore.QAbstractListModel):
|
152
|
+
addCommand = QtCore.Signal(object)
|
153
|
+
selectionRequested = QtCore.Signal(object)
|
154
|
+
|
155
|
+
waveformReady = QtCore.Signal()
|
156
|
+
utterancesReady = QtCore.Signal()
|
157
|
+
|
158
|
+
def __init__(self, *args, **kwargs):
|
159
|
+
super().__init__(*args, **kwargs)
|
160
|
+
self.utterances = []
|
161
|
+
self.file = None
|
162
|
+
self.y = None
|
163
|
+
self.speakers = []
|
164
|
+
self._indices = []
|
165
|
+
self._speaker_indices = []
|
166
|
+
self.reversed_indices = {}
|
167
|
+
self.speaker_channel_mapping = {}
|
168
|
+
self.corpus_model: CorpusModel = None
|
169
|
+
self.waveform_worker = workers.WaveformWorker()
|
170
|
+
self.speaker_tier_worker = workers.SpeakerTierWorker()
|
171
|
+
self.speaker_tier_worker.signals.result.connect(self.finalize_loading_utterances)
|
172
|
+
self.waveform_worker.signals.result.connect(self.finalize_loading_wave_form)
|
173
|
+
|
174
|
+
def get_utterance(self, utterance_id: int) -> Utterance:
|
175
|
+
try:
|
176
|
+
return self.utterances[self.reversed_indices[utterance_id]]
|
177
|
+
except KeyError:
|
178
|
+
return None
|
179
|
+
|
180
|
+
def set_corpus_model(self, corpus_model: CorpusModel):
|
181
|
+
self.corpus_model = corpus_model
|
182
|
+
|
183
|
+
def clean_up_for_close(self):
|
184
|
+
self.waveform_worker.stop()
|
185
|
+
self.speaker_tier_worker.stop()
|
186
|
+
|
187
|
+
def set_file(self, file_id):
|
188
|
+
self.file = (
|
189
|
+
self.corpus_model.session.query(File).options(joinedload(File.sound_file)).get(file_id)
|
190
|
+
)
|
191
|
+
self.y = None
|
192
|
+
self.get_utterances()
|
193
|
+
self.waveform_worker.stop()
|
194
|
+
self.waveform_worker.set_params(self.file.sound_file.sound_file_path)
|
195
|
+
self.waveform_worker.start()
|
196
|
+
|
197
|
+
def finalize_loading_utterances(self, results):
|
198
|
+
utterances, file_id = results
|
199
|
+
if file_id != self.file.id:
|
200
|
+
return
|
201
|
+
self.utterances = utterances
|
202
|
+
for i, u in enumerate(utterances):
|
203
|
+
if u.speaker_id not in self.speakers:
|
204
|
+
self.speakers.append(u.speaker_id)
|
205
|
+
self._speaker_indices.append(u.speaker_id)
|
206
|
+
self.reversed_indices[u.id] = i
|
207
|
+
self._indices.append(u.id)
|
208
|
+
if self.file.num_channels > 1 and u.speaker_id not in self.speaker_channel_mapping:
|
209
|
+
self.speaker_channel_mapping[u.speaker_id] = u.channel
|
210
|
+
self.utterancesReady.emit()
|
211
|
+
|
212
|
+
def finalize_loading_wave_form(self, results):
|
213
|
+
y, file_path = results
|
214
|
+
if self.file is None or file_path != self.file.sound_file.sound_file_path:
|
215
|
+
return
|
216
|
+
self.y = y
|
217
|
+
self.waveformReady.emit()
|
218
|
+
|
219
|
+
def get_utterances(self):
|
220
|
+
parent_index = self.index(0, 0)
|
221
|
+
self.beginRemoveRows(parent_index, 0, len(self.utterances))
|
222
|
+
self.utterances = []
|
223
|
+
self.speakers = []
|
224
|
+
self._indices = []
|
225
|
+
self._speaker_indices = []
|
226
|
+
self.speaker_channel_mapping = {}
|
227
|
+
self.reversed_indices = {}
|
228
|
+
self.endRemoveRows()
|
229
|
+
if self.file is None:
|
230
|
+
return
|
231
|
+
self.speaker_tier_worker.stop()
|
232
|
+
self.speaker_tier_worker.query_alignment = (
|
233
|
+
self.corpus_model.has_alignments
|
234
|
+
or self.corpus_model.has_reference_alignments
|
235
|
+
or self.corpus_model.has_transcribed_alignments
|
236
|
+
)
|
237
|
+
self.speaker_tier_worker.session = self.corpus_model.session
|
238
|
+
self.speaker_tier_worker.set_params(self.file.id)
|
239
|
+
self.speaker_tier_worker.start()
|
240
|
+
|
241
|
+
def create_utterance(self, speaker_id: Optional[int], begin: float, end: float):
|
242
|
+
if not self.corpus_model.editable:
|
243
|
+
return
|
244
|
+
channel = 0
|
245
|
+
if speaker_id is None:
|
246
|
+
speaker_id = self.corpus_model.corpus.add_speaker(
|
247
|
+
"speech", session=self.corpus_model.session
|
248
|
+
).id
|
249
|
+
if self.file.num_channels > 1:
|
250
|
+
if speaker_id not in self.speaker_channel_mapping:
|
251
|
+
self.speaker_channel_mapping[speaker_id] = 0
|
252
|
+
channel = self.speaker_channel_mapping[speaker_id]
|
253
|
+
begin = round(begin, 4)
|
254
|
+
end = round(end, 4)
|
255
|
+
text = ""
|
256
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
257
|
+
new_utt = Utterance(
|
258
|
+
id=next_pk,
|
259
|
+
speaker_id=speaker_id,
|
260
|
+
file_id=self.file.id,
|
261
|
+
file=self.file,
|
262
|
+
begin=begin,
|
263
|
+
end=end,
|
264
|
+
channel=channel,
|
265
|
+
text=text,
|
266
|
+
normalized_text=text,
|
267
|
+
oovs=text,
|
268
|
+
)
|
269
|
+
print(new_utt.id, new_utt.speaker_id, new_utt.file_id, new_utt.begin, new_utt.end)
|
270
|
+
self.addCommand.emit(undo.CreateUtteranceCommand(new_utt, self))
|
271
|
+
self.corpus_model.set_file_modified(self.file.id)
|
272
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
273
|
+
|
274
|
+
def add_table_utterances(self, utterances: typing.List[Utterance]):
|
275
|
+
for utterance in utterances:
|
276
|
+
if len(self.utterances) > 0:
|
277
|
+
for i, u in enumerate(self.utterances):
|
278
|
+
if u.begin < utterance.begin:
|
279
|
+
continue
|
280
|
+
break
|
281
|
+
else:
|
282
|
+
i = len(self.utterances) - 1
|
283
|
+
else:
|
284
|
+
i = 0
|
285
|
+
parent_index = self.index(i, 0)
|
286
|
+
self.beginInsertRows(parent_index, i, i + 1)
|
287
|
+
self.utterances.insert(i, utterance)
|
288
|
+
self._indices.insert(i, utterance.id)
|
289
|
+
self._speaker_indices.insert(i, utterance.speaker_id)
|
290
|
+
self.endInsertRows()
|
291
|
+
self.reversed_indices = {u: j for j, u in enumerate(self._indices)}
|
292
|
+
self.selectionRequested.emit(utterances)
|
293
|
+
|
294
|
+
def delete_table_utterances(self, utterances: typing.List[Utterance]):
|
295
|
+
for utterance in utterances:
|
296
|
+
try:
|
297
|
+
index = self.reversed_indices.pop(utterance.id)
|
298
|
+
except KeyError:
|
299
|
+
continue
|
300
|
+
parent_index = self.index(index, 0)
|
301
|
+
self.beginRemoveRows(parent_index, index, index + 1)
|
302
|
+
_ = self.utterances.pop(index)
|
303
|
+
_ = self._indices.pop(index)
|
304
|
+
_ = self._speaker_indices.pop(index)
|
305
|
+
self.reversed_indices = {u: j for j, u in enumerate(self._indices)}
|
306
|
+
self.endRemoveRows()
|
307
|
+
self.selectionRequested.emit(None)
|
308
|
+
|
309
|
+
def change_speaker_table_utterances(self, utterances: typing.List[Utterance]):
|
310
|
+
for utterance in utterances:
|
311
|
+
try:
|
312
|
+
index = self.reversed_indices[utterance.id]
|
313
|
+
except KeyError:
|
314
|
+
continue
|
315
|
+
if utterance.speaker_id not in self.speakers:
|
316
|
+
self.speakers.append(utterance.speaker_id)
|
317
|
+
self.speaker_channel_mapping[utterance.speaker_id] = utterance.channel
|
318
|
+
self._speaker_indices[index] = utterance.speaker_id
|
319
|
+
|
320
|
+
def merge_table_utterances(
|
321
|
+
self, merged_utterance: Utterance, split_utterances: typing.List[Utterance]
|
322
|
+
):
|
323
|
+
self.delete_table_utterances(split_utterances)
|
324
|
+
self.add_table_utterances([merged_utterance])
|
325
|
+
|
326
|
+
def split_table_utterances(
|
327
|
+
self, merged_utterance: Utterance, split_utterances: typing.List[Utterance]
|
328
|
+
):
|
329
|
+
self.delete_table_utterances([merged_utterance])
|
330
|
+
self.add_table_utterances(split_utterances)
|
331
|
+
|
332
|
+
def update_utterance_text(self, utterance: Utterance, text):
|
333
|
+
if not self.corpus_model.editable:
|
334
|
+
return
|
335
|
+
if text != utterance.text:
|
336
|
+
self.addCommand.emit(undo.UpdateUtteranceTextCommand(utterance, text, self))
|
337
|
+
self.corpus_model.set_file_modified(self.file.id)
|
338
|
+
|
339
|
+
def refresh_utterances(self):
|
340
|
+
for utterance in self.utterances:
|
341
|
+
self.corpus_model.session.refresh(utterance)
|
342
|
+
|
343
|
+
def update_utterance_speaker(self, utterance: Utterance, speaker_id: int):
|
344
|
+
if not self.corpus_model.editable:
|
345
|
+
return
|
346
|
+
old_speaker_id = utterance.speaker_id
|
347
|
+
if old_speaker_id == speaker_id:
|
348
|
+
return
|
349
|
+
self.addCommand.emit(undo.UpdateUtteranceSpeakerCommand(utterance, speaker_id, self))
|
350
|
+
self.corpus_model.set_file_modified(self.file.id)
|
351
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
352
|
+
self.corpus_model.set_speaker_modified(old_speaker_id)
|
353
|
+
|
354
|
+
def update_utterance_times(
|
355
|
+
self, utterance: Utterance, begin: Optional[float] = None, end: Optional[float] = None
|
356
|
+
):
|
357
|
+
if not self.corpus_model.editable:
|
358
|
+
return
|
359
|
+
if utterance.begin == begin and utterance.end == end:
|
360
|
+
return
|
361
|
+
self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
|
362
|
+
self.corpus_model.set_file_modified(self.file.id)
|
363
|
+
|
364
|
+
def split_vad_utterance(
|
365
|
+
self, original_utterance_id, replacement_utterance_data: typing.List[KalpyUtterance]
|
366
|
+
):
|
367
|
+
if not replacement_utterance_data:
|
368
|
+
return
|
369
|
+
utt = self.utterances[self.reversed_indices[original_utterance_id]]
|
370
|
+
replacement_utterances = []
|
371
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
372
|
+
speaker_id = utt.speaker_id
|
373
|
+
for new_utt in replacement_utterance_data:
|
374
|
+
replacement_utterances.append(
|
375
|
+
Utterance(
|
376
|
+
id=next_pk,
|
377
|
+
begin=new_utt.segment.begin,
|
378
|
+
end=new_utt.segment.end,
|
379
|
+
speaker_id=speaker_id,
|
380
|
+
file_id=self.file.id,
|
381
|
+
text=new_utt.transcript,
|
382
|
+
normalized_text=new_utt.transcript,
|
383
|
+
features="",
|
384
|
+
in_subset=False,
|
385
|
+
ignored=False,
|
386
|
+
channel=new_utt.segment.channel,
|
387
|
+
)
|
388
|
+
)
|
389
|
+
next_pk += 1
|
390
|
+
self.addCommand.emit(
|
391
|
+
undo.SplitUtteranceCommand(utt, replacement_utterances, self, update_table=False)
|
392
|
+
)
|
393
|
+
self.corpus_model.set_file_modified(self.file.id)
|
394
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
395
|
+
|
396
|
+
def split_utterances(self, utterance: Utterance):
|
397
|
+
if not self.corpus_model.editable:
|
398
|
+
return
|
399
|
+
beg = utterance.begin
|
400
|
+
end = utterance.end
|
401
|
+
duration = end - beg
|
402
|
+
first_text = []
|
403
|
+
second_text = []
|
404
|
+
speaker_id = utterance.speaker_id
|
405
|
+
if (
|
406
|
+
utterance.text
|
407
|
+
and utterance.normalized_text
|
408
|
+
and " " not in utterance.text
|
409
|
+
and " " in utterance.normalized_text
|
410
|
+
):
|
411
|
+
t = utterance.normalized_text.split()
|
412
|
+
mid_ind = int(len(t) / 2)
|
413
|
+
first_text = t[:mid_ind]
|
414
|
+
second_text = t[mid_ind:]
|
415
|
+
elif utterance.text:
|
416
|
+
t = utterance.text.split()
|
417
|
+
mid_ind = int(len(t) / 2)
|
418
|
+
first_text = t[:mid_ind]
|
419
|
+
second_text = t[mid_ind:]
|
420
|
+
split_time = beg + (duration / 2)
|
421
|
+
oovs = set()
|
422
|
+
for w in first_text:
|
423
|
+
if not self.corpus_model.dictionary_model.check_word(w, speaker_id):
|
424
|
+
oovs.add(w)
|
425
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
426
|
+
first_utt = Utterance(
|
427
|
+
id=next_pk,
|
428
|
+
speaker_id=speaker_id,
|
429
|
+
file_id=self.file.id,
|
430
|
+
begin=beg,
|
431
|
+
end=split_time,
|
432
|
+
channel=utterance.channel,
|
433
|
+
text=" ".join(first_text),
|
434
|
+
normalized_text=" ".join(first_text),
|
435
|
+
oovs=" ".join(oovs),
|
436
|
+
)
|
437
|
+
next_pk += 1
|
438
|
+
oovs = set()
|
439
|
+
for w in second_text:
|
440
|
+
if not self.corpus_model.dictionary_model.check_word(w, utterance.speaker_id):
|
441
|
+
oovs.add(w)
|
442
|
+
second_utt = Utterance(
|
443
|
+
id=next_pk,
|
444
|
+
speaker_id=speaker_id,
|
445
|
+
file_id=self.file.id,
|
446
|
+
begin=split_time,
|
447
|
+
end=end,
|
448
|
+
channel=utterance.channel,
|
449
|
+
text=" ".join(second_text),
|
450
|
+
normalized_text=" ".join(second_text),
|
451
|
+
oovs=" ".join(oovs),
|
452
|
+
)
|
453
|
+
self.addCommand.emit(undo.SplitUtteranceCommand(utterance, [first_utt, second_utt], self))
|
454
|
+
self.corpus_model.set_file_modified(self.file.id)
|
455
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
456
|
+
self.selectionRequested.emit([first_utt, second_utt])
|
457
|
+
|
458
|
+
def merge_utterances(self, utterances: list[Utterance]):
|
459
|
+
if not self.corpus_model.editable:
|
460
|
+
return
|
461
|
+
if not utterances:
|
462
|
+
return
|
463
|
+
min_begin = 1000000000
|
464
|
+
max_end = 0
|
465
|
+
text = ""
|
466
|
+
normalized_text = ""
|
467
|
+
speaker_id = None
|
468
|
+
channel = None
|
469
|
+
for old_utt in sorted(utterances, key=lambda x: x.begin):
|
470
|
+
if speaker_id is None:
|
471
|
+
speaker_id = old_utt.speaker_id
|
472
|
+
if channel is None:
|
473
|
+
channel = old_utt.channel
|
474
|
+
if old_utt.begin < min_begin:
|
475
|
+
min_begin = old_utt.begin
|
476
|
+
if old_utt.end > max_end:
|
477
|
+
max_end = old_utt.end
|
478
|
+
utt_text = old_utt.text
|
479
|
+
if utt_text == "speech" and text.strip() == "speech":
|
480
|
+
continue
|
481
|
+
text += utt_text + " "
|
482
|
+
normalized_text += old_utt.normalized_text + " "
|
483
|
+
text = text[:-1]
|
484
|
+
normalized_text = normalized_text[:-1]
|
485
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
486
|
+
oovs = set()
|
487
|
+
for w in text.split():
|
488
|
+
if not self.corpus_model.dictionary_model.check_word(w, speaker_id):
|
489
|
+
oovs.add(w)
|
490
|
+
new_utt = Utterance(
|
491
|
+
id=next_pk,
|
492
|
+
speaker_id=speaker_id,
|
493
|
+
file_id=self.file.id,
|
494
|
+
begin=min_begin,
|
495
|
+
end=max_end,
|
496
|
+
channel=channel,
|
497
|
+
text=text,
|
498
|
+
normalized_text=normalized_text,
|
499
|
+
oovs=" ".join(oovs),
|
500
|
+
)
|
501
|
+
self.addCommand.emit(undo.MergeUtteranceCommand(utterances, new_utt, self))
|
502
|
+
self.corpus_model.set_file_modified(self.file.id)
|
503
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
504
|
+
self.selectionRequested.emit([new_utt])
|
505
|
+
|
506
|
+
def delete_utterances(self, utterances: typing.List[Utterance]):
|
507
|
+
if not self.corpus_model.editable:
|
508
|
+
return
|
509
|
+
if not utterances:
|
510
|
+
return
|
511
|
+
speaker_ids = set(x.speaker_id for x in utterances)
|
512
|
+
self.addCommand.emit(undo.DeleteUtteranceCommand(utterances, self))
|
513
|
+
self.corpus_model.set_file_modified(self.file.id)
|
514
|
+
for speaker_id in speaker_ids:
|
515
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
516
|
+
|
517
|
+
def rowCount(self, parent=None):
|
518
|
+
return len(self.utterances)
|
519
|
+
|
520
|
+
def data(self, index, role=QtCore.Qt.ItemDataRole.DisplayRole):
|
521
|
+
if role == QtCore.Qt.ItemDataRole.DisplayRole:
|
522
|
+
return self.utterances[index.row()]
|
523
|
+
|
524
|
+
|
525
|
+
class FileSelectionModel(QtCore.QItemSelectionModel):
|
526
|
+
fileAboutToChange = QtCore.Signal()
|
152
527
|
fileChanged = QtCore.Signal()
|
153
528
|
channelChanged = QtCore.Signal()
|
154
529
|
resetView = QtCore.Signal()
|
155
|
-
fileAboutToChange = QtCore.Signal()
|
156
530
|
viewChanged = QtCore.Signal(object, object)
|
157
531
|
selectionAudioChanged = QtCore.Signal()
|
158
532
|
currentTimeChanged = QtCore.Signal(object)
|
159
533
|
currentUtteranceChanged = QtCore.Signal()
|
534
|
+
speakerRequested = QtCore.Signal(object)
|
535
|
+
|
536
|
+
spectrogramReady = QtCore.Signal()
|
537
|
+
waveformReady = QtCore.Signal()
|
538
|
+
pitchTrackReady = QtCore.Signal()
|
160
539
|
|
161
540
|
def __init__(self, *args, **kwargs):
|
162
|
-
super(
|
541
|
+
super().__init__(*args, **kwargs)
|
163
542
|
self.settings = AnchorSettings()
|
164
543
|
self.min_time = 0
|
165
544
|
self.max_time = 10
|
166
545
|
self.selected_min_time = None
|
167
546
|
self.selected_max_time = None
|
168
|
-
self.current_file: Optional[File] = None
|
169
547
|
self.x = None
|
170
548
|
self.y = None
|
171
|
-
self.
|
549
|
+
self.top_point = 2
|
550
|
+
self.bottom_point = 0
|
551
|
+
self.separator_point = 1
|
172
552
|
self.selected_channel = 0
|
173
|
-
|
174
|
-
|
175
|
-
self.
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
self.
|
180
|
-
self.
|
181
|
-
self.
|
182
|
-
self.
|
553
|
+
self.spectrogram = None
|
554
|
+
self.min_db = None
|
555
|
+
self.max_db = None
|
556
|
+
self.pitch_track_x = None
|
557
|
+
self.pitch_track_y = None
|
558
|
+
self.waveform_x = None
|
559
|
+
self.waveform_y = None
|
560
|
+
self.requested_utterance_id = None
|
561
|
+
self.auto_waveform_worker = workers.AutoWaveformWorker()
|
562
|
+
self.spectrogram_worker = workers.SpectrogramWorker()
|
563
|
+
self.pitch_track_worker = workers.PitchWorker()
|
564
|
+
self.auto_waveform_worker.signals.result.connect(self.finalize_loading_auto_wave_form)
|
565
|
+
self.spectrogram_worker.signals.result.connect(self.finalize_loading_spectrogram)
|
566
|
+
self.pitch_track_worker.signals.result.connect(self.finalize_loading_pitch_track)
|
567
|
+
self.model().waveformReady.connect(self.load_audio_selection)
|
568
|
+
self.model().utterancesReady.connect(self.finalize_set_new_file)
|
569
|
+
self.viewChanged.connect(self.load_audio_selection)
|
570
|
+
self.model().selectionRequested.connect(self.update_selected_utterances)
|
571
|
+
|
572
|
+
def selected_utterances(self):
|
573
|
+
utts = []
|
574
|
+
m = self.model()
|
575
|
+
for index in self.selectedRows(0):
|
576
|
+
utt = m.utterances[index.row()]
|
577
|
+
utts.append(utt)
|
578
|
+
return utts
|
579
|
+
|
580
|
+
def load_audio_selection(self):
|
581
|
+
if self.model().y is None:
|
582
|
+
return
|
583
|
+
begin_samp = int(self.min_time * self.model().file.sample_rate)
|
584
|
+
end_samp = int(self.max_time * self.model().file.sample_rate)
|
585
|
+
if len(self.model().y.shape) > 1:
|
586
|
+
y = self.model().y[begin_samp:end_samp, self.selected_channel]
|
587
|
+
else:
|
588
|
+
y = self.model().y[begin_samp:end_samp]
|
589
|
+
self.spectrogram_worker.stop()
|
590
|
+
self.spectrogram_worker.set_params(
|
591
|
+
y,
|
592
|
+
self.model().file.sound_file.sample_rate,
|
593
|
+
self.min_time,
|
594
|
+
self.max_time,
|
595
|
+
self.selected_channel,
|
596
|
+
)
|
597
|
+
self.spectrogram_worker.start()
|
598
|
+
if self.max_time - self.min_time <= 10:
|
599
|
+
self.pitch_track_worker.stop()
|
600
|
+
self.pitch_track_worker.set_params(
|
601
|
+
y,
|
602
|
+
self.model().file.sound_file.sample_rate,
|
603
|
+
self.min_time,
|
604
|
+
self.max_time,
|
605
|
+
self.selected_channel,
|
606
|
+
self.bottom_point,
|
607
|
+
self.separator_point,
|
608
|
+
)
|
609
|
+
self.pitch_track_worker.start()
|
610
|
+
self.auto_waveform_worker.stop()
|
611
|
+
self.auto_waveform_worker.set_params(
|
612
|
+
y,
|
613
|
+
self.separator_point,
|
614
|
+
self.top_point,
|
615
|
+
self.min_time,
|
616
|
+
self.max_time,
|
617
|
+
self.selected_channel,
|
618
|
+
)
|
619
|
+
self.auto_waveform_worker.start()
|
620
|
+
|
621
|
+
def clean_up_for_close(self):
|
622
|
+
self.spectrogram_worker.stop()
|
623
|
+
self.pitch_track_worker.stop()
|
624
|
+
self.auto_waveform_worker.stop()
|
183
625
|
|
184
626
|
@property
|
185
627
|
def plot_min(self):
|
@@ -193,6 +635,292 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
193
635
|
return -self.min_time
|
194
636
|
return self.max_time
|
195
637
|
|
638
|
+
def finalize_loading_spectrogram(self, results):
|
639
|
+
stft, channel, begin, end, min_db, max_db = results
|
640
|
+
if self.settings.right_to_left:
|
641
|
+
stft = np.flip(stft, 1)
|
642
|
+
begin, end = -end, -begin
|
643
|
+
if begin != self.plot_min or end != self.plot_max:
|
644
|
+
return
|
645
|
+
self.spectrogram = stft
|
646
|
+
self.min_db = self.min_db
|
647
|
+
self.max_db = self.max_db
|
648
|
+
self.spectrogramReady.emit()
|
649
|
+
|
650
|
+
def finalize_loading_pitch_track(self, results):
|
651
|
+
pitch_track, voicing_track, channel, begin, end, min_f0, max_f0 = results
|
652
|
+
if self.settings.right_to_left:
|
653
|
+
pitch_track = np.flip(pitch_track, 0)
|
654
|
+
begin, end = -end, -begin
|
655
|
+
if begin != self.plot_min or end != self.plot_max:
|
656
|
+
return
|
657
|
+
self.pitch_track_y = pitch_track
|
658
|
+
if pitch_track is None:
|
659
|
+
return
|
660
|
+
x = np.linspace(
|
661
|
+
start=self.plot_min,
|
662
|
+
stop=self.plot_max,
|
663
|
+
num=pitch_track.shape[0],
|
664
|
+
)
|
665
|
+
self.pitch_track_x = x
|
666
|
+
self.pitchTrackReady.emit()
|
667
|
+
|
668
|
+
def finalize_loading_auto_wave_form(self, results):
|
669
|
+
y, begin, end, channel = results
|
670
|
+
if self.settings.right_to_left:
|
671
|
+
y = np.flip(y, 0)
|
672
|
+
begin, end = -end, -begin
|
673
|
+
if begin != self.plot_min or end != self.plot_max:
|
674
|
+
return
|
675
|
+
x = np.linspace(start=self.plot_min, stop=self.plot_max, num=y.shape[0])
|
676
|
+
self.waveform_x = x
|
677
|
+
self.waveform_y = y
|
678
|
+
self.waveformReady.emit()
|
679
|
+
|
680
|
+
def select_audio(self, begin, end):
|
681
|
+
if end is not None and end - begin < 0.025:
|
682
|
+
end = None
|
683
|
+
self.selected_min_time = begin
|
684
|
+
self.selected_max_time = end
|
685
|
+
self.selectionAudioChanged.emit()
|
686
|
+
|
687
|
+
def request_start_time(self, start_time):
|
688
|
+
if start_time >= self.max_time:
|
689
|
+
return
|
690
|
+
if start_time < self.min_time:
|
691
|
+
return
|
692
|
+
self.selected_min_time = start_time
|
693
|
+
self.selected_max_time = None
|
694
|
+
self.selectionAudioChanged.emit()
|
695
|
+
|
696
|
+
def set_current_channel(self, channel):
|
697
|
+
if channel == self.selected_channel:
|
698
|
+
return
|
699
|
+
self.selected_channel = channel
|
700
|
+
self.load_audio_selection()
|
701
|
+
|
702
|
+
def get_selected_wave_form(self):
|
703
|
+
if self.y is None:
|
704
|
+
return None, None
|
705
|
+
if len(self.y.shape) > 1 and self.y.shape[0] == 2:
|
706
|
+
return self.x, self.y[self.selected_channel, :]
|
707
|
+
return self.x, self.y
|
708
|
+
|
709
|
+
def zoom(self, factor, mid_point=None):
|
710
|
+
if factor == 0 or self.min_time is None:
|
711
|
+
return
|
712
|
+
cur_duration = self.max_time - self.min_time
|
713
|
+
if mid_point is None:
|
714
|
+
mid_point = self.min_time + (cur_duration / 2)
|
715
|
+
new_duration = cur_duration / factor
|
716
|
+
new_begin = mid_point - (mid_point - self.min_time) / factor
|
717
|
+
new_begin = max(new_begin, 0)
|
718
|
+
new_end = min(new_begin + new_duration, self.model().file.duration)
|
719
|
+
if new_end - new_begin <= 0.025:
|
720
|
+
return
|
721
|
+
self.set_view_times(new_begin, new_end)
|
722
|
+
|
723
|
+
def pan(self, factor):
|
724
|
+
if self.min_time is None:
|
725
|
+
return
|
726
|
+
if factor < 1:
|
727
|
+
factor = 1 - factor
|
728
|
+
right = True
|
729
|
+
else:
|
730
|
+
right = False
|
731
|
+
factor = factor - 1
|
732
|
+
if right and self.max_time == self.model().file.duration:
|
733
|
+
return
|
734
|
+
if not right and self.min_time == 0:
|
735
|
+
return
|
736
|
+
cur_duration = self.max_time - self.min_time
|
737
|
+
shift = factor * cur_duration
|
738
|
+
if right:
|
739
|
+
new_begin = self.min_time + shift
|
740
|
+
new_end = self.max_time + shift
|
741
|
+
else:
|
742
|
+
new_begin = self.min_time - shift
|
743
|
+
new_end = self.max_time - shift
|
744
|
+
if new_begin < 0:
|
745
|
+
new_end = new_end + abs(new_begin)
|
746
|
+
new_begin = 0
|
747
|
+
if new_end > self.model().file.duration:
|
748
|
+
new_begin -= self.model().file.duration - new_end
|
749
|
+
new_end = self.model().file.duration
|
750
|
+
self.set_view_times(new_begin, new_end)
|
751
|
+
|
752
|
+
def zoom_in(self):
|
753
|
+
if self.model().file is None:
|
754
|
+
return
|
755
|
+
self.zoom(1.5)
|
756
|
+
|
757
|
+
def zoom_out(self):
|
758
|
+
if self.model().file is None:
|
759
|
+
return
|
760
|
+
self.zoom(0.5)
|
761
|
+
|
762
|
+
def zoom_to_selection(self):
|
763
|
+
if self.selected_min_time is not None and self.selected_max_time is not None:
|
764
|
+
self.set_view_times(self.selected_min_time, self.selected_max_time)
|
765
|
+
|
766
|
+
def update_from_slider(self, value):
|
767
|
+
if not self.max_time:
|
768
|
+
return
|
769
|
+
cur_window = self.max_time - self.min_time
|
770
|
+
self.set_view_times(value, value + cur_window)
|
771
|
+
|
772
|
+
def update_selection_audio(self, begin, end):
|
773
|
+
if begin < self.min_time:
|
774
|
+
begin = self.min_time
|
775
|
+
if end > self.max_time:
|
776
|
+
end = self.max_time
|
777
|
+
self.selected_min_time = begin
|
778
|
+
self.selected_max_time = end
|
779
|
+
self.selectionAudioChanged.emit()
|
780
|
+
|
781
|
+
def visible_utterances(self) -> typing.List[Utterance]:
|
782
|
+
file_utts = []
|
783
|
+
if not self.model().file:
|
784
|
+
return file_utts
|
785
|
+
if self.model().rowCount() > 1:
|
786
|
+
for u in self.model().utterances:
|
787
|
+
if u.begin >= self.max_time:
|
788
|
+
break
|
789
|
+
if u.end <= self.min_time:
|
790
|
+
continue
|
791
|
+
file_utts.append(u)
|
792
|
+
else:
|
793
|
+
file_utts.extend(self.model().utterances)
|
794
|
+
return file_utts
|
795
|
+
|
796
|
+
def model(self) -> FileUtterancesModel:
|
797
|
+
return super().model()
|
798
|
+
|
799
|
+
def set_view_times(self, begin, end):
|
800
|
+
begin = max(begin, 0)
|
801
|
+
end = min(end, self.model().file.duration)
|
802
|
+
if (begin, end) == (self.min_time, self.max_time):
|
803
|
+
return
|
804
|
+
self.min_time = begin
|
805
|
+
self.max_time = end
|
806
|
+
if (
|
807
|
+
self.selected_max_time is not None
|
808
|
+
and not self.min_time <= self.selected_min_time <= self.max_time
|
809
|
+
):
|
810
|
+
self.selected_min_time = self.min_time
|
811
|
+
if (
|
812
|
+
self.selected_max_time is not None
|
813
|
+
and not self.min_time <= self.selected_max_time <= self.max_time
|
814
|
+
):
|
815
|
+
self.selected_max_time = None
|
816
|
+
self.viewChanged.emit(self.min_time, self.max_time)
|
817
|
+
|
818
|
+
def set_current_file(self, info, force_update=False):
|
819
|
+
file_id, begin, end, utterance_id, speaker_id = info
|
820
|
+
try:
|
821
|
+
new_file = self.model().file is None or self.model().file.id != file_id
|
822
|
+
except sqlalchemy.orm.exc.DetachedInstanceError:
|
823
|
+
new_file = True
|
824
|
+
self.requested_utterance_id = utterance_id
|
825
|
+
if new_file:
|
826
|
+
self.fileAboutToChange.emit()
|
827
|
+
self.model().set_file(file_id)
|
828
|
+
self.speakerRequested.emit(speaker_id)
|
829
|
+
else:
|
830
|
+
self.finalize_set_new_file()
|
831
|
+
self.speakerRequested.emit(speaker_id)
|
832
|
+
self.set_view_times(begin, end)
|
833
|
+
|
834
|
+
def finalize_set_new_file(self):
|
835
|
+
if self.requested_utterance_id is None:
|
836
|
+
return
|
837
|
+
utterance = self.model().get_utterance(self.requested_utterance_id)
|
838
|
+
if utterance is None:
|
839
|
+
return
|
840
|
+
self.update_select(self.requested_utterance_id, reset=True)
|
841
|
+
self.selected_channel = 0
|
842
|
+
if utterance is not None and utterance.channel is not None:
|
843
|
+
self.selected_channel = utterance.channel
|
844
|
+
self.fileChanged.emit()
|
845
|
+
|
846
|
+
def checkSelected(self, utterance_id: int):
|
847
|
+
m = self.model()
|
848
|
+
for index in self.selectedRows(0):
|
849
|
+
if utterance_id == m._indices[index.row()]:
|
850
|
+
return True
|
851
|
+
return False
|
852
|
+
|
853
|
+
def update_selected_utterances(self, utterances):
|
854
|
+
super().clearSelection()
|
855
|
+
super().clearCurrentIndex()
|
856
|
+
if not utterances:
|
857
|
+
return
|
858
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
859
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
860
|
+
for u in utterances:
|
861
|
+
if u.id not in self.model().reversed_indices:
|
862
|
+
continue
|
863
|
+
row = self.model().reversed_indices[u.id]
|
864
|
+
|
865
|
+
index = self.model().index(row, 0)
|
866
|
+
if not index.isValid():
|
867
|
+
return
|
868
|
+
self.select(index, flags)
|
869
|
+
self.currentUtteranceChanged.emit()
|
870
|
+
|
871
|
+
def update_select(self, utterance_id: int, deselect=False, reset=False):
|
872
|
+
if reset and [x.id for x in self.selected_utterances()] == [utterance_id]:
|
873
|
+
return
|
874
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
875
|
+
if reset:
|
876
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.ClearAndSelect
|
877
|
+
elif deselect:
|
878
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Deselect
|
879
|
+
else:
|
880
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
881
|
+
if utterance_id not in self.model().reversed_indices:
|
882
|
+
return
|
883
|
+
row = self.model().reversed_indices[utterance_id]
|
884
|
+
|
885
|
+
index = self.model().index(row, 0)
|
886
|
+
if not index.isValid():
|
887
|
+
return
|
888
|
+
self.select(index, flags)
|
889
|
+
if not deselect:
|
890
|
+
self.select_audio(self.model().utterances[row].begin, self.model().utterances[row].end)
|
891
|
+
self.currentUtteranceChanged.emit()
|
892
|
+
|
893
|
+
|
894
|
+
class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
895
|
+
fileChanged = QtCore.Signal()
|
896
|
+
channelChanged = QtCore.Signal()
|
897
|
+
resetView = QtCore.Signal()
|
898
|
+
fileAboutToChange = QtCore.Signal()
|
899
|
+
fileViewRequested = QtCore.Signal(object)
|
900
|
+
selectionAudioChanged = QtCore.Signal()
|
901
|
+
currentTimeChanged = QtCore.Signal(object)
|
902
|
+
currentUtteranceChanged = QtCore.Signal()
|
903
|
+
|
904
|
+
def __init__(self, *args, **kwargs):
|
905
|
+
super().__init__(*args, **kwargs)
|
906
|
+
self.settings = AnchorSettings()
|
907
|
+
self.min_time = 0
|
908
|
+
self.max_time = 10
|
909
|
+
self.selected_min_time = None
|
910
|
+
self.selected_max_time = None
|
911
|
+
self.x = None
|
912
|
+
self.y = None
|
913
|
+
self.current_utterance_id = None
|
914
|
+
self.selected_channel = 0
|
915
|
+
# self.viewChanged.connect(self.update_selected_waveform)
|
916
|
+
# self.fileChanged.connect(self.update_selected_waveform)
|
917
|
+
self.currentRowChanged.connect(self.switch_utterance)
|
918
|
+
# self.selectionChanged.connect(self.update_selection_audio)
|
919
|
+
# self.selectionChanged.connect(self.update_selection_audio)
|
920
|
+
# self.model().changeCommandFired.connect(self.expire_current)
|
921
|
+
self.model().layoutChanged.connect(self.check_selection)
|
922
|
+
self.model().unlockCorpus.connect(self.fileChanged.emit)
|
923
|
+
|
196
924
|
def set_current_utterance(self, utterance_id):
|
197
925
|
self.current_utterance_id = utterance_id
|
198
926
|
self.currentUtteranceChanged.emit()
|
@@ -203,13 +931,8 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
203
931
|
elif self.model().rowCount() == 0:
|
204
932
|
self.clearSelection()
|
205
933
|
|
206
|
-
def set_current_channel(self, channel):
|
207
|
-
self.selected_channel = channel
|
208
|
-
self.channelChanged.emit()
|
209
|
-
|
210
934
|
def clearSelection(self) -> None:
|
211
935
|
self.fileAboutToChange.emit()
|
212
|
-
self.current_file = None
|
213
936
|
self.current_utterance_id = None
|
214
937
|
self.min_time = None
|
215
938
|
self.max_time = None
|
@@ -219,22 +942,6 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
219
942
|
super(CorpusSelectionModel, self).clearSelection()
|
220
943
|
self.fileChanged.emit()
|
221
944
|
|
222
|
-
def update_selected_wavform(self, *args):
|
223
|
-
if self.min_time is None or self.current_file is None:
|
224
|
-
self.x = None
|
225
|
-
self.y = None
|
226
|
-
else:
|
227
|
-
self.x, self.y = self.current_file.sound_file.normalized_waveform(
|
228
|
-
self.min_time, self.max_time
|
229
|
-
)
|
230
|
-
|
231
|
-
def get_selected_wave_form(self):
|
232
|
-
if self.y is None:
|
233
|
-
return None, None
|
234
|
-
if len(self.y.shape) > 1 and self.y.shape[0] == 2:
|
235
|
-
return self.x, self.y[self.selected_channel, :]
|
236
|
-
return self.x, self.y
|
237
|
-
|
238
945
|
def update_select_rows(self, rows: list[int]):
|
239
946
|
super(CorpusSelectionModel, self).clearCurrentIndex()
|
240
947
|
super(CorpusSelectionModel, self).clearSelection()
|
@@ -250,8 +957,29 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
250
957
|
| QtCore.QItemSelectionModel.SelectionFlag.Rows,
|
251
958
|
)
|
252
959
|
|
960
|
+
def update_selected_utterances(self, utterances):
|
961
|
+
if not utterances:
|
962
|
+
return
|
963
|
+
first = True
|
964
|
+
for u in utterances:
|
965
|
+
if u.id not in self.model().reversed_indices:
|
966
|
+
continue
|
967
|
+
row = self.model().reversed_indices[u.id]
|
968
|
+
|
969
|
+
index = self.model().index(row, 0)
|
970
|
+
if not index.isValid():
|
971
|
+
return
|
972
|
+
if not first:
|
973
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
974
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
975
|
+
else:
|
976
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
977
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.ClearAndSelect
|
978
|
+
first = False
|
979
|
+
self.select(index, flags)
|
980
|
+
|
253
981
|
def update_select(self, utterance_id: int, deselect=False, reset=False, focus=False):
|
254
|
-
if reset and
|
982
|
+
if reset and self.selected_utterances() == [utterance_id]:
|
255
983
|
return
|
256
984
|
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
257
985
|
if reset:
|
@@ -266,58 +994,13 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
266
994
|
if focus:
|
267
995
|
flags |= QtCore.QItemSelectionModel.SelectionFlag.Current
|
268
996
|
if row == self.currentIndex().row():
|
269
|
-
self.
|
997
|
+
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
270
998
|
|
271
999
|
index = self.model().index(row, 0)
|
272
1000
|
if not index.isValid():
|
273
1001
|
return
|
274
1002
|
self.select(index, flags)
|
275
1003
|
|
276
|
-
def select_audio(self, begin, end):
|
277
|
-
if end is not None and end - begin < 0.025:
|
278
|
-
end = None
|
279
|
-
self.selected_min_time = begin
|
280
|
-
self.selected_max_time = end
|
281
|
-
self.selectionAudioChanged.emit()
|
282
|
-
|
283
|
-
def request_start_time(self, start_time):
|
284
|
-
if start_time >= self.max_time:
|
285
|
-
return
|
286
|
-
if start_time < self.min_time:
|
287
|
-
return
|
288
|
-
self.selected_min_time = start_time
|
289
|
-
self.selected_max_time = None
|
290
|
-
self.selectionAudioChanged.emit()
|
291
|
-
|
292
|
-
def visible_utts(self) -> typing.List[Utterance]:
|
293
|
-
file_utts = []
|
294
|
-
if not self.current_file:
|
295
|
-
return file_utts
|
296
|
-
if self.current_file.num_utterances > 1:
|
297
|
-
for u in sorted(self.current_file.utterances, key=lambda x: x.begin):
|
298
|
-
if u.begin >= self.max_time:
|
299
|
-
break
|
300
|
-
if u.end <= self.min_time:
|
301
|
-
continue
|
302
|
-
file_utts.append(u)
|
303
|
-
else:
|
304
|
-
file_utts.extend(self.current_file.utterances)
|
305
|
-
return file_utts
|
306
|
-
|
307
|
-
def currentUtterance(self) -> Optional[Utterance]:
|
308
|
-
if self.current_utterance_id is None:
|
309
|
-
return
|
310
|
-
m = self.model()
|
311
|
-
utterance = (
|
312
|
-
m.session.query(Utterance)
|
313
|
-
.options(
|
314
|
-
joinedload(Utterance.file).joinedload(File.sound_file),
|
315
|
-
joinedload(Utterance.file).subqueryload(File.speakers),
|
316
|
-
)
|
317
|
-
.get(self.current_utterance_id)
|
318
|
-
)
|
319
|
-
return utterance
|
320
|
-
|
321
1004
|
def _update_selection(self):
|
322
1005
|
index = self.currentIndex()
|
323
1006
|
if not index.isValid():
|
@@ -326,22 +1009,20 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
326
1009
|
self.current_utterance_id = m._indices[index.row()]
|
327
1010
|
self.currentUtteranceChanged.emit()
|
328
1011
|
|
329
|
-
def
|
330
|
-
current_utterance = self.
|
1012
|
+
def selected_utterances(self):
|
1013
|
+
current_utterance = self.current_utterance_id
|
331
1014
|
if current_utterance is None:
|
332
1015
|
return []
|
333
1016
|
utts = [current_utterance]
|
334
1017
|
m = self.model()
|
335
1018
|
for index in self.selectedRows(1):
|
336
|
-
if current_utterance is not None and m._indices[index.row()] == current_utterance
|
1019
|
+
if current_utterance is not None and m._indices[index.row()] == current_utterance:
|
337
1020
|
continue
|
338
|
-
utt = m.
|
1021
|
+
utt = m.utterance_id_at(index)
|
339
1022
|
if utt is None:
|
340
1023
|
continue
|
341
1024
|
if current_utterance is None:
|
342
1025
|
current_utterance = utt
|
343
|
-
if utt.file_id != current_utterance.file_id:
|
344
|
-
continue
|
345
1026
|
utts.append(utt)
|
346
1027
|
return utts
|
347
1028
|
|
@@ -356,206 +1037,40 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
356
1037
|
text = m.data(m.index(index.row(), m.text_column), QtCore.Qt.ItemDataRole.DisplayRole)
|
357
1038
|
return text
|
358
1039
|
|
359
|
-
def zoom(self, factor, mid_point=None):
|
360
|
-
if factor == 0 or self.min_time is None:
|
361
|
-
return
|
362
|
-
cur_duration = self.max_time - self.min_time
|
363
|
-
if mid_point is None:
|
364
|
-
mid_point = self.min_time + (cur_duration / 2)
|
365
|
-
new_duration = cur_duration / factor
|
366
|
-
new_begin = mid_point - (mid_point - self.min_time) / factor
|
367
|
-
new_begin = max(new_begin, 0)
|
368
|
-
new_end = min(new_begin + new_duration, self.current_file.duration)
|
369
|
-
if new_end - new_begin <= 0.025:
|
370
|
-
return
|
371
|
-
self.set_view_times(new_begin, new_end)
|
372
|
-
|
373
|
-
def pan(self, factor):
|
374
|
-
if self.min_time is None:
|
375
|
-
return
|
376
|
-
if factor < 1:
|
377
|
-
factor = 1 - factor
|
378
|
-
right = True
|
379
|
-
else:
|
380
|
-
right = False
|
381
|
-
factor = factor - 1
|
382
|
-
if right and self.max_time == self.current_file.duration:
|
383
|
-
return
|
384
|
-
if not right and self.min_time == 0:
|
385
|
-
return
|
386
|
-
cur_duration = self.max_time - self.min_time
|
387
|
-
shift = factor * cur_duration
|
388
|
-
if right:
|
389
|
-
new_begin = self.min_time + shift
|
390
|
-
new_end = self.max_time + shift
|
391
|
-
else:
|
392
|
-
new_begin = self.min_time - shift
|
393
|
-
new_end = self.max_time - shift
|
394
|
-
if new_begin < 0:
|
395
|
-
new_end = new_end + abs(new_begin)
|
396
|
-
new_begin = 0
|
397
|
-
if new_end > self.current_file.duration:
|
398
|
-
new_begin -= self.current_file.duration - new_end
|
399
|
-
new_end = self.current_file.duration
|
400
|
-
self.set_view_times(new_begin, new_end)
|
401
|
-
|
402
|
-
def zoom_in(self):
|
403
|
-
if self.current_file is None:
|
404
|
-
return
|
405
|
-
self.zoom(1.5)
|
406
|
-
|
407
|
-
def zoom_out(self):
|
408
|
-
if self.current_file is None:
|
409
|
-
return
|
410
|
-
self.zoom(0.5)
|
411
|
-
|
412
|
-
def zoom_to_selection(self):
|
413
|
-
if self.selected_min_time is None or self.selected_max_time is None:
|
414
|
-
rows = self.selectedRows(1)
|
415
|
-
if not rows:
|
416
|
-
return
|
417
|
-
begin = None
|
418
|
-
end = None
|
419
|
-
for r in rows:
|
420
|
-
u = self.model().utteranceAt(r)
|
421
|
-
if u is None:
|
422
|
-
continue
|
423
|
-
if u.file_id != self.current_file.id:
|
424
|
-
continue
|
425
|
-
if begin is None or begin > u.begin:
|
426
|
-
begin = u.begin
|
427
|
-
if end is None or end < u.end:
|
428
|
-
end = u.end
|
429
|
-
self.set_view_times(begin, end)
|
430
|
-
else:
|
431
|
-
self.set_view_times(self.selected_min_time, self.selected_max_time)
|
432
|
-
|
433
|
-
def update_from_slider(self, value):
|
434
|
-
if not self.max_time:
|
435
|
-
return
|
436
|
-
cur_window = self.max_time - self.min_time
|
437
|
-
self.set_view_times(value, value + cur_window)
|
438
|
-
|
439
|
-
def update_selection_audio(self):
|
440
|
-
begins = self.selectedRows(self.model().begin_column)
|
441
|
-
ends = self.selectedRows(self.model().end_column)
|
442
|
-
begin = None
|
443
|
-
end = None
|
444
|
-
if len(begins) > 0:
|
445
|
-
for i, b in enumerate(begins):
|
446
|
-
b = self.model().data(b, QtCore.Qt.ItemDataRole.DisplayRole)
|
447
|
-
e = self.model().data(ends[i], QtCore.Qt.ItemDataRole.DisplayRole)
|
448
|
-
if begin is None or begin > b:
|
449
|
-
begin = b
|
450
|
-
if end is None or end < e:
|
451
|
-
end = e
|
452
|
-
if self.current_file is None or begin > self.current_file.duration:
|
453
|
-
begin = None
|
454
|
-
end = None
|
455
|
-
elif end > self.current_file.duration:
|
456
|
-
end = self.current_file.duration
|
457
|
-
self.selected_min_time = begin
|
458
|
-
self.selected_max_time = end
|
459
|
-
self.selectionAudioChanged.emit()
|
460
|
-
|
461
1040
|
def switch_utterance(self, new_index, old_index):
|
1041
|
+
if not self.model().fully_loaded:
|
1042
|
+
return
|
462
1043
|
if not isinstance(new_index, QtCore.QModelIndex):
|
463
1044
|
row = 0
|
464
1045
|
else:
|
465
1046
|
if not new_index.isValid():
|
466
1047
|
return
|
467
1048
|
row = new_index.row()
|
468
|
-
utt = self.model().
|
1049
|
+
utt = self.model().utterance_id_at(row)
|
469
1050
|
if utt is None:
|
470
1051
|
return
|
471
|
-
if utt
|
1052
|
+
if utt == self.current_utterance_id:
|
472
1053
|
return
|
473
|
-
self.current_utterance_id = utt
|
1054
|
+
self.current_utterance_id = utt
|
474
1055
|
self.currentUtteranceChanged.emit()
|
475
|
-
self.
|
476
|
-
utt.file_id, utt.begin, utt.end, channel=utt.channel, force_update=True
|
477
|
-
)
|
478
|
-
|
479
|
-
def update_view_times(self, *args, force_update=False):
|
480
|
-
utts = self.selectedUtterances()
|
481
|
-
if len(utts) == 0:
|
482
|
-
self.resetView.emit()
|
483
|
-
return
|
484
|
-
if len(utts) == 1:
|
485
|
-
force_update = True
|
486
|
-
begin = utts[0].begin
|
487
|
-
f_id = utts[0].file_id
|
488
|
-
end_ind = -1
|
489
|
-
while True:
|
490
|
-
if utts[end_ind].file_id == f_id:
|
491
|
-
end = utts[end_ind].end
|
492
|
-
break
|
493
|
-
self.set_current_file(f_id, begin, end, channel=utts[0].channel, force_update=force_update)
|
494
|
-
self.selected_min_time = self.min_time
|
1056
|
+
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
495
1057
|
|
496
1058
|
def model(self) -> CorpusModel:
|
497
1059
|
return super(CorpusSelectionModel, self).model()
|
498
1060
|
|
499
|
-
def
|
500
|
-
m = self.model()
|
501
|
-
for index in self.selectedRows(1):
|
502
|
-
if utterance.id == m._indices[index.row()]:
|
503
|
-
return True
|
504
|
-
return False
|
505
|
-
|
506
|
-
def set_current_file(self, file_id, begin=None, end=None, channel=None, force_update=False):
|
507
|
-
try:
|
508
|
-
new_file = self.current_file is None or self.current_file.id != file_id
|
509
|
-
except sqlalchemy.orm.exc.DetachedInstanceError:
|
510
|
-
new_file = True
|
511
|
-
if new_file:
|
512
|
-
self.selected_min_time = None
|
513
|
-
self.selected_max_time = None
|
514
|
-
self.fileAboutToChange.emit()
|
515
|
-
self.selected_channel = 0 if channel is None else channel
|
516
|
-
self.current_file = (
|
517
|
-
self.model().session.query(File).options(joinedload(File.sound_file)).get(file_id)
|
518
|
-
)
|
519
|
-
self.min_time = begin
|
520
|
-
self.max_time = end
|
521
|
-
self.fileChanged.emit()
|
522
|
-
elif (
|
523
|
-
self.current_file is not None
|
524
|
-
and begin is not None
|
525
|
-
and end is not None
|
526
|
-
and force_update
|
527
|
-
):
|
528
|
-
self.selected_channel = channel
|
529
|
-
self.set_view_times(begin, end)
|
530
|
-
|
531
|
-
def set_view_times(self, begin, end):
|
532
|
-
begin = max(begin, 0)
|
533
|
-
end = min(end, self.current_file.duration)
|
534
|
-
if (begin, end) == (self.min_time, self.max_time):
|
535
|
-
return
|
536
|
-
self.min_time = begin
|
537
|
-
self.max_time = end
|
538
|
-
self.selected_min_time = self.min_time
|
539
|
-
if self.selected_max_time is not None and self.selected_max_time > self.max_time:
|
540
|
-
self.selected_max_time = None
|
541
|
-
self.viewChanged.emit(self.min_time, self.max_time)
|
542
|
-
|
543
|
-
def focusUtterance(self, index):
|
1061
|
+
def focus_utterance(self, index):
|
544
1062
|
m = self.model()
|
545
|
-
|
546
|
-
|
1063
|
+
row = index.row()
|
1064
|
+
utt_id = m.utterance_id_at(row)
|
1065
|
+
if utt_id is None:
|
547
1066
|
self.min_time = 0
|
548
1067
|
self.max_time = 1
|
549
1068
|
self.fileAboutToChange()
|
550
|
-
self.current_file = None
|
551
1069
|
self.fileChanged.emit()
|
552
1070
|
return
|
553
|
-
self.
|
554
|
-
|
555
|
-
|
556
|
-
padding = 1
|
557
|
-
self.set_view_times(begin - padding, end + padding)
|
558
|
-
self.selectionAudioChanged.emit()
|
1071
|
+
self.current_utterance_id = utt_id
|
1072
|
+
self.currentUtteranceChanged.emit()
|
1073
|
+
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
559
1074
|
|
560
1075
|
|
561
1076
|
class OovModel(TableModel):
|
@@ -1450,6 +1965,7 @@ class CorpusModel(TableModel):
|
|
1450
1965
|
self.speakers = {}
|
1451
1966
|
self.speaker_id_mapping = {}
|
1452
1967
|
self.utterances = None
|
1968
|
+
self.session: sqlalchemy.orm.scoped_session = None
|
1453
1969
|
self.utterance_count = 0
|
1454
1970
|
self.speaker_count = 0
|
1455
1971
|
self.file_count = 0
|
@@ -1494,29 +2010,46 @@ class CorpusModel(TableModel):
|
|
1494
2010
|
return True
|
1495
2011
|
return False
|
1496
2012
|
|
1497
|
-
def update_utterance_table_row(self,
|
1498
|
-
if
|
1499
|
-
|
1500
|
-
|
2013
|
+
def update_utterance_table_row(self, utterance: typing.Union[int, Utterance]):
|
2014
|
+
if isinstance(utterance, int):
|
2015
|
+
utterance_id = utterance
|
2016
|
+
if utterance_id not in self.reversed_indices:
|
2017
|
+
return
|
2018
|
+
utterance = self.session.query(Utterance).get(utterance_id)
|
2019
|
+
else:
|
2020
|
+
utterance_id = utterance.id
|
2021
|
+
if utterance_id not in self.reversed_indices:
|
2022
|
+
return
|
1501
2023
|
index = self.reversed_indices[utterance_id]
|
1502
2024
|
self.layoutAboutToBeChanged.emit()
|
1503
2025
|
self._data[index][self.text_column] = utterance.text
|
1504
2026
|
self._data[index][self.begin_column] = utterance.begin
|
1505
2027
|
self._data[index][self.end_column] = utterance.end
|
1506
|
-
self._data[index][self.duration_column] = utterance.
|
2028
|
+
self._data[index][self.duration_column] = utterance.end - utterance.begin
|
2029
|
+
self.layoutChanged.emit()
|
2030
|
+
|
2031
|
+
def change_speaker_table_utterances(self, utterances: typing.List[Utterance]):
|
2032
|
+
self.layoutAboutToBeChanged.emit()
|
2033
|
+
for u in utterances:
|
2034
|
+
if u.id not in self.reversed_indices:
|
2035
|
+
continue
|
2036
|
+
index = self.reversed_indices[u.id]
|
2037
|
+
self._speaker_indices[index] = u.speaker_id
|
2038
|
+
self._data[index][self.speaker_column] = self.get_speaker_name(u.speaker_id)
|
1507
2039
|
self.layoutChanged.emit()
|
1508
2040
|
|
1509
2041
|
def add_table_utterances(self, utterances: typing.List[Utterance]):
|
1510
2042
|
self.layoutAboutToBeChanged.emit()
|
1511
2043
|
rows = []
|
1512
2044
|
for utterance in utterances:
|
2045
|
+
speaker_name = self.get_speaker_name(utterance.speaker_id)
|
1513
2046
|
row_data = [
|
1514
2047
|
utterance.oovs,
|
1515
2048
|
utterance.file_name,
|
1516
|
-
|
2049
|
+
speaker_name,
|
1517
2050
|
utterance.begin,
|
1518
2051
|
utterance.end,
|
1519
|
-
utterance.
|
2052
|
+
utterance.end - utterance.begin,
|
1520
2053
|
utterance.text,
|
1521
2054
|
]
|
1522
2055
|
self._data.append(row_data)
|
@@ -1531,7 +2064,10 @@ class CorpusModel(TableModel):
|
|
1531
2064
|
def delete_table_utterances(self, utterances: typing.List[Utterance]):
|
1532
2065
|
self.layoutAboutToBeChanged.emit()
|
1533
2066
|
for utterance in utterances:
|
1534
|
-
|
2067
|
+
try:
|
2068
|
+
index = self.reversed_indices.pop(utterance.id)
|
2069
|
+
except KeyError:
|
2070
|
+
continue
|
1535
2071
|
_ = self._data.pop(index)
|
1536
2072
|
_ = self._indices.pop(index)
|
1537
2073
|
_ = self._file_indices.pop(index)
|
@@ -1552,7 +2088,6 @@ class CorpusModel(TableModel):
|
|
1552
2088
|
|
1553
2089
|
self.layoutAboutToBeChanged.emit()
|
1554
2090
|
first = split_utterances[0]
|
1555
|
-
self.session.merge(first)
|
1556
2091
|
file_name = self._data[index][1]
|
1557
2092
|
speaker_name = self._data[index][2]
|
1558
2093
|
row_data = [
|
@@ -1561,7 +2096,7 @@ class CorpusModel(TableModel):
|
|
1561
2096
|
speaker_name,
|
1562
2097
|
first.begin,
|
1563
2098
|
first.end,
|
1564
|
-
first.
|
2099
|
+
first.end - first.begin,
|
1565
2100
|
first.text,
|
1566
2101
|
]
|
1567
2102
|
self._data[index] = row_data
|
@@ -1571,7 +2106,6 @@ class CorpusModel(TableModel):
|
|
1571
2106
|
self.reversed_indices[first.id] = index
|
1572
2107
|
rows = [index]
|
1573
2108
|
for utterance in split_utterances[1:]:
|
1574
|
-
self.session.merge(utterance)
|
1575
2109
|
index += 1
|
1576
2110
|
rows.append(index)
|
1577
2111
|
self.reversed_indices = {
|
@@ -1584,7 +2118,7 @@ class CorpusModel(TableModel):
|
|
1584
2118
|
speaker_name,
|
1585
2119
|
utterance.begin,
|
1586
2120
|
utterance.end,
|
1587
|
-
utterance.
|
2121
|
+
utterance.end - utterance.begin,
|
1588
2122
|
utterance.text,
|
1589
2123
|
]
|
1590
2124
|
self.reversed_indices[utterance.id] = index
|
@@ -1603,14 +2137,13 @@ class CorpusModel(TableModel):
|
|
1603
2137
|
except KeyError:
|
1604
2138
|
return
|
1605
2139
|
self.layoutAboutToBeChanged.emit()
|
1606
|
-
self.session.merge(merged_utterance)
|
1607
2140
|
row_data = [
|
1608
2141
|
merged_utterance.oovs,
|
1609
2142
|
merged_utterance.file_name,
|
1610
2143
|
merged_utterance.speaker_name,
|
1611
2144
|
merged_utterance.begin,
|
1612
2145
|
merged_utterance.end,
|
1613
|
-
merged_utterance.
|
2146
|
+
merged_utterance.end - merged_utterance.begin,
|
1614
2147
|
merged_utterance.text,
|
1615
2148
|
]
|
1616
2149
|
first = split_utterances[0]
|
@@ -1659,32 +2192,6 @@ class CorpusModel(TableModel):
|
|
1659
2192
|
self.language_model = language_model
|
1660
2193
|
self.languageModelChanged.emit()
|
1661
2194
|
|
1662
|
-
def create_utterance(self, file: File, speaker: Optional[Speaker], begin: float, end: float):
|
1663
|
-
if not self.editable:
|
1664
|
-
return
|
1665
|
-
channel = 0
|
1666
|
-
if file.num_channels > 1:
|
1667
|
-
ind = file.speaker_ordering.index(speaker)
|
1668
|
-
if ind >= len(file.speaker_ordering) / 2:
|
1669
|
-
channel = 1
|
1670
|
-
if speaker is None:
|
1671
|
-
speaker = self.corpus.add_speaker("speech", session=self.session)
|
1672
|
-
begin = round(begin, 4)
|
1673
|
-
end = round(end, 4)
|
1674
|
-
text = ""
|
1675
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1676
|
-
new_utt = Utterance(
|
1677
|
-
id=next_pk,
|
1678
|
-
speaker_id=speaker.id,
|
1679
|
-
file_id=file.id,
|
1680
|
-
begin=begin,
|
1681
|
-
end=end,
|
1682
|
-
channel=channel,
|
1683
|
-
text=text,
|
1684
|
-
)
|
1685
|
-
self.addCommand.emit(undo.CreateUtteranceCommand(new_utt, self))
|
1686
|
-
self.unsaved_files.add(file.id)
|
1687
|
-
|
1688
2195
|
def set_file_modified(self, file_id: typing.Union[int, typing.List[int]]):
|
1689
2196
|
if isinstance(file_id, int):
|
1690
2197
|
file_id = [file_id]
|
@@ -1699,32 +2206,6 @@ class CorpusModel(TableModel):
|
|
1699
2206
|
)
|
1700
2207
|
self.session.commit()
|
1701
2208
|
|
1702
|
-
def update_utterance_text(self, utterance: Utterance, text):
|
1703
|
-
if text != utterance.text:
|
1704
|
-
self.addCommand.emit(undo.UpdateUtteranceTextCommand(utterance, text, self))
|
1705
|
-
self.set_file_modified(utterance.file_id)
|
1706
|
-
|
1707
|
-
def update_utterance_times(
|
1708
|
-
self, utterance: Utterance, begin: Optional[float] = None, end: Optional[float] = None
|
1709
|
-
):
|
1710
|
-
if not self.editable:
|
1711
|
-
return
|
1712
|
-
self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
|
1713
|
-
self.set_file_modified(utterance.file_id)
|
1714
|
-
|
1715
|
-
def update_utterance_speaker(self, utterance: Utterance, speaker: Speaker):
|
1716
|
-
if not self.editable:
|
1717
|
-
return
|
1718
|
-
self.addCommand.emit(undo.UpdateUtteranceSpeakerCommand(utterance, speaker, self))
|
1719
|
-
|
1720
|
-
def delete_utterances(self, utterances: list[Utterance]):
|
1721
|
-
if not self.editable:
|
1722
|
-
return
|
1723
|
-
for u in utterances:
|
1724
|
-
self.set_file_modified(u.file_id)
|
1725
|
-
self.set_speaker_modified(u.speaker_id)
|
1726
|
-
self.addCommand.emit(undo.DeleteUtteranceCommand(utterances, self))
|
1727
|
-
|
1728
2209
|
def check_align_lexicon_compiler(self):
|
1729
2210
|
if self.acoustic_model is None:
|
1730
2211
|
return
|
@@ -1743,150 +2224,13 @@ class CorpusModel(TableModel):
|
|
1743
2224
|
dictionary_id, self.acoustic_model, disambiguation=True
|
1744
2225
|
)
|
1745
2226
|
|
1746
|
-
def split_vad_utterance(
|
1747
|
-
self, original_utterance_id, replacement_utterance_data: typing.List[KalpyUtterance]
|
1748
|
-
):
|
1749
|
-
utt = self.session.get(Utterance, original_utterance_id)
|
1750
|
-
replacement_utterances = []
|
1751
|
-
speaker_id = utt.speaker_id
|
1752
|
-
file_id = utt.file_id
|
1753
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1754
|
-
for new_utt in replacement_utterance_data:
|
1755
|
-
replacement_utterances.append(
|
1756
|
-
Utterance(
|
1757
|
-
id=next_pk,
|
1758
|
-
begin=new_utt.segment.begin,
|
1759
|
-
end=new_utt.segment.end,
|
1760
|
-
speaker_id=speaker_id,
|
1761
|
-
file_id=file_id,
|
1762
|
-
text=new_utt.transcript,
|
1763
|
-
normalized_text=new_utt.transcript,
|
1764
|
-
features="",
|
1765
|
-
in_subset=False,
|
1766
|
-
ignored=False,
|
1767
|
-
channel=new_utt.segment.channel,
|
1768
|
-
)
|
1769
|
-
)
|
1770
|
-
next_pk += 1
|
1771
|
-
splitting_utterances = [[utt, *replacement_utterances]]
|
1772
|
-
self.addCommand.emit(
|
1773
|
-
undo.SplitUtteranceCommand(splitting_utterances, self, update_table=False)
|
1774
|
-
)
|
1775
|
-
self.requestFileView.emit(utt.file_name)
|
1776
|
-
self.set_file_modified(file_id)
|
1777
|
-
self.set_speaker_modified(speaker_id)
|
1778
|
-
|
1779
|
-
def split_utterances(self, utterances: list[Utterance]):
|
1780
|
-
if not self.editable:
|
1781
|
-
return
|
1782
|
-
splitting_utterances = []
|
1783
|
-
for utt in utterances:
|
1784
|
-
duration = utt.duration
|
1785
|
-
beg = utt.begin
|
1786
|
-
end = utt.end
|
1787
|
-
first_text = ""
|
1788
|
-
second_text = ""
|
1789
|
-
if " " not in utt.text and " " in utt.normalized_text:
|
1790
|
-
t = utt.normalized_text.split()
|
1791
|
-
mid_ind = int(len(t) / 2)
|
1792
|
-
first_text = t[:mid_ind]
|
1793
|
-
second_text = t[mid_ind:]
|
1794
|
-
elif utt.text:
|
1795
|
-
t = utt.text.split()
|
1796
|
-
mid_ind = int(len(t) / 2)
|
1797
|
-
first_text = t[:mid_ind]
|
1798
|
-
second_text = t[mid_ind:]
|
1799
|
-
split_time = beg + (duration / 2)
|
1800
|
-
oovs = set()
|
1801
|
-
for w in first_text:
|
1802
|
-
if not self.dictionary_model.check_word(w, utt.speaker_id):
|
1803
|
-
oovs.add(w)
|
1804
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1805
|
-
first_utt = Utterance(
|
1806
|
-
id=next_pk,
|
1807
|
-
speaker_id=utt.speaker_id,
|
1808
|
-
file_id=utt.file_id,
|
1809
|
-
begin=beg,
|
1810
|
-
end=split_time,
|
1811
|
-
channel=utt.channel,
|
1812
|
-
text=" ".join(first_text),
|
1813
|
-
normalized_text=" ".join(first_text),
|
1814
|
-
oovs=" ".join(oovs),
|
1815
|
-
)
|
1816
|
-
next_pk += 1
|
1817
|
-
oovs = set()
|
1818
|
-
for w in second_text:
|
1819
|
-
if not self.dictionary_model.check_word(w, utt.speaker_id):
|
1820
|
-
oovs.add(w)
|
1821
|
-
second_utt = Utterance(
|
1822
|
-
id=next_pk,
|
1823
|
-
speaker_id=utt.speaker_id,
|
1824
|
-
file_id=utt.file_id,
|
1825
|
-
begin=split_time,
|
1826
|
-
end=end,
|
1827
|
-
channel=utt.channel,
|
1828
|
-
text=" ".join(second_text),
|
1829
|
-
normalized_text=" ".join(second_text),
|
1830
|
-
oovs=" ".join(oovs),
|
1831
|
-
)
|
1832
|
-
splitting_utterances.append([utt, first_utt, second_utt])
|
1833
|
-
self.addCommand.emit(undo.SplitUtteranceCommand(splitting_utterances, self))
|
1834
|
-
self.set_file_modified([utt[0].file_id for utt in splitting_utterances])
|
1835
|
-
|
1836
2227
|
def merge_speakers(self, speakers: list[int]):
|
1837
2228
|
self.addCommand.emit(undo.MergeSpeakersCommand(speakers, self))
|
1838
2229
|
|
1839
|
-
def merge_utterances(self, utterances: list[Utterance]):
|
1840
|
-
if not self.editable:
|
1841
|
-
return
|
1842
|
-
min_begin = 1000000000
|
1843
|
-
max_end = 0
|
1844
|
-
text = ""
|
1845
|
-
normalized_text = ""
|
1846
|
-
speaker = None
|
1847
|
-
file = None
|
1848
|
-
channel = None
|
1849
|
-
for old_utt in sorted(utterances, key=lambda x: x.begin):
|
1850
|
-
if speaker is None:
|
1851
|
-
speaker = old_utt.speaker
|
1852
|
-
if file is None:
|
1853
|
-
file = old_utt.file
|
1854
|
-
if channel is None:
|
1855
|
-
channel = old_utt.channel
|
1856
|
-
if old_utt.begin < min_begin:
|
1857
|
-
min_begin = old_utt.begin
|
1858
|
-
if old_utt.end > max_end:
|
1859
|
-
max_end = old_utt.end
|
1860
|
-
utt_text = old_utt.text
|
1861
|
-
if utt_text == "speech" and text.strip() == "speech":
|
1862
|
-
continue
|
1863
|
-
text += utt_text + " "
|
1864
|
-
normalized_text += old_utt.normalized_text + " "
|
1865
|
-
text = text[:-1]
|
1866
|
-
normalized_text = normalized_text[:-1]
|
1867
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1868
|
-
oovs = set()
|
1869
|
-
for w in text.split():
|
1870
|
-
if not self.dictionary_model.check_word(w, speaker.id):
|
1871
|
-
oovs.add(w)
|
1872
|
-
new_utt = Utterance(
|
1873
|
-
id=next_pk,
|
1874
|
-
speaker=speaker,
|
1875
|
-
file=file,
|
1876
|
-
begin=min_begin,
|
1877
|
-
end=max_end,
|
1878
|
-
channel=channel,
|
1879
|
-
text=text,
|
1880
|
-
normalized_text=normalized_text,
|
1881
|
-
oovs=" ".join(oovs),
|
1882
|
-
)
|
1883
|
-
self.set_file_modified(file.id)
|
1884
|
-
self.addCommand.emit(undo.MergeUtteranceCommand(utterances, new_utt, self))
|
1885
|
-
|
1886
2230
|
def replace_all(self, search_query: TextFilterQuery, replacement: str):
|
1887
2231
|
self.addCommand.emit(undo.ReplaceAllCommand(search_query, replacement, self))
|
1888
2232
|
|
1889
|
-
def
|
2233
|
+
def utterance_id_at(self, index) -> Optional[Utterance]:
|
1890
2234
|
if not isinstance(index, int):
|
1891
2235
|
if not index.isValid():
|
1892
2236
|
return None
|
@@ -1895,15 +2239,16 @@ class CorpusModel(TableModel):
|
|
1895
2239
|
return None
|
1896
2240
|
if len(self._indices) == 0:
|
1897
2241
|
return None
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
2242
|
+
return self._indices[index]
|
2243
|
+
|
2244
|
+
def audio_info_for_utterance(self, row: int):
|
2245
|
+
return (
|
2246
|
+
self._file_indices[row],
|
2247
|
+
self._data[row][self.begin_column],
|
2248
|
+
self._data[row][self.end_column],
|
2249
|
+
self._indices[row],
|
2250
|
+
self._speaker_indices[row],
|
1905
2251
|
)
|
1906
|
-
return utterance
|
1907
2252
|
|
1908
2253
|
def fileAt(self, index) -> int:
|
1909
2254
|
if not isinstance(index, int):
|