Anchor-annotator 0.0.11__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {Anchor_annotator-0.0.11.dist-info → Anchor_annotator-0.2.0.dist-info}/METADATA +2 -2
- Anchor_annotator-0.2.0.dist-info/RECORD +21 -0
- {Anchor_annotator-0.0.11.dist-info → Anchor_annotator-0.2.0.dist-info}/WHEEL +1 -1
- anchor/_version.py +2 -2
- anchor/main.py +77 -50
- anchor/models.py +827 -455
- anchor/plot.py +471 -412
- anchor/settings.py +9 -1
- anchor/ui_preferences.py +78 -54
- anchor/undo.py +173 -128
- anchor/widgets.py +56 -48
- anchor/workers.py +61 -36
- Anchor_annotator-0.0.11.dist-info/RECORD +0 -21
- {Anchor_annotator-0.0.11.dist-info → Anchor_annotator-0.2.0.dist-info}/LICENSE +0 -0
- {Anchor_annotator-0.0.11.dist-info → Anchor_annotator-0.2.0.dist-info}/top_level.txt +0 -0
anchor/models.py
CHANGED
@@ -31,7 +31,7 @@ from montreal_forced_aligner.utils import mfa_open
|
|
31
31
|
from PySide6 import QtCore
|
32
32
|
from sqlalchemy.orm import joinedload
|
33
33
|
|
34
|
-
from anchor import undo
|
34
|
+
from anchor import undo, workers
|
35
35
|
from anchor.settings import AnchorSettings
|
36
36
|
|
37
37
|
|
@@ -148,23 +148,766 @@ class TableModel(QtCore.QAbstractTableModel):
|
|
148
148
|
return len(self._header_data)
|
149
149
|
|
150
150
|
|
151
|
+
class FileUtterancesModel(QtCore.QAbstractListModel):
|
152
|
+
addCommand = QtCore.Signal(object)
|
153
|
+
selectionRequested = QtCore.Signal(object)
|
154
|
+
|
155
|
+
waveformReady = QtCore.Signal()
|
156
|
+
utterancesReady = QtCore.Signal()
|
157
|
+
|
158
|
+
def __init__(self, *args, **kwargs):
|
159
|
+
super().__init__(*args, **kwargs)
|
160
|
+
self.utterances = []
|
161
|
+
self.file = None
|
162
|
+
self.y = None
|
163
|
+
self.speakers = []
|
164
|
+
self._indices = []
|
165
|
+
self._speaker_indices = []
|
166
|
+
self.reversed_indices = {}
|
167
|
+
self.speaker_channel_mapping = {}
|
168
|
+
self.corpus_model: CorpusModel = None
|
169
|
+
self.waveform_worker = workers.WaveformWorker()
|
170
|
+
self.speaker_tier_worker = workers.SpeakerTierWorker()
|
171
|
+
self.speaker_tier_worker.signals.result.connect(self.finalize_loading_utterances)
|
172
|
+
self.waveform_worker.signals.result.connect(self.finalize_loading_wave_form)
|
173
|
+
|
174
|
+
def get_utterance(self, utterance_id: int) -> Utterance:
|
175
|
+
try:
|
176
|
+
return self.utterances[self.reversed_indices[utterance_id]]
|
177
|
+
except KeyError:
|
178
|
+
return None
|
179
|
+
|
180
|
+
def set_corpus_model(self, corpus_model: CorpusModel):
|
181
|
+
self.corpus_model = corpus_model
|
182
|
+
|
183
|
+
def clean_up_for_close(self):
|
184
|
+
self.waveform_worker.stop()
|
185
|
+
self.speaker_tier_worker.stop()
|
186
|
+
|
187
|
+
def set_file(self, file_id):
|
188
|
+
self.file = (
|
189
|
+
self.corpus_model.session.query(File).options(joinedload(File.sound_file)).get(file_id)
|
190
|
+
)
|
191
|
+
self.y = None
|
192
|
+
self.get_utterances()
|
193
|
+
self.waveform_worker.stop()
|
194
|
+
self.waveform_worker.set_params(self.file.sound_file.sound_file_path)
|
195
|
+
self.waveform_worker.start()
|
196
|
+
|
197
|
+
def finalize_loading_utterances(self, results):
|
198
|
+
utterances, file_id = results
|
199
|
+
if file_id != self.file.id:
|
200
|
+
return
|
201
|
+
self.utterances = utterances
|
202
|
+
for i, u in enumerate(utterances):
|
203
|
+
if u.speaker_id not in self.speakers:
|
204
|
+
self.speakers.append(u.speaker_id)
|
205
|
+
self._speaker_indices.append(u.speaker_id)
|
206
|
+
self.reversed_indices[u.id] = i
|
207
|
+
self._indices.append(u.id)
|
208
|
+
if self.file.num_channels > 1 and u.speaker_id not in self.speaker_channel_mapping:
|
209
|
+
self.speaker_channel_mapping[u.speaker_id] = u.channel
|
210
|
+
self.utterancesReady.emit()
|
211
|
+
|
212
|
+
def finalize_loading_wave_form(self, results):
|
213
|
+
y, file_path = results
|
214
|
+
if self.file is None or file_path != self.file.sound_file.sound_file_path:
|
215
|
+
return
|
216
|
+
self.y = y
|
217
|
+
self.waveformReady.emit()
|
218
|
+
|
219
|
+
def get_utterances(self):
|
220
|
+
parent_index = self.index(0, 0)
|
221
|
+
self.beginRemoveRows(parent_index, 0, len(self.utterances))
|
222
|
+
self.utterances = []
|
223
|
+
self.speakers = []
|
224
|
+
self._indices = []
|
225
|
+
self._speaker_indices = []
|
226
|
+
self.speaker_channel_mapping = {}
|
227
|
+
self.reversed_indices = {}
|
228
|
+
self.endRemoveRows()
|
229
|
+
if self.file is None:
|
230
|
+
return
|
231
|
+
self.speaker_tier_worker.stop()
|
232
|
+
self.speaker_tier_worker.query_alignment = (
|
233
|
+
self.corpus_model.has_alignments
|
234
|
+
or self.corpus_model.has_reference_alignments
|
235
|
+
or self.corpus_model.has_transcribed_alignments
|
236
|
+
)
|
237
|
+
self.speaker_tier_worker.session = self.corpus_model.session
|
238
|
+
self.speaker_tier_worker.set_params(self.file.id)
|
239
|
+
self.speaker_tier_worker.start()
|
240
|
+
|
241
|
+
def create_utterance(self, speaker_id: Optional[int], begin: float, end: float):
|
242
|
+
if not self.corpus_model.editable:
|
243
|
+
return
|
244
|
+
channel = 0
|
245
|
+
if speaker_id is None:
|
246
|
+
speaker_id = self.corpus_model.corpus.add_speaker(
|
247
|
+
"speech", session=self.corpus_model.session
|
248
|
+
).id
|
249
|
+
if self.file.num_channels > 1:
|
250
|
+
if speaker_id not in self.speaker_channel_mapping:
|
251
|
+
self.speaker_channel_mapping[speaker_id] = 0
|
252
|
+
channel = self.speaker_channel_mapping[speaker_id]
|
253
|
+
begin = round(begin, 4)
|
254
|
+
end = round(end, 4)
|
255
|
+
text = ""
|
256
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
257
|
+
new_utt = Utterance(
|
258
|
+
id=next_pk,
|
259
|
+
speaker_id=speaker_id,
|
260
|
+
file_id=self.file.id,
|
261
|
+
file=self.file,
|
262
|
+
begin=begin,
|
263
|
+
end=end,
|
264
|
+
channel=channel,
|
265
|
+
text=text,
|
266
|
+
normalized_text=text,
|
267
|
+
oovs=text,
|
268
|
+
)
|
269
|
+
print(new_utt.id, new_utt.speaker_id, new_utt.file_id, new_utt.begin, new_utt.end)
|
270
|
+
self.addCommand.emit(undo.CreateUtteranceCommand(new_utt, self))
|
271
|
+
self.corpus_model.set_file_modified(self.file.id)
|
272
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
273
|
+
|
274
|
+
def add_table_utterances(self, utterances: typing.List[Utterance]):
|
275
|
+
for utterance in utterances:
|
276
|
+
if len(self.utterances) > 0:
|
277
|
+
for i, u in enumerate(self.utterances):
|
278
|
+
if u.begin < utterance.begin:
|
279
|
+
continue
|
280
|
+
break
|
281
|
+
else:
|
282
|
+
i = len(self.utterances) - 1
|
283
|
+
else:
|
284
|
+
i = 0
|
285
|
+
parent_index = self.index(i, 0)
|
286
|
+
self.beginInsertRows(parent_index, i, i + 1)
|
287
|
+
self.utterances.insert(i, utterance)
|
288
|
+
self._indices.insert(i, utterance.id)
|
289
|
+
self._speaker_indices.insert(i, utterance.speaker_id)
|
290
|
+
self.endInsertRows()
|
291
|
+
self.reversed_indices = {u: j for j, u in enumerate(self._indices)}
|
292
|
+
self.selectionRequested.emit(utterances)
|
293
|
+
|
294
|
+
def delete_table_utterances(self, utterances: typing.List[Utterance]):
|
295
|
+
for utterance in utterances:
|
296
|
+
try:
|
297
|
+
index = self.reversed_indices.pop(utterance.id)
|
298
|
+
except KeyError:
|
299
|
+
continue
|
300
|
+
parent_index = self.index(index, 0)
|
301
|
+
self.beginRemoveRows(parent_index, index, index + 1)
|
302
|
+
_ = self.utterances.pop(index)
|
303
|
+
_ = self._indices.pop(index)
|
304
|
+
_ = self._speaker_indices.pop(index)
|
305
|
+
self.reversed_indices = {u: j for j, u in enumerate(self._indices)}
|
306
|
+
self.endRemoveRows()
|
307
|
+
self.selectionRequested.emit(None)
|
308
|
+
|
309
|
+
def change_speaker_table_utterances(self, utterances: typing.List[Utterance]):
|
310
|
+
for utterance in utterances:
|
311
|
+
try:
|
312
|
+
index = self.reversed_indices[utterance.id]
|
313
|
+
except KeyError:
|
314
|
+
continue
|
315
|
+
if utterance.speaker_id not in self.speakers:
|
316
|
+
self.speakers.append(utterance.speaker_id)
|
317
|
+
self.speaker_channel_mapping[utterance.speaker_id] = utterance.channel
|
318
|
+
self._speaker_indices[index] = utterance.speaker_id
|
319
|
+
|
320
|
+
def merge_table_utterances(
|
321
|
+
self, merged_utterance: Utterance, split_utterances: typing.List[Utterance]
|
322
|
+
):
|
323
|
+
self.delete_table_utterances(split_utterances)
|
324
|
+
self.add_table_utterances([merged_utterance])
|
325
|
+
|
326
|
+
def split_table_utterances(
|
327
|
+
self, merged_utterance: Utterance, split_utterances: typing.List[Utterance]
|
328
|
+
):
|
329
|
+
self.delete_table_utterances([merged_utterance])
|
330
|
+
self.add_table_utterances(split_utterances)
|
331
|
+
|
332
|
+
def update_utterance_text(self, utterance: Utterance, text):
|
333
|
+
if not self.corpus_model.editable:
|
334
|
+
return
|
335
|
+
if text != utterance.text:
|
336
|
+
self.addCommand.emit(undo.UpdateUtteranceTextCommand(utterance, text, self))
|
337
|
+
self.corpus_model.set_file_modified(self.file.id)
|
338
|
+
|
339
|
+
def refresh_utterances(self):
|
340
|
+
for utterance in self.utterances:
|
341
|
+
self.corpus_model.session.refresh(utterance)
|
342
|
+
|
343
|
+
def update_utterance_speaker(self, utterance: Utterance, speaker_id: int):
|
344
|
+
if not self.corpus_model.editable:
|
345
|
+
return
|
346
|
+
old_speaker_id = utterance.speaker_id
|
347
|
+
if old_speaker_id == speaker_id:
|
348
|
+
return
|
349
|
+
self.addCommand.emit(undo.UpdateUtteranceSpeakerCommand(utterance, speaker_id, self))
|
350
|
+
self.corpus_model.set_file_modified(self.file.id)
|
351
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
352
|
+
self.corpus_model.set_speaker_modified(old_speaker_id)
|
353
|
+
|
354
|
+
def update_utterance_times(
|
355
|
+
self, utterance: Utterance, begin: Optional[float] = None, end: Optional[float] = None
|
356
|
+
):
|
357
|
+
if not self.corpus_model.editable:
|
358
|
+
return
|
359
|
+
if utterance.begin == begin and utterance.end == end:
|
360
|
+
return
|
361
|
+
self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
|
362
|
+
self.corpus_model.set_file_modified(self.file.id)
|
363
|
+
|
364
|
+
def split_vad_utterance(
|
365
|
+
self, original_utterance_id, replacement_utterance_data: typing.List[KalpyUtterance]
|
366
|
+
):
|
367
|
+
if not replacement_utterance_data:
|
368
|
+
return
|
369
|
+
utt = self.utterances[self.reversed_indices[original_utterance_id]]
|
370
|
+
replacement_utterances = []
|
371
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
372
|
+
speaker_id = utt.speaker_id
|
373
|
+
for new_utt in replacement_utterance_data:
|
374
|
+
replacement_utterances.append(
|
375
|
+
Utterance(
|
376
|
+
id=next_pk,
|
377
|
+
begin=new_utt.segment.begin,
|
378
|
+
end=new_utt.segment.end,
|
379
|
+
speaker_id=speaker_id,
|
380
|
+
file_id=self.file.id,
|
381
|
+
text=new_utt.transcript,
|
382
|
+
normalized_text=new_utt.transcript,
|
383
|
+
features="",
|
384
|
+
in_subset=False,
|
385
|
+
ignored=False,
|
386
|
+
channel=new_utt.segment.channel,
|
387
|
+
)
|
388
|
+
)
|
389
|
+
next_pk += 1
|
390
|
+
self.addCommand.emit(
|
391
|
+
undo.SplitUtteranceCommand(utt, replacement_utterances, self, update_table=False)
|
392
|
+
)
|
393
|
+
self.corpus_model.set_file_modified(self.file.id)
|
394
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
395
|
+
|
396
|
+
def split_utterances(self, utterance: Utterance):
|
397
|
+
if not self.corpus_model.editable:
|
398
|
+
return
|
399
|
+
beg = utterance.begin
|
400
|
+
end = utterance.end
|
401
|
+
duration = end - beg
|
402
|
+
first_text = []
|
403
|
+
second_text = []
|
404
|
+
speaker_id = utterance.speaker_id
|
405
|
+
if (
|
406
|
+
utterance.text
|
407
|
+
and utterance.normalized_text
|
408
|
+
and " " not in utterance.text
|
409
|
+
and " " in utterance.normalized_text
|
410
|
+
):
|
411
|
+
t = utterance.normalized_text.split()
|
412
|
+
mid_ind = int(len(t) / 2)
|
413
|
+
first_text = t[:mid_ind]
|
414
|
+
second_text = t[mid_ind:]
|
415
|
+
elif utterance.text:
|
416
|
+
t = utterance.text.split()
|
417
|
+
mid_ind = int(len(t) / 2)
|
418
|
+
first_text = t[:mid_ind]
|
419
|
+
second_text = t[mid_ind:]
|
420
|
+
split_time = beg + (duration / 2)
|
421
|
+
oovs = set()
|
422
|
+
for w in first_text:
|
423
|
+
if not self.corpus_model.dictionary_model.check_word(w, speaker_id):
|
424
|
+
oovs.add(w)
|
425
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
426
|
+
first_utt = Utterance(
|
427
|
+
id=next_pk,
|
428
|
+
speaker_id=speaker_id,
|
429
|
+
file_id=self.file.id,
|
430
|
+
begin=beg,
|
431
|
+
end=split_time,
|
432
|
+
channel=utterance.channel,
|
433
|
+
text=" ".join(first_text),
|
434
|
+
normalized_text=" ".join(first_text),
|
435
|
+
oovs=" ".join(oovs),
|
436
|
+
)
|
437
|
+
next_pk += 1
|
438
|
+
oovs = set()
|
439
|
+
for w in second_text:
|
440
|
+
if not self.corpus_model.dictionary_model.check_word(w, utterance.speaker_id):
|
441
|
+
oovs.add(w)
|
442
|
+
second_utt = Utterance(
|
443
|
+
id=next_pk,
|
444
|
+
speaker_id=speaker_id,
|
445
|
+
file_id=self.file.id,
|
446
|
+
begin=split_time,
|
447
|
+
end=end,
|
448
|
+
channel=utterance.channel,
|
449
|
+
text=" ".join(second_text),
|
450
|
+
normalized_text=" ".join(second_text),
|
451
|
+
oovs=" ".join(oovs),
|
452
|
+
)
|
453
|
+
self.addCommand.emit(undo.SplitUtteranceCommand(utterance, [first_utt, second_utt], self))
|
454
|
+
self.corpus_model.set_file_modified(self.file.id)
|
455
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
456
|
+
self.selectionRequested.emit([first_utt, second_utt])
|
457
|
+
|
458
|
+
def merge_utterances(self, utterances: list[Utterance]):
|
459
|
+
if not self.corpus_model.editable:
|
460
|
+
return
|
461
|
+
if not utterances:
|
462
|
+
return
|
463
|
+
min_begin = 1000000000
|
464
|
+
max_end = 0
|
465
|
+
text = ""
|
466
|
+
normalized_text = ""
|
467
|
+
speaker_id = None
|
468
|
+
channel = None
|
469
|
+
for old_utt in sorted(utterances, key=lambda x: x.begin):
|
470
|
+
if speaker_id is None:
|
471
|
+
speaker_id = old_utt.speaker_id
|
472
|
+
if channel is None:
|
473
|
+
channel = old_utt.channel
|
474
|
+
if old_utt.begin < min_begin:
|
475
|
+
min_begin = old_utt.begin
|
476
|
+
if old_utt.end > max_end:
|
477
|
+
max_end = old_utt.end
|
478
|
+
utt_text = old_utt.text
|
479
|
+
if utt_text == "speech" and text.strip() == "speech":
|
480
|
+
continue
|
481
|
+
text += utt_text + " "
|
482
|
+
normalized_text += old_utt.normalized_text + " "
|
483
|
+
text = text[:-1]
|
484
|
+
normalized_text = normalized_text[:-1]
|
485
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
486
|
+
oovs = set()
|
487
|
+
for w in text.split():
|
488
|
+
if not self.corpus_model.dictionary_model.check_word(w, speaker_id):
|
489
|
+
oovs.add(w)
|
490
|
+
new_utt = Utterance(
|
491
|
+
id=next_pk,
|
492
|
+
speaker_id=speaker_id,
|
493
|
+
file_id=self.file.id,
|
494
|
+
begin=min_begin,
|
495
|
+
end=max_end,
|
496
|
+
channel=channel,
|
497
|
+
text=text,
|
498
|
+
normalized_text=normalized_text,
|
499
|
+
oovs=" ".join(oovs),
|
500
|
+
)
|
501
|
+
self.addCommand.emit(undo.MergeUtteranceCommand(utterances, new_utt, self))
|
502
|
+
self.corpus_model.set_file_modified(self.file.id)
|
503
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
504
|
+
self.selectionRequested.emit([new_utt])
|
505
|
+
|
506
|
+
def delete_utterances(self, utterances: typing.List[Utterance]):
|
507
|
+
if not self.corpus_model.editable:
|
508
|
+
return
|
509
|
+
if not utterances:
|
510
|
+
return
|
511
|
+
speaker_ids = set(x.speaker_id for x in utterances)
|
512
|
+
self.addCommand.emit(undo.DeleteUtteranceCommand(utterances, self))
|
513
|
+
self.corpus_model.set_file_modified(self.file.id)
|
514
|
+
for speaker_id in speaker_ids:
|
515
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
516
|
+
|
517
|
+
def rowCount(self, parent=None):
|
518
|
+
return len(self.utterances)
|
519
|
+
|
520
|
+
def data(self, index, role=QtCore.Qt.ItemDataRole.DisplayRole):
|
521
|
+
if role == QtCore.Qt.ItemDataRole.DisplayRole:
|
522
|
+
return self.utterances[index.row()]
|
523
|
+
|
524
|
+
|
525
|
+
class FileSelectionModel(QtCore.QItemSelectionModel):
|
526
|
+
fileAboutToChange = QtCore.Signal()
|
527
|
+
fileChanged = QtCore.Signal()
|
528
|
+
channelChanged = QtCore.Signal()
|
529
|
+
resetView = QtCore.Signal()
|
530
|
+
viewChanged = QtCore.Signal(object, object)
|
531
|
+
selectionAudioChanged = QtCore.Signal()
|
532
|
+
currentTimeChanged = QtCore.Signal(object)
|
533
|
+
currentUtteranceChanged = QtCore.Signal()
|
534
|
+
speakerRequested = QtCore.Signal(object)
|
535
|
+
|
536
|
+
spectrogramReady = QtCore.Signal()
|
537
|
+
waveformReady = QtCore.Signal()
|
538
|
+
pitchTrackReady = QtCore.Signal()
|
539
|
+
|
540
|
+
def __init__(self, *args, **kwargs):
|
541
|
+
super().__init__(*args, **kwargs)
|
542
|
+
self.settings = AnchorSettings()
|
543
|
+
self.min_time = 0
|
544
|
+
self.max_time = 10
|
545
|
+
self.selected_min_time = None
|
546
|
+
self.selected_max_time = None
|
547
|
+
self.x = None
|
548
|
+
self.y = None
|
549
|
+
self.top_point = 2
|
550
|
+
self.bottom_point = 0
|
551
|
+
self.separator_point = 1
|
552
|
+
self.selected_channel = 0
|
553
|
+
self.spectrogram = None
|
554
|
+
self.min_db = None
|
555
|
+
self.max_db = None
|
556
|
+
self.pitch_track_x = None
|
557
|
+
self.pitch_track_y = None
|
558
|
+
self.waveform_x = None
|
559
|
+
self.waveform_y = None
|
560
|
+
self.requested_utterance_id = None
|
561
|
+
self.auto_waveform_worker = workers.AutoWaveformWorker()
|
562
|
+
self.spectrogram_worker = workers.SpectrogramWorker()
|
563
|
+
self.pitch_track_worker = workers.PitchWorker()
|
564
|
+
self.auto_waveform_worker.signals.result.connect(self.finalize_loading_auto_wave_form)
|
565
|
+
self.spectrogram_worker.signals.result.connect(self.finalize_loading_spectrogram)
|
566
|
+
self.pitch_track_worker.signals.result.connect(self.finalize_loading_pitch_track)
|
567
|
+
self.model().waveformReady.connect(self.load_audio_selection)
|
568
|
+
self.model().utterancesReady.connect(self.finalize_set_new_file)
|
569
|
+
self.viewChanged.connect(self.load_audio_selection)
|
570
|
+
self.model().selectionRequested.connect(self.update_selected_utterances)
|
571
|
+
|
572
|
+
def selected_utterances(self):
|
573
|
+
utts = []
|
574
|
+
m = self.model()
|
575
|
+
for index in self.selectedRows(0):
|
576
|
+
utt = m.utterances[index.row()]
|
577
|
+
utts.append(utt)
|
578
|
+
return utts
|
579
|
+
|
580
|
+
def load_audio_selection(self):
|
581
|
+
if self.model().y is None:
|
582
|
+
return
|
583
|
+
begin_samp = int(self.min_time * self.model().file.sample_rate)
|
584
|
+
end_samp = int(self.max_time * self.model().file.sample_rate)
|
585
|
+
if len(self.model().y.shape) > 1:
|
586
|
+
y = self.model().y[begin_samp:end_samp, self.selected_channel]
|
587
|
+
else:
|
588
|
+
y = self.model().y[begin_samp:end_samp]
|
589
|
+
self.spectrogram_worker.stop()
|
590
|
+
self.spectrogram_worker.set_params(
|
591
|
+
y,
|
592
|
+
self.model().file.sound_file.sample_rate,
|
593
|
+
self.min_time,
|
594
|
+
self.max_time,
|
595
|
+
self.selected_channel,
|
596
|
+
)
|
597
|
+
self.spectrogram_worker.start()
|
598
|
+
if self.max_time - self.min_time <= 10:
|
599
|
+
self.pitch_track_worker.stop()
|
600
|
+
self.pitch_track_worker.set_params(
|
601
|
+
y,
|
602
|
+
self.model().file.sound_file.sample_rate,
|
603
|
+
self.min_time,
|
604
|
+
self.max_time,
|
605
|
+
self.selected_channel,
|
606
|
+
self.bottom_point,
|
607
|
+
self.separator_point,
|
608
|
+
)
|
609
|
+
self.pitch_track_worker.start()
|
610
|
+
self.auto_waveform_worker.stop()
|
611
|
+
self.auto_waveform_worker.set_params(
|
612
|
+
y,
|
613
|
+
self.separator_point,
|
614
|
+
self.top_point,
|
615
|
+
self.min_time,
|
616
|
+
self.max_time,
|
617
|
+
self.selected_channel,
|
618
|
+
)
|
619
|
+
self.auto_waveform_worker.start()
|
620
|
+
|
621
|
+
def clean_up_for_close(self):
|
622
|
+
self.spectrogram_worker.stop()
|
623
|
+
self.pitch_track_worker.stop()
|
624
|
+
self.auto_waveform_worker.stop()
|
625
|
+
|
626
|
+
@property
|
627
|
+
def plot_min(self):
|
628
|
+
if self.settings.right_to_left:
|
629
|
+
return -self.max_time
|
630
|
+
return self.min_time
|
631
|
+
|
632
|
+
@property
|
633
|
+
def plot_max(self):
|
634
|
+
if self.settings.right_to_left:
|
635
|
+
return -self.min_time
|
636
|
+
return self.max_time
|
637
|
+
|
638
|
+
def finalize_loading_spectrogram(self, results):
|
639
|
+
stft, channel, begin, end, min_db, max_db = results
|
640
|
+
if self.settings.right_to_left:
|
641
|
+
stft = np.flip(stft, 1)
|
642
|
+
begin, end = -end, -begin
|
643
|
+
if begin != self.plot_min or end != self.plot_max:
|
644
|
+
return
|
645
|
+
self.spectrogram = stft
|
646
|
+
self.min_db = self.min_db
|
647
|
+
self.max_db = self.max_db
|
648
|
+
self.spectrogramReady.emit()
|
649
|
+
|
650
|
+
def finalize_loading_pitch_track(self, results):
|
651
|
+
pitch_track, voicing_track, channel, begin, end, min_f0, max_f0 = results
|
652
|
+
if self.settings.right_to_left:
|
653
|
+
pitch_track = np.flip(pitch_track, 0)
|
654
|
+
begin, end = -end, -begin
|
655
|
+
if begin != self.plot_min or end != self.plot_max:
|
656
|
+
return
|
657
|
+
self.pitch_track_y = pitch_track
|
658
|
+
if pitch_track is None:
|
659
|
+
return
|
660
|
+
x = np.linspace(
|
661
|
+
start=self.plot_min,
|
662
|
+
stop=self.plot_max,
|
663
|
+
num=pitch_track.shape[0],
|
664
|
+
)
|
665
|
+
self.pitch_track_x = x
|
666
|
+
self.pitchTrackReady.emit()
|
667
|
+
|
668
|
+
def finalize_loading_auto_wave_form(self, results):
|
669
|
+
y, begin, end, channel = results
|
670
|
+
if self.settings.right_to_left:
|
671
|
+
y = np.flip(y, 0)
|
672
|
+
begin, end = -end, -begin
|
673
|
+
if begin != self.plot_min or end != self.plot_max:
|
674
|
+
return
|
675
|
+
x = np.linspace(start=self.plot_min, stop=self.plot_max, num=y.shape[0])
|
676
|
+
self.waveform_x = x
|
677
|
+
self.waveform_y = y
|
678
|
+
self.waveformReady.emit()
|
679
|
+
|
680
|
+
def select_audio(self, begin, end):
|
681
|
+
if end is not None and end - begin < 0.025:
|
682
|
+
end = None
|
683
|
+
self.selected_min_time = begin
|
684
|
+
self.selected_max_time = end
|
685
|
+
self.selectionAudioChanged.emit()
|
686
|
+
|
687
|
+
def request_start_time(self, start_time):
|
688
|
+
if start_time >= self.max_time:
|
689
|
+
return
|
690
|
+
if start_time < self.min_time:
|
691
|
+
return
|
692
|
+
self.selected_min_time = start_time
|
693
|
+
self.selected_max_time = None
|
694
|
+
self.selectionAudioChanged.emit()
|
695
|
+
|
696
|
+
def set_current_channel(self, channel):
|
697
|
+
if channel == self.selected_channel:
|
698
|
+
return
|
699
|
+
self.selected_channel = channel
|
700
|
+
self.load_audio_selection()
|
701
|
+
|
702
|
+
def get_selected_wave_form(self):
|
703
|
+
if self.y is None:
|
704
|
+
return None, None
|
705
|
+
if len(self.y.shape) > 1 and self.y.shape[0] == 2:
|
706
|
+
return self.x, self.y[self.selected_channel, :]
|
707
|
+
return self.x, self.y
|
708
|
+
|
709
|
+
def zoom(self, factor, mid_point=None):
|
710
|
+
if factor == 0 or self.min_time is None:
|
711
|
+
return
|
712
|
+
cur_duration = self.max_time - self.min_time
|
713
|
+
if mid_point is None:
|
714
|
+
mid_point = self.min_time + (cur_duration / 2)
|
715
|
+
new_duration = cur_duration / factor
|
716
|
+
new_begin = mid_point - (mid_point - self.min_time) / factor
|
717
|
+
new_begin = max(new_begin, 0)
|
718
|
+
new_end = min(new_begin + new_duration, self.model().file.duration)
|
719
|
+
if new_end - new_begin <= 0.025:
|
720
|
+
return
|
721
|
+
self.set_view_times(new_begin, new_end)
|
722
|
+
|
723
|
+
def pan(self, factor):
|
724
|
+
if self.min_time is None:
|
725
|
+
return
|
726
|
+
if factor < 1:
|
727
|
+
factor = 1 - factor
|
728
|
+
right = True
|
729
|
+
else:
|
730
|
+
right = False
|
731
|
+
factor = factor - 1
|
732
|
+
if right and self.max_time == self.model().file.duration:
|
733
|
+
return
|
734
|
+
if not right and self.min_time == 0:
|
735
|
+
return
|
736
|
+
cur_duration = self.max_time - self.min_time
|
737
|
+
shift = factor * cur_duration
|
738
|
+
if right:
|
739
|
+
new_begin = self.min_time + shift
|
740
|
+
new_end = self.max_time + shift
|
741
|
+
else:
|
742
|
+
new_begin = self.min_time - shift
|
743
|
+
new_end = self.max_time - shift
|
744
|
+
if new_begin < 0:
|
745
|
+
new_end = new_end + abs(new_begin)
|
746
|
+
new_begin = 0
|
747
|
+
if new_end > self.model().file.duration:
|
748
|
+
new_begin -= self.model().file.duration - new_end
|
749
|
+
new_end = self.model().file.duration
|
750
|
+
self.set_view_times(new_begin, new_end)
|
751
|
+
|
752
|
+
def zoom_in(self):
|
753
|
+
if self.model().file is None:
|
754
|
+
return
|
755
|
+
self.zoom(1.5)
|
756
|
+
|
757
|
+
def zoom_out(self):
|
758
|
+
if self.model().file is None:
|
759
|
+
return
|
760
|
+
self.zoom(0.5)
|
761
|
+
|
762
|
+
def zoom_to_selection(self):
|
763
|
+
if self.selected_min_time is not None and self.selected_max_time is not None:
|
764
|
+
self.set_view_times(self.selected_min_time, self.selected_max_time)
|
765
|
+
|
766
|
+
def update_from_slider(self, value):
|
767
|
+
if not self.max_time:
|
768
|
+
return
|
769
|
+
cur_window = self.max_time - self.min_time
|
770
|
+
self.set_view_times(value, value + cur_window)
|
771
|
+
|
772
|
+
def update_selection_audio(self, begin, end):
|
773
|
+
if begin < self.min_time:
|
774
|
+
begin = self.min_time
|
775
|
+
if end > self.max_time:
|
776
|
+
end = self.max_time
|
777
|
+
self.selected_min_time = begin
|
778
|
+
self.selected_max_time = end
|
779
|
+
self.selectionAudioChanged.emit()
|
780
|
+
|
781
|
+
def visible_utterances(self) -> typing.List[Utterance]:
|
782
|
+
file_utts = []
|
783
|
+
if not self.model().file:
|
784
|
+
return file_utts
|
785
|
+
if self.model().rowCount() > 1:
|
786
|
+
for u in self.model().utterances:
|
787
|
+
if u.begin >= self.max_time:
|
788
|
+
break
|
789
|
+
if u.end <= self.min_time:
|
790
|
+
continue
|
791
|
+
file_utts.append(u)
|
792
|
+
else:
|
793
|
+
file_utts.extend(self.model().utterances)
|
794
|
+
return file_utts
|
795
|
+
|
796
|
+
def model(self) -> FileUtterancesModel:
|
797
|
+
return super().model()
|
798
|
+
|
799
|
+
def set_view_times(self, begin, end):
|
800
|
+
begin = max(begin, 0)
|
801
|
+
end = min(end, self.model().file.duration)
|
802
|
+
if (begin, end) == (self.min_time, self.max_time):
|
803
|
+
return
|
804
|
+
self.min_time = begin
|
805
|
+
self.max_time = end
|
806
|
+
if (
|
807
|
+
self.selected_max_time is not None
|
808
|
+
and not self.min_time <= self.selected_min_time <= self.max_time
|
809
|
+
):
|
810
|
+
self.selected_min_time = self.min_time
|
811
|
+
if (
|
812
|
+
self.selected_max_time is not None
|
813
|
+
and not self.min_time <= self.selected_max_time <= self.max_time
|
814
|
+
):
|
815
|
+
self.selected_max_time = None
|
816
|
+
self.viewChanged.emit(self.min_time, self.max_time)
|
817
|
+
|
818
|
+
def set_current_file(self, info, force_update=False):
|
819
|
+
file_id, begin, end, utterance_id, speaker_id = info
|
820
|
+
try:
|
821
|
+
new_file = self.model().file is None or self.model().file.id != file_id
|
822
|
+
except sqlalchemy.orm.exc.DetachedInstanceError:
|
823
|
+
new_file = True
|
824
|
+
self.requested_utterance_id = utterance_id
|
825
|
+
if new_file:
|
826
|
+
self.fileAboutToChange.emit()
|
827
|
+
self.model().set_file(file_id)
|
828
|
+
self.speakerRequested.emit(speaker_id)
|
829
|
+
else:
|
830
|
+
self.finalize_set_new_file()
|
831
|
+
self.speakerRequested.emit(speaker_id)
|
832
|
+
self.set_view_times(begin, end)
|
833
|
+
|
834
|
+
def finalize_set_new_file(self):
|
835
|
+
if self.requested_utterance_id is None:
|
836
|
+
return
|
837
|
+
utterance = self.model().get_utterance(self.requested_utterance_id)
|
838
|
+
if utterance is None:
|
839
|
+
return
|
840
|
+
self.update_select(self.requested_utterance_id, reset=True)
|
841
|
+
self.selected_channel = 0
|
842
|
+
if utterance is not None and utterance.channel is not None:
|
843
|
+
self.selected_channel = utterance.channel
|
844
|
+
self.fileChanged.emit()
|
845
|
+
|
846
|
+
def checkSelected(self, utterance_id: int):
|
847
|
+
m = self.model()
|
848
|
+
for index in self.selectedRows(0):
|
849
|
+
if utterance_id == m._indices[index.row()]:
|
850
|
+
return True
|
851
|
+
return False
|
852
|
+
|
853
|
+
def update_selected_utterances(self, utterances):
|
854
|
+
super().clearSelection()
|
855
|
+
super().clearCurrentIndex()
|
856
|
+
if not utterances:
|
857
|
+
return
|
858
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
859
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
860
|
+
for u in utterances:
|
861
|
+
if u.id not in self.model().reversed_indices:
|
862
|
+
continue
|
863
|
+
row = self.model().reversed_indices[u.id]
|
864
|
+
|
865
|
+
index = self.model().index(row, 0)
|
866
|
+
if not index.isValid():
|
867
|
+
return
|
868
|
+
self.select(index, flags)
|
869
|
+
self.currentUtteranceChanged.emit()
|
870
|
+
|
871
|
+
def update_select(self, utterance_id: int, deselect=False, reset=False):
|
872
|
+
if reset and [x.id for x in self.selected_utterances()] == [utterance_id]:
|
873
|
+
return
|
874
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
875
|
+
if reset:
|
876
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.ClearAndSelect
|
877
|
+
elif deselect:
|
878
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Deselect
|
879
|
+
else:
|
880
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
881
|
+
if utterance_id not in self.model().reversed_indices:
|
882
|
+
return
|
883
|
+
row = self.model().reversed_indices[utterance_id]
|
884
|
+
|
885
|
+
index = self.model().index(row, 0)
|
886
|
+
if not index.isValid():
|
887
|
+
return
|
888
|
+
self.select(index, flags)
|
889
|
+
if not deselect:
|
890
|
+
self.select_audio(self.model().utterances[row].begin, self.model().utterances[row].end)
|
891
|
+
self.currentUtteranceChanged.emit()
|
892
|
+
|
893
|
+
|
151
894
|
class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
152
895
|
fileChanged = QtCore.Signal()
|
153
896
|
channelChanged = QtCore.Signal()
|
154
897
|
resetView = QtCore.Signal()
|
155
898
|
fileAboutToChange = QtCore.Signal()
|
156
|
-
|
899
|
+
fileViewRequested = QtCore.Signal(object)
|
157
900
|
selectionAudioChanged = QtCore.Signal()
|
158
901
|
currentTimeChanged = QtCore.Signal(object)
|
159
902
|
currentUtteranceChanged = QtCore.Signal()
|
160
903
|
|
161
904
|
def __init__(self, *args, **kwargs):
|
162
|
-
super(
|
905
|
+
super().__init__(*args, **kwargs)
|
906
|
+
self.settings = AnchorSettings()
|
163
907
|
self.min_time = 0
|
164
908
|
self.max_time = 10
|
165
909
|
self.selected_min_time = None
|
166
910
|
self.selected_max_time = None
|
167
|
-
self.current_file: Optional[File] = None
|
168
911
|
self.x = None
|
169
912
|
self.y = None
|
170
913
|
self.current_utterance_id = None
|
@@ -175,10 +918,8 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
175
918
|
# self.selectionChanged.connect(self.update_selection_audio)
|
176
919
|
# self.selectionChanged.connect(self.update_selection_audio)
|
177
920
|
# self.model().changeCommandFired.connect(self.expire_current)
|
178
|
-
self.selectionChanged.connect(self._update_selection)
|
179
921
|
self.model().layoutChanged.connect(self.check_selection)
|
180
922
|
self.model().unlockCorpus.connect(self.fileChanged.emit)
|
181
|
-
self.model().selectionRequested.connect(self.update_select_rows)
|
182
923
|
|
183
924
|
def set_current_utterance(self, utterance_id):
|
184
925
|
self.current_utterance_id = utterance_id
|
@@ -190,13 +931,8 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
190
931
|
elif self.model().rowCount() == 0:
|
191
932
|
self.clearSelection()
|
192
933
|
|
193
|
-
def set_current_channel(self, channel):
|
194
|
-
self.selected_channel = channel
|
195
|
-
self.channelChanged.emit()
|
196
|
-
|
197
934
|
def clearSelection(self) -> None:
|
198
935
|
self.fileAboutToChange.emit()
|
199
|
-
self.current_file = None
|
200
936
|
self.current_utterance_id = None
|
201
937
|
self.min_time = None
|
202
938
|
self.max_time = None
|
@@ -206,22 +942,6 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
206
942
|
super(CorpusSelectionModel, self).clearSelection()
|
207
943
|
self.fileChanged.emit()
|
208
944
|
|
209
|
-
def update_selected_wavform(self, *args):
|
210
|
-
if self.min_time is None or self.current_file is None:
|
211
|
-
self.x = None
|
212
|
-
self.y = None
|
213
|
-
else:
|
214
|
-
self.x, self.y = self.current_file.sound_file.normalized_waveform(
|
215
|
-
self.min_time, self.max_time
|
216
|
-
)
|
217
|
-
|
218
|
-
def get_selected_wave_form(self):
|
219
|
-
if self.y is None:
|
220
|
-
return None, None
|
221
|
-
if len(self.y.shape) > 1 and self.y.shape[0] == 2:
|
222
|
-
return self.x, self.y[self.selected_channel, :]
|
223
|
-
return self.x, self.y
|
224
|
-
|
225
945
|
def update_select_rows(self, rows: list[int]):
|
226
946
|
super(CorpusSelectionModel, self).clearCurrentIndex()
|
227
947
|
super(CorpusSelectionModel, self).clearSelection()
|
@@ -237,8 +957,29 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
237
957
|
| QtCore.QItemSelectionModel.SelectionFlag.Rows,
|
238
958
|
)
|
239
959
|
|
960
|
+
def update_selected_utterances(self, utterances):
|
961
|
+
if not utterances:
|
962
|
+
return
|
963
|
+
first = True
|
964
|
+
for u in utterances:
|
965
|
+
if u.id not in self.model().reversed_indices:
|
966
|
+
continue
|
967
|
+
row = self.model().reversed_indices[u.id]
|
968
|
+
|
969
|
+
index = self.model().index(row, 0)
|
970
|
+
if not index.isValid():
|
971
|
+
return
|
972
|
+
if not first:
|
973
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
974
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
975
|
+
else:
|
976
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
977
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.ClearAndSelect
|
978
|
+
first = False
|
979
|
+
self.select(index, flags)
|
980
|
+
|
240
981
|
def update_select(self, utterance_id: int, deselect=False, reset=False, focus=False):
|
241
|
-
if reset and
|
982
|
+
if reset and self.selected_utterances() == [utterance_id]:
|
242
983
|
return
|
243
984
|
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
244
985
|
if reset:
|
@@ -253,58 +994,13 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
253
994
|
if focus:
|
254
995
|
flags |= QtCore.QItemSelectionModel.SelectionFlag.Current
|
255
996
|
if row == self.currentIndex().row():
|
256
|
-
self.
|
997
|
+
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
257
998
|
|
258
999
|
index = self.model().index(row, 0)
|
259
1000
|
if not index.isValid():
|
260
1001
|
return
|
261
1002
|
self.select(index, flags)
|
262
1003
|
|
263
|
-
def select_audio(self, begin, end):
|
264
|
-
if end is not None and end - begin < 0.025:
|
265
|
-
end = None
|
266
|
-
self.selected_min_time = begin
|
267
|
-
self.selected_max_time = end
|
268
|
-
self.selectionAudioChanged.emit()
|
269
|
-
|
270
|
-
def request_start_time(self, start_time):
|
271
|
-
if start_time >= self.max_time:
|
272
|
-
return
|
273
|
-
if start_time < self.min_time:
|
274
|
-
return
|
275
|
-
self.selected_min_time = start_time
|
276
|
-
self.selected_max_time = None
|
277
|
-
self.selectionAudioChanged.emit()
|
278
|
-
|
279
|
-
def visible_utts(self) -> typing.List[Utterance]:
|
280
|
-
file_utts = []
|
281
|
-
if not self.current_file:
|
282
|
-
return file_utts
|
283
|
-
if self.current_file.num_utterances > 1:
|
284
|
-
for u in sorted(self.current_file.utterances, key=lambda x: x.begin):
|
285
|
-
if u.begin >= self.max_time:
|
286
|
-
break
|
287
|
-
if u.end <= self.min_time:
|
288
|
-
continue
|
289
|
-
file_utts.append(u)
|
290
|
-
else:
|
291
|
-
file_utts.extend(self.current_file.utterances)
|
292
|
-
return file_utts
|
293
|
-
|
294
|
-
def currentUtterance(self) -> Optional[Utterance]:
|
295
|
-
if self.current_utterance_id is not None:
|
296
|
-
return
|
297
|
-
m = self.model()
|
298
|
-
utterance = (
|
299
|
-
m.session.query(Utterance)
|
300
|
-
.options(
|
301
|
-
joinedload(Utterance.file).joinedload(File.sound_file),
|
302
|
-
joinedload(Utterance.file).subqueryload(File.speakers),
|
303
|
-
)
|
304
|
-
.get(self.current_utterance_id)
|
305
|
-
)
|
306
|
-
return utterance
|
307
|
-
|
308
1004
|
def _update_selection(self):
|
309
1005
|
index = self.currentIndex()
|
310
1006
|
if not index.isValid():
|
@@ -313,20 +1009,20 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
313
1009
|
self.current_utterance_id = m._indices[index.row()]
|
314
1010
|
self.currentUtteranceChanged.emit()
|
315
1011
|
|
316
|
-
def
|
317
|
-
|
1012
|
+
def selected_utterances(self):
|
1013
|
+
current_utterance = self.current_utterance_id
|
1014
|
+
if current_utterance is None:
|
1015
|
+
return []
|
1016
|
+
utts = [current_utterance]
|
318
1017
|
m = self.model()
|
319
|
-
current_utterance = m.utteranceAt(self.currentIndex())
|
320
1018
|
for index in self.selectedRows(1):
|
321
|
-
if current_utterance is not None and m._indices[index.row()] == current_utterance
|
1019
|
+
if current_utterance is not None and m._indices[index.row()] == current_utterance:
|
322
1020
|
continue
|
323
|
-
utt = m.
|
1021
|
+
utt = m.utterance_id_at(index)
|
324
1022
|
if utt is None:
|
325
1023
|
continue
|
326
1024
|
if current_utterance is None:
|
327
1025
|
current_utterance = utt
|
328
|
-
if utt.file_id != current_utterance.file_id:
|
329
|
-
continue
|
330
1026
|
utts.append(utt)
|
331
1027
|
return utts
|
332
1028
|
|
@@ -341,140 +1037,23 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
341
1037
|
text = m.data(m.index(index.row(), m.text_column), QtCore.Qt.ItemDataRole.DisplayRole)
|
342
1038
|
return text
|
343
1039
|
|
344
|
-
def zoom(self, factor, mid_point=None):
|
345
|
-
if factor == 0:
|
346
|
-
return
|
347
|
-
cur_duration = self.max_time - self.min_time
|
348
|
-
if mid_point is None:
|
349
|
-
mid_point = self.min_time + (cur_duration / 2)
|
350
|
-
new_duration = cur_duration / factor
|
351
|
-
new_begin = mid_point - (mid_point - self.min_time) / factor
|
352
|
-
new_begin = max(new_begin, 0)
|
353
|
-
new_end = min(new_begin + new_duration, self.current_file.duration)
|
354
|
-
if new_end - new_begin <= 0.025:
|
355
|
-
return
|
356
|
-
self.set_view_times(new_begin, new_end)
|
357
|
-
|
358
|
-
def pan(self, factor):
|
359
|
-
if factor < 1:
|
360
|
-
factor = 1 - factor
|
361
|
-
right = True
|
362
|
-
else:
|
363
|
-
right = False
|
364
|
-
factor = factor - 1
|
365
|
-
if right and self.max_time == self.current_file.duration:
|
366
|
-
return
|
367
|
-
if not right and self.min_time == 0:
|
368
|
-
return
|
369
|
-
cur_duration = self.max_time - self.min_time
|
370
|
-
shift = factor * cur_duration
|
371
|
-
if right:
|
372
|
-
new_begin = self.min_time + shift
|
373
|
-
new_end = self.max_time + shift
|
374
|
-
else:
|
375
|
-
new_begin = self.min_time - shift
|
376
|
-
new_end = self.max_time - shift
|
377
|
-
if new_begin < 0:
|
378
|
-
new_end = new_end + abs(new_begin)
|
379
|
-
new_begin = 0
|
380
|
-
if new_end > self.current_file.duration:
|
381
|
-
new_begin -= self.current_file.duration - new_end
|
382
|
-
new_end = self.current_file.duration
|
383
|
-
self.set_view_times(new_begin, new_end)
|
384
|
-
|
385
|
-
def zoom_in(self):
|
386
|
-
if self.current_file is None:
|
387
|
-
return
|
388
|
-
self.zoom(1.5)
|
389
|
-
|
390
|
-
def zoom_out(self):
|
391
|
-
if self.current_file is None:
|
392
|
-
return
|
393
|
-
self.zoom(0.5)
|
394
|
-
|
395
|
-
def zoom_to_selection(self):
|
396
|
-
if self.selected_min_time is None or self.selected_max_time is None:
|
397
|
-
rows = self.selectedRows(1)
|
398
|
-
if not rows:
|
399
|
-
return
|
400
|
-
begin = None
|
401
|
-
end = None
|
402
|
-
for r in rows:
|
403
|
-
u = self.model().utteranceAt(r)
|
404
|
-
if u is None:
|
405
|
-
continue
|
406
|
-
if u.file_id != self.current_file.id:
|
407
|
-
continue
|
408
|
-
if begin is None or begin > u.begin:
|
409
|
-
begin = u.begin
|
410
|
-
if end is None or end < u.end:
|
411
|
-
end = u.end
|
412
|
-
self.set_view_times(begin, end)
|
413
|
-
else:
|
414
|
-
self.set_view_times(self.selected_min_time, self.selected_max_time)
|
415
|
-
|
416
|
-
def update_from_slider(self, value):
|
417
|
-
if not self.max_time:
|
418
|
-
return
|
419
|
-
cur_window = self.max_time - self.min_time
|
420
|
-
self.set_view_times(value, value + cur_window)
|
421
|
-
|
422
|
-
def update_selection_audio(self):
|
423
|
-
begins = self.selectedRows(self.model().begin_column)
|
424
|
-
ends = self.selectedRows(self.model().end_column)
|
425
|
-
begin = None
|
426
|
-
end = None
|
427
|
-
if len(begins) > 0:
|
428
|
-
for i, b in enumerate(begins):
|
429
|
-
b = self.model().data(b, QtCore.Qt.ItemDataRole.DisplayRole)
|
430
|
-
e = self.model().data(ends[i], QtCore.Qt.ItemDataRole.DisplayRole)
|
431
|
-
if begin is None or begin > b:
|
432
|
-
begin = b
|
433
|
-
if end is None or end < e:
|
434
|
-
end = e
|
435
|
-
if self.current_file is None or begin > self.current_file.duration:
|
436
|
-
begin = None
|
437
|
-
end = None
|
438
|
-
elif end > self.current_file.duration:
|
439
|
-
end = self.current_file.duration
|
440
|
-
self.selected_min_time = begin
|
441
|
-
self.selected_max_time = end
|
442
|
-
self.selectionAudioChanged.emit()
|
443
|
-
|
444
1040
|
def switch_utterance(self, new_index, old_index):
|
1041
|
+
if not self.model().fully_loaded:
|
1042
|
+
return
|
445
1043
|
if not isinstance(new_index, QtCore.QModelIndex):
|
446
1044
|
row = 0
|
447
1045
|
else:
|
448
1046
|
if not new_index.isValid():
|
449
1047
|
return
|
450
1048
|
row = new_index.row()
|
451
|
-
utt = self.model().
|
1049
|
+
utt = self.model().utterance_id_at(row)
|
452
1050
|
if utt is None:
|
453
1051
|
return
|
454
|
-
if utt
|
1052
|
+
if utt == self.current_utterance_id:
|
455
1053
|
return
|
456
|
-
self.current_utterance_id = utt
|
1054
|
+
self.current_utterance_id = utt
|
457
1055
|
self.currentUtteranceChanged.emit()
|
458
|
-
self.
|
459
|
-
utt.file_id, utt.begin, utt.end, channel=utt.channel, force_update=True
|
460
|
-
)
|
461
|
-
|
462
|
-
def update_view_times(self, *args, force_update=False):
|
463
|
-
utts = self.selectedUtterances()
|
464
|
-
if len(utts) == 0:
|
465
|
-
self.resetView.emit()
|
466
|
-
return
|
467
|
-
if len(utts) == 1:
|
468
|
-
force_update = True
|
469
|
-
begin = utts[0].begin
|
470
|
-
f_id = utts[0].file_id
|
471
|
-
end_ind = -1
|
472
|
-
while True:
|
473
|
-
if utts[end_ind].file_id == f_id:
|
474
|
-
end = utts[end_ind].end
|
475
|
-
break
|
476
|
-
self.set_current_file(f_id, begin, end, channel=utts[0].channel, force_update=force_update)
|
477
|
-
self.selected_min_time = self.min_time
|
1056
|
+
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
478
1057
|
|
479
1058
|
def model(self) -> CorpusModel:
|
480
1059
|
return super(CorpusSelectionModel, self).model()
|
@@ -486,43 +1065,6 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
486
1065
|
return True
|
487
1066
|
return False
|
488
1067
|
|
489
|
-
def set_current_file(self, file_id, begin=None, end=None, channel=None, force_update=False):
|
490
|
-
try:
|
491
|
-
new_file = self.current_file is None or self.current_file.id != file_id
|
492
|
-
except sqlalchemy.orm.exc.DetachedInstanceError:
|
493
|
-
new_file = True
|
494
|
-
if new_file:
|
495
|
-
self.selected_min_time = None
|
496
|
-
self.selected_max_time = None
|
497
|
-
self.fileAboutToChange.emit()
|
498
|
-
self.selected_channel = 0 if channel is None else channel
|
499
|
-
self.current_file = (
|
500
|
-
self.model().session.query(File).options(joinedload(File.sound_file)).get(file_id)
|
501
|
-
)
|
502
|
-
self.min_time = begin
|
503
|
-
self.max_time = end
|
504
|
-
self.fileChanged.emit()
|
505
|
-
elif (
|
506
|
-
self.current_file is not None
|
507
|
-
and begin is not None
|
508
|
-
and end is not None
|
509
|
-
and force_update
|
510
|
-
):
|
511
|
-
self.selected_channel = channel
|
512
|
-
self.set_view_times(begin, end)
|
513
|
-
|
514
|
-
def set_view_times(self, begin, end):
|
515
|
-
begin = max(begin, 0)
|
516
|
-
end = min(end, self.current_file.duration)
|
517
|
-
if (begin, end) == (self.min_time, self.max_time):
|
518
|
-
return
|
519
|
-
self.min_time = begin
|
520
|
-
self.max_time = end
|
521
|
-
self.selected_min_time = self.min_time
|
522
|
-
if self.selected_max_time is not None and self.selected_max_time > self.max_time:
|
523
|
-
self.selected_max_time = None
|
524
|
-
self.viewChanged.emit(self.min_time, self.max_time)
|
525
|
-
|
526
1068
|
def focusUtterance(self, index):
|
527
1069
|
m = self.model()
|
528
1070
|
u = m.utteranceAt(index)
|
@@ -530,10 +1072,8 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
530
1072
|
self.min_time = 0
|
531
1073
|
self.max_time = 1
|
532
1074
|
self.fileAboutToChange()
|
533
|
-
self.current_file = None
|
534
1075
|
self.fileChanged.emit()
|
535
1076
|
return
|
536
|
-
self.current_file = u.file
|
537
1077
|
begin = u.begin
|
538
1078
|
end = u.end
|
539
1079
|
padding = 1
|
@@ -1043,6 +1583,8 @@ class SpeakerModel(TableModel):
|
|
1043
1583
|
self.mds_speaker_utterances()
|
1044
1584
|
|
1045
1585
|
def finish_load_ivectors(self, result, *args, **kwargs):
|
1586
|
+
if result is None:
|
1587
|
+
return
|
1046
1588
|
speaker_ids, utterance_ids, utt2spk, ivectors = result
|
1047
1589
|
if speaker_ids != self.current_speakers:
|
1048
1590
|
return
|
@@ -1431,6 +1973,7 @@ class CorpusModel(TableModel):
|
|
1431
1973
|
self.speakers = {}
|
1432
1974
|
self.speaker_id_mapping = {}
|
1433
1975
|
self.utterances = None
|
1976
|
+
self.session: sqlalchemy.orm.scoped_session = None
|
1434
1977
|
self.utterance_count = 0
|
1435
1978
|
self.speaker_count = 0
|
1436
1979
|
self.file_count = 0
|
@@ -1475,29 +2018,46 @@ class CorpusModel(TableModel):
|
|
1475
2018
|
return True
|
1476
2019
|
return False
|
1477
2020
|
|
1478
|
-
def update_utterance_table_row(self,
|
1479
|
-
if
|
1480
|
-
|
1481
|
-
|
2021
|
+
def update_utterance_table_row(self, utterance: typing.Union[int, Utterance]):
|
2022
|
+
if isinstance(utterance, int):
|
2023
|
+
utterance_id = utterance
|
2024
|
+
if utterance_id not in self.reversed_indices:
|
2025
|
+
return
|
2026
|
+
utterance = self.session.query(Utterance).get(utterance_id)
|
2027
|
+
else:
|
2028
|
+
utterance_id = utterance.id
|
2029
|
+
if utterance_id not in self.reversed_indices:
|
2030
|
+
return
|
1482
2031
|
index = self.reversed_indices[utterance_id]
|
1483
2032
|
self.layoutAboutToBeChanged.emit()
|
1484
2033
|
self._data[index][self.text_column] = utterance.text
|
1485
2034
|
self._data[index][self.begin_column] = utterance.begin
|
1486
2035
|
self._data[index][self.end_column] = utterance.end
|
1487
|
-
self._data[index][self.duration_column] = utterance.
|
2036
|
+
self._data[index][self.duration_column] = utterance.end - utterance.begin
|
2037
|
+
self.layoutChanged.emit()
|
2038
|
+
|
2039
|
+
def change_speaker_table_utterances(self, utterances: typing.List[Utterance]):
|
2040
|
+
self.layoutAboutToBeChanged.emit()
|
2041
|
+
for u in utterances:
|
2042
|
+
if u.id not in self.reversed_indices:
|
2043
|
+
continue
|
2044
|
+
index = self.reversed_indices[u.id]
|
2045
|
+
self._speaker_indices[index] = u.speaker_id
|
2046
|
+
self._data[index][self.speaker_column] = self.get_speaker_name(u.speaker_id)
|
1488
2047
|
self.layoutChanged.emit()
|
1489
2048
|
|
1490
2049
|
def add_table_utterances(self, utterances: typing.List[Utterance]):
|
1491
2050
|
self.layoutAboutToBeChanged.emit()
|
1492
2051
|
rows = []
|
1493
2052
|
for utterance in utterances:
|
2053
|
+
speaker_name = self.get_speaker_name(utterance.speaker_id)
|
1494
2054
|
row_data = [
|
1495
2055
|
utterance.oovs,
|
1496
2056
|
utterance.file_name,
|
1497
|
-
|
2057
|
+
speaker_name,
|
1498
2058
|
utterance.begin,
|
1499
2059
|
utterance.end,
|
1500
|
-
utterance.
|
2060
|
+
utterance.end - utterance.begin,
|
1501
2061
|
utterance.text,
|
1502
2062
|
]
|
1503
2063
|
self._data.append(row_data)
|
@@ -1512,7 +2072,10 @@ class CorpusModel(TableModel):
|
|
1512
2072
|
def delete_table_utterances(self, utterances: typing.List[Utterance]):
|
1513
2073
|
self.layoutAboutToBeChanged.emit()
|
1514
2074
|
for utterance in utterances:
|
1515
|
-
|
2075
|
+
try:
|
2076
|
+
index = self.reversed_indices.pop(utterance.id)
|
2077
|
+
except KeyError:
|
2078
|
+
continue
|
1516
2079
|
_ = self._data.pop(index)
|
1517
2080
|
_ = self._indices.pop(index)
|
1518
2081
|
_ = self._file_indices.pop(index)
|
@@ -1533,7 +2096,6 @@ class CorpusModel(TableModel):
|
|
1533
2096
|
|
1534
2097
|
self.layoutAboutToBeChanged.emit()
|
1535
2098
|
first = split_utterances[0]
|
1536
|
-
self.session.merge(first)
|
1537
2099
|
file_name = self._data[index][1]
|
1538
2100
|
speaker_name = self._data[index][2]
|
1539
2101
|
row_data = [
|
@@ -1542,7 +2104,7 @@ class CorpusModel(TableModel):
|
|
1542
2104
|
speaker_name,
|
1543
2105
|
first.begin,
|
1544
2106
|
first.end,
|
1545
|
-
first.
|
2107
|
+
first.end - first.begin,
|
1546
2108
|
first.text,
|
1547
2109
|
]
|
1548
2110
|
self._data[index] = row_data
|
@@ -1552,7 +2114,6 @@ class CorpusModel(TableModel):
|
|
1552
2114
|
self.reversed_indices[first.id] = index
|
1553
2115
|
rows = [index]
|
1554
2116
|
for utterance in split_utterances[1:]:
|
1555
|
-
self.session.merge(utterance)
|
1556
2117
|
index += 1
|
1557
2118
|
rows.append(index)
|
1558
2119
|
self.reversed_indices = {
|
@@ -1565,7 +2126,7 @@ class CorpusModel(TableModel):
|
|
1565
2126
|
speaker_name,
|
1566
2127
|
utterance.begin,
|
1567
2128
|
utterance.end,
|
1568
|
-
utterance.
|
2129
|
+
utterance.end - utterance.begin,
|
1569
2130
|
utterance.text,
|
1570
2131
|
]
|
1571
2132
|
self.reversed_indices[utterance.id] = index
|
@@ -1584,14 +2145,13 @@ class CorpusModel(TableModel):
|
|
1584
2145
|
except KeyError:
|
1585
2146
|
return
|
1586
2147
|
self.layoutAboutToBeChanged.emit()
|
1587
|
-
self.session.merge(merged_utterance)
|
1588
2148
|
row_data = [
|
1589
2149
|
merged_utterance.oovs,
|
1590
2150
|
merged_utterance.file_name,
|
1591
2151
|
merged_utterance.speaker_name,
|
1592
2152
|
merged_utterance.begin,
|
1593
2153
|
merged_utterance.end,
|
1594
|
-
merged_utterance.
|
2154
|
+
merged_utterance.end - merged_utterance.begin,
|
1595
2155
|
merged_utterance.text,
|
1596
2156
|
]
|
1597
2157
|
first = split_utterances[0]
|
@@ -1640,32 +2200,6 @@ class CorpusModel(TableModel):
|
|
1640
2200
|
self.language_model = language_model
|
1641
2201
|
self.languageModelChanged.emit()
|
1642
2202
|
|
1643
|
-
def create_utterance(self, file: File, speaker: Optional[Speaker], begin: float, end: float):
|
1644
|
-
if not self.editable:
|
1645
|
-
return
|
1646
|
-
channel = 0
|
1647
|
-
if file.num_channels > 1:
|
1648
|
-
ind = file.speaker_ordering.index(speaker)
|
1649
|
-
if ind >= len(file.speaker_ordering) / 2:
|
1650
|
-
channel = 1
|
1651
|
-
if speaker is None:
|
1652
|
-
speaker = self.corpus.add_speaker("speech", session=self.session)
|
1653
|
-
begin = round(begin, 4)
|
1654
|
-
end = round(end, 4)
|
1655
|
-
text = ""
|
1656
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1657
|
-
new_utt = Utterance(
|
1658
|
-
id=next_pk,
|
1659
|
-
speaker_id=speaker.id,
|
1660
|
-
file_id=file.id,
|
1661
|
-
begin=begin,
|
1662
|
-
end=end,
|
1663
|
-
channel=channel,
|
1664
|
-
text=text,
|
1665
|
-
)
|
1666
|
-
self.addCommand.emit(undo.CreateUtteranceCommand(new_utt, self))
|
1667
|
-
self.unsaved_files.add(file.id)
|
1668
|
-
|
1669
2203
|
def set_file_modified(self, file_id: typing.Union[int, typing.List[int]]):
|
1670
2204
|
if isinstance(file_id, int):
|
1671
2205
|
file_id = [file_id]
|
@@ -1680,32 +2214,6 @@ class CorpusModel(TableModel):
|
|
1680
2214
|
)
|
1681
2215
|
self.session.commit()
|
1682
2216
|
|
1683
|
-
def update_utterance_text(self, utterance: Utterance, text):
|
1684
|
-
if text != utterance.text:
|
1685
|
-
self.addCommand.emit(undo.UpdateUtteranceTextCommand(utterance, text, self))
|
1686
|
-
self.set_file_modified(utterance.file_id)
|
1687
|
-
|
1688
|
-
def update_utterance_times(
|
1689
|
-
self, utterance: Utterance, begin: Optional[float] = None, end: Optional[float] = None
|
1690
|
-
):
|
1691
|
-
if not self.editable:
|
1692
|
-
return
|
1693
|
-
self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
|
1694
|
-
self.set_file_modified(utterance.file_id)
|
1695
|
-
|
1696
|
-
def update_utterance_speaker(self, utterance: Utterance, speaker: Speaker):
|
1697
|
-
if not self.editable:
|
1698
|
-
return
|
1699
|
-
self.addCommand.emit(undo.UpdateUtteranceSpeakerCommand(utterance, speaker, self))
|
1700
|
-
|
1701
|
-
def delete_utterances(self, utterances: list[Utterance]):
|
1702
|
-
if not self.editable:
|
1703
|
-
return
|
1704
|
-
for u in utterances:
|
1705
|
-
self.set_file_modified(u.file_id)
|
1706
|
-
self.set_speaker_modified(u.speaker_id)
|
1707
|
-
self.addCommand.emit(undo.DeleteUtteranceCommand(utterances, self))
|
1708
|
-
|
1709
2217
|
def check_align_lexicon_compiler(self):
|
1710
2218
|
if self.acoustic_model is None:
|
1711
2219
|
return
|
@@ -1724,150 +2232,13 @@ class CorpusModel(TableModel):
|
|
1724
2232
|
dictionary_id, self.acoustic_model, disambiguation=True
|
1725
2233
|
)
|
1726
2234
|
|
1727
|
-
def split_vad_utterance(
|
1728
|
-
self, original_utterance_id, replacement_utterance_data: typing.List[KalpyUtterance]
|
1729
|
-
):
|
1730
|
-
utt = self.session.get(Utterance, original_utterance_id)
|
1731
|
-
replacement_utterances = []
|
1732
|
-
speaker_id = utt.speaker_id
|
1733
|
-
file_id = utt.file_id
|
1734
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1735
|
-
for new_utt in replacement_utterance_data:
|
1736
|
-
replacement_utterances.append(
|
1737
|
-
Utterance(
|
1738
|
-
id=next_pk,
|
1739
|
-
begin=new_utt.segment.begin,
|
1740
|
-
end=new_utt.segment.end,
|
1741
|
-
speaker_id=speaker_id,
|
1742
|
-
file_id=file_id,
|
1743
|
-
text=new_utt.transcript,
|
1744
|
-
normalized_text=new_utt.transcript,
|
1745
|
-
features="",
|
1746
|
-
in_subset=False,
|
1747
|
-
ignored=False,
|
1748
|
-
channel=new_utt.segment.channel,
|
1749
|
-
)
|
1750
|
-
)
|
1751
|
-
next_pk += 1
|
1752
|
-
splitting_utterances = [[utt, *replacement_utterances]]
|
1753
|
-
self.addCommand.emit(
|
1754
|
-
undo.SplitUtteranceCommand(splitting_utterances, self, update_table=False)
|
1755
|
-
)
|
1756
|
-
self.requestFileView.emit(utt.file_name)
|
1757
|
-
self.set_file_modified(file_id)
|
1758
|
-
self.set_speaker_modified(speaker_id)
|
1759
|
-
|
1760
|
-
def split_utterances(self, utterances: list[Utterance]):
|
1761
|
-
if not self.editable:
|
1762
|
-
return
|
1763
|
-
splitting_utterances = []
|
1764
|
-
for utt in utterances:
|
1765
|
-
duration = utt.duration
|
1766
|
-
beg = utt.begin
|
1767
|
-
end = utt.end
|
1768
|
-
first_text = ""
|
1769
|
-
second_text = ""
|
1770
|
-
if " " not in utt.text and " " in utt.normalized_text:
|
1771
|
-
t = utt.normalized_text.split()
|
1772
|
-
mid_ind = int(len(t) / 2)
|
1773
|
-
first_text = t[:mid_ind]
|
1774
|
-
second_text = t[mid_ind:]
|
1775
|
-
elif utt.text:
|
1776
|
-
t = utt.text.split()
|
1777
|
-
mid_ind = int(len(t) / 2)
|
1778
|
-
first_text = t[:mid_ind]
|
1779
|
-
second_text = t[mid_ind:]
|
1780
|
-
split_time = beg + (duration / 2)
|
1781
|
-
oovs = set()
|
1782
|
-
for w in first_text:
|
1783
|
-
if not self.dictionary_model.check_word(w, utt.speaker_id):
|
1784
|
-
oovs.add(w)
|
1785
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1786
|
-
first_utt = Utterance(
|
1787
|
-
id=next_pk,
|
1788
|
-
speaker_id=utt.speaker_id,
|
1789
|
-
file_id=utt.file_id,
|
1790
|
-
begin=beg,
|
1791
|
-
end=split_time,
|
1792
|
-
channel=utt.channel,
|
1793
|
-
text=" ".join(first_text),
|
1794
|
-
normalized_text=" ".join(first_text),
|
1795
|
-
oovs=" ".join(oovs),
|
1796
|
-
)
|
1797
|
-
next_pk += 1
|
1798
|
-
oovs = set()
|
1799
|
-
for w in second_text:
|
1800
|
-
if not self.dictionary_model.check_word(w, utt.speaker_id):
|
1801
|
-
oovs.add(w)
|
1802
|
-
second_utt = Utterance(
|
1803
|
-
id=next_pk,
|
1804
|
-
speaker_id=utt.speaker_id,
|
1805
|
-
file_id=utt.file_id,
|
1806
|
-
begin=split_time,
|
1807
|
-
end=end,
|
1808
|
-
channel=utt.channel,
|
1809
|
-
text=" ".join(second_text),
|
1810
|
-
normalized_text=" ".join(second_text),
|
1811
|
-
oovs=" ".join(oovs),
|
1812
|
-
)
|
1813
|
-
splitting_utterances.append([utt, first_utt, second_utt])
|
1814
|
-
self.addCommand.emit(undo.SplitUtteranceCommand(splitting_utterances, self))
|
1815
|
-
self.set_file_modified([utt[0].file_id for utt in splitting_utterances])
|
1816
|
-
|
1817
2235
|
def merge_speakers(self, speakers: list[int]):
|
1818
2236
|
self.addCommand.emit(undo.MergeSpeakersCommand(speakers, self))
|
1819
2237
|
|
1820
|
-
def merge_utterances(self, utterances: list[Utterance]):
|
1821
|
-
if not self.editable:
|
1822
|
-
return
|
1823
|
-
min_begin = 1000000000
|
1824
|
-
max_end = 0
|
1825
|
-
text = ""
|
1826
|
-
normalized_text = ""
|
1827
|
-
speaker = None
|
1828
|
-
file = None
|
1829
|
-
channel = None
|
1830
|
-
for old_utt in sorted(utterances, key=lambda x: x.begin):
|
1831
|
-
if speaker is None:
|
1832
|
-
speaker = old_utt.speaker
|
1833
|
-
if file is None:
|
1834
|
-
file = old_utt.file
|
1835
|
-
if channel is None:
|
1836
|
-
channel = old_utt.channel
|
1837
|
-
if old_utt.begin < min_begin:
|
1838
|
-
min_begin = old_utt.begin
|
1839
|
-
if old_utt.end > max_end:
|
1840
|
-
max_end = old_utt.end
|
1841
|
-
utt_text = old_utt.text
|
1842
|
-
if utt_text == "speech" and text.strip() == "speech":
|
1843
|
-
continue
|
1844
|
-
text += utt_text + " "
|
1845
|
-
normalized_text += old_utt.normalized_text + " "
|
1846
|
-
text = text[:-1]
|
1847
|
-
normalized_text = normalized_text[:-1]
|
1848
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1849
|
-
oovs = set()
|
1850
|
-
for w in text.split():
|
1851
|
-
if not self.dictionary_model.check_word(w, speaker.id):
|
1852
|
-
oovs.add(w)
|
1853
|
-
new_utt = Utterance(
|
1854
|
-
id=next_pk,
|
1855
|
-
speaker=speaker,
|
1856
|
-
file=file,
|
1857
|
-
begin=min_begin,
|
1858
|
-
end=max_end,
|
1859
|
-
channel=channel,
|
1860
|
-
text=text,
|
1861
|
-
normalized_text=normalized_text,
|
1862
|
-
oovs=" ".join(oovs),
|
1863
|
-
)
|
1864
|
-
self.set_file_modified(file.id)
|
1865
|
-
self.addCommand.emit(undo.MergeUtteranceCommand(utterances, new_utt, self))
|
1866
|
-
|
1867
2238
|
def replace_all(self, search_query: TextFilterQuery, replacement: str):
|
1868
2239
|
self.addCommand.emit(undo.ReplaceAllCommand(search_query, replacement, self))
|
1869
2240
|
|
1870
|
-
def
|
2241
|
+
def utterance_id_at(self, index) -> Optional[Utterance]:
|
1871
2242
|
if not isinstance(index, int):
|
1872
2243
|
if not index.isValid():
|
1873
2244
|
return None
|
@@ -1876,15 +2247,16 @@ class CorpusModel(TableModel):
|
|
1876
2247
|
return None
|
1877
2248
|
if len(self._indices) == 0:
|
1878
2249
|
return None
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
1883
|
-
|
1884
|
-
|
1885
|
-
|
2250
|
+
return self._indices[index]
|
2251
|
+
|
2252
|
+
def audio_info_for_utterance(self, row: int):
|
2253
|
+
return (
|
2254
|
+
self._file_indices[row],
|
2255
|
+
self._data[row][self.begin_column],
|
2256
|
+
self._data[row][self.end_column],
|
2257
|
+
self._indices[row],
|
2258
|
+
self._speaker_indices[row],
|
1886
2259
|
)
|
1887
|
-
return utterance
|
1888
2260
|
|
1889
2261
|
def fileAt(self, index) -> int:
|
1890
2262
|
if not isinstance(index, int):
|