Anchor-annotator 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/METADATA +1 -1
- Anchor_annotator-0.2.0.dist-info/RECORD +21 -0
- anchor/_version.py +2 -2
- anchor/main.py +44 -22
- anchor/models.py +821 -468
- anchor/plot.py +428 -399
- anchor/undo.py +173 -128
- anchor/widgets.py +35 -44
- anchor/workers.py +43 -17
- Anchor_annotator-0.1.0.dist-info/RECORD +0 -21
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/LICENSE +0 -0
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/WHEEL +0 -0
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/top_level.txt +0 -0
anchor/models.py
CHANGED
@@ -31,7 +31,7 @@ from montreal_forced_aligner.utils import mfa_open
|
|
31
31
|
from PySide6 import QtCore
|
32
32
|
from sqlalchemy.orm import joinedload
|
33
33
|
|
34
|
-
from anchor import undo
|
34
|
+
from anchor import undo, workers
|
35
35
|
from anchor.settings import AnchorSettings
|
36
36
|
|
37
37
|
|
@@ -148,24 +148,766 @@ class TableModel(QtCore.QAbstractTableModel):
|
|
148
148
|
return len(self._header_data)
|
149
149
|
|
150
150
|
|
151
|
+
class FileUtterancesModel(QtCore.QAbstractListModel):
|
152
|
+
addCommand = QtCore.Signal(object)
|
153
|
+
selectionRequested = QtCore.Signal(object)
|
154
|
+
|
155
|
+
waveformReady = QtCore.Signal()
|
156
|
+
utterancesReady = QtCore.Signal()
|
157
|
+
|
158
|
+
def __init__(self, *args, **kwargs):
|
159
|
+
super().__init__(*args, **kwargs)
|
160
|
+
self.utterances = []
|
161
|
+
self.file = None
|
162
|
+
self.y = None
|
163
|
+
self.speakers = []
|
164
|
+
self._indices = []
|
165
|
+
self._speaker_indices = []
|
166
|
+
self.reversed_indices = {}
|
167
|
+
self.speaker_channel_mapping = {}
|
168
|
+
self.corpus_model: CorpusModel = None
|
169
|
+
self.waveform_worker = workers.WaveformWorker()
|
170
|
+
self.speaker_tier_worker = workers.SpeakerTierWorker()
|
171
|
+
self.speaker_tier_worker.signals.result.connect(self.finalize_loading_utterances)
|
172
|
+
self.waveform_worker.signals.result.connect(self.finalize_loading_wave_form)
|
173
|
+
|
174
|
+
def get_utterance(self, utterance_id: int) -> Utterance:
|
175
|
+
try:
|
176
|
+
return self.utterances[self.reversed_indices[utterance_id]]
|
177
|
+
except KeyError:
|
178
|
+
return None
|
179
|
+
|
180
|
+
def set_corpus_model(self, corpus_model: CorpusModel):
|
181
|
+
self.corpus_model = corpus_model
|
182
|
+
|
183
|
+
def clean_up_for_close(self):
|
184
|
+
self.waveform_worker.stop()
|
185
|
+
self.speaker_tier_worker.stop()
|
186
|
+
|
187
|
+
def set_file(self, file_id):
|
188
|
+
self.file = (
|
189
|
+
self.corpus_model.session.query(File).options(joinedload(File.sound_file)).get(file_id)
|
190
|
+
)
|
191
|
+
self.y = None
|
192
|
+
self.get_utterances()
|
193
|
+
self.waveform_worker.stop()
|
194
|
+
self.waveform_worker.set_params(self.file.sound_file.sound_file_path)
|
195
|
+
self.waveform_worker.start()
|
196
|
+
|
197
|
+
def finalize_loading_utterances(self, results):
|
198
|
+
utterances, file_id = results
|
199
|
+
if file_id != self.file.id:
|
200
|
+
return
|
201
|
+
self.utterances = utterances
|
202
|
+
for i, u in enumerate(utterances):
|
203
|
+
if u.speaker_id not in self.speakers:
|
204
|
+
self.speakers.append(u.speaker_id)
|
205
|
+
self._speaker_indices.append(u.speaker_id)
|
206
|
+
self.reversed_indices[u.id] = i
|
207
|
+
self._indices.append(u.id)
|
208
|
+
if self.file.num_channels > 1 and u.speaker_id not in self.speaker_channel_mapping:
|
209
|
+
self.speaker_channel_mapping[u.speaker_id] = u.channel
|
210
|
+
self.utterancesReady.emit()
|
211
|
+
|
212
|
+
def finalize_loading_wave_form(self, results):
|
213
|
+
y, file_path = results
|
214
|
+
if self.file is None or file_path != self.file.sound_file.sound_file_path:
|
215
|
+
return
|
216
|
+
self.y = y
|
217
|
+
self.waveformReady.emit()
|
218
|
+
|
219
|
+
def get_utterances(self):
|
220
|
+
parent_index = self.index(0, 0)
|
221
|
+
self.beginRemoveRows(parent_index, 0, len(self.utterances))
|
222
|
+
self.utterances = []
|
223
|
+
self.speakers = []
|
224
|
+
self._indices = []
|
225
|
+
self._speaker_indices = []
|
226
|
+
self.speaker_channel_mapping = {}
|
227
|
+
self.reversed_indices = {}
|
228
|
+
self.endRemoveRows()
|
229
|
+
if self.file is None:
|
230
|
+
return
|
231
|
+
self.speaker_tier_worker.stop()
|
232
|
+
self.speaker_tier_worker.query_alignment = (
|
233
|
+
self.corpus_model.has_alignments
|
234
|
+
or self.corpus_model.has_reference_alignments
|
235
|
+
or self.corpus_model.has_transcribed_alignments
|
236
|
+
)
|
237
|
+
self.speaker_tier_worker.session = self.corpus_model.session
|
238
|
+
self.speaker_tier_worker.set_params(self.file.id)
|
239
|
+
self.speaker_tier_worker.start()
|
240
|
+
|
241
|
+
def create_utterance(self, speaker_id: Optional[int], begin: float, end: float):
|
242
|
+
if not self.corpus_model.editable:
|
243
|
+
return
|
244
|
+
channel = 0
|
245
|
+
if speaker_id is None:
|
246
|
+
speaker_id = self.corpus_model.corpus.add_speaker(
|
247
|
+
"speech", session=self.corpus_model.session
|
248
|
+
).id
|
249
|
+
if self.file.num_channels > 1:
|
250
|
+
if speaker_id not in self.speaker_channel_mapping:
|
251
|
+
self.speaker_channel_mapping[speaker_id] = 0
|
252
|
+
channel = self.speaker_channel_mapping[speaker_id]
|
253
|
+
begin = round(begin, 4)
|
254
|
+
end = round(end, 4)
|
255
|
+
text = ""
|
256
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
257
|
+
new_utt = Utterance(
|
258
|
+
id=next_pk,
|
259
|
+
speaker_id=speaker_id,
|
260
|
+
file_id=self.file.id,
|
261
|
+
file=self.file,
|
262
|
+
begin=begin,
|
263
|
+
end=end,
|
264
|
+
channel=channel,
|
265
|
+
text=text,
|
266
|
+
normalized_text=text,
|
267
|
+
oovs=text,
|
268
|
+
)
|
269
|
+
print(new_utt.id, new_utt.speaker_id, new_utt.file_id, new_utt.begin, new_utt.end)
|
270
|
+
self.addCommand.emit(undo.CreateUtteranceCommand(new_utt, self))
|
271
|
+
self.corpus_model.set_file_modified(self.file.id)
|
272
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
273
|
+
|
274
|
+
def add_table_utterances(self, utterances: typing.List[Utterance]):
|
275
|
+
for utterance in utterances:
|
276
|
+
if len(self.utterances) > 0:
|
277
|
+
for i, u in enumerate(self.utterances):
|
278
|
+
if u.begin < utterance.begin:
|
279
|
+
continue
|
280
|
+
break
|
281
|
+
else:
|
282
|
+
i = len(self.utterances) - 1
|
283
|
+
else:
|
284
|
+
i = 0
|
285
|
+
parent_index = self.index(i, 0)
|
286
|
+
self.beginInsertRows(parent_index, i, i + 1)
|
287
|
+
self.utterances.insert(i, utterance)
|
288
|
+
self._indices.insert(i, utterance.id)
|
289
|
+
self._speaker_indices.insert(i, utterance.speaker_id)
|
290
|
+
self.endInsertRows()
|
291
|
+
self.reversed_indices = {u: j for j, u in enumerate(self._indices)}
|
292
|
+
self.selectionRequested.emit(utterances)
|
293
|
+
|
294
|
+
def delete_table_utterances(self, utterances: typing.List[Utterance]):
|
295
|
+
for utterance in utterances:
|
296
|
+
try:
|
297
|
+
index = self.reversed_indices.pop(utterance.id)
|
298
|
+
except KeyError:
|
299
|
+
continue
|
300
|
+
parent_index = self.index(index, 0)
|
301
|
+
self.beginRemoveRows(parent_index, index, index + 1)
|
302
|
+
_ = self.utterances.pop(index)
|
303
|
+
_ = self._indices.pop(index)
|
304
|
+
_ = self._speaker_indices.pop(index)
|
305
|
+
self.reversed_indices = {u: j for j, u in enumerate(self._indices)}
|
306
|
+
self.endRemoveRows()
|
307
|
+
self.selectionRequested.emit(None)
|
308
|
+
|
309
|
+
def change_speaker_table_utterances(self, utterances: typing.List[Utterance]):
|
310
|
+
for utterance in utterances:
|
311
|
+
try:
|
312
|
+
index = self.reversed_indices[utterance.id]
|
313
|
+
except KeyError:
|
314
|
+
continue
|
315
|
+
if utterance.speaker_id not in self.speakers:
|
316
|
+
self.speakers.append(utterance.speaker_id)
|
317
|
+
self.speaker_channel_mapping[utterance.speaker_id] = utterance.channel
|
318
|
+
self._speaker_indices[index] = utterance.speaker_id
|
319
|
+
|
320
|
+
def merge_table_utterances(
|
321
|
+
self, merged_utterance: Utterance, split_utterances: typing.List[Utterance]
|
322
|
+
):
|
323
|
+
self.delete_table_utterances(split_utterances)
|
324
|
+
self.add_table_utterances([merged_utterance])
|
325
|
+
|
326
|
+
def split_table_utterances(
|
327
|
+
self, merged_utterance: Utterance, split_utterances: typing.List[Utterance]
|
328
|
+
):
|
329
|
+
self.delete_table_utterances([merged_utterance])
|
330
|
+
self.add_table_utterances(split_utterances)
|
331
|
+
|
332
|
+
def update_utterance_text(self, utterance: Utterance, text):
|
333
|
+
if not self.corpus_model.editable:
|
334
|
+
return
|
335
|
+
if text != utterance.text:
|
336
|
+
self.addCommand.emit(undo.UpdateUtteranceTextCommand(utterance, text, self))
|
337
|
+
self.corpus_model.set_file_modified(self.file.id)
|
338
|
+
|
339
|
+
def refresh_utterances(self):
|
340
|
+
for utterance in self.utterances:
|
341
|
+
self.corpus_model.session.refresh(utterance)
|
342
|
+
|
343
|
+
def update_utterance_speaker(self, utterance: Utterance, speaker_id: int):
|
344
|
+
if not self.corpus_model.editable:
|
345
|
+
return
|
346
|
+
old_speaker_id = utterance.speaker_id
|
347
|
+
if old_speaker_id == speaker_id:
|
348
|
+
return
|
349
|
+
self.addCommand.emit(undo.UpdateUtteranceSpeakerCommand(utterance, speaker_id, self))
|
350
|
+
self.corpus_model.set_file_modified(self.file.id)
|
351
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
352
|
+
self.corpus_model.set_speaker_modified(old_speaker_id)
|
353
|
+
|
354
|
+
def update_utterance_times(
|
355
|
+
self, utterance: Utterance, begin: Optional[float] = None, end: Optional[float] = None
|
356
|
+
):
|
357
|
+
if not self.corpus_model.editable:
|
358
|
+
return
|
359
|
+
if utterance.begin == begin and utterance.end == end:
|
360
|
+
return
|
361
|
+
self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
|
362
|
+
self.corpus_model.set_file_modified(self.file.id)
|
363
|
+
|
364
|
+
def split_vad_utterance(
|
365
|
+
self, original_utterance_id, replacement_utterance_data: typing.List[KalpyUtterance]
|
366
|
+
):
|
367
|
+
if not replacement_utterance_data:
|
368
|
+
return
|
369
|
+
utt = self.utterances[self.reversed_indices[original_utterance_id]]
|
370
|
+
replacement_utterances = []
|
371
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
372
|
+
speaker_id = utt.speaker_id
|
373
|
+
for new_utt in replacement_utterance_data:
|
374
|
+
replacement_utterances.append(
|
375
|
+
Utterance(
|
376
|
+
id=next_pk,
|
377
|
+
begin=new_utt.segment.begin,
|
378
|
+
end=new_utt.segment.end,
|
379
|
+
speaker_id=speaker_id,
|
380
|
+
file_id=self.file.id,
|
381
|
+
text=new_utt.transcript,
|
382
|
+
normalized_text=new_utt.transcript,
|
383
|
+
features="",
|
384
|
+
in_subset=False,
|
385
|
+
ignored=False,
|
386
|
+
channel=new_utt.segment.channel,
|
387
|
+
)
|
388
|
+
)
|
389
|
+
next_pk += 1
|
390
|
+
self.addCommand.emit(
|
391
|
+
undo.SplitUtteranceCommand(utt, replacement_utterances, self, update_table=False)
|
392
|
+
)
|
393
|
+
self.corpus_model.set_file_modified(self.file.id)
|
394
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
395
|
+
|
396
|
+
def split_utterances(self, utterance: Utterance):
|
397
|
+
if not self.corpus_model.editable:
|
398
|
+
return
|
399
|
+
beg = utterance.begin
|
400
|
+
end = utterance.end
|
401
|
+
duration = end - beg
|
402
|
+
first_text = []
|
403
|
+
second_text = []
|
404
|
+
speaker_id = utterance.speaker_id
|
405
|
+
if (
|
406
|
+
utterance.text
|
407
|
+
and utterance.normalized_text
|
408
|
+
and " " not in utterance.text
|
409
|
+
and " " in utterance.normalized_text
|
410
|
+
):
|
411
|
+
t = utterance.normalized_text.split()
|
412
|
+
mid_ind = int(len(t) / 2)
|
413
|
+
first_text = t[:mid_ind]
|
414
|
+
second_text = t[mid_ind:]
|
415
|
+
elif utterance.text:
|
416
|
+
t = utterance.text.split()
|
417
|
+
mid_ind = int(len(t) / 2)
|
418
|
+
first_text = t[:mid_ind]
|
419
|
+
second_text = t[mid_ind:]
|
420
|
+
split_time = beg + (duration / 2)
|
421
|
+
oovs = set()
|
422
|
+
for w in first_text:
|
423
|
+
if not self.corpus_model.dictionary_model.check_word(w, speaker_id):
|
424
|
+
oovs.add(w)
|
425
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
426
|
+
first_utt = Utterance(
|
427
|
+
id=next_pk,
|
428
|
+
speaker_id=speaker_id,
|
429
|
+
file_id=self.file.id,
|
430
|
+
begin=beg,
|
431
|
+
end=split_time,
|
432
|
+
channel=utterance.channel,
|
433
|
+
text=" ".join(first_text),
|
434
|
+
normalized_text=" ".join(first_text),
|
435
|
+
oovs=" ".join(oovs),
|
436
|
+
)
|
437
|
+
next_pk += 1
|
438
|
+
oovs = set()
|
439
|
+
for w in second_text:
|
440
|
+
if not self.corpus_model.dictionary_model.check_word(w, utterance.speaker_id):
|
441
|
+
oovs.add(w)
|
442
|
+
second_utt = Utterance(
|
443
|
+
id=next_pk,
|
444
|
+
speaker_id=speaker_id,
|
445
|
+
file_id=self.file.id,
|
446
|
+
begin=split_time,
|
447
|
+
end=end,
|
448
|
+
channel=utterance.channel,
|
449
|
+
text=" ".join(second_text),
|
450
|
+
normalized_text=" ".join(second_text),
|
451
|
+
oovs=" ".join(oovs),
|
452
|
+
)
|
453
|
+
self.addCommand.emit(undo.SplitUtteranceCommand(utterance, [first_utt, second_utt], self))
|
454
|
+
self.corpus_model.set_file_modified(self.file.id)
|
455
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
456
|
+
self.selectionRequested.emit([first_utt, second_utt])
|
457
|
+
|
458
|
+
def merge_utterances(self, utterances: list[Utterance]):
|
459
|
+
if not self.corpus_model.editable:
|
460
|
+
return
|
461
|
+
if not utterances:
|
462
|
+
return
|
463
|
+
min_begin = 1000000000
|
464
|
+
max_end = 0
|
465
|
+
text = ""
|
466
|
+
normalized_text = ""
|
467
|
+
speaker_id = None
|
468
|
+
channel = None
|
469
|
+
for old_utt in sorted(utterances, key=lambda x: x.begin):
|
470
|
+
if speaker_id is None:
|
471
|
+
speaker_id = old_utt.speaker_id
|
472
|
+
if channel is None:
|
473
|
+
channel = old_utt.channel
|
474
|
+
if old_utt.begin < min_begin:
|
475
|
+
min_begin = old_utt.begin
|
476
|
+
if old_utt.end > max_end:
|
477
|
+
max_end = old_utt.end
|
478
|
+
utt_text = old_utt.text
|
479
|
+
if utt_text == "speech" and text.strip() == "speech":
|
480
|
+
continue
|
481
|
+
text += utt_text + " "
|
482
|
+
normalized_text += old_utt.normalized_text + " "
|
483
|
+
text = text[:-1]
|
484
|
+
normalized_text = normalized_text[:-1]
|
485
|
+
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
486
|
+
oovs = set()
|
487
|
+
for w in text.split():
|
488
|
+
if not self.corpus_model.dictionary_model.check_word(w, speaker_id):
|
489
|
+
oovs.add(w)
|
490
|
+
new_utt = Utterance(
|
491
|
+
id=next_pk,
|
492
|
+
speaker_id=speaker_id,
|
493
|
+
file_id=self.file.id,
|
494
|
+
begin=min_begin,
|
495
|
+
end=max_end,
|
496
|
+
channel=channel,
|
497
|
+
text=text,
|
498
|
+
normalized_text=normalized_text,
|
499
|
+
oovs=" ".join(oovs),
|
500
|
+
)
|
501
|
+
self.addCommand.emit(undo.MergeUtteranceCommand(utterances, new_utt, self))
|
502
|
+
self.corpus_model.set_file_modified(self.file.id)
|
503
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
504
|
+
self.selectionRequested.emit([new_utt])
|
505
|
+
|
506
|
+
def delete_utterances(self, utterances: typing.List[Utterance]):
|
507
|
+
if not self.corpus_model.editable:
|
508
|
+
return
|
509
|
+
if not utterances:
|
510
|
+
return
|
511
|
+
speaker_ids = set(x.speaker_id for x in utterances)
|
512
|
+
self.addCommand.emit(undo.DeleteUtteranceCommand(utterances, self))
|
513
|
+
self.corpus_model.set_file_modified(self.file.id)
|
514
|
+
for speaker_id in speaker_ids:
|
515
|
+
self.corpus_model.set_speaker_modified(speaker_id)
|
516
|
+
|
517
|
+
def rowCount(self, parent=None):
|
518
|
+
return len(self.utterances)
|
519
|
+
|
520
|
+
def data(self, index, role=QtCore.Qt.ItemDataRole.DisplayRole):
|
521
|
+
if role == QtCore.Qt.ItemDataRole.DisplayRole:
|
522
|
+
return self.utterances[index.row()]
|
523
|
+
|
524
|
+
|
525
|
+
class FileSelectionModel(QtCore.QItemSelectionModel):
|
526
|
+
fileAboutToChange = QtCore.Signal()
|
527
|
+
fileChanged = QtCore.Signal()
|
528
|
+
channelChanged = QtCore.Signal()
|
529
|
+
resetView = QtCore.Signal()
|
530
|
+
viewChanged = QtCore.Signal(object, object)
|
531
|
+
selectionAudioChanged = QtCore.Signal()
|
532
|
+
currentTimeChanged = QtCore.Signal(object)
|
533
|
+
currentUtteranceChanged = QtCore.Signal()
|
534
|
+
speakerRequested = QtCore.Signal(object)
|
535
|
+
|
536
|
+
spectrogramReady = QtCore.Signal()
|
537
|
+
waveformReady = QtCore.Signal()
|
538
|
+
pitchTrackReady = QtCore.Signal()
|
539
|
+
|
540
|
+
def __init__(self, *args, **kwargs):
|
541
|
+
super().__init__(*args, **kwargs)
|
542
|
+
self.settings = AnchorSettings()
|
543
|
+
self.min_time = 0
|
544
|
+
self.max_time = 10
|
545
|
+
self.selected_min_time = None
|
546
|
+
self.selected_max_time = None
|
547
|
+
self.x = None
|
548
|
+
self.y = None
|
549
|
+
self.top_point = 2
|
550
|
+
self.bottom_point = 0
|
551
|
+
self.separator_point = 1
|
552
|
+
self.selected_channel = 0
|
553
|
+
self.spectrogram = None
|
554
|
+
self.min_db = None
|
555
|
+
self.max_db = None
|
556
|
+
self.pitch_track_x = None
|
557
|
+
self.pitch_track_y = None
|
558
|
+
self.waveform_x = None
|
559
|
+
self.waveform_y = None
|
560
|
+
self.requested_utterance_id = None
|
561
|
+
self.auto_waveform_worker = workers.AutoWaveformWorker()
|
562
|
+
self.spectrogram_worker = workers.SpectrogramWorker()
|
563
|
+
self.pitch_track_worker = workers.PitchWorker()
|
564
|
+
self.auto_waveform_worker.signals.result.connect(self.finalize_loading_auto_wave_form)
|
565
|
+
self.spectrogram_worker.signals.result.connect(self.finalize_loading_spectrogram)
|
566
|
+
self.pitch_track_worker.signals.result.connect(self.finalize_loading_pitch_track)
|
567
|
+
self.model().waveformReady.connect(self.load_audio_selection)
|
568
|
+
self.model().utterancesReady.connect(self.finalize_set_new_file)
|
569
|
+
self.viewChanged.connect(self.load_audio_selection)
|
570
|
+
self.model().selectionRequested.connect(self.update_selected_utterances)
|
571
|
+
|
572
|
+
def selected_utterances(self):
|
573
|
+
utts = []
|
574
|
+
m = self.model()
|
575
|
+
for index in self.selectedRows(0):
|
576
|
+
utt = m.utterances[index.row()]
|
577
|
+
utts.append(utt)
|
578
|
+
return utts
|
579
|
+
|
580
|
+
def load_audio_selection(self):
|
581
|
+
if self.model().y is None:
|
582
|
+
return
|
583
|
+
begin_samp = int(self.min_time * self.model().file.sample_rate)
|
584
|
+
end_samp = int(self.max_time * self.model().file.sample_rate)
|
585
|
+
if len(self.model().y.shape) > 1:
|
586
|
+
y = self.model().y[begin_samp:end_samp, self.selected_channel]
|
587
|
+
else:
|
588
|
+
y = self.model().y[begin_samp:end_samp]
|
589
|
+
self.spectrogram_worker.stop()
|
590
|
+
self.spectrogram_worker.set_params(
|
591
|
+
y,
|
592
|
+
self.model().file.sound_file.sample_rate,
|
593
|
+
self.min_time,
|
594
|
+
self.max_time,
|
595
|
+
self.selected_channel,
|
596
|
+
)
|
597
|
+
self.spectrogram_worker.start()
|
598
|
+
if self.max_time - self.min_time <= 10:
|
599
|
+
self.pitch_track_worker.stop()
|
600
|
+
self.pitch_track_worker.set_params(
|
601
|
+
y,
|
602
|
+
self.model().file.sound_file.sample_rate,
|
603
|
+
self.min_time,
|
604
|
+
self.max_time,
|
605
|
+
self.selected_channel,
|
606
|
+
self.bottom_point,
|
607
|
+
self.separator_point,
|
608
|
+
)
|
609
|
+
self.pitch_track_worker.start()
|
610
|
+
self.auto_waveform_worker.stop()
|
611
|
+
self.auto_waveform_worker.set_params(
|
612
|
+
y,
|
613
|
+
self.separator_point,
|
614
|
+
self.top_point,
|
615
|
+
self.min_time,
|
616
|
+
self.max_time,
|
617
|
+
self.selected_channel,
|
618
|
+
)
|
619
|
+
self.auto_waveform_worker.start()
|
620
|
+
|
621
|
+
def clean_up_for_close(self):
|
622
|
+
self.spectrogram_worker.stop()
|
623
|
+
self.pitch_track_worker.stop()
|
624
|
+
self.auto_waveform_worker.stop()
|
625
|
+
|
626
|
+
@property
|
627
|
+
def plot_min(self):
|
628
|
+
if self.settings.right_to_left:
|
629
|
+
return -self.max_time
|
630
|
+
return self.min_time
|
631
|
+
|
632
|
+
@property
|
633
|
+
def plot_max(self):
|
634
|
+
if self.settings.right_to_left:
|
635
|
+
return -self.min_time
|
636
|
+
return self.max_time
|
637
|
+
|
638
|
+
def finalize_loading_spectrogram(self, results):
|
639
|
+
stft, channel, begin, end, min_db, max_db = results
|
640
|
+
if self.settings.right_to_left:
|
641
|
+
stft = np.flip(stft, 1)
|
642
|
+
begin, end = -end, -begin
|
643
|
+
if begin != self.plot_min or end != self.plot_max:
|
644
|
+
return
|
645
|
+
self.spectrogram = stft
|
646
|
+
self.min_db = self.min_db
|
647
|
+
self.max_db = self.max_db
|
648
|
+
self.spectrogramReady.emit()
|
649
|
+
|
650
|
+
def finalize_loading_pitch_track(self, results):
|
651
|
+
pitch_track, voicing_track, channel, begin, end, min_f0, max_f0 = results
|
652
|
+
if self.settings.right_to_left:
|
653
|
+
pitch_track = np.flip(pitch_track, 0)
|
654
|
+
begin, end = -end, -begin
|
655
|
+
if begin != self.plot_min or end != self.plot_max:
|
656
|
+
return
|
657
|
+
self.pitch_track_y = pitch_track
|
658
|
+
if pitch_track is None:
|
659
|
+
return
|
660
|
+
x = np.linspace(
|
661
|
+
start=self.plot_min,
|
662
|
+
stop=self.plot_max,
|
663
|
+
num=pitch_track.shape[0],
|
664
|
+
)
|
665
|
+
self.pitch_track_x = x
|
666
|
+
self.pitchTrackReady.emit()
|
667
|
+
|
668
|
+
def finalize_loading_auto_wave_form(self, results):
|
669
|
+
y, begin, end, channel = results
|
670
|
+
if self.settings.right_to_left:
|
671
|
+
y = np.flip(y, 0)
|
672
|
+
begin, end = -end, -begin
|
673
|
+
if begin != self.plot_min or end != self.plot_max:
|
674
|
+
return
|
675
|
+
x = np.linspace(start=self.plot_min, stop=self.plot_max, num=y.shape[0])
|
676
|
+
self.waveform_x = x
|
677
|
+
self.waveform_y = y
|
678
|
+
self.waveformReady.emit()
|
679
|
+
|
680
|
+
def select_audio(self, begin, end):
|
681
|
+
if end is not None and end - begin < 0.025:
|
682
|
+
end = None
|
683
|
+
self.selected_min_time = begin
|
684
|
+
self.selected_max_time = end
|
685
|
+
self.selectionAudioChanged.emit()
|
686
|
+
|
687
|
+
def request_start_time(self, start_time):
|
688
|
+
if start_time >= self.max_time:
|
689
|
+
return
|
690
|
+
if start_time < self.min_time:
|
691
|
+
return
|
692
|
+
self.selected_min_time = start_time
|
693
|
+
self.selected_max_time = None
|
694
|
+
self.selectionAudioChanged.emit()
|
695
|
+
|
696
|
+
def set_current_channel(self, channel):
|
697
|
+
if channel == self.selected_channel:
|
698
|
+
return
|
699
|
+
self.selected_channel = channel
|
700
|
+
self.load_audio_selection()
|
701
|
+
|
702
|
+
def get_selected_wave_form(self):
|
703
|
+
if self.y is None:
|
704
|
+
return None, None
|
705
|
+
if len(self.y.shape) > 1 and self.y.shape[0] == 2:
|
706
|
+
return self.x, self.y[self.selected_channel, :]
|
707
|
+
return self.x, self.y
|
708
|
+
|
709
|
+
def zoom(self, factor, mid_point=None):
|
710
|
+
if factor == 0 or self.min_time is None:
|
711
|
+
return
|
712
|
+
cur_duration = self.max_time - self.min_time
|
713
|
+
if mid_point is None:
|
714
|
+
mid_point = self.min_time + (cur_duration / 2)
|
715
|
+
new_duration = cur_duration / factor
|
716
|
+
new_begin = mid_point - (mid_point - self.min_time) / factor
|
717
|
+
new_begin = max(new_begin, 0)
|
718
|
+
new_end = min(new_begin + new_duration, self.model().file.duration)
|
719
|
+
if new_end - new_begin <= 0.025:
|
720
|
+
return
|
721
|
+
self.set_view_times(new_begin, new_end)
|
722
|
+
|
723
|
+
def pan(self, factor):
|
724
|
+
if self.min_time is None:
|
725
|
+
return
|
726
|
+
if factor < 1:
|
727
|
+
factor = 1 - factor
|
728
|
+
right = True
|
729
|
+
else:
|
730
|
+
right = False
|
731
|
+
factor = factor - 1
|
732
|
+
if right and self.max_time == self.model().file.duration:
|
733
|
+
return
|
734
|
+
if not right and self.min_time == 0:
|
735
|
+
return
|
736
|
+
cur_duration = self.max_time - self.min_time
|
737
|
+
shift = factor * cur_duration
|
738
|
+
if right:
|
739
|
+
new_begin = self.min_time + shift
|
740
|
+
new_end = self.max_time + shift
|
741
|
+
else:
|
742
|
+
new_begin = self.min_time - shift
|
743
|
+
new_end = self.max_time - shift
|
744
|
+
if new_begin < 0:
|
745
|
+
new_end = new_end + abs(new_begin)
|
746
|
+
new_begin = 0
|
747
|
+
if new_end > self.model().file.duration:
|
748
|
+
new_begin -= self.model().file.duration - new_end
|
749
|
+
new_end = self.model().file.duration
|
750
|
+
self.set_view_times(new_begin, new_end)
|
751
|
+
|
752
|
+
def zoom_in(self):
|
753
|
+
if self.model().file is None:
|
754
|
+
return
|
755
|
+
self.zoom(1.5)
|
756
|
+
|
757
|
+
def zoom_out(self):
|
758
|
+
if self.model().file is None:
|
759
|
+
return
|
760
|
+
self.zoom(0.5)
|
761
|
+
|
762
|
+
def zoom_to_selection(self):
|
763
|
+
if self.selected_min_time is not None and self.selected_max_time is not None:
|
764
|
+
self.set_view_times(self.selected_min_time, self.selected_max_time)
|
765
|
+
|
766
|
+
def update_from_slider(self, value):
|
767
|
+
if not self.max_time:
|
768
|
+
return
|
769
|
+
cur_window = self.max_time - self.min_time
|
770
|
+
self.set_view_times(value, value + cur_window)
|
771
|
+
|
772
|
+
def update_selection_audio(self, begin, end):
|
773
|
+
if begin < self.min_time:
|
774
|
+
begin = self.min_time
|
775
|
+
if end > self.max_time:
|
776
|
+
end = self.max_time
|
777
|
+
self.selected_min_time = begin
|
778
|
+
self.selected_max_time = end
|
779
|
+
self.selectionAudioChanged.emit()
|
780
|
+
|
781
|
+
def visible_utterances(self) -> typing.List[Utterance]:
|
782
|
+
file_utts = []
|
783
|
+
if not self.model().file:
|
784
|
+
return file_utts
|
785
|
+
if self.model().rowCount() > 1:
|
786
|
+
for u in self.model().utterances:
|
787
|
+
if u.begin >= self.max_time:
|
788
|
+
break
|
789
|
+
if u.end <= self.min_time:
|
790
|
+
continue
|
791
|
+
file_utts.append(u)
|
792
|
+
else:
|
793
|
+
file_utts.extend(self.model().utterances)
|
794
|
+
return file_utts
|
795
|
+
|
796
|
+
def model(self) -> FileUtterancesModel:
|
797
|
+
return super().model()
|
798
|
+
|
799
|
+
def set_view_times(self, begin, end):
|
800
|
+
begin = max(begin, 0)
|
801
|
+
end = min(end, self.model().file.duration)
|
802
|
+
if (begin, end) == (self.min_time, self.max_time):
|
803
|
+
return
|
804
|
+
self.min_time = begin
|
805
|
+
self.max_time = end
|
806
|
+
if (
|
807
|
+
self.selected_max_time is not None
|
808
|
+
and not self.min_time <= self.selected_min_time <= self.max_time
|
809
|
+
):
|
810
|
+
self.selected_min_time = self.min_time
|
811
|
+
if (
|
812
|
+
self.selected_max_time is not None
|
813
|
+
and not self.min_time <= self.selected_max_time <= self.max_time
|
814
|
+
):
|
815
|
+
self.selected_max_time = None
|
816
|
+
self.viewChanged.emit(self.min_time, self.max_time)
|
817
|
+
|
818
|
+
def set_current_file(self, info, force_update=False):
|
819
|
+
file_id, begin, end, utterance_id, speaker_id = info
|
820
|
+
try:
|
821
|
+
new_file = self.model().file is None or self.model().file.id != file_id
|
822
|
+
except sqlalchemy.orm.exc.DetachedInstanceError:
|
823
|
+
new_file = True
|
824
|
+
self.requested_utterance_id = utterance_id
|
825
|
+
if new_file:
|
826
|
+
self.fileAboutToChange.emit()
|
827
|
+
self.model().set_file(file_id)
|
828
|
+
self.speakerRequested.emit(speaker_id)
|
829
|
+
else:
|
830
|
+
self.finalize_set_new_file()
|
831
|
+
self.speakerRequested.emit(speaker_id)
|
832
|
+
self.set_view_times(begin, end)
|
833
|
+
|
834
|
+
def finalize_set_new_file(self):
|
835
|
+
if self.requested_utterance_id is None:
|
836
|
+
return
|
837
|
+
utterance = self.model().get_utterance(self.requested_utterance_id)
|
838
|
+
if utterance is None:
|
839
|
+
return
|
840
|
+
self.update_select(self.requested_utterance_id, reset=True)
|
841
|
+
self.selected_channel = 0
|
842
|
+
if utterance is not None and utterance.channel is not None:
|
843
|
+
self.selected_channel = utterance.channel
|
844
|
+
self.fileChanged.emit()
|
845
|
+
|
846
|
+
def checkSelected(self, utterance_id: int):
|
847
|
+
m = self.model()
|
848
|
+
for index in self.selectedRows(0):
|
849
|
+
if utterance_id == m._indices[index.row()]:
|
850
|
+
return True
|
851
|
+
return False
|
852
|
+
|
853
|
+
def update_selected_utterances(self, utterances):
|
854
|
+
super().clearSelection()
|
855
|
+
super().clearCurrentIndex()
|
856
|
+
if not utterances:
|
857
|
+
return
|
858
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
859
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
860
|
+
for u in utterances:
|
861
|
+
if u.id not in self.model().reversed_indices:
|
862
|
+
continue
|
863
|
+
row = self.model().reversed_indices[u.id]
|
864
|
+
|
865
|
+
index = self.model().index(row, 0)
|
866
|
+
if not index.isValid():
|
867
|
+
return
|
868
|
+
self.select(index, flags)
|
869
|
+
self.currentUtteranceChanged.emit()
|
870
|
+
|
871
|
+
def update_select(self, utterance_id: int, deselect=False, reset=False):
|
872
|
+
if reset and [x.id for x in self.selected_utterances()] == [utterance_id]:
|
873
|
+
return
|
874
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
875
|
+
if reset:
|
876
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.ClearAndSelect
|
877
|
+
elif deselect:
|
878
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Deselect
|
879
|
+
else:
|
880
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
881
|
+
if utterance_id not in self.model().reversed_indices:
|
882
|
+
return
|
883
|
+
row = self.model().reversed_indices[utterance_id]
|
884
|
+
|
885
|
+
index = self.model().index(row, 0)
|
886
|
+
if not index.isValid():
|
887
|
+
return
|
888
|
+
self.select(index, flags)
|
889
|
+
if not deselect:
|
890
|
+
self.select_audio(self.model().utterances[row].begin, self.model().utterances[row].end)
|
891
|
+
self.currentUtteranceChanged.emit()
|
892
|
+
|
893
|
+
|
151
894
|
class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
152
895
|
fileChanged = QtCore.Signal()
|
153
896
|
channelChanged = QtCore.Signal()
|
154
897
|
resetView = QtCore.Signal()
|
155
898
|
fileAboutToChange = QtCore.Signal()
|
156
|
-
|
899
|
+
fileViewRequested = QtCore.Signal(object)
|
157
900
|
selectionAudioChanged = QtCore.Signal()
|
158
901
|
currentTimeChanged = QtCore.Signal(object)
|
159
902
|
currentUtteranceChanged = QtCore.Signal()
|
160
903
|
|
161
904
|
def __init__(self, *args, **kwargs):
|
162
|
-
super(
|
905
|
+
super().__init__(*args, **kwargs)
|
163
906
|
self.settings = AnchorSettings()
|
164
907
|
self.min_time = 0
|
165
908
|
self.max_time = 10
|
166
909
|
self.selected_min_time = None
|
167
910
|
self.selected_max_time = None
|
168
|
-
self.current_file: Optional[File] = None
|
169
911
|
self.x = None
|
170
912
|
self.y = None
|
171
913
|
self.current_utterance_id = None
|
@@ -176,22 +918,8 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
176
918
|
# self.selectionChanged.connect(self.update_selection_audio)
|
177
919
|
# self.selectionChanged.connect(self.update_selection_audio)
|
178
920
|
# self.model().changeCommandFired.connect(self.expire_current)
|
179
|
-
self.selectionChanged.connect(self._update_selection)
|
180
921
|
self.model().layoutChanged.connect(self.check_selection)
|
181
922
|
self.model().unlockCorpus.connect(self.fileChanged.emit)
|
182
|
-
self.model().selectionRequested.connect(self.update_select_rows)
|
183
|
-
|
184
|
-
@property
|
185
|
-
def plot_min(self):
|
186
|
-
if self.settings.right_to_left:
|
187
|
-
return -self.max_time
|
188
|
-
return self.min_time
|
189
|
-
|
190
|
-
@property
|
191
|
-
def plot_max(self):
|
192
|
-
if self.settings.right_to_left:
|
193
|
-
return -self.min_time
|
194
|
-
return self.max_time
|
195
923
|
|
196
924
|
def set_current_utterance(self, utterance_id):
|
197
925
|
self.current_utterance_id = utterance_id
|
@@ -203,13 +931,8 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
203
931
|
elif self.model().rowCount() == 0:
|
204
932
|
self.clearSelection()
|
205
933
|
|
206
|
-
def set_current_channel(self, channel):
|
207
|
-
self.selected_channel = channel
|
208
|
-
self.channelChanged.emit()
|
209
|
-
|
210
934
|
def clearSelection(self) -> None:
|
211
935
|
self.fileAboutToChange.emit()
|
212
|
-
self.current_file = None
|
213
936
|
self.current_utterance_id = None
|
214
937
|
self.min_time = None
|
215
938
|
self.max_time = None
|
@@ -219,22 +942,6 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
219
942
|
super(CorpusSelectionModel, self).clearSelection()
|
220
943
|
self.fileChanged.emit()
|
221
944
|
|
222
|
-
def update_selected_wavform(self, *args):
|
223
|
-
if self.min_time is None or self.current_file is None:
|
224
|
-
self.x = None
|
225
|
-
self.y = None
|
226
|
-
else:
|
227
|
-
self.x, self.y = self.current_file.sound_file.normalized_waveform(
|
228
|
-
self.min_time, self.max_time
|
229
|
-
)
|
230
|
-
|
231
|
-
def get_selected_wave_form(self):
|
232
|
-
if self.y is None:
|
233
|
-
return None, None
|
234
|
-
if len(self.y.shape) > 1 and self.y.shape[0] == 2:
|
235
|
-
return self.x, self.y[self.selected_channel, :]
|
236
|
-
return self.x, self.y
|
237
|
-
|
238
945
|
def update_select_rows(self, rows: list[int]):
|
239
946
|
super(CorpusSelectionModel, self).clearCurrentIndex()
|
240
947
|
super(CorpusSelectionModel, self).clearSelection()
|
@@ -250,8 +957,29 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
250
957
|
| QtCore.QItemSelectionModel.SelectionFlag.Rows,
|
251
958
|
)
|
252
959
|
|
960
|
+
def update_selected_utterances(self, utterances):
|
961
|
+
if not utterances:
|
962
|
+
return
|
963
|
+
first = True
|
964
|
+
for u in utterances:
|
965
|
+
if u.id not in self.model().reversed_indices:
|
966
|
+
continue
|
967
|
+
row = self.model().reversed_indices[u.id]
|
968
|
+
|
969
|
+
index = self.model().index(row, 0)
|
970
|
+
if not index.isValid():
|
971
|
+
return
|
972
|
+
if not first:
|
973
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
974
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
975
|
+
else:
|
976
|
+
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
977
|
+
flags |= QtCore.QItemSelectionModel.SelectionFlag.ClearAndSelect
|
978
|
+
first = False
|
979
|
+
self.select(index, flags)
|
980
|
+
|
253
981
|
def update_select(self, utterance_id: int, deselect=False, reset=False, focus=False):
|
254
|
-
if reset and
|
982
|
+
if reset and self.selected_utterances() == [utterance_id]:
|
255
983
|
return
|
256
984
|
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
257
985
|
if reset:
|
@@ -266,58 +994,13 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
266
994
|
if focus:
|
267
995
|
flags |= QtCore.QItemSelectionModel.SelectionFlag.Current
|
268
996
|
if row == self.currentIndex().row():
|
269
|
-
self.
|
997
|
+
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
270
998
|
|
271
999
|
index = self.model().index(row, 0)
|
272
1000
|
if not index.isValid():
|
273
1001
|
return
|
274
1002
|
self.select(index, flags)
|
275
1003
|
|
276
|
-
def select_audio(self, begin, end):
|
277
|
-
if end is not None and end - begin < 0.025:
|
278
|
-
end = None
|
279
|
-
self.selected_min_time = begin
|
280
|
-
self.selected_max_time = end
|
281
|
-
self.selectionAudioChanged.emit()
|
282
|
-
|
283
|
-
def request_start_time(self, start_time):
|
284
|
-
if start_time >= self.max_time:
|
285
|
-
return
|
286
|
-
if start_time < self.min_time:
|
287
|
-
return
|
288
|
-
self.selected_min_time = start_time
|
289
|
-
self.selected_max_time = None
|
290
|
-
self.selectionAudioChanged.emit()
|
291
|
-
|
292
|
-
def visible_utts(self) -> typing.List[Utterance]:
|
293
|
-
file_utts = []
|
294
|
-
if not self.current_file:
|
295
|
-
return file_utts
|
296
|
-
if self.current_file.num_utterances > 1:
|
297
|
-
for u in sorted(self.current_file.utterances, key=lambda x: x.begin):
|
298
|
-
if u.begin >= self.max_time:
|
299
|
-
break
|
300
|
-
if u.end <= self.min_time:
|
301
|
-
continue
|
302
|
-
file_utts.append(u)
|
303
|
-
else:
|
304
|
-
file_utts.extend(self.current_file.utterances)
|
305
|
-
return file_utts
|
306
|
-
|
307
|
-
def currentUtterance(self) -> Optional[Utterance]:
|
308
|
-
if self.current_utterance_id is None:
|
309
|
-
return
|
310
|
-
m = self.model()
|
311
|
-
utterance = (
|
312
|
-
m.session.query(Utterance)
|
313
|
-
.options(
|
314
|
-
joinedload(Utterance.file).joinedload(File.sound_file),
|
315
|
-
joinedload(Utterance.file).subqueryload(File.speakers),
|
316
|
-
)
|
317
|
-
.get(self.current_utterance_id)
|
318
|
-
)
|
319
|
-
return utterance
|
320
|
-
|
321
1004
|
def _update_selection(self):
|
322
1005
|
index = self.currentIndex()
|
323
1006
|
if not index.isValid():
|
@@ -326,22 +1009,20 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
326
1009
|
self.current_utterance_id = m._indices[index.row()]
|
327
1010
|
self.currentUtteranceChanged.emit()
|
328
1011
|
|
329
|
-
def
|
330
|
-
current_utterance = self.
|
1012
|
+
def selected_utterances(self):
|
1013
|
+
current_utterance = self.current_utterance_id
|
331
1014
|
if current_utterance is None:
|
332
1015
|
return []
|
333
1016
|
utts = [current_utterance]
|
334
1017
|
m = self.model()
|
335
1018
|
for index in self.selectedRows(1):
|
336
|
-
if current_utterance is not None and m._indices[index.row()] == current_utterance
|
1019
|
+
if current_utterance is not None and m._indices[index.row()] == current_utterance:
|
337
1020
|
continue
|
338
|
-
utt = m.
|
1021
|
+
utt = m.utterance_id_at(index)
|
339
1022
|
if utt is None:
|
340
1023
|
continue
|
341
1024
|
if current_utterance is None:
|
342
1025
|
current_utterance = utt
|
343
|
-
if utt.file_id != current_utterance.file_id:
|
344
|
-
continue
|
345
1026
|
utts.append(utt)
|
346
1027
|
return utts
|
347
1028
|
|
@@ -356,142 +1037,23 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
356
1037
|
text = m.data(m.index(index.row(), m.text_column), QtCore.Qt.ItemDataRole.DisplayRole)
|
357
1038
|
return text
|
358
1039
|
|
359
|
-
def zoom(self, factor, mid_point=None):
|
360
|
-
if factor == 0 or self.min_time is None:
|
361
|
-
return
|
362
|
-
cur_duration = self.max_time - self.min_time
|
363
|
-
if mid_point is None:
|
364
|
-
mid_point = self.min_time + (cur_duration / 2)
|
365
|
-
new_duration = cur_duration / factor
|
366
|
-
new_begin = mid_point - (mid_point - self.min_time) / factor
|
367
|
-
new_begin = max(new_begin, 0)
|
368
|
-
new_end = min(new_begin + new_duration, self.current_file.duration)
|
369
|
-
if new_end - new_begin <= 0.025:
|
370
|
-
return
|
371
|
-
self.set_view_times(new_begin, new_end)
|
372
|
-
|
373
|
-
def pan(self, factor):
|
374
|
-
if self.min_time is None:
|
375
|
-
return
|
376
|
-
if factor < 1:
|
377
|
-
factor = 1 - factor
|
378
|
-
right = True
|
379
|
-
else:
|
380
|
-
right = False
|
381
|
-
factor = factor - 1
|
382
|
-
if right and self.max_time == self.current_file.duration:
|
383
|
-
return
|
384
|
-
if not right and self.min_time == 0:
|
385
|
-
return
|
386
|
-
cur_duration = self.max_time - self.min_time
|
387
|
-
shift = factor * cur_duration
|
388
|
-
if right:
|
389
|
-
new_begin = self.min_time + shift
|
390
|
-
new_end = self.max_time + shift
|
391
|
-
else:
|
392
|
-
new_begin = self.min_time - shift
|
393
|
-
new_end = self.max_time - shift
|
394
|
-
if new_begin < 0:
|
395
|
-
new_end = new_end + abs(new_begin)
|
396
|
-
new_begin = 0
|
397
|
-
if new_end > self.current_file.duration:
|
398
|
-
new_begin -= self.current_file.duration - new_end
|
399
|
-
new_end = self.current_file.duration
|
400
|
-
self.set_view_times(new_begin, new_end)
|
401
|
-
|
402
|
-
def zoom_in(self):
|
403
|
-
if self.current_file is None:
|
404
|
-
return
|
405
|
-
self.zoom(1.5)
|
406
|
-
|
407
|
-
def zoom_out(self):
|
408
|
-
if self.current_file is None:
|
409
|
-
return
|
410
|
-
self.zoom(0.5)
|
411
|
-
|
412
|
-
def zoom_to_selection(self):
|
413
|
-
if self.selected_min_time is None or self.selected_max_time is None:
|
414
|
-
rows = self.selectedRows(1)
|
415
|
-
if not rows:
|
416
|
-
return
|
417
|
-
begin = None
|
418
|
-
end = None
|
419
|
-
for r in rows:
|
420
|
-
u = self.model().utteranceAt(r)
|
421
|
-
if u is None:
|
422
|
-
continue
|
423
|
-
if u.file_id != self.current_file.id:
|
424
|
-
continue
|
425
|
-
if begin is None or begin > u.begin:
|
426
|
-
begin = u.begin
|
427
|
-
if end is None or end < u.end:
|
428
|
-
end = u.end
|
429
|
-
self.set_view_times(begin, end)
|
430
|
-
else:
|
431
|
-
self.set_view_times(self.selected_min_time, self.selected_max_time)
|
432
|
-
|
433
|
-
def update_from_slider(self, value):
|
434
|
-
if not self.max_time:
|
435
|
-
return
|
436
|
-
cur_window = self.max_time - self.min_time
|
437
|
-
self.set_view_times(value, value + cur_window)
|
438
|
-
|
439
|
-
def update_selection_audio(self):
|
440
|
-
begins = self.selectedRows(self.model().begin_column)
|
441
|
-
ends = self.selectedRows(self.model().end_column)
|
442
|
-
begin = None
|
443
|
-
end = None
|
444
|
-
if len(begins) > 0:
|
445
|
-
for i, b in enumerate(begins):
|
446
|
-
b = self.model().data(b, QtCore.Qt.ItemDataRole.DisplayRole)
|
447
|
-
e = self.model().data(ends[i], QtCore.Qt.ItemDataRole.DisplayRole)
|
448
|
-
if begin is None or begin > b:
|
449
|
-
begin = b
|
450
|
-
if end is None or end < e:
|
451
|
-
end = e
|
452
|
-
if self.current_file is None or begin > self.current_file.duration:
|
453
|
-
begin = None
|
454
|
-
end = None
|
455
|
-
elif end > self.current_file.duration:
|
456
|
-
end = self.current_file.duration
|
457
|
-
self.selected_min_time = begin
|
458
|
-
self.selected_max_time = end
|
459
|
-
self.selectionAudioChanged.emit()
|
460
|
-
|
461
1040
|
def switch_utterance(self, new_index, old_index):
|
1041
|
+
if not self.model().fully_loaded:
|
1042
|
+
return
|
462
1043
|
if not isinstance(new_index, QtCore.QModelIndex):
|
463
1044
|
row = 0
|
464
1045
|
else:
|
465
1046
|
if not new_index.isValid():
|
466
1047
|
return
|
467
1048
|
row = new_index.row()
|
468
|
-
utt = self.model().
|
1049
|
+
utt = self.model().utterance_id_at(row)
|
469
1050
|
if utt is None:
|
470
1051
|
return
|
471
|
-
if utt
|
1052
|
+
if utt == self.current_utterance_id:
|
472
1053
|
return
|
473
|
-
self.current_utterance_id = utt
|
1054
|
+
self.current_utterance_id = utt
|
474
1055
|
self.currentUtteranceChanged.emit()
|
475
|
-
self.
|
476
|
-
utt.file_id, utt.begin, utt.end, channel=utt.channel, force_update=True
|
477
|
-
)
|
478
|
-
|
479
|
-
def update_view_times(self, *args, force_update=False):
|
480
|
-
utts = self.selectedUtterances()
|
481
|
-
if len(utts) == 0:
|
482
|
-
self.resetView.emit()
|
483
|
-
return
|
484
|
-
if len(utts) == 1:
|
485
|
-
force_update = True
|
486
|
-
begin = utts[0].begin
|
487
|
-
f_id = utts[0].file_id
|
488
|
-
end_ind = -1
|
489
|
-
while True:
|
490
|
-
if utts[end_ind].file_id == f_id:
|
491
|
-
end = utts[end_ind].end
|
492
|
-
break
|
493
|
-
self.set_current_file(f_id, begin, end, channel=utts[0].channel, force_update=force_update)
|
494
|
-
self.selected_min_time = self.min_time
|
1056
|
+
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
495
1057
|
|
496
1058
|
def model(self) -> CorpusModel:
|
497
1059
|
return super(CorpusSelectionModel, self).model()
|
@@ -503,43 +1065,6 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
503
1065
|
return True
|
504
1066
|
return False
|
505
1067
|
|
506
|
-
def set_current_file(self, file_id, begin=None, end=None, channel=None, force_update=False):
|
507
|
-
try:
|
508
|
-
new_file = self.current_file is None or self.current_file.id != file_id
|
509
|
-
except sqlalchemy.orm.exc.DetachedInstanceError:
|
510
|
-
new_file = True
|
511
|
-
if new_file:
|
512
|
-
self.selected_min_time = None
|
513
|
-
self.selected_max_time = None
|
514
|
-
self.fileAboutToChange.emit()
|
515
|
-
self.selected_channel = 0 if channel is None else channel
|
516
|
-
self.current_file = (
|
517
|
-
self.model().session.query(File).options(joinedload(File.sound_file)).get(file_id)
|
518
|
-
)
|
519
|
-
self.min_time = begin
|
520
|
-
self.max_time = end
|
521
|
-
self.fileChanged.emit()
|
522
|
-
elif (
|
523
|
-
self.current_file is not None
|
524
|
-
and begin is not None
|
525
|
-
and end is not None
|
526
|
-
and force_update
|
527
|
-
):
|
528
|
-
self.selected_channel = channel
|
529
|
-
self.set_view_times(begin, end)
|
530
|
-
|
531
|
-
def set_view_times(self, begin, end):
|
532
|
-
begin = max(begin, 0)
|
533
|
-
end = min(end, self.current_file.duration)
|
534
|
-
if (begin, end) == (self.min_time, self.max_time):
|
535
|
-
return
|
536
|
-
self.min_time = begin
|
537
|
-
self.max_time = end
|
538
|
-
self.selected_min_time = self.min_time
|
539
|
-
if self.selected_max_time is not None and self.selected_max_time > self.max_time:
|
540
|
-
self.selected_max_time = None
|
541
|
-
self.viewChanged.emit(self.min_time, self.max_time)
|
542
|
-
|
543
1068
|
def focusUtterance(self, index):
|
544
1069
|
m = self.model()
|
545
1070
|
u = m.utteranceAt(index)
|
@@ -547,10 +1072,8 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
547
1072
|
self.min_time = 0
|
548
1073
|
self.max_time = 1
|
549
1074
|
self.fileAboutToChange()
|
550
|
-
self.current_file = None
|
551
1075
|
self.fileChanged.emit()
|
552
1076
|
return
|
553
|
-
self.current_file = u.file
|
554
1077
|
begin = u.begin
|
555
1078
|
end = u.end
|
556
1079
|
padding = 1
|
@@ -1450,6 +1973,7 @@ class CorpusModel(TableModel):
|
|
1450
1973
|
self.speakers = {}
|
1451
1974
|
self.speaker_id_mapping = {}
|
1452
1975
|
self.utterances = None
|
1976
|
+
self.session: sqlalchemy.orm.scoped_session = None
|
1453
1977
|
self.utterance_count = 0
|
1454
1978
|
self.speaker_count = 0
|
1455
1979
|
self.file_count = 0
|
@@ -1494,29 +2018,46 @@ class CorpusModel(TableModel):
|
|
1494
2018
|
return True
|
1495
2019
|
return False
|
1496
2020
|
|
1497
|
-
def update_utterance_table_row(self,
|
1498
|
-
if
|
1499
|
-
|
1500
|
-
|
2021
|
+
def update_utterance_table_row(self, utterance: typing.Union[int, Utterance]):
|
2022
|
+
if isinstance(utterance, int):
|
2023
|
+
utterance_id = utterance
|
2024
|
+
if utterance_id not in self.reversed_indices:
|
2025
|
+
return
|
2026
|
+
utterance = self.session.query(Utterance).get(utterance_id)
|
2027
|
+
else:
|
2028
|
+
utterance_id = utterance.id
|
2029
|
+
if utterance_id not in self.reversed_indices:
|
2030
|
+
return
|
1501
2031
|
index = self.reversed_indices[utterance_id]
|
1502
2032
|
self.layoutAboutToBeChanged.emit()
|
1503
2033
|
self._data[index][self.text_column] = utterance.text
|
1504
2034
|
self._data[index][self.begin_column] = utterance.begin
|
1505
2035
|
self._data[index][self.end_column] = utterance.end
|
1506
|
-
self._data[index][self.duration_column] = utterance.
|
2036
|
+
self._data[index][self.duration_column] = utterance.end - utterance.begin
|
2037
|
+
self.layoutChanged.emit()
|
2038
|
+
|
2039
|
+
def change_speaker_table_utterances(self, utterances: typing.List[Utterance]):
|
2040
|
+
self.layoutAboutToBeChanged.emit()
|
2041
|
+
for u in utterances:
|
2042
|
+
if u.id not in self.reversed_indices:
|
2043
|
+
continue
|
2044
|
+
index = self.reversed_indices[u.id]
|
2045
|
+
self._speaker_indices[index] = u.speaker_id
|
2046
|
+
self._data[index][self.speaker_column] = self.get_speaker_name(u.speaker_id)
|
1507
2047
|
self.layoutChanged.emit()
|
1508
2048
|
|
1509
2049
|
def add_table_utterances(self, utterances: typing.List[Utterance]):
|
1510
2050
|
self.layoutAboutToBeChanged.emit()
|
1511
2051
|
rows = []
|
1512
2052
|
for utterance in utterances:
|
2053
|
+
speaker_name = self.get_speaker_name(utterance.speaker_id)
|
1513
2054
|
row_data = [
|
1514
2055
|
utterance.oovs,
|
1515
2056
|
utterance.file_name,
|
1516
|
-
|
2057
|
+
speaker_name,
|
1517
2058
|
utterance.begin,
|
1518
2059
|
utterance.end,
|
1519
|
-
utterance.
|
2060
|
+
utterance.end - utterance.begin,
|
1520
2061
|
utterance.text,
|
1521
2062
|
]
|
1522
2063
|
self._data.append(row_data)
|
@@ -1531,7 +2072,10 @@ class CorpusModel(TableModel):
|
|
1531
2072
|
def delete_table_utterances(self, utterances: typing.List[Utterance]):
|
1532
2073
|
self.layoutAboutToBeChanged.emit()
|
1533
2074
|
for utterance in utterances:
|
1534
|
-
|
2075
|
+
try:
|
2076
|
+
index = self.reversed_indices.pop(utterance.id)
|
2077
|
+
except KeyError:
|
2078
|
+
continue
|
1535
2079
|
_ = self._data.pop(index)
|
1536
2080
|
_ = self._indices.pop(index)
|
1537
2081
|
_ = self._file_indices.pop(index)
|
@@ -1552,7 +2096,6 @@ class CorpusModel(TableModel):
|
|
1552
2096
|
|
1553
2097
|
self.layoutAboutToBeChanged.emit()
|
1554
2098
|
first = split_utterances[0]
|
1555
|
-
self.session.merge(first)
|
1556
2099
|
file_name = self._data[index][1]
|
1557
2100
|
speaker_name = self._data[index][2]
|
1558
2101
|
row_data = [
|
@@ -1561,7 +2104,7 @@ class CorpusModel(TableModel):
|
|
1561
2104
|
speaker_name,
|
1562
2105
|
first.begin,
|
1563
2106
|
first.end,
|
1564
|
-
first.
|
2107
|
+
first.end - first.begin,
|
1565
2108
|
first.text,
|
1566
2109
|
]
|
1567
2110
|
self._data[index] = row_data
|
@@ -1571,7 +2114,6 @@ class CorpusModel(TableModel):
|
|
1571
2114
|
self.reversed_indices[first.id] = index
|
1572
2115
|
rows = [index]
|
1573
2116
|
for utterance in split_utterances[1:]:
|
1574
|
-
self.session.merge(utterance)
|
1575
2117
|
index += 1
|
1576
2118
|
rows.append(index)
|
1577
2119
|
self.reversed_indices = {
|
@@ -1584,7 +2126,7 @@ class CorpusModel(TableModel):
|
|
1584
2126
|
speaker_name,
|
1585
2127
|
utterance.begin,
|
1586
2128
|
utterance.end,
|
1587
|
-
utterance.
|
2129
|
+
utterance.end - utterance.begin,
|
1588
2130
|
utterance.text,
|
1589
2131
|
]
|
1590
2132
|
self.reversed_indices[utterance.id] = index
|
@@ -1603,14 +2145,13 @@ class CorpusModel(TableModel):
|
|
1603
2145
|
except KeyError:
|
1604
2146
|
return
|
1605
2147
|
self.layoutAboutToBeChanged.emit()
|
1606
|
-
self.session.merge(merged_utterance)
|
1607
2148
|
row_data = [
|
1608
2149
|
merged_utterance.oovs,
|
1609
2150
|
merged_utterance.file_name,
|
1610
2151
|
merged_utterance.speaker_name,
|
1611
2152
|
merged_utterance.begin,
|
1612
2153
|
merged_utterance.end,
|
1613
|
-
merged_utterance.
|
2154
|
+
merged_utterance.end - merged_utterance.begin,
|
1614
2155
|
merged_utterance.text,
|
1615
2156
|
]
|
1616
2157
|
first = split_utterances[0]
|
@@ -1659,32 +2200,6 @@ class CorpusModel(TableModel):
|
|
1659
2200
|
self.language_model = language_model
|
1660
2201
|
self.languageModelChanged.emit()
|
1661
2202
|
|
1662
|
-
def create_utterance(self, file: File, speaker: Optional[Speaker], begin: float, end: float):
|
1663
|
-
if not self.editable:
|
1664
|
-
return
|
1665
|
-
channel = 0
|
1666
|
-
if file.num_channels > 1:
|
1667
|
-
ind = file.speaker_ordering.index(speaker)
|
1668
|
-
if ind >= len(file.speaker_ordering) / 2:
|
1669
|
-
channel = 1
|
1670
|
-
if speaker is None:
|
1671
|
-
speaker = self.corpus.add_speaker("speech", session=self.session)
|
1672
|
-
begin = round(begin, 4)
|
1673
|
-
end = round(end, 4)
|
1674
|
-
text = ""
|
1675
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1676
|
-
new_utt = Utterance(
|
1677
|
-
id=next_pk,
|
1678
|
-
speaker_id=speaker.id,
|
1679
|
-
file_id=file.id,
|
1680
|
-
begin=begin,
|
1681
|
-
end=end,
|
1682
|
-
channel=channel,
|
1683
|
-
text=text,
|
1684
|
-
)
|
1685
|
-
self.addCommand.emit(undo.CreateUtteranceCommand(new_utt, self))
|
1686
|
-
self.unsaved_files.add(file.id)
|
1687
|
-
|
1688
2203
|
def set_file_modified(self, file_id: typing.Union[int, typing.List[int]]):
|
1689
2204
|
if isinstance(file_id, int):
|
1690
2205
|
file_id = [file_id]
|
@@ -1699,32 +2214,6 @@ class CorpusModel(TableModel):
|
|
1699
2214
|
)
|
1700
2215
|
self.session.commit()
|
1701
2216
|
|
1702
|
-
def update_utterance_text(self, utterance: Utterance, text):
|
1703
|
-
if text != utterance.text:
|
1704
|
-
self.addCommand.emit(undo.UpdateUtteranceTextCommand(utterance, text, self))
|
1705
|
-
self.set_file_modified(utterance.file_id)
|
1706
|
-
|
1707
|
-
def update_utterance_times(
|
1708
|
-
self, utterance: Utterance, begin: Optional[float] = None, end: Optional[float] = None
|
1709
|
-
):
|
1710
|
-
if not self.editable:
|
1711
|
-
return
|
1712
|
-
self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
|
1713
|
-
self.set_file_modified(utterance.file_id)
|
1714
|
-
|
1715
|
-
def update_utterance_speaker(self, utterance: Utterance, speaker: Speaker):
|
1716
|
-
if not self.editable:
|
1717
|
-
return
|
1718
|
-
self.addCommand.emit(undo.UpdateUtteranceSpeakerCommand(utterance, speaker, self))
|
1719
|
-
|
1720
|
-
def delete_utterances(self, utterances: list[Utterance]):
|
1721
|
-
if not self.editable:
|
1722
|
-
return
|
1723
|
-
for u in utterances:
|
1724
|
-
self.set_file_modified(u.file_id)
|
1725
|
-
self.set_speaker_modified(u.speaker_id)
|
1726
|
-
self.addCommand.emit(undo.DeleteUtteranceCommand(utterances, self))
|
1727
|
-
|
1728
2217
|
def check_align_lexicon_compiler(self):
|
1729
2218
|
if self.acoustic_model is None:
|
1730
2219
|
return
|
@@ -1743,150 +2232,13 @@ class CorpusModel(TableModel):
|
|
1743
2232
|
dictionary_id, self.acoustic_model, disambiguation=True
|
1744
2233
|
)
|
1745
2234
|
|
1746
|
-
def split_vad_utterance(
|
1747
|
-
self, original_utterance_id, replacement_utterance_data: typing.List[KalpyUtterance]
|
1748
|
-
):
|
1749
|
-
utt = self.session.get(Utterance, original_utterance_id)
|
1750
|
-
replacement_utterances = []
|
1751
|
-
speaker_id = utt.speaker_id
|
1752
|
-
file_id = utt.file_id
|
1753
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1754
|
-
for new_utt in replacement_utterance_data:
|
1755
|
-
replacement_utterances.append(
|
1756
|
-
Utterance(
|
1757
|
-
id=next_pk,
|
1758
|
-
begin=new_utt.segment.begin,
|
1759
|
-
end=new_utt.segment.end,
|
1760
|
-
speaker_id=speaker_id,
|
1761
|
-
file_id=file_id,
|
1762
|
-
text=new_utt.transcript,
|
1763
|
-
normalized_text=new_utt.transcript,
|
1764
|
-
features="",
|
1765
|
-
in_subset=False,
|
1766
|
-
ignored=False,
|
1767
|
-
channel=new_utt.segment.channel,
|
1768
|
-
)
|
1769
|
-
)
|
1770
|
-
next_pk += 1
|
1771
|
-
splitting_utterances = [[utt, *replacement_utterances]]
|
1772
|
-
self.addCommand.emit(
|
1773
|
-
undo.SplitUtteranceCommand(splitting_utterances, self, update_table=False)
|
1774
|
-
)
|
1775
|
-
self.requestFileView.emit(utt.file_name)
|
1776
|
-
self.set_file_modified(file_id)
|
1777
|
-
self.set_speaker_modified(speaker_id)
|
1778
|
-
|
1779
|
-
def split_utterances(self, utterances: list[Utterance]):
|
1780
|
-
if not self.editable:
|
1781
|
-
return
|
1782
|
-
splitting_utterances = []
|
1783
|
-
for utt in utterances:
|
1784
|
-
duration = utt.duration
|
1785
|
-
beg = utt.begin
|
1786
|
-
end = utt.end
|
1787
|
-
first_text = ""
|
1788
|
-
second_text = ""
|
1789
|
-
if " " not in utt.text and " " in utt.normalized_text:
|
1790
|
-
t = utt.normalized_text.split()
|
1791
|
-
mid_ind = int(len(t) / 2)
|
1792
|
-
first_text = t[:mid_ind]
|
1793
|
-
second_text = t[mid_ind:]
|
1794
|
-
elif utt.text:
|
1795
|
-
t = utt.text.split()
|
1796
|
-
mid_ind = int(len(t) / 2)
|
1797
|
-
first_text = t[:mid_ind]
|
1798
|
-
second_text = t[mid_ind:]
|
1799
|
-
split_time = beg + (duration / 2)
|
1800
|
-
oovs = set()
|
1801
|
-
for w in first_text:
|
1802
|
-
if not self.dictionary_model.check_word(w, utt.speaker_id):
|
1803
|
-
oovs.add(w)
|
1804
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1805
|
-
first_utt = Utterance(
|
1806
|
-
id=next_pk,
|
1807
|
-
speaker_id=utt.speaker_id,
|
1808
|
-
file_id=utt.file_id,
|
1809
|
-
begin=beg,
|
1810
|
-
end=split_time,
|
1811
|
-
channel=utt.channel,
|
1812
|
-
text=" ".join(first_text),
|
1813
|
-
normalized_text=" ".join(first_text),
|
1814
|
-
oovs=" ".join(oovs),
|
1815
|
-
)
|
1816
|
-
next_pk += 1
|
1817
|
-
oovs = set()
|
1818
|
-
for w in second_text:
|
1819
|
-
if not self.dictionary_model.check_word(w, utt.speaker_id):
|
1820
|
-
oovs.add(w)
|
1821
|
-
second_utt = Utterance(
|
1822
|
-
id=next_pk,
|
1823
|
-
speaker_id=utt.speaker_id,
|
1824
|
-
file_id=utt.file_id,
|
1825
|
-
begin=split_time,
|
1826
|
-
end=end,
|
1827
|
-
channel=utt.channel,
|
1828
|
-
text=" ".join(second_text),
|
1829
|
-
normalized_text=" ".join(second_text),
|
1830
|
-
oovs=" ".join(oovs),
|
1831
|
-
)
|
1832
|
-
splitting_utterances.append([utt, first_utt, second_utt])
|
1833
|
-
self.addCommand.emit(undo.SplitUtteranceCommand(splitting_utterances, self))
|
1834
|
-
self.set_file_modified([utt[0].file_id for utt in splitting_utterances])
|
1835
|
-
|
1836
2235
|
def merge_speakers(self, speakers: list[int]):
|
1837
2236
|
self.addCommand.emit(undo.MergeSpeakersCommand(speakers, self))
|
1838
2237
|
|
1839
|
-
def merge_utterances(self, utterances: list[Utterance]):
|
1840
|
-
if not self.editable:
|
1841
|
-
return
|
1842
|
-
min_begin = 1000000000
|
1843
|
-
max_end = 0
|
1844
|
-
text = ""
|
1845
|
-
normalized_text = ""
|
1846
|
-
speaker = None
|
1847
|
-
file = None
|
1848
|
-
channel = None
|
1849
|
-
for old_utt in sorted(utterances, key=lambda x: x.begin):
|
1850
|
-
if speaker is None:
|
1851
|
-
speaker = old_utt.speaker
|
1852
|
-
if file is None:
|
1853
|
-
file = old_utt.file
|
1854
|
-
if channel is None:
|
1855
|
-
channel = old_utt.channel
|
1856
|
-
if old_utt.begin < min_begin:
|
1857
|
-
min_begin = old_utt.begin
|
1858
|
-
if old_utt.end > max_end:
|
1859
|
-
max_end = old_utt.end
|
1860
|
-
utt_text = old_utt.text
|
1861
|
-
if utt_text == "speech" and text.strip() == "speech":
|
1862
|
-
continue
|
1863
|
-
text += utt_text + " "
|
1864
|
-
normalized_text += old_utt.normalized_text + " "
|
1865
|
-
text = text[:-1]
|
1866
|
-
normalized_text = normalized_text[:-1]
|
1867
|
-
next_pk = self.corpus.get_next_primary_key(Utterance)
|
1868
|
-
oovs = set()
|
1869
|
-
for w in text.split():
|
1870
|
-
if not self.dictionary_model.check_word(w, speaker.id):
|
1871
|
-
oovs.add(w)
|
1872
|
-
new_utt = Utterance(
|
1873
|
-
id=next_pk,
|
1874
|
-
speaker=speaker,
|
1875
|
-
file=file,
|
1876
|
-
begin=min_begin,
|
1877
|
-
end=max_end,
|
1878
|
-
channel=channel,
|
1879
|
-
text=text,
|
1880
|
-
normalized_text=normalized_text,
|
1881
|
-
oovs=" ".join(oovs),
|
1882
|
-
)
|
1883
|
-
self.set_file_modified(file.id)
|
1884
|
-
self.addCommand.emit(undo.MergeUtteranceCommand(utterances, new_utt, self))
|
1885
|
-
|
1886
2238
|
def replace_all(self, search_query: TextFilterQuery, replacement: str):
|
1887
2239
|
self.addCommand.emit(undo.ReplaceAllCommand(search_query, replacement, self))
|
1888
2240
|
|
1889
|
-
def
|
2241
|
+
def utterance_id_at(self, index) -> Optional[Utterance]:
|
1890
2242
|
if not isinstance(index, int):
|
1891
2243
|
if not index.isValid():
|
1892
2244
|
return None
|
@@ -1895,15 +2247,16 @@ class CorpusModel(TableModel):
|
|
1895
2247
|
return None
|
1896
2248
|
if len(self._indices) == 0:
|
1897
2249
|
return None
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
2250
|
+
return self._indices[index]
|
2251
|
+
|
2252
|
+
def audio_info_for_utterance(self, row: int):
|
2253
|
+
return (
|
2254
|
+
self._file_indices[row],
|
2255
|
+
self._data[row][self.begin_column],
|
2256
|
+
self._data[row][self.end_column],
|
2257
|
+
self._indices[row],
|
2258
|
+
self._speaker_indices[row],
|
1905
2259
|
)
|
1906
|
-
return utterance
|
1907
2260
|
|
1908
2261
|
def fileAt(self, index) -> int:
|
1909
2262
|
if not isinstance(index, int):
|