Anchor-annotator 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anchor/_version.py +9 -4
- anchor/command_line.py +1 -0
- anchor/main.py +113 -6
- anchor/models.py +402 -23
- anchor/plot.py +835 -104
- anchor/settings.py +6 -10
- anchor/ui_main_window.py +14 -8
- anchor/undo.py +672 -11
- anchor/widgets.py +308 -44
- anchor/workers.py +632 -351
- {Anchor_annotator-0.8.1.dist-info → anchor_annotator-0.9.0.dist-info}/METADATA +3 -2
- anchor_annotator-0.9.0.dist-info/RECORD +22 -0
- {Anchor_annotator-0.8.1.dist-info → anchor_annotator-0.9.0.dist-info}/WHEEL +1 -1
- Anchor_annotator-0.8.1.dist-info/RECORD +0 -22
- {Anchor_annotator-0.8.1.dist-info → anchor_annotator-0.9.0.dist-info/licenses}/LICENSE +0 -0
- {Anchor_annotator-0.8.1.dist-info → anchor_annotator-0.9.0.dist-info}/top_level.txt +0 -0
anchor/models.py
CHANGED
@@ -29,9 +29,12 @@ from montreal_forced_aligner.db import (
|
|
29
29
|
File,
|
30
30
|
Grapheme,
|
31
31
|
Phone,
|
32
|
+
PhoneInterval,
|
33
|
+
Pronunciation,
|
32
34
|
Speaker,
|
33
35
|
Utterance,
|
34
36
|
Word,
|
37
|
+
WordInterval,
|
35
38
|
)
|
36
39
|
from montreal_forced_aligner.g2p.generator import PyniniValidator
|
37
40
|
from montreal_forced_aligner.models import (
|
@@ -114,7 +117,7 @@ class TableModel(QtCore.QAbstractTableModel):
|
|
114
117
|
self.limit = 1
|
115
118
|
self.text_filter = None
|
116
119
|
|
117
|
-
def set_text_filter(self, text_filter: TextFilterQuery):
|
120
|
+
def set_text_filter(self, text_filter: typing.Optional[TextFilterQuery]):
|
118
121
|
if text_filter != self.text_filter:
|
119
122
|
self.current_offset = 0
|
120
123
|
self.text_filter = text_filter
|
@@ -180,6 +183,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
180
183
|
waveformReady = QtCore.Signal()
|
181
184
|
utterancesReady = QtCore.Signal()
|
182
185
|
speakersChanged = QtCore.Signal()
|
186
|
+
phoneTierChanged = QtCore.Signal(object)
|
183
187
|
|
184
188
|
def __init__(self, *args, **kwargs):
|
185
189
|
super().__init__(*args, **kwargs)
|
@@ -191,8 +195,10 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
191
195
|
self._speaker_indices = []
|
192
196
|
self.reversed_indices = {}
|
193
197
|
self.speaker_channel_mapping = {}
|
194
|
-
self.corpus_model: CorpusModel = None
|
198
|
+
self.corpus_model: typing.Optional[CorpusModel] = None
|
195
199
|
self.closing = False
|
200
|
+
self.cached_begin = None
|
201
|
+
self.cached_end = None
|
196
202
|
|
197
203
|
self.thread_pool = QtCore.QThreadPool()
|
198
204
|
self.thread_pool.setMaxThreadCount(4)
|
@@ -209,12 +215,12 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
209
215
|
def clean_up_for_close(self):
|
210
216
|
self.closing = True
|
211
217
|
|
212
|
-
def set_file(self, file_id):
|
218
|
+
def set_file(self, file_id, utterance_id=None, begin=None, end=None):
|
213
219
|
self.file = (
|
214
220
|
self.corpus_model.session.query(File).options(joinedload(File.sound_file)).get(file_id)
|
215
221
|
)
|
216
222
|
self.y = None
|
217
|
-
self.get_utterances()
|
223
|
+
self.get_utterances(utterance_id, begin, end)
|
218
224
|
waveform_worker = workers.WaveformWorker(self.file.sound_file.sound_file_path)
|
219
225
|
waveform_worker.signals.result.connect(self.finalize_loading_wave_form)
|
220
226
|
self.thread_pool.start(waveform_worker)
|
@@ -222,7 +228,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
222
228
|
def finalize_loading_utterances(self, results):
|
223
229
|
if self.closing:
|
224
230
|
return
|
225
|
-
utterances, file_id = results
|
231
|
+
utterances, file_id, self.cached_begin, self.cached_end = results
|
226
232
|
if file_id != self.file.id:
|
227
233
|
return
|
228
234
|
self.utterances = utterances
|
@@ -245,7 +251,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
245
251
|
self.y = y
|
246
252
|
self.waveformReady.emit()
|
247
253
|
|
248
|
-
def get_utterances(self):
|
254
|
+
def get_utterances(self, utterance_id=None, begin=None, end=None):
|
249
255
|
parent_index = self.index(0, 0)
|
250
256
|
self.beginRemoveRows(parent_index, 0, len(self.utterances))
|
251
257
|
self.utterances = []
|
@@ -265,6 +271,9 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
265
271
|
or self.corpus_model.has_reference_alignments
|
266
272
|
or self.corpus_model.has_transcribed_alignments
|
267
273
|
),
|
274
|
+
utterance_id=utterance_id,
|
275
|
+
begin=begin,
|
276
|
+
end=end,
|
268
277
|
)
|
269
278
|
speaker_tier_worker.signals.result.connect(self.finalize_loading_utterances)
|
270
279
|
self.thread_pool.start(speaker_tier_worker)
|
@@ -359,6 +368,107 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
359
368
|
self.delete_table_utterances([merged_utterance])
|
360
369
|
self.add_table_utterances(split_utterances)
|
361
370
|
|
371
|
+
def update_phone_boundaries(
|
372
|
+
self,
|
373
|
+
utterance: Utterance,
|
374
|
+
first_phone_interval: PhoneInterval,
|
375
|
+
second_phone_interval: PhoneInterval,
|
376
|
+
new_time: float,
|
377
|
+
):
|
378
|
+
if not self.corpus_model.editable:
|
379
|
+
return
|
380
|
+
if first_phone_interval.end == new_time and second_phone_interval.begin == new_time:
|
381
|
+
return
|
382
|
+
self.addCommand.emit(
|
383
|
+
undo.UpdatePhoneBoundariesCommand(
|
384
|
+
utterance, first_phone_interval, second_phone_interval, new_time, self
|
385
|
+
)
|
386
|
+
)
|
387
|
+
self.corpus_model.set_file_modified(self.file.id)
|
388
|
+
|
389
|
+
def delete_reference_alignments(self, utterance: Utterance):
|
390
|
+
if not self.corpus_model.editable:
|
391
|
+
return
|
392
|
+
self.addCommand.emit(undo.DeleteReferenceIntervalsCommand(utterance, self))
|
393
|
+
self.corpus_model.set_file_modified(self.file.id)
|
394
|
+
|
395
|
+
def update_phone_interval(
|
396
|
+
self, utterance: Utterance, phone_interval: PhoneInterval, phone: Phone
|
397
|
+
):
|
398
|
+
if not self.corpus_model.editable:
|
399
|
+
return
|
400
|
+
if phone_interval.phone_id == phone.id:
|
401
|
+
return
|
402
|
+
self.addCommand.emit(
|
403
|
+
undo.UpdatePhoneIntervalCommand(utterance, phone_interval, phone, self)
|
404
|
+
)
|
405
|
+
self.corpus_model.set_file_modified(self.file.id)
|
406
|
+
|
407
|
+
def update_word_pronunciation(
|
408
|
+
self, utterance: Utterance, word_interval: WordInterval, pronunciation: Pronunciation
|
409
|
+
):
|
410
|
+
if not self.corpus_model.editable:
|
411
|
+
return
|
412
|
+
if word_interval.pronunciation_id == pronunciation.id:
|
413
|
+
return
|
414
|
+
self.addCommand.emit(
|
415
|
+
undo.UpdateWordIntervalPronunciationCommand(
|
416
|
+
utterance, word_interval, pronunciation, self
|
417
|
+
)
|
418
|
+
)
|
419
|
+
self.corpus_model.set_file_modified(self.file.id)
|
420
|
+
|
421
|
+
def update_word(
|
422
|
+
self, utterance: Utterance, word_interval: WordInterval, word: typing.Union[Word, str]
|
423
|
+
):
|
424
|
+
if not self.corpus_model.editable:
|
425
|
+
return
|
426
|
+
if isinstance(word, Word) and word_interval.word_id == word.id:
|
427
|
+
return
|
428
|
+
self.addCommand.emit(
|
429
|
+
undo.UpdateWordIntervalWordCommand(utterance, word_interval, word, self)
|
430
|
+
)
|
431
|
+
self.corpus_model.set_file_modified(self.file.id)
|
432
|
+
|
433
|
+
def insert_phone_interval(
|
434
|
+
self,
|
435
|
+
utterance: Utterance,
|
436
|
+
phone_interval,
|
437
|
+
previous_interval: PhoneInterval,
|
438
|
+
following_interval: PhoneInterval,
|
439
|
+
word_interval: WordInterval,
|
440
|
+
):
|
441
|
+
if not self.corpus_model.editable:
|
442
|
+
return
|
443
|
+
self.addCommand.emit(
|
444
|
+
undo.InsertPhoneIntervalCommand(
|
445
|
+
utterance,
|
446
|
+
phone_interval,
|
447
|
+
previous_interval,
|
448
|
+
following_interval,
|
449
|
+
self,
|
450
|
+
word_interval,
|
451
|
+
)
|
452
|
+
)
|
453
|
+
self.corpus_model.set_file_modified(self.file.id)
|
454
|
+
|
455
|
+
def delete_phone_interval(
|
456
|
+
self,
|
457
|
+
utterance: Utterance,
|
458
|
+
phone_interval: PhoneInterval,
|
459
|
+
previous_interval: PhoneInterval,
|
460
|
+
following_interval: PhoneInterval,
|
461
|
+
time_point: float,
|
462
|
+
):
|
463
|
+
if not self.corpus_model.editable:
|
464
|
+
return
|
465
|
+
self.addCommand.emit(
|
466
|
+
undo.DeletePhoneIntervalCommand(
|
467
|
+
utterance, phone_interval, previous_interval, following_interval, time_point, self
|
468
|
+
)
|
469
|
+
)
|
470
|
+
self.corpus_model.set_file_modified(self.file.id)
|
471
|
+
|
362
472
|
def update_utterance_text(self, utterance: Utterance, text):
|
363
473
|
if not self.corpus_model.editable:
|
364
474
|
return
|
@@ -472,7 +582,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
472
582
|
begin=beg,
|
473
583
|
end=split_time,
|
474
584
|
channel=utterance.channel,
|
475
|
-
text=" ".join(first_text),
|
585
|
+
text=" ".join(first_text) + " ",
|
476
586
|
normalized_text=" ".join(first_text),
|
477
587
|
oovs=" ".join(oovs),
|
478
588
|
)
|
@@ -488,7 +598,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
488
598
|
begin=split_time,
|
489
599
|
end=end,
|
490
600
|
channel=utterance.channel,
|
491
|
-
text=" ".join(second_text),
|
601
|
+
text=" " + " ".join(second_text),
|
492
602
|
normalized_text=" ".join(second_text),
|
493
603
|
oovs=" ".join(oovs),
|
494
604
|
)
|
@@ -528,8 +638,8 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
528
638
|
continue
|
529
639
|
text += utt_text + " "
|
530
640
|
normalized_text += old_utt.normalized_text + " "
|
531
|
-
text = text[:-1]
|
532
|
-
normalized_text = normalized_text[:-1]
|
641
|
+
text = re.sub(r"\s+", " ", text[:-1])
|
642
|
+
normalized_text = re.sub(r"\s+", " ", normalized_text[:-1])
|
533
643
|
next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
|
534
644
|
oovs = set()
|
535
645
|
for w in text.split():
|
@@ -579,6 +689,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
579
689
|
selectionAudioChanged = QtCore.Signal(object)
|
580
690
|
currentUtteranceChanged = QtCore.Signal(object)
|
581
691
|
speakerRequested = QtCore.Signal(object)
|
692
|
+
searchTermChanged = QtCore.Signal(object)
|
582
693
|
|
583
694
|
spectrogramReady = QtCore.Signal()
|
584
695
|
waveformReady = QtCore.Signal()
|
@@ -631,6 +742,11 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
631
742
|
return
|
632
743
|
begin_samp = int(self.min_time * self.model().file.sample_rate)
|
633
744
|
end_samp = int(self.max_time * self.model().file.sample_rate)
|
745
|
+
if self.model().cached_begin is not None and (
|
746
|
+
self.min_time < self.model().cached_begin + 5
|
747
|
+
or self.max_time > self.model().cached_end - 5
|
748
|
+
):
|
749
|
+
self.model().get_utterances(begin=self.min_time, end=self.max_time)
|
634
750
|
if len(self.model().y.shape) > 1:
|
635
751
|
y = self.model().y[begin_samp:end_samp, self.selected_channel]
|
636
752
|
else:
|
@@ -890,23 +1006,44 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
890
1006
|
and not self.min_time <= self.selected_max_time <= self.max_time
|
891
1007
|
):
|
892
1008
|
self.selected_max_time = None
|
893
|
-
|
894
|
-
self.view_change_timer.start()
|
1009
|
+
self.view_change_timer.start()
|
895
1010
|
|
896
1011
|
def send_selection_update(self):
|
897
1012
|
self.viewChanged.emit(self.min_time, self.max_time)
|
898
1013
|
|
899
|
-
def
|
1014
|
+
def set_search_term(self, word):
|
1015
|
+
query = TextFilterQuery(word, word=True)
|
1016
|
+
self.searchTermChanged.emit(query)
|
1017
|
+
|
1018
|
+
def set_current_file(
|
1019
|
+
self,
|
1020
|
+
file_id,
|
1021
|
+
begin,
|
1022
|
+
end,
|
1023
|
+
utterance_id,
|
1024
|
+
speaker_id,
|
1025
|
+
force_update=False,
|
1026
|
+
single_utterance=False,
|
1027
|
+
):
|
900
1028
|
try:
|
901
|
-
new_file =
|
1029
|
+
new_file = (
|
1030
|
+
self.model().file is None
|
1031
|
+
or self.model().file.id != file_id
|
1032
|
+
or self.model().file.duration >= 500
|
1033
|
+
)
|
902
1034
|
except sqlalchemy.orm.exc.DetachedInstanceError:
|
903
1035
|
new_file = True
|
1036
|
+
if force_update and single_utterance:
|
1037
|
+
new_file = True
|
904
1038
|
self.requested_utterance_id = utterance_id
|
905
1039
|
self.selected_min_time = None
|
906
1040
|
self.selected_max_time = None
|
907
1041
|
if new_file:
|
908
1042
|
self.fileAboutToChange.emit()
|
909
|
-
|
1043
|
+
if single_utterance:
|
1044
|
+
self.model().set_file(file_id, utterance_id=utterance_id)
|
1045
|
+
else:
|
1046
|
+
self.model().set_file(file_id, begin=begin, end=end)
|
910
1047
|
self.speakerRequested.emit(speaker_id)
|
911
1048
|
else:
|
912
1049
|
self.finalize_set_new_file()
|
@@ -1496,6 +1633,7 @@ class SpeakerModel(TableModel):
|
|
1496
1633
|
self.mds = None
|
1497
1634
|
self.perplexity = 30.0
|
1498
1635
|
self.cluster_labels = None
|
1636
|
+
self.distances = None
|
1499
1637
|
self.ivectors = None
|
1500
1638
|
self.utterance_ids = None
|
1501
1639
|
self.alternate_speaker_ids = []
|
@@ -1623,10 +1761,11 @@ class SpeakerModel(TableModel):
|
|
1623
1761
|
def finish_clustering(self, result, *args, **kwargs):
|
1624
1762
|
if result is None:
|
1625
1763
|
return
|
1626
|
-
speaker_ids, c_labels = result
|
1764
|
+
speaker_ids, c_labels, distances = result
|
1627
1765
|
if speaker_ids != self.current_speakers:
|
1628
1766
|
return
|
1629
1767
|
self.cluster_labels = c_labels
|
1768
|
+
self.distances = distances
|
1630
1769
|
self.num_clusters = np.max(c_labels) + 1
|
1631
1770
|
self.clustered.emit()
|
1632
1771
|
|
@@ -1656,6 +1795,7 @@ class SpeakerModel(TableModel):
|
|
1656
1795
|
def change_current_speaker(self, speaker_id: typing.Union[int, typing.List[int]], reset=False):
|
1657
1796
|
self.mds = None
|
1658
1797
|
self.cluster_labels = None
|
1798
|
+
self.distances = None
|
1659
1799
|
if reset:
|
1660
1800
|
self.current_speakers = []
|
1661
1801
|
if isinstance(speaker_id, int):
|
@@ -1719,6 +1859,7 @@ class SpeakerModel(TableModel):
|
|
1719
1859
|
|
1720
1860
|
def cluster_speaker_utterances(self):
|
1721
1861
|
self.cluster_labels = None
|
1862
|
+
self.distances = None
|
1722
1863
|
self.num_clusters = None
|
1723
1864
|
if self.corpus_model.corpus is None:
|
1724
1865
|
return
|
@@ -1751,6 +1892,175 @@ class SpeakerModel(TableModel):
|
|
1751
1892
|
self.runFunction.emit("Generating speaker MDS", self.finish_mds, [kwargs])
|
1752
1893
|
|
1753
1894
|
|
1895
|
+
class AlignmentAnalysisModel(TableModel):
|
1896
|
+
def __init__(self, parent=None):
|
1897
|
+
columns = [
|
1898
|
+
"Utterance",
|
1899
|
+
"Speaker",
|
1900
|
+
"Phone",
|
1901
|
+
"Duration",
|
1902
|
+
"Log-likelihood",
|
1903
|
+
"Word",
|
1904
|
+
]
|
1905
|
+
super().__init__(columns, parent=parent)
|
1906
|
+
self.settings = AnchorSettings()
|
1907
|
+
self.utterance_ids = []
|
1908
|
+
self.file_ids = []
|
1909
|
+
self.speaker_ids = []
|
1910
|
+
self.indices = []
|
1911
|
+
self.reversed_indices = {}
|
1912
|
+
self.corpus_model: Optional[CorpusModel] = None
|
1913
|
+
self.set_limit(self.settings.value(self.settings.RESULTS_PER_PAGE))
|
1914
|
+
self.speaker_filter = None
|
1915
|
+
self.phone_filter = None
|
1916
|
+
self.less_than = None
|
1917
|
+
self.greater_than = None
|
1918
|
+
self.measure = "duration"
|
1919
|
+
self.exclude_manual = False
|
1920
|
+
self.relative_duration = False
|
1921
|
+
self.word_mode = False
|
1922
|
+
self.sort_index = None
|
1923
|
+
self.sort_order = None
|
1924
|
+
|
1925
|
+
def data(self, index, role=QtCore.Qt.ItemDataRole.DisplayRole):
|
1926
|
+
if not index.isValid() or index.column() > 5:
|
1927
|
+
return None
|
1928
|
+
if role == QtCore.Qt.ItemDataRole.DisplayRole:
|
1929
|
+
d = self._data[index.row()][index.column()]
|
1930
|
+
if index.column() in {3, 4}:
|
1931
|
+
try:
|
1932
|
+
return float(d)
|
1933
|
+
except TypeError:
|
1934
|
+
return "N/A"
|
1935
|
+
return d
|
1936
|
+
return super().data(index, role)
|
1937
|
+
|
1938
|
+
def set_less_than(self, less_than: float):
|
1939
|
+
if less_than != self.less_than:
|
1940
|
+
self.current_offset = 0
|
1941
|
+
self.less_than = less_than
|
1942
|
+
|
1943
|
+
def set_greater_than(self, greater_than: float):
|
1944
|
+
if greater_than != self.greater_than:
|
1945
|
+
self.current_offset = 0
|
1946
|
+
self.greater_than = greater_than
|
1947
|
+
|
1948
|
+
def set_exclude_manual(self, exclude_manual: bool):
|
1949
|
+
if exclude_manual != self.exclude_manual:
|
1950
|
+
self.current_offset = 0
|
1951
|
+
self.exclude_manual = exclude_manual
|
1952
|
+
|
1953
|
+
def set_measure(self, measure: str):
|
1954
|
+
measure = measure.lower() if measure == "Duration" else "phone_goodness"
|
1955
|
+
if measure != self.measure:
|
1956
|
+
self.current_offset = 0
|
1957
|
+
self.measure = measure
|
1958
|
+
|
1959
|
+
def set_speaker_filter(self, speaker_id: typing.Union[int, str, None]):
|
1960
|
+
if speaker_id and not isinstance(speaker_id, int):
|
1961
|
+
current_speaker = (
|
1962
|
+
self.corpus_model.corpus.session.query(Speaker)
|
1963
|
+
.filter(Speaker.name == speaker_id)
|
1964
|
+
.first()
|
1965
|
+
)
|
1966
|
+
speaker_id = current_speaker.id
|
1967
|
+
if speaker_id != self.speaker_filter:
|
1968
|
+
self.current_offset = 0
|
1969
|
+
self.speaker_filter = speaker_id
|
1970
|
+
|
1971
|
+
def set_word_mode(self, word_mode):
|
1972
|
+
if word_mode != self.word_mode:
|
1973
|
+
self.current_offset = 0
|
1974
|
+
self.word_mode = word_mode
|
1975
|
+
|
1976
|
+
def set_relative_duration(self, relative_duration):
|
1977
|
+
if relative_duration != self.relative_duration:
|
1978
|
+
self.current_offset = 0
|
1979
|
+
self.relative_duration = relative_duration
|
1980
|
+
|
1981
|
+
def set_phone_filter(self, phone_id: typing.Union[int, str, None]):
|
1982
|
+
if phone_id == "":
|
1983
|
+
phone_id = None
|
1984
|
+
if phone_id and not isinstance(phone_id, int):
|
1985
|
+
phone_id = self.corpus_model.phones[phone_id].id
|
1986
|
+
if phone_id != self.phone_filter:
|
1987
|
+
self.current_offset = 0
|
1988
|
+
self.phone_filter = phone_id
|
1989
|
+
|
1990
|
+
def set_word_filter(self, text_filter: typing.Optional[TextFilterQuery]):
|
1991
|
+
if text_filter != self.text_filter:
|
1992
|
+
self.current_offset = 0
|
1993
|
+
self.text_filter = text_filter
|
1994
|
+
|
1995
|
+
def set_corpus_model(self, corpus_model: CorpusModel):
|
1996
|
+
self.corpus_model = corpus_model
|
1997
|
+
self.corpus_model.corpusLoading.connect(self.update_data)
|
1998
|
+
|
1999
|
+
def update_sort(self, column, order):
|
2000
|
+
self.sort_index = column
|
2001
|
+
self.sort_order = order
|
2002
|
+
self.update_data()
|
2003
|
+
|
2004
|
+
def finish_update_data(self, result, *args, **kwargs):
|
2005
|
+
self.layoutAboutToBeChanged.emit()
|
2006
|
+
if result is None:
|
2007
|
+
self._data = []
|
2008
|
+
self.indices = []
|
2009
|
+
self.utterance_ids = []
|
2010
|
+
self.file_ids = []
|
2011
|
+
self.speaker_ids = []
|
2012
|
+
self.reversed_indices = {}
|
2013
|
+
else:
|
2014
|
+
(
|
2015
|
+
self._data,
|
2016
|
+
self.indices,
|
2017
|
+
self.utterance_ids,
|
2018
|
+
self.file_ids,
|
2019
|
+
self.speaker_ids,
|
2020
|
+
self.reversed_indices,
|
2021
|
+
) = result
|
2022
|
+
self.layoutChanged.emit()
|
2023
|
+
self.newResults.emit()
|
2024
|
+
|
2025
|
+
@property
|
2026
|
+
def query_kwargs(self) -> typing.Dict[str, typing.Any]:
|
2027
|
+
kwargs = {
|
2028
|
+
"limit": self.limit,
|
2029
|
+
"current_offset": self.current_offset,
|
2030
|
+
"speaker_id": self.speaker_filter if isinstance(self.speaker_filter, int) else None,
|
2031
|
+
"phone_id": self.phone_filter if isinstance(self.phone_filter, int) else None,
|
2032
|
+
"word_filter": self.text_filter,
|
2033
|
+
"word_mode": self.word_mode,
|
2034
|
+
"less_than": self.less_than,
|
2035
|
+
"greater_than": self.greater_than,
|
2036
|
+
"measure": self.measure,
|
2037
|
+
"relative_duration": self.relative_duration,
|
2038
|
+
"exclude_manual": self.exclude_manual,
|
2039
|
+
}
|
2040
|
+
if self.sort_index is not None:
|
2041
|
+
kwargs["sort_index"] = self.sort_index
|
2042
|
+
kwargs["sort_desc"] = self.sort_order == QtCore.Qt.SortOrder.DescendingOrder
|
2043
|
+
return kwargs
|
2044
|
+
|
2045
|
+
@property
|
2046
|
+
def count_kwargs(self) -> typing.Dict[str, typing.Any]:
|
2047
|
+
kwargs = self.query_kwargs
|
2048
|
+
kwargs["count"] = True
|
2049
|
+
return kwargs
|
2050
|
+
|
2051
|
+
def update_result_count(self):
|
2052
|
+
self.runFunction.emit(
|
2053
|
+
"Counting alignment analysis results",
|
2054
|
+
self.finalize_result_count,
|
2055
|
+
[self.count_kwargs],
|
2056
|
+
)
|
2057
|
+
|
2058
|
+
def update_data(self):
|
2059
|
+
if not self.corpus_model.has_alignments:
|
2060
|
+
return
|
2061
|
+
self.runFunction.emit("Analyzing alignments", self.finish_update_data, [self.query_kwargs])
|
2062
|
+
|
2063
|
+
|
1754
2064
|
class DiarizationModel(TableModel):
|
1755
2065
|
changeUtteranceSpeakerRequested = QtCore.Signal(object, object)
|
1756
2066
|
|
@@ -1781,22 +2091,26 @@ class DiarizationModel(TableModel):
|
|
1781
2091
|
self.metric = "cosine"
|
1782
2092
|
self.inverted = False
|
1783
2093
|
self.in_speakers = False
|
2094
|
+
self.selected_speaker_indices = {}
|
1784
2095
|
|
1785
2096
|
def data(self, index, role=None):
|
1786
2097
|
if not index.isValid() or index.column() > 5:
|
1787
2098
|
return None
|
1788
2099
|
if role == QtCore.Qt.ItemDataRole.DisplayRole:
|
2100
|
+
d = self._data[index.row()][index.column()]
|
2101
|
+
if isinstance(d, list):
|
2102
|
+
d = d[self.selected_speaker_indices.get(index.row(), 0)]
|
1789
2103
|
if index.column() == 5:
|
1790
2104
|
try:
|
1791
|
-
return float(
|
2105
|
+
return float(d)
|
1792
2106
|
except TypeError:
|
1793
2107
|
return "N/A"
|
1794
2108
|
elif index.column() in {2, 4}:
|
1795
2109
|
try:
|
1796
|
-
return int(
|
2110
|
+
return int(d)
|
1797
2111
|
except TypeError:
|
1798
2112
|
return "N/A"
|
1799
|
-
return
|
2113
|
+
return d
|
1800
2114
|
return super().data(index, role)
|
1801
2115
|
|
1802
2116
|
def set_threshold(self, threshold: float):
|
@@ -1858,10 +2172,18 @@ class DiarizationModel(TableModel):
|
|
1858
2172
|
self.alternate_speaker_filter = current_speaker.id
|
1859
2173
|
|
1860
2174
|
def reassign_utterance(self, row: int):
|
2175
|
+
if not self.corpus_model.editable:
|
2176
|
+
return
|
1861
2177
|
utterance_id = self.utterance_ids[row]
|
1862
2178
|
if utterance_id is None:
|
1863
2179
|
return
|
1864
|
-
|
2180
|
+
speaker_id = self.suggested_indices[row]
|
2181
|
+
if isinstance(speaker_id, list):
|
2182
|
+
speaker_id = speaker_id[self.selected_speaker_indices.pop(row, 0)]
|
2183
|
+
old_speaker_id = self.speaker_indices[row]
|
2184
|
+
self.corpus_model.addCommand.emit(
|
2185
|
+
undo.ChangeSpeakerCommand([utterance_id], old_speaker_id, speaker_id, self)
|
2186
|
+
)
|
1865
2187
|
self.layoutAboutToBeChanged.emit()
|
1866
2188
|
self._data.pop(row)
|
1867
2189
|
self.utterance_ids.pop(row)
|
@@ -1870,6 +2192,20 @@ class DiarizationModel(TableModel):
|
|
1870
2192
|
|
1871
2193
|
self.layoutChanged.emit()
|
1872
2194
|
|
2195
|
+
def can_cycle(self, index):
|
2196
|
+
return isinstance(self._data[index.row()][index.column()], list)
|
2197
|
+
|
2198
|
+
def change_suggested_speaker(self, row: int):
|
2199
|
+
d = self._data[row][1]
|
2200
|
+
if not isinstance(d, list):
|
2201
|
+
return
|
2202
|
+
ind = self.selected_speaker_indices.get(row, 0)
|
2203
|
+
ind += 1
|
2204
|
+
if ind > len(d) - 1:
|
2205
|
+
ind = 0
|
2206
|
+
self.selected_speaker_indices[row] = ind
|
2207
|
+
self.dataChanged.emit(self.index(row, 1), self.index(row, 5))
|
2208
|
+
|
1873
2209
|
def merge_speakers(self, row: int):
|
1874
2210
|
speaker_id = self.speaker_indices[row]
|
1875
2211
|
if self.inverted:
|
@@ -1878,7 +2214,10 @@ class DiarizationModel(TableModel):
|
|
1878
2214
|
undo.ChangeSpeakerCommand([utterance_id], speaker_id, 0, self)
|
1879
2215
|
)
|
1880
2216
|
else:
|
1881
|
-
self.
|
2217
|
+
suggested = self.suggested_indices[row]
|
2218
|
+
if isinstance(suggested, list):
|
2219
|
+
suggested = suggested[self.selected_speaker_indices.pop(row, 0)]
|
2220
|
+
self.corpus_model.merge_speakers([suggested, speaker_id])
|
1882
2221
|
self.layoutAboutToBeChanged.emit()
|
1883
2222
|
self._data.pop(row)
|
1884
2223
|
self.utterance_ids.pop(row)
|
@@ -1893,6 +2232,7 @@ class DiarizationModel(TableModel):
|
|
1893
2232
|
|
1894
2233
|
def finish_update_data(self, result, *args, **kwargs):
|
1895
2234
|
self.layoutAboutToBeChanged.emit()
|
2235
|
+
self.selected_speaker_indices = {}
|
1896
2236
|
if result is None:
|
1897
2237
|
self._data = []
|
1898
2238
|
self.utterance_ids = []
|
@@ -2016,11 +2356,13 @@ class CorpusModel(TableModel):
|
|
2016
2356
|
"Log-likelihood",
|
2017
2357
|
"Speech log-likelihood",
|
2018
2358
|
"Phone duration deviation",
|
2359
|
+
"SNR",
|
2019
2360
|
"PER",
|
2020
2361
|
"Overlap score",
|
2021
2362
|
"Transcription",
|
2022
2363
|
"WER",
|
2023
2364
|
"Ivector distance",
|
2365
|
+
"Diarization variance",
|
2024
2366
|
]
|
2025
2367
|
super().__init__(header, parent=parent)
|
2026
2368
|
self.oov_column = header.index("OOVs?")
|
@@ -2035,6 +2377,7 @@ class CorpusModel(TableModel):
|
|
2035
2377
|
header.index("Log-likelihood"),
|
2036
2378
|
header.index("Speech log-likelihood"),
|
2037
2379
|
header.index("Phone duration deviation"),
|
2380
|
+
header.index("SNR"),
|
2038
2381
|
]
|
2039
2382
|
self.alignment_evaluation_header_indices = [
|
2040
2383
|
header.index("PER"),
|
@@ -2046,7 +2389,9 @@ class CorpusModel(TableModel):
|
|
2046
2389
|
]
|
2047
2390
|
self.diarization_header_indices = [
|
2048
2391
|
header.index("Ivector distance"),
|
2392
|
+
header.index("Diarization variance"),
|
2049
2393
|
]
|
2394
|
+
self.filter_nulls = [False for _ in range(len(header))]
|
2050
2395
|
self.sort_index = None
|
2051
2396
|
self.sort_order = None
|
2052
2397
|
self.file_filter = None
|
@@ -2074,6 +2419,8 @@ class CorpusModel(TableModel):
|
|
2074
2419
|
self.unsaved_files = set()
|
2075
2420
|
self.files = []
|
2076
2421
|
self.speakers = {}
|
2422
|
+
self.phones = {}
|
2423
|
+
self.words = {}
|
2077
2424
|
self.speaker_id_mapping = {}
|
2078
2425
|
self.utterances = None
|
2079
2426
|
self.session: sqlalchemy.orm.scoped_session = None
|
@@ -2093,6 +2440,11 @@ class CorpusModel(TableModel):
|
|
2093
2440
|
self.latest_alignment_workflow = None
|
2094
2441
|
self.language = None
|
2095
2442
|
|
2443
|
+
def update_filter_nulls(self, toggled, header_index: int):
|
2444
|
+
self.filter_nulls[header_index] = toggled
|
2445
|
+
self.update_data()
|
2446
|
+
self.update_result_count()
|
2447
|
+
|
2096
2448
|
def update_latest_alignment_workflow(self):
|
2097
2449
|
with self.corpus.session() as session:
|
2098
2450
|
query = (
|
@@ -2143,7 +2495,10 @@ class CorpusModel(TableModel):
|
|
2143
2495
|
|
2144
2496
|
@property
|
2145
2497
|
def has_dictionary(self):
|
2146
|
-
if
|
2498
|
+
if (
|
2499
|
+
isinstance(self.corpus, AcousticCorpusWithPronunciations)
|
2500
|
+
and self.corpus.dictionary_model is not None
|
2501
|
+
):
|
2147
2502
|
return True
|
2148
2503
|
return False
|
2149
2504
|
|
@@ -2332,7 +2687,8 @@ class CorpusModel(TableModel):
|
|
2332
2687
|
def set_file_modified(self, file_id: typing.Union[int, typing.List[int]]):
|
2333
2688
|
if isinstance(file_id, int):
|
2334
2689
|
file_id = [file_id]
|
2335
|
-
|
2690
|
+
data = {File.modified: True}
|
2691
|
+
self.session.query(File).filter(File.id.in_(file_id)).update(data)
|
2336
2692
|
self.session.commit()
|
2337
2693
|
|
2338
2694
|
def set_speaker_modified(self, speaker_id: typing.Union[int, typing.List[int]]):
|
@@ -2400,6 +2756,8 @@ class CorpusModel(TableModel):
|
|
2400
2756
|
self.corpusLoading.emit()
|
2401
2757
|
self.refresh_files()
|
2402
2758
|
self.refresh_speakers()
|
2759
|
+
self.refresh_phones()
|
2760
|
+
self.refresh_words()
|
2403
2761
|
self.refresh_utterances()
|
2404
2762
|
self.update_latest_alignment_workflow()
|
2405
2763
|
|
@@ -2447,6 +2805,26 @@ class CorpusModel(TableModel):
|
|
2447
2805
|
def refresh_speakers(self):
|
2448
2806
|
self.runFunction.emit("Loading speakers", self.finish_update_speakers, [])
|
2449
2807
|
|
2808
|
+
def refresh_phones(self):
|
2809
|
+
self.phones = {}
|
2810
|
+
with self.corpus.session() as session:
|
2811
|
+
phones = (
|
2812
|
+
session.query(Phone)
|
2813
|
+
.filter(
|
2814
|
+
Phone.phone_type.in_([PhoneType.non_silence, PhoneType.oov, PhoneType.silence])
|
2815
|
+
)
|
2816
|
+
.all()
|
2817
|
+
)
|
2818
|
+
for p in phones:
|
2819
|
+
self.phones[p.phone] = p
|
2820
|
+
|
2821
|
+
def refresh_words(self):
|
2822
|
+
self.words = {}
|
2823
|
+
with self.corpus.session() as session:
|
2824
|
+
words = session.query(Word).order_by(Word.word).all()
|
2825
|
+
for w in words:
|
2826
|
+
self.words[w.word] = w
|
2827
|
+
|
2450
2828
|
def data(self, index, role):
|
2451
2829
|
if not index.isValid():
|
2452
2830
|
return None
|
@@ -2530,6 +2908,7 @@ class CorpusModel(TableModel):
|
|
2530
2908
|
"limit": self.limit,
|
2531
2909
|
"current_offset": self.current_offset,
|
2532
2910
|
"has_ivectors": self.corpus.has_any_ivectors(),
|
2911
|
+
"filter_nulls": self.filter_nulls,
|
2533
2912
|
}
|
2534
2913
|
if self.sort_index is not None:
|
2535
2914
|
kwargs["sort_index"] = self.sort_index
|