Anchor-annotator 0.8.2__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anchor/models.py CHANGED
@@ -29,9 +29,12 @@ from montreal_forced_aligner.db import (
29
29
  File,
30
30
  Grapheme,
31
31
  Phone,
32
+ PhoneInterval,
33
+ Pronunciation,
32
34
  Speaker,
33
35
  Utterance,
34
36
  Word,
37
+ WordInterval,
35
38
  )
36
39
  from montreal_forced_aligner.g2p.generator import PyniniValidator
37
40
  from montreal_forced_aligner.models import (
@@ -114,7 +117,7 @@ class TableModel(QtCore.QAbstractTableModel):
114
117
  self.limit = 1
115
118
  self.text_filter = None
116
119
 
117
- def set_text_filter(self, text_filter: TextFilterQuery):
120
+ def set_text_filter(self, text_filter: typing.Optional[TextFilterQuery]):
118
121
  if text_filter != self.text_filter:
119
122
  self.current_offset = 0
120
123
  self.text_filter = text_filter
@@ -180,6 +183,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
180
183
  waveformReady = QtCore.Signal()
181
184
  utterancesReady = QtCore.Signal()
182
185
  speakersChanged = QtCore.Signal()
186
+ phoneTierChanged = QtCore.Signal(object)
183
187
 
184
188
  def __init__(self, *args, **kwargs):
185
189
  super().__init__(*args, **kwargs)
@@ -191,8 +195,10 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
191
195
  self._speaker_indices = []
192
196
  self.reversed_indices = {}
193
197
  self.speaker_channel_mapping = {}
194
- self.corpus_model: CorpusModel = None
198
+ self.corpus_model: typing.Optional[CorpusModel] = None
195
199
  self.closing = False
200
+ self.cached_begin = None
201
+ self.cached_end = None
196
202
 
197
203
  self.thread_pool = QtCore.QThreadPool()
198
204
  self.thread_pool.setMaxThreadCount(4)
@@ -209,12 +215,12 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
209
215
  def clean_up_for_close(self):
210
216
  self.closing = True
211
217
 
212
- def set_file(self, file_id):
218
+ def set_file(self, file_id, utterance_id=None, begin=None, end=None):
213
219
  self.file = (
214
220
  self.corpus_model.session.query(File).options(joinedload(File.sound_file)).get(file_id)
215
221
  )
216
222
  self.y = None
217
- self.get_utterances()
223
+ self.get_utterances(utterance_id, begin, end)
218
224
  waveform_worker = workers.WaveformWorker(self.file.sound_file.sound_file_path)
219
225
  waveform_worker.signals.result.connect(self.finalize_loading_wave_form)
220
226
  self.thread_pool.start(waveform_worker)
@@ -222,7 +228,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
222
228
  def finalize_loading_utterances(self, results):
223
229
  if self.closing:
224
230
  return
225
- utterances, file_id = results
231
+ utterances, file_id, self.cached_begin, self.cached_end = results
226
232
  if file_id != self.file.id:
227
233
  return
228
234
  self.utterances = utterances
@@ -245,7 +251,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
245
251
  self.y = y
246
252
  self.waveformReady.emit()
247
253
 
248
- def get_utterances(self):
254
+ def get_utterances(self, utterance_id=None, begin=None, end=None):
249
255
  parent_index = self.index(0, 0)
250
256
  self.beginRemoveRows(parent_index, 0, len(self.utterances))
251
257
  self.utterances = []
@@ -265,6 +271,9 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
265
271
  or self.corpus_model.has_reference_alignments
266
272
  or self.corpus_model.has_transcribed_alignments
267
273
  ),
274
+ utterance_id=utterance_id,
275
+ begin=begin,
276
+ end=end,
268
277
  )
269
278
  speaker_tier_worker.signals.result.connect(self.finalize_loading_utterances)
270
279
  self.thread_pool.start(speaker_tier_worker)
@@ -359,6 +368,107 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
359
368
  self.delete_table_utterances([merged_utterance])
360
369
  self.add_table_utterances(split_utterances)
361
370
 
371
+ def update_phone_boundaries(
372
+ self,
373
+ utterance: Utterance,
374
+ first_phone_interval: PhoneInterval,
375
+ second_phone_interval: PhoneInterval,
376
+ new_time: float,
377
+ ):
378
+ if not self.corpus_model.editable:
379
+ return
380
+ if first_phone_interval.end == new_time and second_phone_interval.begin == new_time:
381
+ return
382
+ self.addCommand.emit(
383
+ undo.UpdatePhoneBoundariesCommand(
384
+ utterance, first_phone_interval, second_phone_interval, new_time, self
385
+ )
386
+ )
387
+ self.corpus_model.set_file_modified(self.file.id)
388
+
389
+ def delete_reference_alignments(self, utterance: Utterance):
390
+ if not self.corpus_model.editable:
391
+ return
392
+ self.addCommand.emit(undo.DeleteReferenceIntervalsCommand(utterance, self))
393
+ self.corpus_model.set_file_modified(self.file.id)
394
+
395
+ def update_phone_interval(
396
+ self, utterance: Utterance, phone_interval: PhoneInterval, phone: Phone
397
+ ):
398
+ if not self.corpus_model.editable:
399
+ return
400
+ if phone_interval.phone_id == phone.id:
401
+ return
402
+ self.addCommand.emit(
403
+ undo.UpdatePhoneIntervalCommand(utterance, phone_interval, phone, self)
404
+ )
405
+ self.corpus_model.set_file_modified(self.file.id)
406
+
407
+ def update_word_pronunciation(
408
+ self, utterance: Utterance, word_interval: WordInterval, pronunciation: Pronunciation
409
+ ):
410
+ if not self.corpus_model.editable:
411
+ return
412
+ if word_interval.pronunciation_id == pronunciation.id:
413
+ return
414
+ self.addCommand.emit(
415
+ undo.UpdateWordIntervalPronunciationCommand(
416
+ utterance, word_interval, pronunciation, self
417
+ )
418
+ )
419
+ self.corpus_model.set_file_modified(self.file.id)
420
+
421
+ def update_word(
422
+ self, utterance: Utterance, word_interval: WordInterval, word: typing.Union[Word, str]
423
+ ):
424
+ if not self.corpus_model.editable:
425
+ return
426
+ if isinstance(word, Word) and word_interval.word_id == word.id:
427
+ return
428
+ self.addCommand.emit(
429
+ undo.UpdateWordIntervalWordCommand(utterance, word_interval, word, self)
430
+ )
431
+ self.corpus_model.set_file_modified(self.file.id)
432
+
433
+ def insert_phone_interval(
434
+ self,
435
+ utterance: Utterance,
436
+ phone_interval,
437
+ previous_interval: PhoneInterval,
438
+ following_interval: PhoneInterval,
439
+ word_interval: WordInterval,
440
+ ):
441
+ if not self.corpus_model.editable:
442
+ return
443
+ self.addCommand.emit(
444
+ undo.InsertPhoneIntervalCommand(
445
+ utterance,
446
+ phone_interval,
447
+ previous_interval,
448
+ following_interval,
449
+ self,
450
+ word_interval,
451
+ )
452
+ )
453
+ self.corpus_model.set_file_modified(self.file.id)
454
+
455
+ def delete_phone_interval(
456
+ self,
457
+ utterance: Utterance,
458
+ phone_interval: PhoneInterval,
459
+ previous_interval: PhoneInterval,
460
+ following_interval: PhoneInterval,
461
+ time_point: float,
462
+ ):
463
+ if not self.corpus_model.editable:
464
+ return
465
+ self.addCommand.emit(
466
+ undo.DeletePhoneIntervalCommand(
467
+ utterance, phone_interval, previous_interval, following_interval, time_point, self
468
+ )
469
+ )
470
+ self.corpus_model.set_file_modified(self.file.id)
471
+
362
472
  def update_utterance_text(self, utterance: Utterance, text):
363
473
  if not self.corpus_model.editable:
364
474
  return
@@ -472,7 +582,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
472
582
  begin=beg,
473
583
  end=split_time,
474
584
  channel=utterance.channel,
475
- text=" ".join(first_text),
585
+ text=" ".join(first_text) + " ",
476
586
  normalized_text=" ".join(first_text),
477
587
  oovs=" ".join(oovs),
478
588
  )
@@ -488,7 +598,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
488
598
  begin=split_time,
489
599
  end=end,
490
600
  channel=utterance.channel,
491
- text=" ".join(second_text),
601
+ text=" " + " ".join(second_text),
492
602
  normalized_text=" ".join(second_text),
493
603
  oovs=" ".join(oovs),
494
604
  )
@@ -528,8 +638,8 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
528
638
  continue
529
639
  text += utt_text + " "
530
640
  normalized_text += old_utt.normalized_text + " "
531
- text = text[:-1]
532
- normalized_text = normalized_text[:-1]
641
+ text = re.sub(r"\s+", " ", text[:-1])
642
+ normalized_text = re.sub(r"\s+", " ", normalized_text[:-1])
533
643
  next_pk = self.corpus_model.corpus.get_next_primary_key(Utterance)
534
644
  oovs = set()
535
645
  for w in text.split():
@@ -579,6 +689,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
579
689
  selectionAudioChanged = QtCore.Signal(object)
580
690
  currentUtteranceChanged = QtCore.Signal(object)
581
691
  speakerRequested = QtCore.Signal(object)
692
+ searchTermChanged = QtCore.Signal(object)
582
693
 
583
694
  spectrogramReady = QtCore.Signal()
584
695
  waveformReady = QtCore.Signal()
@@ -631,6 +742,11 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
631
742
  return
632
743
  begin_samp = int(self.min_time * self.model().file.sample_rate)
633
744
  end_samp = int(self.max_time * self.model().file.sample_rate)
745
+ if self.model().cached_begin is not None and (
746
+ self.min_time < self.model().cached_begin + 5
747
+ or self.max_time > self.model().cached_end - 5
748
+ ):
749
+ self.model().get_utterances(begin=self.min_time, end=self.max_time)
634
750
  if len(self.model().y.shape) > 1:
635
751
  y = self.model().y[begin_samp:end_samp, self.selected_channel]
636
752
  else:
@@ -890,23 +1006,44 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
890
1006
  and not self.min_time <= self.selected_max_time <= self.max_time
891
1007
  ):
892
1008
  self.selected_max_time = None
893
- if not new_file:
894
- self.view_change_timer.start()
1009
+ self.view_change_timer.start()
895
1010
 
896
1011
  def send_selection_update(self):
897
1012
  self.viewChanged.emit(self.min_time, self.max_time)
898
1013
 
899
- def set_current_file(self, file_id, begin, end, utterance_id, speaker_id, force_update=False):
1014
+ def set_search_term(self, word):
1015
+ query = TextFilterQuery(word, word=True)
1016
+ self.searchTermChanged.emit(query)
1017
+
1018
+ def set_current_file(
1019
+ self,
1020
+ file_id,
1021
+ begin,
1022
+ end,
1023
+ utterance_id,
1024
+ speaker_id,
1025
+ force_update=False,
1026
+ single_utterance=False,
1027
+ ):
900
1028
  try:
901
- new_file = self.model().file is None or self.model().file.id != file_id
1029
+ new_file = (
1030
+ self.model().file is None
1031
+ or self.model().file.id != file_id
1032
+ or self.model().file.duration >= 500
1033
+ )
902
1034
  except sqlalchemy.orm.exc.DetachedInstanceError:
903
1035
  new_file = True
1036
+ if force_update and single_utterance:
1037
+ new_file = True
904
1038
  self.requested_utterance_id = utterance_id
905
1039
  self.selected_min_time = None
906
1040
  self.selected_max_time = None
907
1041
  if new_file:
908
1042
  self.fileAboutToChange.emit()
909
- self.model().set_file(file_id)
1043
+ if single_utterance:
1044
+ self.model().set_file(file_id, utterance_id=utterance_id)
1045
+ else:
1046
+ self.model().set_file(file_id, begin=begin, end=end)
910
1047
  self.speakerRequested.emit(speaker_id)
911
1048
  else:
912
1049
  self.finalize_set_new_file()
@@ -1496,6 +1633,7 @@ class SpeakerModel(TableModel):
1496
1633
  self.mds = None
1497
1634
  self.perplexity = 30.0
1498
1635
  self.cluster_labels = None
1636
+ self.distances = None
1499
1637
  self.ivectors = None
1500
1638
  self.utterance_ids = None
1501
1639
  self.alternate_speaker_ids = []
@@ -1623,10 +1761,11 @@ class SpeakerModel(TableModel):
1623
1761
  def finish_clustering(self, result, *args, **kwargs):
1624
1762
  if result is None:
1625
1763
  return
1626
- speaker_ids, c_labels = result
1764
+ speaker_ids, c_labels, distances = result
1627
1765
  if speaker_ids != self.current_speakers:
1628
1766
  return
1629
1767
  self.cluster_labels = c_labels
1768
+ self.distances = distances
1630
1769
  self.num_clusters = np.max(c_labels) + 1
1631
1770
  self.clustered.emit()
1632
1771
 
@@ -1656,6 +1795,7 @@ class SpeakerModel(TableModel):
1656
1795
  def change_current_speaker(self, speaker_id: typing.Union[int, typing.List[int]], reset=False):
1657
1796
  self.mds = None
1658
1797
  self.cluster_labels = None
1798
+ self.distances = None
1659
1799
  if reset:
1660
1800
  self.current_speakers = []
1661
1801
  if isinstance(speaker_id, int):
@@ -1719,6 +1859,7 @@ class SpeakerModel(TableModel):
1719
1859
 
1720
1860
  def cluster_speaker_utterances(self):
1721
1861
  self.cluster_labels = None
1862
+ self.distances = None
1722
1863
  self.num_clusters = None
1723
1864
  if self.corpus_model.corpus is None:
1724
1865
  return
@@ -1751,6 +1892,175 @@ class SpeakerModel(TableModel):
1751
1892
  self.runFunction.emit("Generating speaker MDS", self.finish_mds, [kwargs])
1752
1893
 
1753
1894
 
1895
+ class AlignmentAnalysisModel(TableModel):
1896
+ def __init__(self, parent=None):
1897
+ columns = [
1898
+ "Utterance",
1899
+ "Speaker",
1900
+ "Phone",
1901
+ "Duration",
1902
+ "Log-likelihood",
1903
+ "Word",
1904
+ ]
1905
+ super().__init__(columns, parent=parent)
1906
+ self.settings = AnchorSettings()
1907
+ self.utterance_ids = []
1908
+ self.file_ids = []
1909
+ self.speaker_ids = []
1910
+ self.indices = []
1911
+ self.reversed_indices = {}
1912
+ self.corpus_model: Optional[CorpusModel] = None
1913
+ self.set_limit(self.settings.value(self.settings.RESULTS_PER_PAGE))
1914
+ self.speaker_filter = None
1915
+ self.phone_filter = None
1916
+ self.less_than = None
1917
+ self.greater_than = None
1918
+ self.measure = "duration"
1919
+ self.exclude_manual = False
1920
+ self.relative_duration = False
1921
+ self.word_mode = False
1922
+ self.sort_index = None
1923
+ self.sort_order = None
1924
+
1925
+ def data(self, index, role=QtCore.Qt.ItemDataRole.DisplayRole):
1926
+ if not index.isValid() or index.column() > 5:
1927
+ return None
1928
+ if role == QtCore.Qt.ItemDataRole.DisplayRole:
1929
+ d = self._data[index.row()][index.column()]
1930
+ if index.column() in {3, 4}:
1931
+ try:
1932
+ return float(d)
1933
+ except TypeError:
1934
+ return "N/A"
1935
+ return d
1936
+ return super().data(index, role)
1937
+
1938
+ def set_less_than(self, less_than: float):
1939
+ if less_than != self.less_than:
1940
+ self.current_offset = 0
1941
+ self.less_than = less_than
1942
+
1943
+ def set_greater_than(self, greater_than: float):
1944
+ if greater_than != self.greater_than:
1945
+ self.current_offset = 0
1946
+ self.greater_than = greater_than
1947
+
1948
+ def set_exclude_manual(self, exclude_manual: bool):
1949
+ if exclude_manual != self.exclude_manual:
1950
+ self.current_offset = 0
1951
+ self.exclude_manual = exclude_manual
1952
+
1953
+ def set_measure(self, measure: str):
1954
+ measure = measure.lower() if measure == "Duration" else "phone_goodness"
1955
+ if measure != self.measure:
1956
+ self.current_offset = 0
1957
+ self.measure = measure
1958
+
1959
+ def set_speaker_filter(self, speaker_id: typing.Union[int, str, None]):
1960
+ if speaker_id and not isinstance(speaker_id, int):
1961
+ current_speaker = (
1962
+ self.corpus_model.corpus.session.query(Speaker)
1963
+ .filter(Speaker.name == speaker_id)
1964
+ .first()
1965
+ )
1966
+ speaker_id = current_speaker.id
1967
+ if speaker_id != self.speaker_filter:
1968
+ self.current_offset = 0
1969
+ self.speaker_filter = speaker_id
1970
+
1971
+ def set_word_mode(self, word_mode):
1972
+ if word_mode != self.word_mode:
1973
+ self.current_offset = 0
1974
+ self.word_mode = word_mode
1975
+
1976
+ def set_relative_duration(self, relative_duration):
1977
+ if relative_duration != self.relative_duration:
1978
+ self.current_offset = 0
1979
+ self.relative_duration = relative_duration
1980
+
1981
+ def set_phone_filter(self, phone_id: typing.Union[int, str, None]):
1982
+ if phone_id == "":
1983
+ phone_id = None
1984
+ if phone_id and not isinstance(phone_id, int):
1985
+ phone_id = self.corpus_model.phones[phone_id].id
1986
+ if phone_id != self.phone_filter:
1987
+ self.current_offset = 0
1988
+ self.phone_filter = phone_id
1989
+
1990
+ def set_word_filter(self, text_filter: typing.Optional[TextFilterQuery]):
1991
+ if text_filter != self.text_filter:
1992
+ self.current_offset = 0
1993
+ self.text_filter = text_filter
1994
+
1995
+ def set_corpus_model(self, corpus_model: CorpusModel):
1996
+ self.corpus_model = corpus_model
1997
+ self.corpus_model.corpusLoading.connect(self.update_data)
1998
+
1999
+ def update_sort(self, column, order):
2000
+ self.sort_index = column
2001
+ self.sort_order = order
2002
+ self.update_data()
2003
+
2004
+ def finish_update_data(self, result, *args, **kwargs):
2005
+ self.layoutAboutToBeChanged.emit()
2006
+ if result is None:
2007
+ self._data = []
2008
+ self.indices = []
2009
+ self.utterance_ids = []
2010
+ self.file_ids = []
2011
+ self.speaker_ids = []
2012
+ self.reversed_indices = {}
2013
+ else:
2014
+ (
2015
+ self._data,
2016
+ self.indices,
2017
+ self.utterance_ids,
2018
+ self.file_ids,
2019
+ self.speaker_ids,
2020
+ self.reversed_indices,
2021
+ ) = result
2022
+ self.layoutChanged.emit()
2023
+ self.newResults.emit()
2024
+
2025
+ @property
2026
+ def query_kwargs(self) -> typing.Dict[str, typing.Any]:
2027
+ kwargs = {
2028
+ "limit": self.limit,
2029
+ "current_offset": self.current_offset,
2030
+ "speaker_id": self.speaker_filter if isinstance(self.speaker_filter, int) else None,
2031
+ "phone_id": self.phone_filter if isinstance(self.phone_filter, int) else None,
2032
+ "word_filter": self.text_filter,
2033
+ "word_mode": self.word_mode,
2034
+ "less_than": self.less_than,
2035
+ "greater_than": self.greater_than,
2036
+ "measure": self.measure,
2037
+ "relative_duration": self.relative_duration,
2038
+ "exclude_manual": self.exclude_manual,
2039
+ }
2040
+ if self.sort_index is not None:
2041
+ kwargs["sort_index"] = self.sort_index
2042
+ kwargs["sort_desc"] = self.sort_order == QtCore.Qt.SortOrder.DescendingOrder
2043
+ return kwargs
2044
+
2045
+ @property
2046
+ def count_kwargs(self) -> typing.Dict[str, typing.Any]:
2047
+ kwargs = self.query_kwargs
2048
+ kwargs["count"] = True
2049
+ return kwargs
2050
+
2051
+ def update_result_count(self):
2052
+ self.runFunction.emit(
2053
+ "Counting alignment analysis results",
2054
+ self.finalize_result_count,
2055
+ [self.count_kwargs],
2056
+ )
2057
+
2058
+ def update_data(self):
2059
+ if not self.corpus_model.has_alignments:
2060
+ return
2061
+ self.runFunction.emit("Analyzing alignments", self.finish_update_data, [self.query_kwargs])
2062
+
2063
+
1754
2064
  class DiarizationModel(TableModel):
1755
2065
  changeUtteranceSpeakerRequested = QtCore.Signal(object, object)
1756
2066
 
@@ -1781,22 +2091,26 @@ class DiarizationModel(TableModel):
1781
2091
  self.metric = "cosine"
1782
2092
  self.inverted = False
1783
2093
  self.in_speakers = False
2094
+ self.selected_speaker_indices = {}
1784
2095
 
1785
2096
  def data(self, index, role=None):
1786
2097
  if not index.isValid() or index.column() > 5:
1787
2098
  return None
1788
2099
  if role == QtCore.Qt.ItemDataRole.DisplayRole:
2100
+ d = self._data[index.row()][index.column()]
2101
+ if isinstance(d, list):
2102
+ d = d[self.selected_speaker_indices.get(index.row(), 0)]
1789
2103
  if index.column() == 5:
1790
2104
  try:
1791
- return float(self._data[index.row()][index.column()])
2105
+ return float(d)
1792
2106
  except TypeError:
1793
2107
  return "N/A"
1794
2108
  elif index.column() in {2, 4}:
1795
2109
  try:
1796
- return int(self._data[index.row()][index.column()])
2110
+ return int(d)
1797
2111
  except TypeError:
1798
2112
  return "N/A"
1799
- return self._data[index.row()][index.column()]
2113
+ return d
1800
2114
  return super().data(index, role)
1801
2115
 
1802
2116
  def set_threshold(self, threshold: float):
@@ -1858,10 +2172,18 @@ class DiarizationModel(TableModel):
1858
2172
  self.alternate_speaker_filter = current_speaker.id
1859
2173
 
1860
2174
  def reassign_utterance(self, row: int):
2175
+ if not self.corpus_model.editable:
2176
+ return
1861
2177
  utterance_id = self.utterance_ids[row]
1862
2178
  if utterance_id is None:
1863
2179
  return
1864
- self.changeUtteranceSpeakerRequested.emit(utterance_id, self.suggested_indices[row])
2180
+ speaker_id = self.suggested_indices[row]
2181
+ if isinstance(speaker_id, list):
2182
+ speaker_id = speaker_id[self.selected_speaker_indices.pop(row, 0)]
2183
+ old_speaker_id = self.speaker_indices[row]
2184
+ self.corpus_model.addCommand.emit(
2185
+ undo.ChangeSpeakerCommand([utterance_id], old_speaker_id, speaker_id, self)
2186
+ )
1865
2187
  self.layoutAboutToBeChanged.emit()
1866
2188
  self._data.pop(row)
1867
2189
  self.utterance_ids.pop(row)
@@ -1870,6 +2192,20 @@ class DiarizationModel(TableModel):
1870
2192
 
1871
2193
  self.layoutChanged.emit()
1872
2194
 
2195
+ def can_cycle(self, index):
2196
+ return isinstance(self._data[index.row()][index.column()], list)
2197
+
2198
+ def change_suggested_speaker(self, row: int):
2199
+ d = self._data[row][1]
2200
+ if not isinstance(d, list):
2201
+ return
2202
+ ind = self.selected_speaker_indices.get(row, 0)
2203
+ ind += 1
2204
+ if ind > len(d) - 1:
2205
+ ind = 0
2206
+ self.selected_speaker_indices[row] = ind
2207
+ self.dataChanged.emit(self.index(row, 1), self.index(row, 5))
2208
+
1873
2209
  def merge_speakers(self, row: int):
1874
2210
  speaker_id = self.speaker_indices[row]
1875
2211
  if self.inverted:
@@ -1878,7 +2214,10 @@ class DiarizationModel(TableModel):
1878
2214
  undo.ChangeSpeakerCommand([utterance_id], speaker_id, 0, self)
1879
2215
  )
1880
2216
  else:
1881
- self.corpus_model.merge_speakers([self.suggested_indices[row], speaker_id])
2217
+ suggested = self.suggested_indices[row]
2218
+ if isinstance(suggested, list):
2219
+ suggested = suggested[self.selected_speaker_indices.pop(row, 0)]
2220
+ self.corpus_model.merge_speakers([suggested, speaker_id])
1882
2221
  self.layoutAboutToBeChanged.emit()
1883
2222
  self._data.pop(row)
1884
2223
  self.utterance_ids.pop(row)
@@ -1893,6 +2232,7 @@ class DiarizationModel(TableModel):
1893
2232
 
1894
2233
  def finish_update_data(self, result, *args, **kwargs):
1895
2234
  self.layoutAboutToBeChanged.emit()
2235
+ self.selected_speaker_indices = {}
1896
2236
  if result is None:
1897
2237
  self._data = []
1898
2238
  self.utterance_ids = []
@@ -2016,11 +2356,13 @@ class CorpusModel(TableModel):
2016
2356
  "Log-likelihood",
2017
2357
  "Speech log-likelihood",
2018
2358
  "Phone duration deviation",
2359
+ "SNR",
2019
2360
  "PER",
2020
2361
  "Overlap score",
2021
2362
  "Transcription",
2022
2363
  "WER",
2023
2364
  "Ivector distance",
2365
+ "Diarization variance",
2024
2366
  ]
2025
2367
  super().__init__(header, parent=parent)
2026
2368
  self.oov_column = header.index("OOVs?")
@@ -2035,6 +2377,7 @@ class CorpusModel(TableModel):
2035
2377
  header.index("Log-likelihood"),
2036
2378
  header.index("Speech log-likelihood"),
2037
2379
  header.index("Phone duration deviation"),
2380
+ header.index("SNR"),
2038
2381
  ]
2039
2382
  self.alignment_evaluation_header_indices = [
2040
2383
  header.index("PER"),
@@ -2046,7 +2389,9 @@ class CorpusModel(TableModel):
2046
2389
  ]
2047
2390
  self.diarization_header_indices = [
2048
2391
  header.index("Ivector distance"),
2392
+ header.index("Diarization variance"),
2049
2393
  ]
2394
+ self.filter_nulls = [False for _ in range(len(header))]
2050
2395
  self.sort_index = None
2051
2396
  self.sort_order = None
2052
2397
  self.file_filter = None
@@ -2074,6 +2419,8 @@ class CorpusModel(TableModel):
2074
2419
  self.unsaved_files = set()
2075
2420
  self.files = []
2076
2421
  self.speakers = {}
2422
+ self.phones = {}
2423
+ self.words = {}
2077
2424
  self.speaker_id_mapping = {}
2078
2425
  self.utterances = None
2079
2426
  self.session: sqlalchemy.orm.scoped_session = None
@@ -2093,6 +2440,11 @@ class CorpusModel(TableModel):
2093
2440
  self.latest_alignment_workflow = None
2094
2441
  self.language = None
2095
2442
 
2443
+ def update_filter_nulls(self, toggled, header_index: int):
2444
+ self.filter_nulls[header_index] = toggled
2445
+ self.update_data()
2446
+ self.update_result_count()
2447
+
2096
2448
  def update_latest_alignment_workflow(self):
2097
2449
  with self.corpus.session() as session:
2098
2450
  query = (
@@ -2143,7 +2495,10 @@ class CorpusModel(TableModel):
2143
2495
 
2144
2496
  @property
2145
2497
  def has_dictionary(self):
2146
- if isinstance(self.corpus, AcousticCorpusWithPronunciations):
2498
+ if (
2499
+ isinstance(self.corpus, AcousticCorpusWithPronunciations)
2500
+ and self.corpus.dictionary_model is not None
2501
+ ):
2147
2502
  return True
2148
2503
  return False
2149
2504
 
@@ -2332,7 +2687,8 @@ class CorpusModel(TableModel):
2332
2687
  def set_file_modified(self, file_id: typing.Union[int, typing.List[int]]):
2333
2688
  if isinstance(file_id, int):
2334
2689
  file_id = [file_id]
2335
- self.session.query(File).filter(File.id.in_(file_id)).update({File.modified: True})
2690
+ data = {File.modified: True}
2691
+ self.session.query(File).filter(File.id.in_(file_id)).update(data)
2336
2692
  self.session.commit()
2337
2693
 
2338
2694
  def set_speaker_modified(self, speaker_id: typing.Union[int, typing.List[int]]):
@@ -2400,6 +2756,8 @@ class CorpusModel(TableModel):
2400
2756
  self.corpusLoading.emit()
2401
2757
  self.refresh_files()
2402
2758
  self.refresh_speakers()
2759
+ self.refresh_phones()
2760
+ self.refresh_words()
2403
2761
  self.refresh_utterances()
2404
2762
  self.update_latest_alignment_workflow()
2405
2763
 
@@ -2447,6 +2805,26 @@ class CorpusModel(TableModel):
2447
2805
  def refresh_speakers(self):
2448
2806
  self.runFunction.emit("Loading speakers", self.finish_update_speakers, [])
2449
2807
 
2808
+ def refresh_phones(self):
2809
+ self.phones = {}
2810
+ with self.corpus.session() as session:
2811
+ phones = (
2812
+ session.query(Phone)
2813
+ .filter(
2814
+ Phone.phone_type.in_([PhoneType.non_silence, PhoneType.oov, PhoneType.silence])
2815
+ )
2816
+ .all()
2817
+ )
2818
+ for p in phones:
2819
+ self.phones[p.phone] = p
2820
+
2821
+ def refresh_words(self):
2822
+ self.words = {}
2823
+ with self.corpus.session() as session:
2824
+ words = session.query(Word).order_by(Word.word).all()
2825
+ for w in words:
2826
+ self.words[w.word] = w
2827
+
2450
2828
  def data(self, index, role):
2451
2829
  if not index.isValid():
2452
2830
  return None
@@ -2530,6 +2908,7 @@ class CorpusModel(TableModel):
2530
2908
  "limit": self.limit,
2531
2909
  "current_offset": self.current_offset,
2532
2910
  "has_ivectors": self.corpus.has_any_ivectors(),
2911
+ "filter_nulls": self.filter_nulls,
2533
2912
  }
2534
2913
  if self.sort_index is not None:
2535
2914
  kwargs["sort_index"] = self.sort_index