Anchor-annotator 0.3.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anchor/models.py CHANGED
@@ -24,6 +24,12 @@ from montreal_forced_aligner.corpus.acoustic_corpus import (
24
24
  )
25
25
  from montreal_forced_aligner.data import PhoneType, WordType
26
26
  from montreal_forced_aligner.db import File, Phone, Speaker, Utterance
27
+ from montreal_forced_aligner.dictionary.mixins import (
28
+ DEFAULT_CLITIC_MARKERS,
29
+ DEFAULT_COMPOUND_MARKERS,
30
+ DEFAULT_PUNCTUATION,
31
+ DEFAULT_WORD_BREAK_MARKERS,
32
+ )
27
33
  from montreal_forced_aligner.g2p.generator import PyniniValidator
28
34
  from montreal_forced_aligner.models import (
29
35
  AcousticModel,
@@ -45,6 +51,23 @@ if typing.TYPE_CHECKING:
45
51
  logger = logging.getLogger("anchor")
46
52
 
47
53
 
54
+ WORD_BREAK_SET = "".join(
55
+ sorted(
56
+ set(
57
+ DEFAULT_WORD_BREAK_MARKERS
58
+ + DEFAULT_PUNCTUATION
59
+ + DEFAULT_CLITIC_MARKERS
60
+ + DEFAULT_COMPOUND_MARKERS
61
+ )
62
+ )
63
+ )
64
+
65
+ if "-" in WORD_BREAK_SET:
66
+ WORD_BREAK_SET = "" + WORD_BREAK_SET.replace("-", "")
67
+
68
+ WORD_BREAK_REGEX_SET = rf"[\s{WORD_BREAK_SET}]"
69
+
70
+
48
71
  # noinspection PyUnresolvedReferences
49
72
  @dataclass(slots=True)
50
73
  class TextFilterQuery:
@@ -76,9 +99,9 @@ class TextFilterQuery:
76
99
  if posix:
77
100
  text = text.replace(r"\b", word_break_set)
78
101
  if text.startswith(r"\b"):
79
- text = r"((?<=\s)|(?<=^))" + text[2:]
102
+ text = rf"((?<={WORD_BREAK_REGEX_SET})|(?<=^))" + text[2:]
80
103
  if text.endswith(r"\b"):
81
- text = text[:-2] + r"((?=\s)|(?=$))"
104
+ text = text[:-2] + rf"((?={WORD_BREAK_REGEX_SET})|(?=$))"
82
105
  if self.regex or self.word:
83
106
  if not self.case_sensitive:
84
107
  text = "(?i)" + text
@@ -482,7 +505,13 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
482
505
  normalized_text = ""
483
506
  speaker_id = None
484
507
  channel = None
508
+ prev_index = None
485
509
  for old_utt in sorted(utterances, key=lambda x: x.begin):
510
+ index = self.reversed_indices[old_utt.id]
511
+ if prev_index is not None:
512
+ if index - prev_index != 1:
513
+ return
514
+ prev_index = index
486
515
  if speaker_id is None:
487
516
  speaker_id = old_utt.speaker_id
488
517
  if channel is None:
@@ -544,8 +573,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
544
573
  channelChanged = QtCore.Signal()
545
574
  resetView = QtCore.Signal()
546
575
  viewChanged = QtCore.Signal(object, object)
547
- selectionAudioChanged = QtCore.Signal()
548
- currentTimeChanged = QtCore.Signal(object)
576
+ selectionAudioChanged = QtCore.Signal(object)
549
577
  currentUtteranceChanged = QtCore.Signal()
550
578
  speakerRequested = QtCore.Signal(object)
551
579
 
@@ -584,6 +612,9 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
584
612
  self.model().utterancesReady.connect(self.finalize_set_new_file)
585
613
  self.viewChanged.connect(self.load_audio_selection)
586
614
  self.model().selectionRequested.connect(self.update_selected_utterances)
615
+ self.view_change_timer = QtCore.QTimer()
616
+ self.view_change_timer.setInterval(50)
617
+ self.view_change_timer.timeout.connect(self.send_selection_update)
587
618
 
588
619
  def selected_utterances(self):
589
620
  utts = []
@@ -706,20 +737,21 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
706
737
  self.waveformReady.emit()
707
738
 
708
739
  def select_audio(self, begin, end):
709
- if end is not None and end - begin < 0.025:
740
+ if end is not None and end - begin < 0.05:
710
741
  end = None
711
742
  self.selected_min_time = begin
712
743
  self.selected_max_time = end
713
- self.selectionAudioChanged.emit()
744
+ if self.selected_min_time != self.min_time:
745
+ self.selectionAudioChanged.emit(False)
714
746
 
715
- def request_start_time(self, start_time):
747
+ def request_start_time(self, start_time, update=False):
716
748
  if start_time >= self.max_time:
717
749
  return
718
750
  if start_time < self.min_time:
719
751
  return
720
752
  self.selected_min_time = start_time
721
753
  self.selected_max_time = None
722
- self.selectionAudioChanged.emit()
754
+ self.selectionAudioChanged.emit(update)
723
755
 
724
756
  def set_current_channel(self, channel):
725
757
  if channel == self.selected_channel:
@@ -789,7 +821,21 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
789
821
 
790
822
  def zoom_to_selection(self):
791
823
  if self.selected_min_time is not None and self.selected_max_time is not None:
792
- self.set_view_times(self.selected_min_time, self.selected_max_time)
824
+ begin = self.selected_min_time
825
+ end = self.selected_max_time
826
+ elif len(self.selectedRows(0)) > 0:
827
+ m = self.model()
828
+ begin = 100000
829
+ end = 0
830
+ for index in self.selectedRows(0):
831
+ u = m.utterances[index.row()]
832
+ if u.begin < begin:
833
+ begin = u.begin
834
+ if u.end > end:
835
+ end = u.end
836
+ else:
837
+ return
838
+ self.set_view_times(begin, end)
793
839
 
794
840
  def update_from_slider(self, value):
795
841
  if not self.max_time:
@@ -797,15 +843,6 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
797
843
  cur_window = self.max_time - self.min_time
798
844
  self.set_view_times(value, value + cur_window)
799
845
 
800
- def update_selection_audio(self, begin, end):
801
- if begin < self.min_time:
802
- begin = self.min_time
803
- if end > self.max_time:
804
- end = self.max_time
805
- self.selected_min_time = begin
806
- self.selected_max_time = end
807
- self.selectionAudioChanged.emit()
808
-
809
846
  def visible_utterances(self) -> typing.List[Utterance]:
810
847
  file_utts = []
811
848
  if not self.model().file:
@@ -841,10 +878,13 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
841
878
  and not self.min_time <= self.selected_max_time <= self.max_time
842
879
  ):
843
880
  self.selected_max_time = None
881
+ self.view_change_timer.start()
882
+
883
+ def send_selection_update(self):
884
+ self.view_change_timer.stop()
844
885
  self.viewChanged.emit(self.min_time, self.max_time)
845
886
 
846
- def set_current_file(self, info, force_update=False):
847
- file_id, begin, end, utterance_id, speaker_id = info
887
+ def set_current_file(self, file_id, begin, end, utterance_id, speaker_id, force_update=False):
848
888
  try:
849
889
  new_file = self.model().file is None or self.model().file.id != file_id
850
890
  except sqlalchemy.orm.exc.DetachedInstanceError:
@@ -926,7 +966,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
926
966
  channelChanged = QtCore.Signal()
927
967
  resetView = QtCore.Signal()
928
968
  fileAboutToChange = QtCore.Signal()
929
- fileViewRequested = QtCore.Signal(object)
969
+ fileViewRequested = QtCore.Signal(object, object, object, object, object)
930
970
  selectionAudioChanged = QtCore.Signal()
931
971
  currentTimeChanged = QtCore.Signal(object)
932
972
  currentUtteranceChanged = QtCore.Signal()
@@ -947,7 +987,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
947
987
  self.currentRowChanged.connect(self.switch_utterance)
948
988
  # self.selectionChanged.connect(self.update_selection_audio)
949
989
  # self.selectionChanged.connect(self.update_selection_audio)
950
- self.model().newResults.connect(self.check_selection)
990
+ # self.model().newResults.connect(self.check_selection)
951
991
  self.model().unlockCorpus.connect(self.fileChanged.emit)
952
992
 
953
993
  def set_current_utterance(self, utterance_id):
@@ -1023,7 +1063,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
1023
1063
  if focus:
1024
1064
  flags |= QtCore.QItemSelectionModel.SelectionFlag.Current
1025
1065
  if row == self.currentIndex().row():
1026
- self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
1066
+ self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
1027
1067
 
1028
1068
  index = self.model().index(row, 0)
1029
1069
  if not index.isValid():
@@ -1082,7 +1122,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
1082
1122
  return
1083
1123
  self.current_utterance_id = utt
1084
1124
  self.currentUtteranceChanged.emit()
1085
- self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
1125
+ self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
1086
1126
 
1087
1127
  def model(self) -> CorpusModel:
1088
1128
  return super().model()
@@ -1099,7 +1139,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
1099
1139
  return
1100
1140
  self.current_utterance_id = utt_id
1101
1141
  self.currentUtteranceChanged.emit()
1102
- self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
1142
+ self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
1103
1143
 
1104
1144
 
1105
1145
  class OovModel(TableModel):
@@ -1266,7 +1306,7 @@ class DictionaryTableModel(TableModel):
1266
1306
  return True
1267
1307
  return False
1268
1308
 
1269
- def add_word(self, word, word_id):
1309
+ def add_word(self, word, word_id=None):
1270
1310
  self.requestLookup.emit(word)
1271
1311
  self.add_pronunciation(word, word_id)
1272
1312
 
@@ -1697,10 +1737,10 @@ class DiarizationModel(TableModel):
1697
1737
  super().__init__(columns, parent=parent)
1698
1738
  self.settings = AnchorSettings()
1699
1739
  self.speaker_count = None
1700
- self._utterance_ids = []
1701
- self._file_ids = []
1702
- self._speaker_indices = []
1703
- self._suggested_indices = []
1740
+ self.utterance_ids = []
1741
+ self.file_ids = []
1742
+ self.speaker_indices = []
1743
+ self.suggested_indices = []
1704
1744
  self.corpus_model: Optional[CorpusModel] = None
1705
1745
  self.set_limit(self.settings.value(self.settings.RESULTS_PER_PAGE))
1706
1746
  self.speaker_filter = None
@@ -1728,11 +1768,6 @@ class DiarizationModel(TableModel):
1728
1768
  return self._data[index.row()][index.column()]
1729
1769
  return super().data(index, role)
1730
1770
 
1731
- def utterance_id_at(self, row: int):
1732
- if row is None:
1733
- return None
1734
- return self._utterance_ids[row]
1735
-
1736
1771
  def set_threshold(self, threshold: float):
1737
1772
  if threshold != self.threshold:
1738
1773
  self.current_offset = 0
@@ -1792,32 +1827,32 @@ class DiarizationModel(TableModel):
1792
1827
  self.alternate_speaker_filter = current_speaker.id
1793
1828
 
1794
1829
  def reassign_utterance(self, row: int):
1795
- utterance_id = self.utterance_id_at(row)
1830
+ utterance_id = self.utterance_ids[row]
1796
1831
  if utterance_id is None:
1797
1832
  return
1798
- self.changeUtteranceSpeakerRequested.emit(utterance_id, self._suggested_indices[row])
1833
+ self.changeUtteranceSpeakerRequested.emit(utterance_id, self.suggested_indices[row])
1799
1834
  self.layoutAboutToBeChanged.emit()
1800
1835
  self._data.pop(row)
1801
- self._utterance_ids.pop(row)
1802
- self._suggested_indices.pop(row)
1803
- self._speaker_indices.pop(row)
1836
+ self.utterance_ids.pop(row)
1837
+ self.suggested_indices.pop(row)
1838
+ self.speaker_indices.pop(row)
1804
1839
 
1805
1840
  self.layoutChanged.emit()
1806
1841
 
1807
1842
  def merge_speakers(self, row: int):
1808
- speaker_id = self._speaker_indices[row]
1843
+ speaker_id = self.speaker_indices[row]
1809
1844
  if self.inverted:
1810
- utterance_id = self._utterance_ids[row]
1845
+ utterance_id = self.utterance_ids[row]
1811
1846
  self.corpus_model.addCommand.emit(
1812
1847
  undo.ChangeSpeakerCommand([utterance_id], speaker_id, 0, self)
1813
1848
  )
1814
1849
  else:
1815
- self.corpus_model.merge_speakers([self._suggested_indices[row], speaker_id])
1850
+ self.corpus_model.merge_speakers([self.suggested_indices[row], speaker_id])
1816
1851
  self.layoutAboutToBeChanged.emit()
1817
1852
  self._data.pop(row)
1818
- self._utterance_ids.pop(row)
1819
- self._suggested_indices.pop(row)
1820
- self._speaker_indices.pop(row)
1853
+ self.utterance_ids.pop(row)
1854
+ self.suggested_indices.pop(row)
1855
+ self.speaker_indices.pop(row)
1821
1856
 
1822
1857
  self.layoutChanged.emit()
1823
1858
 
@@ -1828,17 +1863,16 @@ class DiarizationModel(TableModel):
1828
1863
  def finish_update_data(self, result, *args, **kwargs):
1829
1864
  self.layoutAboutToBeChanged.emit()
1830
1865
  if result is None:
1831
- self._data, self._utterance_ids, self._suggested_indices, self._speaker_indices = (
1832
- [],
1833
- [],
1834
- [],
1835
- )
1866
+ self._data = []
1867
+ self.utterance_ids = []
1868
+ self.suggested_indices = []
1869
+ self.speaker_indices = []
1836
1870
  else:
1837
1871
  (
1838
1872
  self._data,
1839
- self._utterance_ids,
1840
- self._suggested_indices,
1841
- self._speaker_indices,
1873
+ self.utterance_ids,
1874
+ self.suggested_indices,
1875
+ self.speaker_indices,
1842
1876
  ) = result
1843
1877
  self.layoutChanged.emit()
1844
1878
  self.newResults.emit()