Anchor-annotator 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: Anchor_annotator
3
- Version: 0.3.2
3
+ Version: 0.4.0
4
4
  Summary: Anchor annotator is a program for inspecting corpora for the Montreal Forced Aligner and correcting transcriptions and pronunciations.
5
5
  Home-page: https://github.com/MontrealCorpusTools/Anchor-annotator
6
6
  Author: Montreal Corpus Tools
@@ -0,0 +1,22 @@
1
+ anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
3
+ anchor/_version.py,sha256=j90u3VVU4UrJf1fgMUhaZarHK_Do2XGYXr-vZvOFzVo,411
4
+ anchor/command_line.py,sha256=EucG805HyWk_zkMO9RXv9Yj0I0JVdDLZb1_DX2_ISjM,503
5
+ anchor/db.py,sha256=ef4lO6HtCKoxC9CorIc0ZbPxKpjHa576a0ZIBOWNU9E,4956
6
+ anchor/main.py,sha256=cZjj_PbAC2CPDneEy8HGNfH7F1hZpQexevFjBev9YxE,120664
7
+ anchor/models.py,sha256=Uaz_IobsG6aPDH9xfZYwN8bBDzc7U-rcbgm0jqihyd4,95763
8
+ anchor/plot.py,sha256=fUIVvSV7MIvV1HyNo5eZmi1PKun0WFDrnSXHkJD70zA,105668
9
+ anchor/resources_rc.py,sha256=94wgxDTpP4Oy55Br7CZ_YnmvaqzHr4n-AydBPhZc-es,8427242
10
+ anchor/settings.py,sha256=OdJQl54rhQ-JmsDiWIULxMFZZatM4arZ37hnmkk_VM4,47583
11
+ anchor/ui_corpus_manager.py,sha256=e3ybOd4UdYarrLBATxI8vIFnioa4R_BHrbsEz5mJ5eA,8564
12
+ anchor/ui_error_dialog.py,sha256=c_QS0s1VaJEV9AhcrQZQyWHHpUPudWjJY1NI7Ytipio,3832
13
+ anchor/ui_main_window.py,sha256=MYb4PtV1sHYgnc3QwPphKjU3LepzBJpxllhN4nyDook,63525
14
+ anchor/ui_preferences.py,sha256=MOC2dY4qkViW9cUbC0DVSO7FLg-dGSbmR630WFQ6V9c,41843
15
+ anchor/undo.py,sha256=FrzTz9hSUXV6jFJ7EUurxY5NmftQ5NWhtVzzYuVmcRo,32959
16
+ anchor/widgets.py,sha256=Lw2y9bymDiu01eGqLR0M8CSjXYV5-e037XqRQiX7Wn8,157619
17
+ anchor/workers.py,sha256=SUrafStLUrdhi5b3QhkRYKdFghasDc8lxsUCZOF_FRg,171159
18
+ Anchor_annotator-0.4.0.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
19
+ Anchor_annotator-0.4.0.dist-info/METADATA,sha256=EMWnDUTa3Di2cpH4RiP6buwD67Nj-m5vdkkstIFf8M8,1500
20
+ Anchor_annotator-0.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
21
+ Anchor_annotator-0.4.0.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
22
+ Anchor_annotator-0.4.0.dist-info/RECORD,,
anchor/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.3.2'
16
- __version_tuple__ = version_tuple = (0, 3, 2)
15
+ __version__ = version = '0.4.0'
16
+ __version_tuple__ = version_tuple = (0, 4, 0)
anchor/main.py CHANGED
@@ -540,14 +540,14 @@ class MainWindow(QtWidgets.QMainWindow):
540
540
  self.dictionary_model,
541
541
  )
542
542
  self.ui.speakerWidget.set_models(
543
- self.corpus_model, self.selection_model, self.speaker_model
543
+ self.corpus_model, self.file_selection_model, self.speaker_model
544
544
  )
545
545
  self.ui.transcriptionWidget.set_models(self.corpus_model, self.dictionary_model)
546
546
  self.ui.alignmentWidget.set_models(self.corpus_model)
547
547
  self.ui.acousticModelWidget.set_models(self.corpus_model)
548
548
  self.ui.languageModelWidget.set_models(self.corpus_model)
549
549
  self.ui.dictionaryWidget.set_models(self.dictionary_model)
550
- self.ui.diarizationWidget.set_models(self.diarization_model, self.selection_model)
550
+ self.ui.diarizationWidget.set_models(self.diarization_model, self.file_selection_model)
551
551
  self.ui.oovWidget.set_models(self.oov_model)
552
552
  self.file_selection_model.currentUtteranceChanged.connect(self.change_utterance)
553
553
  self.selection_model.fileViewRequested.connect(self.file_selection_model.set_current_file)
@@ -1087,8 +1087,8 @@ class MainWindow(QtWidgets.QMainWindow):
1087
1087
  )
1088
1088
  for m in (
1089
1089
  session.query(anchor.db.AcousticModel)
1090
- .order_by(anchor.db.AcousticModel.last_used.desc())
1091
1090
  .filter_by(available_locally=True)
1091
+ .order_by(anchor.db.AcousticModel.last_used.desc())
1092
1092
  ):
1093
1093
  a = QtGui.QAction(f"{m.path} [{m.name}]", parent=self)
1094
1094
  a.setData(m.id)
@@ -1105,8 +1105,8 @@ class MainWindow(QtWidgets.QMainWindow):
1105
1105
 
1106
1106
  for m in (
1107
1107
  session.query(anchor.db.Dictionary)
1108
- .order_by(anchor.db.Dictionary.last_used.desc())
1109
1108
  .filter_by(available_locally=True)
1109
+ .order_by(anchor.db.Dictionary.last_used.desc())
1110
1110
  ):
1111
1111
  a = QtGui.QAction(text=f"{m.path} [{m.name}]", parent=self)
1112
1112
  a.setData(m.id)
@@ -1122,8 +1122,8 @@ class MainWindow(QtWidgets.QMainWindow):
1122
1122
 
1123
1123
  for m in (
1124
1124
  session.query(anchor.db.LanguageModel)
1125
- .order_by(anchor.db.LanguageModel.last_used.desc())
1126
1125
  .filter_by(available_locally=True)
1126
+ .order_by(anchor.db.LanguageModel.last_used.desc())
1127
1127
  ):
1128
1128
  a = QtGui.QAction(text=f"{m.path} [{m.name}]", parent=self)
1129
1129
  a.setData(m.id)
@@ -1139,8 +1139,8 @@ class MainWindow(QtWidgets.QMainWindow):
1139
1139
 
1140
1140
  for m in (
1141
1141
  session.query(anchor.db.G2PModel)
1142
- .order_by(anchor.db.G2PModel.last_used.desc())
1143
1142
  .filter_by(available_locally=True)
1143
+ .order_by(anchor.db.G2PModel.last_used.desc())
1144
1144
  ):
1145
1145
  a = QtGui.QAction(text=f"{m.path} [{m.name}]", parent=self)
1146
1146
  a.setData(m.id)
@@ -1385,6 +1385,8 @@ class MainWindow(QtWidgets.QMainWindow):
1385
1385
  self.ui.languageModelMenu.setEnabled(True)
1386
1386
 
1387
1387
  def finalize_load_g2p_model(self, generator: PyniniValidator):
1388
+ if generator is None:
1389
+ return
1388
1390
  self.dictionary_model.set_g2p_generator(generator)
1389
1391
  self.corpus_model.g2p_model = generator.g2p_model
1390
1392
  self.check_actions()
@@ -1932,7 +1934,7 @@ class MainWindow(QtWidgets.QMainWindow):
1932
1934
  session.query(anchor.db.AnchorCorpus).filter_by(current=True).update(
1933
1935
  {anchor.db.AnchorCorpus.acoustic_model_id: m_id}
1934
1936
  )
1935
- session.query(anchor.db.AcousticModel).filter_by(
1937
+ session.query(anchor.db.AcousticModel).filter(
1936
1938
  anchor.db.AcousticModel.id == m_id
1937
1939
  ).update(
1938
1940
  {
@@ -2003,9 +2005,7 @@ class MainWindow(QtWidgets.QMainWindow):
2003
2005
  session.query(anchor.db.AnchorCorpus).filter_by(current=True).update(
2004
2006
  {anchor.db.AnchorCorpus.dictionary_id: m_id}
2005
2007
  )
2006
- session.query(anchor.db.Dictionary).filter_by(
2007
- anchor.db.Dictionary.id == m_id
2008
- ).update(
2008
+ session.query(anchor.db.Dictionary).filter(anchor.db.Dictionary.id == m_id).update(
2009
2009
  {
2010
2010
  anchor.db.Dictionary.last_used: datetime.datetime.now(),
2011
2011
  }
@@ -2060,7 +2060,7 @@ class MainWindow(QtWidgets.QMainWindow):
2060
2060
  session.query(anchor.db.AnchorCorpus).filter_by(current=True).update(
2061
2061
  {anchor.db.AnchorCorpus.language_model_id: m_id}
2062
2062
  )
2063
- session.query(anchor.db.LanguageModel).filter_by(
2063
+ session.query(anchor.db.LanguageModel).filter(
2064
2064
  anchor.db.LanguageModel.id == m_id
2065
2065
  ).update(
2066
2066
  {
@@ -2139,7 +2139,7 @@ class MainWindow(QtWidgets.QMainWindow):
2139
2139
  anchor.db.AnchorCorpus.g2p_model_id: m_id,
2140
2140
  }
2141
2141
  )
2142
- session.query(anchor.db.G2PModel).filter_by(anchor.db.G2PModel.id == m_id).update(
2142
+ session.query(anchor.db.G2PModel).filter(anchor.db.G2PModel.id == m_id).update(
2143
2143
  {
2144
2144
  anchor.db.G2PModel.last_used: datetime.datetime.now(),
2145
2145
  }
@@ -2182,7 +2182,7 @@ class MainWindow(QtWidgets.QMainWindow):
2182
2182
  session.query(anchor.db.AnchorCorpus).filter_by(current=True).update(
2183
2183
  {anchor.db.AnchorCorpus.ivector_extractor_id: m_id}
2184
2184
  )
2185
- session.query(anchor.db.IvectorExtractor).filter_by(
2185
+ session.query(anchor.db.IvectorExtractor).filter(
2186
2186
  anchor.db.IvectorExtractor.id == m_id
2187
2187
  ).update(
2188
2188
  {
anchor/models.py CHANGED
@@ -24,6 +24,12 @@ from montreal_forced_aligner.corpus.acoustic_corpus import (
24
24
  )
25
25
  from montreal_forced_aligner.data import PhoneType, WordType
26
26
  from montreal_forced_aligner.db import File, Phone, Speaker, Utterance
27
+ from montreal_forced_aligner.dictionary.mixins import (
28
+ DEFAULT_CLITIC_MARKERS,
29
+ DEFAULT_COMPOUND_MARKERS,
30
+ DEFAULT_PUNCTUATION,
31
+ DEFAULT_WORD_BREAK_MARKERS,
32
+ )
27
33
  from montreal_forced_aligner.g2p.generator import PyniniValidator
28
34
  from montreal_forced_aligner.models import (
29
35
  AcousticModel,
@@ -45,6 +51,23 @@ if typing.TYPE_CHECKING:
45
51
  logger = logging.getLogger("anchor")
46
52
 
47
53
 
54
+ WORD_BREAK_SET = "".join(
55
+ sorted(
56
+ set(
57
+ DEFAULT_WORD_BREAK_MARKERS
58
+ + DEFAULT_PUNCTUATION
59
+ + DEFAULT_CLITIC_MARKERS
60
+ + DEFAULT_COMPOUND_MARKERS
61
+ )
62
+ )
63
+ )
64
+
65
+ if "-" in WORD_BREAK_SET:
66
+ WORD_BREAK_SET = "" + WORD_BREAK_SET.replace("-", "")
67
+
68
+ WORD_BREAK_REGEX_SET = rf"[\s{WORD_BREAK_SET}]"
69
+
70
+
48
71
  # noinspection PyUnresolvedReferences
49
72
  @dataclass(slots=True)
50
73
  class TextFilterQuery:
@@ -76,9 +99,9 @@ class TextFilterQuery:
76
99
  if posix:
77
100
  text = text.replace(r"\b", word_break_set)
78
101
  if text.startswith(r"\b"):
79
- text = r"((?<=\s)|(?<=^))" + text[2:]
102
+ text = rf"((?<={WORD_BREAK_REGEX_SET})|(?<=^))" + text[2:]
80
103
  if text.endswith(r"\b"):
81
- text = text[:-2] + r"((?=\s)|(?=$))"
104
+ text = text[:-2] + rf"((?={WORD_BREAK_REGEX_SET})|(?=$))"
82
105
  if self.regex or self.word:
83
106
  if not self.case_sensitive:
84
107
  text = "(?i)" + text
@@ -545,7 +568,6 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
545
568
  resetView = QtCore.Signal()
546
569
  viewChanged = QtCore.Signal(object, object)
547
570
  selectionAudioChanged = QtCore.Signal()
548
- currentTimeChanged = QtCore.Signal(object)
549
571
  currentUtteranceChanged = QtCore.Signal()
550
572
  speakerRequested = QtCore.Signal(object)
551
573
 
@@ -710,7 +732,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
710
732
  end = None
711
733
  self.selected_min_time = begin
712
734
  self.selected_max_time = end
713
- self.selectionAudioChanged.emit()
735
+ if self.selected_min_time != self.min_time:
736
+ self.selectionAudioChanged.emit()
714
737
 
715
738
  def request_start_time(self, start_time):
716
739
  if start_time >= self.max_time:
@@ -719,7 +742,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
719
742
  return
720
743
  self.selected_min_time = start_time
721
744
  self.selected_max_time = None
722
- self.selectionAudioChanged.emit()
745
+ if self.selected_min_time != self.min_time:
746
+ self.selectionAudioChanged.emit()
723
747
 
724
748
  def set_current_channel(self, channel):
725
749
  if channel == self.selected_channel:
@@ -804,7 +828,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
804
828
  end = self.max_time
805
829
  self.selected_min_time = begin
806
830
  self.selected_max_time = end
807
- self.selectionAudioChanged.emit()
831
+ if self.selected_min_time != self.min_time:
832
+ self.selectionAudioChanged.emit()
808
833
 
809
834
  def visible_utterances(self) -> typing.List[Utterance]:
810
835
  file_utts = []
@@ -843,8 +868,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
843
868
  self.selected_max_time = None
844
869
  self.viewChanged.emit(self.min_time, self.max_time)
845
870
 
846
- def set_current_file(self, info, force_update=False):
847
- file_id, begin, end, utterance_id, speaker_id = info
871
+ def set_current_file(self, file_id, begin, end, utterance_id, speaker_id, force_update=False):
848
872
  try:
849
873
  new_file = self.model().file is None or self.model().file.id != file_id
850
874
  except sqlalchemy.orm.exc.DetachedInstanceError:
@@ -926,7 +950,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
926
950
  channelChanged = QtCore.Signal()
927
951
  resetView = QtCore.Signal()
928
952
  fileAboutToChange = QtCore.Signal()
929
- fileViewRequested = QtCore.Signal(object)
953
+ fileViewRequested = QtCore.Signal(object, object, object, object, object)
930
954
  selectionAudioChanged = QtCore.Signal()
931
955
  currentTimeChanged = QtCore.Signal(object)
932
956
  currentUtteranceChanged = QtCore.Signal()
@@ -1023,7 +1047,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
1023
1047
  if focus:
1024
1048
  flags |= QtCore.QItemSelectionModel.SelectionFlag.Current
1025
1049
  if row == self.currentIndex().row():
1026
- self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
1050
+ self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
1027
1051
 
1028
1052
  index = self.model().index(row, 0)
1029
1053
  if not index.isValid():
@@ -1082,7 +1106,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
1082
1106
  return
1083
1107
  self.current_utterance_id = utt
1084
1108
  self.currentUtteranceChanged.emit()
1085
- self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
1109
+ self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
1086
1110
 
1087
1111
  def model(self) -> CorpusModel:
1088
1112
  return super().model()
@@ -1099,7 +1123,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
1099
1123
  return
1100
1124
  self.current_utterance_id = utt_id
1101
1125
  self.currentUtteranceChanged.emit()
1102
- self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
1126
+ self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
1103
1127
 
1104
1128
 
1105
1129
  class OovModel(TableModel):
@@ -1266,7 +1290,7 @@ class DictionaryTableModel(TableModel):
1266
1290
  return True
1267
1291
  return False
1268
1292
 
1269
- def add_word(self, word, word_id):
1293
+ def add_word(self, word, word_id=None):
1270
1294
  self.requestLookup.emit(word)
1271
1295
  self.add_pronunciation(word, word_id)
1272
1296
 
@@ -1697,10 +1721,10 @@ class DiarizationModel(TableModel):
1697
1721
  super().__init__(columns, parent=parent)
1698
1722
  self.settings = AnchorSettings()
1699
1723
  self.speaker_count = None
1700
- self._utterance_ids = []
1701
- self._file_ids = []
1702
- self._speaker_indices = []
1703
- self._suggested_indices = []
1724
+ self.utterance_ids = []
1725
+ self.file_ids = []
1726
+ self.speaker_indices = []
1727
+ self.suggested_indices = []
1704
1728
  self.corpus_model: Optional[CorpusModel] = None
1705
1729
  self.set_limit(self.settings.value(self.settings.RESULTS_PER_PAGE))
1706
1730
  self.speaker_filter = None
@@ -1728,11 +1752,6 @@ class DiarizationModel(TableModel):
1728
1752
  return self._data[index.row()][index.column()]
1729
1753
  return super().data(index, role)
1730
1754
 
1731
- def utterance_id_at(self, row: int):
1732
- if row is None:
1733
- return None
1734
- return self._utterance_ids[row]
1735
-
1736
1755
  def set_threshold(self, threshold: float):
1737
1756
  if threshold != self.threshold:
1738
1757
  self.current_offset = 0
@@ -1792,32 +1811,32 @@ class DiarizationModel(TableModel):
1792
1811
  self.alternate_speaker_filter = current_speaker.id
1793
1812
 
1794
1813
  def reassign_utterance(self, row: int):
1795
- utterance_id = self.utterance_id_at(row)
1814
+ utterance_id = self.utterance_ids[row]
1796
1815
  if utterance_id is None:
1797
1816
  return
1798
- self.changeUtteranceSpeakerRequested.emit(utterance_id, self._suggested_indices[row])
1817
+ self.changeUtteranceSpeakerRequested.emit(utterance_id, self.suggested_indices[row])
1799
1818
  self.layoutAboutToBeChanged.emit()
1800
1819
  self._data.pop(row)
1801
- self._utterance_ids.pop(row)
1802
- self._suggested_indices.pop(row)
1803
- self._speaker_indices.pop(row)
1820
+ self.utterance_ids.pop(row)
1821
+ self.suggested_indices.pop(row)
1822
+ self.speaker_indices.pop(row)
1804
1823
 
1805
1824
  self.layoutChanged.emit()
1806
1825
 
1807
1826
  def merge_speakers(self, row: int):
1808
- speaker_id = self._speaker_indices[row]
1827
+ speaker_id = self.speaker_indices[row]
1809
1828
  if self.inverted:
1810
- utterance_id = self._utterance_ids[row]
1829
+ utterance_id = self.utterance_ids[row]
1811
1830
  self.corpus_model.addCommand.emit(
1812
1831
  undo.ChangeSpeakerCommand([utterance_id], speaker_id, 0, self)
1813
1832
  )
1814
1833
  else:
1815
- self.corpus_model.merge_speakers([self._suggested_indices[row], speaker_id])
1834
+ self.corpus_model.merge_speakers([self.suggested_indices[row], speaker_id])
1816
1835
  self.layoutAboutToBeChanged.emit()
1817
1836
  self._data.pop(row)
1818
- self._utterance_ids.pop(row)
1819
- self._suggested_indices.pop(row)
1820
- self._speaker_indices.pop(row)
1837
+ self.utterance_ids.pop(row)
1838
+ self.suggested_indices.pop(row)
1839
+ self.speaker_indices.pop(row)
1821
1840
 
1822
1841
  self.layoutChanged.emit()
1823
1842
 
@@ -1828,17 +1847,16 @@ class DiarizationModel(TableModel):
1828
1847
  def finish_update_data(self, result, *args, **kwargs):
1829
1848
  self.layoutAboutToBeChanged.emit()
1830
1849
  if result is None:
1831
- self._data, self._utterance_ids, self._suggested_indices, self._speaker_indices = (
1832
- [],
1833
- [],
1834
- [],
1835
- )
1850
+ self._data = []
1851
+ self.utterance_ids = []
1852
+ self.suggested_indices = []
1853
+ self.speaker_indices = []
1836
1854
  else:
1837
1855
  (
1838
1856
  self._data,
1839
- self._utterance_ids,
1840
- self._suggested_indices,
1841
- self._speaker_indices,
1857
+ self.utterance_ids,
1858
+ self.suggested_indices,
1859
+ self.speaker_indices,
1842
1860
  ) = result
1843
1861
  self.layoutChanged.emit()
1844
1862
  self.newResults.emit()
anchor/plot.py CHANGED
@@ -13,6 +13,11 @@ import sqlalchemy
13
13
  from Bio import pairwise2
14
14
  from montreal_forced_aligner.data import CtmInterval
15
15
  from montreal_forced_aligner.db import Speaker, Utterance
16
+ from montreal_forced_aligner.dictionary.mixins import (
17
+ DEFAULT_PUNCTUATION,
18
+ DEFAULT_WORD_BREAK_MARKERS,
19
+ )
20
+ from montreal_forced_aligner.tokenization.simple import SimpleTokenizer
16
21
  from PySide6 import QtCore, QtGui, QtWidgets
17
22
 
18
23
  from anchor import workers
@@ -161,7 +166,7 @@ class UtteranceClusterView(pg.PlotWidget):
161
166
  self.setBackground(self.settings.value(self.settings.PRIMARY_VERY_DARK_COLOR))
162
167
  self.corpus_model = None
163
168
  self.speaker_model: SpeakerModel = None
164
- self.selection_model: CorpusSelectionModel = None
169
+ self.selection_model: FileSelectionModel = None
165
170
  self.updated_indices = set()
166
171
  self.brushes = {-1: pg.mkBrush(0.5)}
167
172
  self.scatter_item = ScatterPlot()
@@ -226,7 +231,7 @@ class UtteranceClusterView(pg.PlotWidget):
226
231
  def set_models(
227
232
  self,
228
233
  corpus_model: CorpusModel,
229
- selection_model: CorpusSelectionModel,
234
+ selection_model: FileSelectionModel,
230
235
  speaker_model: SpeakerModel,
231
236
  ):
232
237
  self.corpus_model = corpus_model
@@ -248,13 +253,12 @@ class UtteranceClusterView(pg.PlotWidget):
248
253
  if ev.button() == QtCore.Qt.MouseButton.LeftButton:
249
254
  utterance_id = int(self.speaker_model.utterance_ids[index])
250
255
  utterance = self.corpus_model.session.query(Utterance).get(utterance_id)
251
- self.selection_model.set_current_utterance(utterance_id)
252
- self.selection_model.current_utterance_id = utterance_id
253
256
  self.selection_model.set_current_file(
254
257
  utterance.file_id,
255
258
  utterance.begin,
256
259
  utterance.end,
257
- utterance.channel,
260
+ utterance.id,
261
+ utterance.speaker_id,
258
262
  force_update=True,
259
263
  )
260
264
  else:
@@ -1395,7 +1399,7 @@ class NormalizedTextRegion(TextAttributeRegion):
1395
1399
 
1396
1400
 
1397
1401
  class Highlighter(QtGui.QSyntaxHighlighter):
1398
- WORDS = r"\S+"
1402
+ WORDS = rf"[^\s{''.join(DEFAULT_WORD_BREAK_MARKERS)+''.join(DEFAULT_PUNCTUATION)}]+"
1399
1403
 
1400
1404
  def __init__(self, *args):
1401
1405
  super(Highlighter, self).__init__(*args)
@@ -1424,8 +1428,20 @@ class Highlighter(QtGui.QSyntaxHighlighter):
1424
1428
  def highlightBlock(self, text):
1425
1429
  self.settings.sync()
1426
1430
  self.spellcheck_format.setUnderlineColor(self.settings.error_color)
1431
+ tokenizers = self.dictionary_model.corpus_model.corpus.get_tokenizers()
1432
+ dictionary_id = self.dictionary_model.corpus_model.corpus.get_dict_id_for_speaker(
1433
+ self.speaker_id
1434
+ )
1435
+ words = self.WORDS
1436
+ if isinstance(tokenizers, dict) and dictionary_id is not None:
1437
+ tokenizer = self.dictionary_model.corpus_model.corpus.get_tokenizer(dictionary_id)
1438
+ else:
1439
+ tokenizer = tokenizers
1440
+ if isinstance(tokenizer, SimpleTokenizer):
1441
+ extra_symbols = "".join(tokenizer.punctuation) + "".join(tokenizer.word_break_markers)
1442
+ words = rf"[^\s{extra_symbols}]+"
1427
1443
  if self.dictionary_model is not None and self.dictionary_model.word_sets:
1428
- for word_object in re.finditer(self.WORDS, text):
1444
+ for word_object in re.finditer(words, text):
1429
1445
  if not self.dictionary_model.check_word(word_object.group(), self.speaker_id):
1430
1446
  self.setFormat(
1431
1447
  word_object.start(),
anchor/settings.py CHANGED
@@ -855,12 +855,27 @@ class AnchorSettings(QtCore.QSettings):
855
855
  margin: 0 -2px; /* expand outside the groove */
856
856
  }}
857
857
  QTableWidget, QTableView, QTreeView, QTreeWidget {{
858
- alternate-background-color: {table_even_color};
858
+ border: 4px solid {enabled_color};
859
+ }}
860
+ QTableView::item, QTreeWidget::item {{
861
+ background-color: {table_odd_color};
862
+ color: {table_text_color};
859
863
  selection-background-color: {selection_color};
860
864
  selection-color: {text_edit_color};
861
- background-color: {table_odd_color};
865
+ }}
866
+ QTableView::item:selected, QTreeWidget::item:selected {{
867
+ background-color: {selection_color};
868
+ color: {text_edit_color};
869
+ }}
870
+ QTableView::item:alternate, QTreeWidget::item:alternate {{
871
+ background-color: {table_even_color};
862
872
  color: {table_text_color};
863
- border: 4px solid {enabled_color};
873
+ selection-background-color: {selection_color};
874
+ selection-color: {text_edit_color};
875
+ }}
876
+ QTableView::item:alternate:selected, QTreeWidget::item:alternate:selected {{
877
+ background-color: {selection_color};
878
+ color: {text_edit_color};
864
879
  }}
865
880
  QTreeView QLabel, QTreeWidget QLabel{{
866
881
  color: {table_text_color};
anchor/undo.py CHANGED
@@ -39,8 +39,11 @@ class CorpusCommand(QtGui.QUndoCommand):
39
39
 
40
40
  def redo(self) -> None:
41
41
  with self.corpus_model.edit_lock:
42
- self._redo(self.corpus_model.session)
43
- self.corpus_model.session.commit()
42
+ try:
43
+ self._redo(self.corpus_model.session)
44
+ self.corpus_model.session.commit()
45
+ except Exception:
46
+ self.corpus_model.session.rollback()
44
47
  # while True:
45
48
  # try:
46
49
  # with self.corpus_model.session.begin_nested():
@@ -53,8 +56,11 @@ class CorpusCommand(QtGui.QUndoCommand):
53
56
 
54
57
  def undo(self) -> None:
55
58
  with self.corpus_model.edit_lock:
56
- self._undo(self.corpus_model.session)
57
- self.corpus_model.session.commit()
59
+ try:
60
+ self._undo(self.corpus_model.session)
61
+ self.corpus_model.session.commit()
62
+ except Exception:
63
+ self.corpus_model.session.rollback()
58
64
  # while True:
59
65
  # try:
60
66
  # with self.corpus_model.session.begin_nested():
@@ -453,6 +459,11 @@ class UpdateUtteranceTextCommand(FileCommand):
453
459
  self.new_text = other.new_text
454
460
  return True
455
461
 
462
+ def update_data(self):
463
+ super().update_data()
464
+ self.corpus_model.changeCommandFired.emit()
465
+ self.corpus_model.update_utterance_table_row(self.utterance)
466
+
456
467
 
457
468
  class ReplaceAllCommand(CorpusCommand):
458
469
  def __init__(
anchor/widgets.py CHANGED
@@ -74,10 +74,7 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
74
74
  self.start_load_time = None
75
75
  self.min_time = None
76
76
  self.selection_model = None
77
- self.timer = QtCore.QTimer(self)
78
- self.timer.setInterval(1)
79
- self.timer.timeout.connect(self.checkStop)
80
- # self.positionChanged.connect(self.checkStop)
77
+ self.positionChanged.connect(self.checkStop)
81
78
  # self.positionChanged.connect(self.positionDebug)
82
79
  self.errorOccurred.connect(self.handle_error)
83
80
  o = None
@@ -89,7 +86,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
89
86
  self._audio_output.setDevice(self.devices.defaultAudioOutput())
90
87
  self.setAudioOutput(self._audio_output)
91
88
  self.playbackStateChanged.connect(self.reset_position)
92
- self.set_volume(self.settings.value(self.settings.VOLUME))
93
89
  self.fade_in_anim = QtCore.QPropertyAnimation(self._audio_output, b"volume")
94
90
  self.fade_in_anim.setDuration(10)
95
91
  self.fade_in_anim.setStartValue(0.1)
@@ -105,6 +101,7 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
105
101
  self.fade_out_anim.setKeyValueAt(0.1, self._audio_output.volume())
106
102
  self.fade_out_anim.finished.connect(super().pause)
107
103
  self.file_path = None
104
+ self.set_volume(self.settings.value(self.settings.VOLUME))
108
105
 
109
106
  def setMuted(self, muted: bool):
110
107
  self.audioOutput().setMuted(muted)
@@ -125,7 +122,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
125
122
  or self.currentTime() >= self.maxTime()
126
123
  ):
127
124
  self.setCurrentTime(self.startTime())
128
- self.timer.start()
129
125
  super(MediaPlayer, self).play()
130
126
  if fade_in:
131
127
  self.fade_in_anim.start()
@@ -153,14 +149,11 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
153
149
  def reset_position(self):
154
150
  state = self.playbackState()
155
151
  if state == QtMultimedia.QMediaPlayer.PlaybackState.StoppedState:
156
- self.timer.stop()
157
152
  self.setCurrentTime(self.startTime())
158
- self.timeChanged.emit(self.currentTime())
159
- elif state == QtMultimedia.QMediaPlayer.PlaybackState.PausedState:
160
- self.timer.stop()
161
153
 
162
154
  def update_audio_device(self):
163
155
  self._audio_output.setDevice(self.devices.defaultAudioOutput())
156
+ self.setAudioOutput(self._audio_output)
164
157
 
165
158
  def refresh_settings(self):
166
159
  self.settings.sync()
@@ -177,9 +170,9 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
177
170
  self.selection_model.fileChanged.connect(self.loadNewFile)
178
171
  self.selection_model.viewChanged.connect(self.update_times)
179
172
  self.selection_model.selectionAudioChanged.connect(self.update_selection_times)
180
- self.selection_model.currentTimeChanged.connect(self.update_selection_times)
181
173
 
182
174
  def set_volume(self, volume: int):
175
+ self.settings.setValue(self.settings.VOLUME, volume)
183
176
  if self.audioOutput() is None:
184
177
  return
185
178
  linearVolume = QtMultimedia.QtAudio.convertVolume(
@@ -188,6 +181,8 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
188
181
  QtMultimedia.QtAudio.VolumeScale.LinearVolumeScale,
189
182
  )
190
183
  self.audioOutput().setVolume(linearVolume)
184
+ self.fade_in_anim.setEndValue(linearVolume)
185
+ self.fade_out_anim.setStartValue(linearVolume)
191
186
 
192
187
  def volume(self) -> int:
193
188
  if self.audioOutput() is None:
@@ -207,9 +202,12 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
207
202
  self.setCurrentTime(self.startTime())
208
203
 
209
204
  def update_times(self):
205
+ if self.playbackState() == QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
206
+ return
210
207
  if self.currentTime() < self.startTime() or self.currentTime() > self.maxTime():
211
208
  self.stop()
212
209
  if self.playbackState() != QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
210
+ self.stop()
213
211
  self.setCurrentTime(self.startTime())
214
212
 
215
213
  def loadNewFile(self, *args):
@@ -238,27 +236,14 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
238
236
  def setCurrentTime(self, time):
239
237
  if time is None:
240
238
  time = 0
241
- if self.playbackState() == QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
242
- return
243
239
  pos = int(time * 1000)
244
240
  self.setPosition(pos)
245
- self.timeChanged.emit(self.currentTime())
246
241
 
247
242
  def checkStop(self):
248
- if not self.hasAudio():
249
- self.stop()
250
- self.setSource(
251
- QtCore.QUrl.fromLocalFile(
252
- self.selection_model.model().file.sound_file.sound_file_path
253
- )
254
- )
255
- self.play()
256
- return
243
+ self.timeChanged.emit(self.currentTime())
257
244
  if self.playbackState() == QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
258
245
  if self.maxTime() is None or self.currentTime() > self.maxTime():
259
246
  self.stop()
260
- self.reset_position()
261
- self.timeChanged.emit(self.currentTime())
262
247
 
263
248
 
264
249
  class NewSpeakerField(QtWidgets.QLineEdit):
@@ -2546,6 +2531,7 @@ class SpeakerClustersWidget(QtWidgets.QWidget):
2546
2531
 
2547
2532
  def __init__(self, *args, **kwargs):
2548
2533
  super().__init__(*args, **kwargs)
2534
+ self.speaker_model = None
2549
2535
  self.settings = AnchorSettings()
2550
2536
  self.settings.sync()
2551
2537
  form_layout = QtWidgets.QHBoxLayout()
@@ -2640,7 +2626,7 @@ class DiarizationTable(AnchorTableView):
2640
2626
  self.doubleClicked.connect(self.search_utterance)
2641
2627
  self.clicked.connect(self.reassign_utterance)
2642
2628
  self.diarization_model: Optional[DiarizationModel] = None
2643
- self.selection_model: Optional[CorpusSelectionModel] = None
2629
+ self.selection_model: Optional[FileSelectionModel] = None
2644
2630
  self.set_reference_utterance_action = QtGui.QAction("Use utterance as reference", self)
2645
2631
  self.set_reference_utterance_action.triggered.connect(self.set_reference_utterance)
2646
2632
  self.setContextMenuPolicy(QtCore.Qt.ContextMenuPolicy.CustomContextMenu)
@@ -2648,7 +2634,7 @@ class DiarizationTable(AnchorTableView):
2648
2634
 
2649
2635
  def generate_context_menu(self, location):
2650
2636
  menu = QtWidgets.QMenu()
2651
- # menu.setStyleSheet(self.settings.menu_style_sheet)
2637
+ menu.setStyleSheet(self.settings.menu_style_sheet)
2652
2638
  menu.addAction(self.set_reference_utterance_action)
2653
2639
  menu.exec_(self.mapToGlobal(location))
2654
2640
 
@@ -2656,7 +2642,7 @@ class DiarizationTable(AnchorTableView):
2656
2642
  rows = self.selectionModel().selectedRows()
2657
2643
  if not rows:
2658
2644
  return
2659
- utterance_id = self.diarization_model._utterance_ids[rows[0].row()]
2645
+ utterance_id = self.diarization_model.utterance_ids[rows[0].row()]
2660
2646
  self.diarization_model.set_utterance_filter(utterance_id)
2661
2647
  self.referenceUtteranceSelected.emit(
2662
2648
  self.diarization_model.data(
@@ -2665,7 +2651,7 @@ class DiarizationTable(AnchorTableView):
2665
2651
  )
2666
2652
  )
2667
2653
 
2668
- def set_models(self, model: DiarizationModel, selection_model: CorpusSelectionModel):
2654
+ def set_models(self, model: DiarizationModel, selection_model: FileSelectionModel):
2669
2655
  self.diarization_model = model
2670
2656
  self.selection_model = selection_model
2671
2657
  self.setModel(model)
@@ -2684,14 +2670,14 @@ class DiarizationTable(AnchorTableView):
2684
2670
  return
2685
2671
  if index.column() == 0:
2686
2672
  row = index.row()
2687
- utterance_id = self.diarization_model._utterance_ids[row]
2673
+ utterance_id = self.diarization_model.utterance_ids[row]
2688
2674
  if utterance_id is None:
2689
2675
  return
2690
2676
  with self.diarization_model.corpus_model.corpus.session() as session:
2691
2677
  try:
2692
- file_id, begin, end, channel = (
2678
+ file_id, begin, end, speaker_id = (
2693
2679
  session.query(
2694
- Utterance.file_id, Utterance.begin, Utterance.end, Utterance.channel
2680
+ Utterance.file_id, Utterance.begin, Utterance.end, Utterance.speaker_id
2695
2681
  )
2696
2682
  .filter(Utterance.id == utterance_id)
2697
2683
  .first()
@@ -2701,19 +2687,18 @@ class DiarizationTable(AnchorTableView):
2701
2687
  return
2702
2688
  else:
2703
2689
  if index.column() == 1:
2704
- speaker_id = self.diarization_model._suggested_indices[index.row()]
2690
+ speaker_id = self.diarization_model.suggested_indices[index.row()]
2705
2691
  else:
2706
- speaker_id = self.diarization_model._speaker_indices[index.row()]
2692
+ speaker_id = self.diarization_model.speaker_indices[index.row()]
2707
2693
  with self.diarization_model.corpus_model.corpus.session() as session:
2708
2694
  c = session.query(Corpus).first()
2709
2695
  try:
2710
- utterance_id, file_id, begin, end, channel = (
2696
+ utterance_id, file_id, begin, end = (
2711
2697
  session.query(
2712
2698
  Utterance.id,
2713
2699
  Utterance.file_id,
2714
2700
  Utterance.begin,
2715
2701
  Utterance.end,
2716
- Utterance.channel,
2717
2702
  )
2718
2703
  .join(Utterance.speaker)
2719
2704
  .filter(Utterance.speaker_id == speaker_id)
@@ -2725,12 +2710,12 @@ class DiarizationTable(AnchorTableView):
2725
2710
  except TypeError:
2726
2711
  self.selection_model.clearSelection()
2727
2712
  return
2728
- self.selection_model.set_current_utterance(utterance_id)
2729
2713
  self.selection_model.set_current_file(
2730
2714
  file_id,
2731
2715
  begin,
2732
2716
  end,
2733
- channel,
2717
+ utterance_id,
2718
+ speaker_id,
2734
2719
  force_update=True,
2735
2720
  )
2736
2721
 
@@ -3280,7 +3265,7 @@ class SpeakerWidget(QtWidgets.QWidget):
3280
3265
  def set_models(
3281
3266
  self,
3282
3267
  corpus_model: CorpusModel,
3283
- selection_model: CorpusSelectionModel,
3268
+ selection_model: FileSelectionModel,
3284
3269
  speaker_model: SpeakerModel,
3285
3270
  ):
3286
3271
  self.speaker_model = speaker_model
@@ -3576,7 +3561,6 @@ class ModelSelectWidget(QtWidgets.QWidget):
3576
3561
  for i, m in enumerate(self.model.models):
3577
3562
  if not m.available_locally or not os.path.exists(m.path):
3578
3563
  continue
3579
- print(m.name, m.path, os.path.exists(m.path))
3580
3564
  self.model_select.addItem(m.name, userData=m.id)
3581
3565
  if m.id == current_model:
3582
3566
  index = i
anchor/workers.py CHANGED
@@ -3650,6 +3650,8 @@ class ImportG2PModelWorker(FunctionWorker): # pragma: no cover
3650
3650
  try:
3651
3651
  generator = Generator(g2p_model_path=self.model_path, num_pronunciations=5)
3652
3652
  generator.setup()
3653
+ except shutil.ReadError:
3654
+ self.signals.result.emit(None)
3653
3655
  except Exception:
3654
3656
  exctype, value = sys.exc_info()[:2]
3655
3657
  self.signals.error.emit((exctype, value, traceback.format_exc()))
@@ -1,22 +0,0 @@
1
- anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
3
- anchor/_version.py,sha256=9jP8Fo8egXoMs_T3DFqSuJYg4n9o9mnwYubl_hnut4k,411
4
- anchor/command_line.py,sha256=EucG805HyWk_zkMO9RXv9Yj0I0JVdDLZb1_DX2_ISjM,503
5
- anchor/db.py,sha256=ef4lO6HtCKoxC9CorIc0ZbPxKpjHa576a0ZIBOWNU9E,4956
6
- anchor/main.py,sha256=eazWSZC94_nyBx_6rJ85hxC9wkCJYVP5FN_t9vnRm44,120658
7
- anchor/models.py,sha256=sCZf5wF6g1KorMgb0AifmqhaxZp5fiYmlmaJoow-tFI,95230
8
- anchor/plot.py,sha256=eNVG9sDdRA9_KKrHKSb2TF66bIluJsJzVnTGN3q-Brk,104878
9
- anchor/resources_rc.py,sha256=94wgxDTpP4Oy55Br7CZ_YnmvaqzHr4n-AydBPhZc-es,8427242
10
- anchor/settings.py,sha256=QdfBtJowHpkBLzJ_3bZRKxF1rJDBW9Z5kp83sJVz0pA,46965
11
- anchor/ui_corpus_manager.py,sha256=e3ybOd4UdYarrLBATxI8vIFnioa4R_BHrbsEz5mJ5eA,8564
12
- anchor/ui_error_dialog.py,sha256=c_QS0s1VaJEV9AhcrQZQyWHHpUPudWjJY1NI7Ytipio,3832
13
- anchor/ui_main_window.py,sha256=MYb4PtV1sHYgnc3QwPphKjU3LepzBJpxllhN4nyDook,63525
14
- anchor/ui_preferences.py,sha256=MOC2dY4qkViW9cUbC0DVSO7FLg-dGSbmR630WFQ6V9c,41843
15
- anchor/undo.py,sha256=HXhrzV-T2JKwPd28KCaR9S6GWmi2Wr2Xk7IBEonRRCs,32564
16
- anchor/widgets.py,sha256=arL006v_cOGWudEF4Adbh_wRjiyFRGHP9BXOtfCr0h0,158301
17
- anchor/workers.py,sha256=iWLBGCg6jJr_OmVFJCRGMgF5Rw5G9IhpdNiqDv8ZBxU,171083
18
- Anchor_annotator-0.3.2.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
19
- Anchor_annotator-0.3.2.dist-info/METADATA,sha256=YcbnBTVx6O59V8W-6t9wu5GVrQBvGcXWDK3fCWHhkvE,1500
20
- Anchor_annotator-0.3.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
21
- Anchor_annotator-0.3.2.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
22
- Anchor_annotator-0.3.2.dist-info/RECORD,,