Anchor-annotator 0.3.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.5.0.dist-info}/METADATA +1 -1
- Anchor_annotator-0.5.0.dist-info/RECORD +22 -0
- anchor/_version.py +2 -2
- anchor/main.py +178 -62
- anchor/models.py +88 -54
- anchor/plot.py +316 -115
- anchor/resources_rc.py +6475 -4801
- anchor/settings.py +195 -95
- anchor/ui_error_dialog.py +15 -16
- anchor/ui_main_window.py +40 -612
- anchor/ui_preferences.py +75 -44
- anchor/undo.py +15 -5
- anchor/widgets.py +98 -132
- anchor/workers.py +29 -1
- Anchor_annotator-0.3.3.dist-info/RECORD +0 -22
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.5.0.dist-info}/LICENSE +0 -0
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.5.0.dist-info}/WHEEL +0 -0
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.5.0.dist-info}/top_level.txt +0 -0
anchor/models.py
CHANGED
@@ -24,6 +24,12 @@ from montreal_forced_aligner.corpus.acoustic_corpus import (
|
|
24
24
|
)
|
25
25
|
from montreal_forced_aligner.data import PhoneType, WordType
|
26
26
|
from montreal_forced_aligner.db import File, Phone, Speaker, Utterance
|
27
|
+
from montreal_forced_aligner.dictionary.mixins import (
|
28
|
+
DEFAULT_CLITIC_MARKERS,
|
29
|
+
DEFAULT_COMPOUND_MARKERS,
|
30
|
+
DEFAULT_PUNCTUATION,
|
31
|
+
DEFAULT_WORD_BREAK_MARKERS,
|
32
|
+
)
|
27
33
|
from montreal_forced_aligner.g2p.generator import PyniniValidator
|
28
34
|
from montreal_forced_aligner.models import (
|
29
35
|
AcousticModel,
|
@@ -45,6 +51,23 @@ if typing.TYPE_CHECKING:
|
|
45
51
|
logger = logging.getLogger("anchor")
|
46
52
|
|
47
53
|
|
54
|
+
WORD_BREAK_SET = "".join(
|
55
|
+
sorted(
|
56
|
+
set(
|
57
|
+
DEFAULT_WORD_BREAK_MARKERS
|
58
|
+
+ DEFAULT_PUNCTUATION
|
59
|
+
+ DEFAULT_CLITIC_MARKERS
|
60
|
+
+ DEFAULT_COMPOUND_MARKERS
|
61
|
+
)
|
62
|
+
)
|
63
|
+
)
|
64
|
+
|
65
|
+
if "-" in WORD_BREAK_SET:
|
66
|
+
WORD_BREAK_SET = "" + WORD_BREAK_SET.replace("-", "")
|
67
|
+
|
68
|
+
WORD_BREAK_REGEX_SET = rf"[\s{WORD_BREAK_SET}]"
|
69
|
+
|
70
|
+
|
48
71
|
# noinspection PyUnresolvedReferences
|
49
72
|
@dataclass(slots=True)
|
50
73
|
class TextFilterQuery:
|
@@ -76,9 +99,9 @@ class TextFilterQuery:
|
|
76
99
|
if posix:
|
77
100
|
text = text.replace(r"\b", word_break_set)
|
78
101
|
if text.startswith(r"\b"):
|
79
|
-
text =
|
102
|
+
text = rf"((?<={WORD_BREAK_REGEX_SET})|(?<=^))" + text[2:]
|
80
103
|
if text.endswith(r"\b"):
|
81
|
-
text = text[:-2] +
|
104
|
+
text = text[:-2] + rf"((?={WORD_BREAK_REGEX_SET})|(?=$))"
|
82
105
|
if self.regex or self.word:
|
83
106
|
if not self.case_sensitive:
|
84
107
|
text = "(?i)" + text
|
@@ -482,7 +505,13 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
482
505
|
normalized_text = ""
|
483
506
|
speaker_id = None
|
484
507
|
channel = None
|
508
|
+
prev_index = None
|
485
509
|
for old_utt in sorted(utterances, key=lambda x: x.begin):
|
510
|
+
index = self.reversed_indices[old_utt.id]
|
511
|
+
if prev_index is not None:
|
512
|
+
if index - prev_index != 1:
|
513
|
+
return
|
514
|
+
prev_index = index
|
486
515
|
if speaker_id is None:
|
487
516
|
speaker_id = old_utt.speaker_id
|
488
517
|
if channel is None:
|
@@ -544,8 +573,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
544
573
|
channelChanged = QtCore.Signal()
|
545
574
|
resetView = QtCore.Signal()
|
546
575
|
viewChanged = QtCore.Signal(object, object)
|
547
|
-
selectionAudioChanged = QtCore.Signal()
|
548
|
-
currentTimeChanged = QtCore.Signal(object)
|
576
|
+
selectionAudioChanged = QtCore.Signal(object)
|
549
577
|
currentUtteranceChanged = QtCore.Signal()
|
550
578
|
speakerRequested = QtCore.Signal(object)
|
551
579
|
|
@@ -584,6 +612,9 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
584
612
|
self.model().utterancesReady.connect(self.finalize_set_new_file)
|
585
613
|
self.viewChanged.connect(self.load_audio_selection)
|
586
614
|
self.model().selectionRequested.connect(self.update_selected_utterances)
|
615
|
+
self.view_change_timer = QtCore.QTimer()
|
616
|
+
self.view_change_timer.setInterval(50)
|
617
|
+
self.view_change_timer.timeout.connect(self.send_selection_update)
|
587
618
|
|
588
619
|
def selected_utterances(self):
|
589
620
|
utts = []
|
@@ -706,20 +737,21 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
706
737
|
self.waveformReady.emit()
|
707
738
|
|
708
739
|
def select_audio(self, begin, end):
|
709
|
-
if end is not None and end - begin < 0.
|
740
|
+
if end is not None and end - begin < 0.05:
|
710
741
|
end = None
|
711
742
|
self.selected_min_time = begin
|
712
743
|
self.selected_max_time = end
|
713
|
-
self.
|
744
|
+
if self.selected_min_time != self.min_time:
|
745
|
+
self.selectionAudioChanged.emit(False)
|
714
746
|
|
715
|
-
def request_start_time(self, start_time):
|
747
|
+
def request_start_time(self, start_time, update=False):
|
716
748
|
if start_time >= self.max_time:
|
717
749
|
return
|
718
750
|
if start_time < self.min_time:
|
719
751
|
return
|
720
752
|
self.selected_min_time = start_time
|
721
753
|
self.selected_max_time = None
|
722
|
-
self.selectionAudioChanged.emit()
|
754
|
+
self.selectionAudioChanged.emit(update)
|
723
755
|
|
724
756
|
def set_current_channel(self, channel):
|
725
757
|
if channel == self.selected_channel:
|
@@ -789,7 +821,21 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
789
821
|
|
790
822
|
def zoom_to_selection(self):
|
791
823
|
if self.selected_min_time is not None and self.selected_max_time is not None:
|
792
|
-
self.
|
824
|
+
begin = self.selected_min_time
|
825
|
+
end = self.selected_max_time
|
826
|
+
elif len(self.selectedRows(0)) > 0:
|
827
|
+
m = self.model()
|
828
|
+
begin = 100000
|
829
|
+
end = 0
|
830
|
+
for index in self.selectedRows(0):
|
831
|
+
u = m.utterances[index.row()]
|
832
|
+
if u.begin < begin:
|
833
|
+
begin = u.begin
|
834
|
+
if u.end > end:
|
835
|
+
end = u.end
|
836
|
+
else:
|
837
|
+
return
|
838
|
+
self.set_view_times(begin, end)
|
793
839
|
|
794
840
|
def update_from_slider(self, value):
|
795
841
|
if not self.max_time:
|
@@ -797,15 +843,6 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
797
843
|
cur_window = self.max_time - self.min_time
|
798
844
|
self.set_view_times(value, value + cur_window)
|
799
845
|
|
800
|
-
def update_selection_audio(self, begin, end):
|
801
|
-
if begin < self.min_time:
|
802
|
-
begin = self.min_time
|
803
|
-
if end > self.max_time:
|
804
|
-
end = self.max_time
|
805
|
-
self.selected_min_time = begin
|
806
|
-
self.selected_max_time = end
|
807
|
-
self.selectionAudioChanged.emit()
|
808
|
-
|
809
846
|
def visible_utterances(self) -> typing.List[Utterance]:
|
810
847
|
file_utts = []
|
811
848
|
if not self.model().file:
|
@@ -841,10 +878,13 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
841
878
|
and not self.min_time <= self.selected_max_time <= self.max_time
|
842
879
|
):
|
843
880
|
self.selected_max_time = None
|
881
|
+
self.view_change_timer.start()
|
882
|
+
|
883
|
+
def send_selection_update(self):
|
884
|
+
self.view_change_timer.stop()
|
844
885
|
self.viewChanged.emit(self.min_time, self.max_time)
|
845
886
|
|
846
|
-
def set_current_file(self,
|
847
|
-
file_id, begin, end, utterance_id, speaker_id = info
|
887
|
+
def set_current_file(self, file_id, begin, end, utterance_id, speaker_id, force_update=False):
|
848
888
|
try:
|
849
889
|
new_file = self.model().file is None or self.model().file.id != file_id
|
850
890
|
except sqlalchemy.orm.exc.DetachedInstanceError:
|
@@ -926,7 +966,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
926
966
|
channelChanged = QtCore.Signal()
|
927
967
|
resetView = QtCore.Signal()
|
928
968
|
fileAboutToChange = QtCore.Signal()
|
929
|
-
fileViewRequested = QtCore.Signal(object)
|
969
|
+
fileViewRequested = QtCore.Signal(object, object, object, object, object)
|
930
970
|
selectionAudioChanged = QtCore.Signal()
|
931
971
|
currentTimeChanged = QtCore.Signal(object)
|
932
972
|
currentUtteranceChanged = QtCore.Signal()
|
@@ -947,7 +987,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
947
987
|
self.currentRowChanged.connect(self.switch_utterance)
|
948
988
|
# self.selectionChanged.connect(self.update_selection_audio)
|
949
989
|
# self.selectionChanged.connect(self.update_selection_audio)
|
950
|
-
self.model().newResults.connect(self.check_selection)
|
990
|
+
# self.model().newResults.connect(self.check_selection)
|
951
991
|
self.model().unlockCorpus.connect(self.fileChanged.emit)
|
952
992
|
|
953
993
|
def set_current_utterance(self, utterance_id):
|
@@ -1023,7 +1063,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
1023
1063
|
if focus:
|
1024
1064
|
flags |= QtCore.QItemSelectionModel.SelectionFlag.Current
|
1025
1065
|
if row == self.currentIndex().row():
|
1026
|
-
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
1066
|
+
self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
|
1027
1067
|
|
1028
1068
|
index = self.model().index(row, 0)
|
1029
1069
|
if not index.isValid():
|
@@ -1082,7 +1122,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
1082
1122
|
return
|
1083
1123
|
self.current_utterance_id = utt
|
1084
1124
|
self.currentUtteranceChanged.emit()
|
1085
|
-
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
1125
|
+
self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
|
1086
1126
|
|
1087
1127
|
def model(self) -> CorpusModel:
|
1088
1128
|
return super().model()
|
@@ -1099,7 +1139,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
1099
1139
|
return
|
1100
1140
|
self.current_utterance_id = utt_id
|
1101
1141
|
self.currentUtteranceChanged.emit()
|
1102
|
-
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
1142
|
+
self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
|
1103
1143
|
|
1104
1144
|
|
1105
1145
|
class OovModel(TableModel):
|
@@ -1266,7 +1306,7 @@ class DictionaryTableModel(TableModel):
|
|
1266
1306
|
return True
|
1267
1307
|
return False
|
1268
1308
|
|
1269
|
-
def add_word(self, word, word_id):
|
1309
|
+
def add_word(self, word, word_id=None):
|
1270
1310
|
self.requestLookup.emit(word)
|
1271
1311
|
self.add_pronunciation(word, word_id)
|
1272
1312
|
|
@@ -1697,10 +1737,10 @@ class DiarizationModel(TableModel):
|
|
1697
1737
|
super().__init__(columns, parent=parent)
|
1698
1738
|
self.settings = AnchorSettings()
|
1699
1739
|
self.speaker_count = None
|
1700
|
-
self.
|
1701
|
-
self.
|
1702
|
-
self.
|
1703
|
-
self.
|
1740
|
+
self.utterance_ids = []
|
1741
|
+
self.file_ids = []
|
1742
|
+
self.speaker_indices = []
|
1743
|
+
self.suggested_indices = []
|
1704
1744
|
self.corpus_model: Optional[CorpusModel] = None
|
1705
1745
|
self.set_limit(self.settings.value(self.settings.RESULTS_PER_PAGE))
|
1706
1746
|
self.speaker_filter = None
|
@@ -1728,11 +1768,6 @@ class DiarizationModel(TableModel):
|
|
1728
1768
|
return self._data[index.row()][index.column()]
|
1729
1769
|
return super().data(index, role)
|
1730
1770
|
|
1731
|
-
def utterance_id_at(self, row: int):
|
1732
|
-
if row is None:
|
1733
|
-
return None
|
1734
|
-
return self._utterance_ids[row]
|
1735
|
-
|
1736
1771
|
def set_threshold(self, threshold: float):
|
1737
1772
|
if threshold != self.threshold:
|
1738
1773
|
self.current_offset = 0
|
@@ -1792,32 +1827,32 @@ class DiarizationModel(TableModel):
|
|
1792
1827
|
self.alternate_speaker_filter = current_speaker.id
|
1793
1828
|
|
1794
1829
|
def reassign_utterance(self, row: int):
|
1795
|
-
utterance_id = self.
|
1830
|
+
utterance_id = self.utterance_ids[row]
|
1796
1831
|
if utterance_id is None:
|
1797
1832
|
return
|
1798
|
-
self.changeUtteranceSpeakerRequested.emit(utterance_id, self.
|
1833
|
+
self.changeUtteranceSpeakerRequested.emit(utterance_id, self.suggested_indices[row])
|
1799
1834
|
self.layoutAboutToBeChanged.emit()
|
1800
1835
|
self._data.pop(row)
|
1801
|
-
self.
|
1802
|
-
self.
|
1803
|
-
self.
|
1836
|
+
self.utterance_ids.pop(row)
|
1837
|
+
self.suggested_indices.pop(row)
|
1838
|
+
self.speaker_indices.pop(row)
|
1804
1839
|
|
1805
1840
|
self.layoutChanged.emit()
|
1806
1841
|
|
1807
1842
|
def merge_speakers(self, row: int):
|
1808
|
-
speaker_id = self.
|
1843
|
+
speaker_id = self.speaker_indices[row]
|
1809
1844
|
if self.inverted:
|
1810
|
-
utterance_id = self.
|
1845
|
+
utterance_id = self.utterance_ids[row]
|
1811
1846
|
self.corpus_model.addCommand.emit(
|
1812
1847
|
undo.ChangeSpeakerCommand([utterance_id], speaker_id, 0, self)
|
1813
1848
|
)
|
1814
1849
|
else:
|
1815
|
-
self.corpus_model.merge_speakers([self.
|
1850
|
+
self.corpus_model.merge_speakers([self.suggested_indices[row], speaker_id])
|
1816
1851
|
self.layoutAboutToBeChanged.emit()
|
1817
1852
|
self._data.pop(row)
|
1818
|
-
self.
|
1819
|
-
self.
|
1820
|
-
self.
|
1853
|
+
self.utterance_ids.pop(row)
|
1854
|
+
self.suggested_indices.pop(row)
|
1855
|
+
self.speaker_indices.pop(row)
|
1821
1856
|
|
1822
1857
|
self.layoutChanged.emit()
|
1823
1858
|
|
@@ -1828,17 +1863,16 @@ class DiarizationModel(TableModel):
|
|
1828
1863
|
def finish_update_data(self, result, *args, **kwargs):
|
1829
1864
|
self.layoutAboutToBeChanged.emit()
|
1830
1865
|
if result is None:
|
1831
|
-
self._data
|
1832
|
-
|
1833
|
-
|
1834
|
-
|
1835
|
-
)
|
1866
|
+
self._data = []
|
1867
|
+
self.utterance_ids = []
|
1868
|
+
self.suggested_indices = []
|
1869
|
+
self.speaker_indices = []
|
1836
1870
|
else:
|
1837
1871
|
(
|
1838
1872
|
self._data,
|
1839
|
-
self.
|
1840
|
-
self.
|
1841
|
-
self.
|
1873
|
+
self.utterance_ids,
|
1874
|
+
self.suggested_indices,
|
1875
|
+
self.speaker_indices,
|
1842
1876
|
) = result
|
1843
1877
|
self.layoutChanged.emit()
|
1844
1878
|
self.newResults.emit()
|