Anchor-annotator 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.4.0.dist-info}/METADATA +1 -1
- Anchor_annotator-0.4.0.dist-info/RECORD +22 -0
- anchor/_version.py +2 -2
- anchor/main.py +4 -2
- anchor/models.py +59 -41
- anchor/plot.py +23 -7
- anchor/settings.py +18 -3
- anchor/undo.py +15 -4
- anchor/widgets.py +24 -40
- anchor/workers.py +2 -0
- Anchor_annotator-0.3.3.dist-info/RECORD +0 -22
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.4.0.dist-info}/LICENSE +0 -0
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.4.0.dist-info}/WHEEL +0 -0
- {Anchor_annotator-0.3.3.dist-info → Anchor_annotator-0.4.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: Anchor_annotator
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Anchor annotator is a program for inspecting corpora for the Montreal Forced Aligner and correcting transcriptions and pronunciations.
|
5
5
|
Home-page: https://github.com/MontrealCorpusTools/Anchor-annotator
|
6
6
|
Author: Montreal Corpus Tools
|
@@ -0,0 +1,22 @@
|
|
1
|
+
anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
|
3
|
+
anchor/_version.py,sha256=j90u3VVU4UrJf1fgMUhaZarHK_Do2XGYXr-vZvOFzVo,411
|
4
|
+
anchor/command_line.py,sha256=EucG805HyWk_zkMO9RXv9Yj0I0JVdDLZb1_DX2_ISjM,503
|
5
|
+
anchor/db.py,sha256=ef4lO6HtCKoxC9CorIc0ZbPxKpjHa576a0ZIBOWNU9E,4956
|
6
|
+
anchor/main.py,sha256=cZjj_PbAC2CPDneEy8HGNfH7F1hZpQexevFjBev9YxE,120664
|
7
|
+
anchor/models.py,sha256=Uaz_IobsG6aPDH9xfZYwN8bBDzc7U-rcbgm0jqihyd4,95763
|
8
|
+
anchor/plot.py,sha256=fUIVvSV7MIvV1HyNo5eZmi1PKun0WFDrnSXHkJD70zA,105668
|
9
|
+
anchor/resources_rc.py,sha256=94wgxDTpP4Oy55Br7CZ_YnmvaqzHr4n-AydBPhZc-es,8427242
|
10
|
+
anchor/settings.py,sha256=OdJQl54rhQ-JmsDiWIULxMFZZatM4arZ37hnmkk_VM4,47583
|
11
|
+
anchor/ui_corpus_manager.py,sha256=e3ybOd4UdYarrLBATxI8vIFnioa4R_BHrbsEz5mJ5eA,8564
|
12
|
+
anchor/ui_error_dialog.py,sha256=c_QS0s1VaJEV9AhcrQZQyWHHpUPudWjJY1NI7Ytipio,3832
|
13
|
+
anchor/ui_main_window.py,sha256=MYb4PtV1sHYgnc3QwPphKjU3LepzBJpxllhN4nyDook,63525
|
14
|
+
anchor/ui_preferences.py,sha256=MOC2dY4qkViW9cUbC0DVSO7FLg-dGSbmR630WFQ6V9c,41843
|
15
|
+
anchor/undo.py,sha256=FrzTz9hSUXV6jFJ7EUurxY5NmftQ5NWhtVzzYuVmcRo,32959
|
16
|
+
anchor/widgets.py,sha256=Lw2y9bymDiu01eGqLR0M8CSjXYV5-e037XqRQiX7Wn8,157619
|
17
|
+
anchor/workers.py,sha256=SUrafStLUrdhi5b3QhkRYKdFghasDc8lxsUCZOF_FRg,171159
|
18
|
+
Anchor_annotator-0.4.0.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
|
19
|
+
Anchor_annotator-0.4.0.dist-info/METADATA,sha256=EMWnDUTa3Di2cpH4RiP6buwD67Nj-m5vdkkstIFf8M8,1500
|
20
|
+
Anchor_annotator-0.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
21
|
+
Anchor_annotator-0.4.0.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
|
22
|
+
Anchor_annotator-0.4.0.dist-info/RECORD,,
|
anchor/_version.py
CHANGED
anchor/main.py
CHANGED
@@ -540,14 +540,14 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
540
540
|
self.dictionary_model,
|
541
541
|
)
|
542
542
|
self.ui.speakerWidget.set_models(
|
543
|
-
self.corpus_model, self.
|
543
|
+
self.corpus_model, self.file_selection_model, self.speaker_model
|
544
544
|
)
|
545
545
|
self.ui.transcriptionWidget.set_models(self.corpus_model, self.dictionary_model)
|
546
546
|
self.ui.alignmentWidget.set_models(self.corpus_model)
|
547
547
|
self.ui.acousticModelWidget.set_models(self.corpus_model)
|
548
548
|
self.ui.languageModelWidget.set_models(self.corpus_model)
|
549
549
|
self.ui.dictionaryWidget.set_models(self.dictionary_model)
|
550
|
-
self.ui.diarizationWidget.set_models(self.diarization_model, self.
|
550
|
+
self.ui.diarizationWidget.set_models(self.diarization_model, self.file_selection_model)
|
551
551
|
self.ui.oovWidget.set_models(self.oov_model)
|
552
552
|
self.file_selection_model.currentUtteranceChanged.connect(self.change_utterance)
|
553
553
|
self.selection_model.fileViewRequested.connect(self.file_selection_model.set_current_file)
|
@@ -1385,6 +1385,8 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1385
1385
|
self.ui.languageModelMenu.setEnabled(True)
|
1386
1386
|
|
1387
1387
|
def finalize_load_g2p_model(self, generator: PyniniValidator):
|
1388
|
+
if generator is None:
|
1389
|
+
return
|
1388
1390
|
self.dictionary_model.set_g2p_generator(generator)
|
1389
1391
|
self.corpus_model.g2p_model = generator.g2p_model
|
1390
1392
|
self.check_actions()
|
anchor/models.py
CHANGED
@@ -24,6 +24,12 @@ from montreal_forced_aligner.corpus.acoustic_corpus import (
|
|
24
24
|
)
|
25
25
|
from montreal_forced_aligner.data import PhoneType, WordType
|
26
26
|
from montreal_forced_aligner.db import File, Phone, Speaker, Utterance
|
27
|
+
from montreal_forced_aligner.dictionary.mixins import (
|
28
|
+
DEFAULT_CLITIC_MARKERS,
|
29
|
+
DEFAULT_COMPOUND_MARKERS,
|
30
|
+
DEFAULT_PUNCTUATION,
|
31
|
+
DEFAULT_WORD_BREAK_MARKERS,
|
32
|
+
)
|
27
33
|
from montreal_forced_aligner.g2p.generator import PyniniValidator
|
28
34
|
from montreal_forced_aligner.models import (
|
29
35
|
AcousticModel,
|
@@ -45,6 +51,23 @@ if typing.TYPE_CHECKING:
|
|
45
51
|
logger = logging.getLogger("anchor")
|
46
52
|
|
47
53
|
|
54
|
+
WORD_BREAK_SET = "".join(
|
55
|
+
sorted(
|
56
|
+
set(
|
57
|
+
DEFAULT_WORD_BREAK_MARKERS
|
58
|
+
+ DEFAULT_PUNCTUATION
|
59
|
+
+ DEFAULT_CLITIC_MARKERS
|
60
|
+
+ DEFAULT_COMPOUND_MARKERS
|
61
|
+
)
|
62
|
+
)
|
63
|
+
)
|
64
|
+
|
65
|
+
if "-" in WORD_BREAK_SET:
|
66
|
+
WORD_BREAK_SET = "" + WORD_BREAK_SET.replace("-", "")
|
67
|
+
|
68
|
+
WORD_BREAK_REGEX_SET = rf"[\s{WORD_BREAK_SET}]"
|
69
|
+
|
70
|
+
|
48
71
|
# noinspection PyUnresolvedReferences
|
49
72
|
@dataclass(slots=True)
|
50
73
|
class TextFilterQuery:
|
@@ -76,9 +99,9 @@ class TextFilterQuery:
|
|
76
99
|
if posix:
|
77
100
|
text = text.replace(r"\b", word_break_set)
|
78
101
|
if text.startswith(r"\b"):
|
79
|
-
text =
|
102
|
+
text = rf"((?<={WORD_BREAK_REGEX_SET})|(?<=^))" + text[2:]
|
80
103
|
if text.endswith(r"\b"):
|
81
|
-
text = text[:-2] +
|
104
|
+
text = text[:-2] + rf"((?={WORD_BREAK_REGEX_SET})|(?=$))"
|
82
105
|
if self.regex or self.word:
|
83
106
|
if not self.case_sensitive:
|
84
107
|
text = "(?i)" + text
|
@@ -545,7 +568,6 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
545
568
|
resetView = QtCore.Signal()
|
546
569
|
viewChanged = QtCore.Signal(object, object)
|
547
570
|
selectionAudioChanged = QtCore.Signal()
|
548
|
-
currentTimeChanged = QtCore.Signal(object)
|
549
571
|
currentUtteranceChanged = QtCore.Signal()
|
550
572
|
speakerRequested = QtCore.Signal(object)
|
551
573
|
|
@@ -710,7 +732,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
710
732
|
end = None
|
711
733
|
self.selected_min_time = begin
|
712
734
|
self.selected_max_time = end
|
713
|
-
self.
|
735
|
+
if self.selected_min_time != self.min_time:
|
736
|
+
self.selectionAudioChanged.emit()
|
714
737
|
|
715
738
|
def request_start_time(self, start_time):
|
716
739
|
if start_time >= self.max_time:
|
@@ -719,7 +742,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
719
742
|
return
|
720
743
|
self.selected_min_time = start_time
|
721
744
|
self.selected_max_time = None
|
722
|
-
self.
|
745
|
+
if self.selected_min_time != self.min_time:
|
746
|
+
self.selectionAudioChanged.emit()
|
723
747
|
|
724
748
|
def set_current_channel(self, channel):
|
725
749
|
if channel == self.selected_channel:
|
@@ -804,7 +828,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
804
828
|
end = self.max_time
|
805
829
|
self.selected_min_time = begin
|
806
830
|
self.selected_max_time = end
|
807
|
-
self.
|
831
|
+
if self.selected_min_time != self.min_time:
|
832
|
+
self.selectionAudioChanged.emit()
|
808
833
|
|
809
834
|
def visible_utterances(self) -> typing.List[Utterance]:
|
810
835
|
file_utts = []
|
@@ -843,8 +868,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
843
868
|
self.selected_max_time = None
|
844
869
|
self.viewChanged.emit(self.min_time, self.max_time)
|
845
870
|
|
846
|
-
def set_current_file(self,
|
847
|
-
file_id, begin, end, utterance_id, speaker_id = info
|
871
|
+
def set_current_file(self, file_id, begin, end, utterance_id, speaker_id, force_update=False):
|
848
872
|
try:
|
849
873
|
new_file = self.model().file is None or self.model().file.id != file_id
|
850
874
|
except sqlalchemy.orm.exc.DetachedInstanceError:
|
@@ -926,7 +950,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
926
950
|
channelChanged = QtCore.Signal()
|
927
951
|
resetView = QtCore.Signal()
|
928
952
|
fileAboutToChange = QtCore.Signal()
|
929
|
-
fileViewRequested = QtCore.Signal(object)
|
953
|
+
fileViewRequested = QtCore.Signal(object, object, object, object, object)
|
930
954
|
selectionAudioChanged = QtCore.Signal()
|
931
955
|
currentTimeChanged = QtCore.Signal(object)
|
932
956
|
currentUtteranceChanged = QtCore.Signal()
|
@@ -1023,7 +1047,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
1023
1047
|
if focus:
|
1024
1048
|
flags |= QtCore.QItemSelectionModel.SelectionFlag.Current
|
1025
1049
|
if row == self.currentIndex().row():
|
1026
|
-
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
1050
|
+
self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
|
1027
1051
|
|
1028
1052
|
index = self.model().index(row, 0)
|
1029
1053
|
if not index.isValid():
|
@@ -1082,7 +1106,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
1082
1106
|
return
|
1083
1107
|
self.current_utterance_id = utt
|
1084
1108
|
self.currentUtteranceChanged.emit()
|
1085
|
-
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
1109
|
+
self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
|
1086
1110
|
|
1087
1111
|
def model(self) -> CorpusModel:
|
1088
1112
|
return super().model()
|
@@ -1099,7 +1123,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
1099
1123
|
return
|
1100
1124
|
self.current_utterance_id = utt_id
|
1101
1125
|
self.currentUtteranceChanged.emit()
|
1102
|
-
self.fileViewRequested.emit(self.model().audio_info_for_utterance(row))
|
1126
|
+
self.fileViewRequested.emit(*self.model().audio_info_for_utterance(row))
|
1103
1127
|
|
1104
1128
|
|
1105
1129
|
class OovModel(TableModel):
|
@@ -1266,7 +1290,7 @@ class DictionaryTableModel(TableModel):
|
|
1266
1290
|
return True
|
1267
1291
|
return False
|
1268
1292
|
|
1269
|
-
def add_word(self, word, word_id):
|
1293
|
+
def add_word(self, word, word_id=None):
|
1270
1294
|
self.requestLookup.emit(word)
|
1271
1295
|
self.add_pronunciation(word, word_id)
|
1272
1296
|
|
@@ -1697,10 +1721,10 @@ class DiarizationModel(TableModel):
|
|
1697
1721
|
super().__init__(columns, parent=parent)
|
1698
1722
|
self.settings = AnchorSettings()
|
1699
1723
|
self.speaker_count = None
|
1700
|
-
self.
|
1701
|
-
self.
|
1702
|
-
self.
|
1703
|
-
self.
|
1724
|
+
self.utterance_ids = []
|
1725
|
+
self.file_ids = []
|
1726
|
+
self.speaker_indices = []
|
1727
|
+
self.suggested_indices = []
|
1704
1728
|
self.corpus_model: Optional[CorpusModel] = None
|
1705
1729
|
self.set_limit(self.settings.value(self.settings.RESULTS_PER_PAGE))
|
1706
1730
|
self.speaker_filter = None
|
@@ -1728,11 +1752,6 @@ class DiarizationModel(TableModel):
|
|
1728
1752
|
return self._data[index.row()][index.column()]
|
1729
1753
|
return super().data(index, role)
|
1730
1754
|
|
1731
|
-
def utterance_id_at(self, row: int):
|
1732
|
-
if row is None:
|
1733
|
-
return None
|
1734
|
-
return self._utterance_ids[row]
|
1735
|
-
|
1736
1755
|
def set_threshold(self, threshold: float):
|
1737
1756
|
if threshold != self.threshold:
|
1738
1757
|
self.current_offset = 0
|
@@ -1792,32 +1811,32 @@ class DiarizationModel(TableModel):
|
|
1792
1811
|
self.alternate_speaker_filter = current_speaker.id
|
1793
1812
|
|
1794
1813
|
def reassign_utterance(self, row: int):
|
1795
|
-
utterance_id = self.
|
1814
|
+
utterance_id = self.utterance_ids[row]
|
1796
1815
|
if utterance_id is None:
|
1797
1816
|
return
|
1798
|
-
self.changeUtteranceSpeakerRequested.emit(utterance_id, self.
|
1817
|
+
self.changeUtteranceSpeakerRequested.emit(utterance_id, self.suggested_indices[row])
|
1799
1818
|
self.layoutAboutToBeChanged.emit()
|
1800
1819
|
self._data.pop(row)
|
1801
|
-
self.
|
1802
|
-
self.
|
1803
|
-
self.
|
1820
|
+
self.utterance_ids.pop(row)
|
1821
|
+
self.suggested_indices.pop(row)
|
1822
|
+
self.speaker_indices.pop(row)
|
1804
1823
|
|
1805
1824
|
self.layoutChanged.emit()
|
1806
1825
|
|
1807
1826
|
def merge_speakers(self, row: int):
|
1808
|
-
speaker_id = self.
|
1827
|
+
speaker_id = self.speaker_indices[row]
|
1809
1828
|
if self.inverted:
|
1810
|
-
utterance_id = self.
|
1829
|
+
utterance_id = self.utterance_ids[row]
|
1811
1830
|
self.corpus_model.addCommand.emit(
|
1812
1831
|
undo.ChangeSpeakerCommand([utterance_id], speaker_id, 0, self)
|
1813
1832
|
)
|
1814
1833
|
else:
|
1815
|
-
self.corpus_model.merge_speakers([self.
|
1834
|
+
self.corpus_model.merge_speakers([self.suggested_indices[row], speaker_id])
|
1816
1835
|
self.layoutAboutToBeChanged.emit()
|
1817
1836
|
self._data.pop(row)
|
1818
|
-
self.
|
1819
|
-
self.
|
1820
|
-
self.
|
1837
|
+
self.utterance_ids.pop(row)
|
1838
|
+
self.suggested_indices.pop(row)
|
1839
|
+
self.speaker_indices.pop(row)
|
1821
1840
|
|
1822
1841
|
self.layoutChanged.emit()
|
1823
1842
|
|
@@ -1828,17 +1847,16 @@ class DiarizationModel(TableModel):
|
|
1828
1847
|
def finish_update_data(self, result, *args, **kwargs):
|
1829
1848
|
self.layoutAboutToBeChanged.emit()
|
1830
1849
|
if result is None:
|
1831
|
-
self._data
|
1832
|
-
|
1833
|
-
|
1834
|
-
|
1835
|
-
)
|
1850
|
+
self._data = []
|
1851
|
+
self.utterance_ids = []
|
1852
|
+
self.suggested_indices = []
|
1853
|
+
self.speaker_indices = []
|
1836
1854
|
else:
|
1837
1855
|
(
|
1838
1856
|
self._data,
|
1839
|
-
self.
|
1840
|
-
self.
|
1841
|
-
self.
|
1857
|
+
self.utterance_ids,
|
1858
|
+
self.suggested_indices,
|
1859
|
+
self.speaker_indices,
|
1842
1860
|
) = result
|
1843
1861
|
self.layoutChanged.emit()
|
1844
1862
|
self.newResults.emit()
|
anchor/plot.py
CHANGED
@@ -13,6 +13,11 @@ import sqlalchemy
|
|
13
13
|
from Bio import pairwise2
|
14
14
|
from montreal_forced_aligner.data import CtmInterval
|
15
15
|
from montreal_forced_aligner.db import Speaker, Utterance
|
16
|
+
from montreal_forced_aligner.dictionary.mixins import (
|
17
|
+
DEFAULT_PUNCTUATION,
|
18
|
+
DEFAULT_WORD_BREAK_MARKERS,
|
19
|
+
)
|
20
|
+
from montreal_forced_aligner.tokenization.simple import SimpleTokenizer
|
16
21
|
from PySide6 import QtCore, QtGui, QtWidgets
|
17
22
|
|
18
23
|
from anchor import workers
|
@@ -161,7 +166,7 @@ class UtteranceClusterView(pg.PlotWidget):
|
|
161
166
|
self.setBackground(self.settings.value(self.settings.PRIMARY_VERY_DARK_COLOR))
|
162
167
|
self.corpus_model = None
|
163
168
|
self.speaker_model: SpeakerModel = None
|
164
|
-
self.selection_model:
|
169
|
+
self.selection_model: FileSelectionModel = None
|
165
170
|
self.updated_indices = set()
|
166
171
|
self.brushes = {-1: pg.mkBrush(0.5)}
|
167
172
|
self.scatter_item = ScatterPlot()
|
@@ -226,7 +231,7 @@ class UtteranceClusterView(pg.PlotWidget):
|
|
226
231
|
def set_models(
|
227
232
|
self,
|
228
233
|
corpus_model: CorpusModel,
|
229
|
-
selection_model:
|
234
|
+
selection_model: FileSelectionModel,
|
230
235
|
speaker_model: SpeakerModel,
|
231
236
|
):
|
232
237
|
self.corpus_model = corpus_model
|
@@ -248,13 +253,12 @@ class UtteranceClusterView(pg.PlotWidget):
|
|
248
253
|
if ev.button() == QtCore.Qt.MouseButton.LeftButton:
|
249
254
|
utterance_id = int(self.speaker_model.utterance_ids[index])
|
250
255
|
utterance = self.corpus_model.session.query(Utterance).get(utterance_id)
|
251
|
-
self.selection_model.set_current_utterance(utterance_id)
|
252
|
-
self.selection_model.current_utterance_id = utterance_id
|
253
256
|
self.selection_model.set_current_file(
|
254
257
|
utterance.file_id,
|
255
258
|
utterance.begin,
|
256
259
|
utterance.end,
|
257
|
-
utterance.
|
260
|
+
utterance.id,
|
261
|
+
utterance.speaker_id,
|
258
262
|
force_update=True,
|
259
263
|
)
|
260
264
|
else:
|
@@ -1395,7 +1399,7 @@ class NormalizedTextRegion(TextAttributeRegion):
|
|
1395
1399
|
|
1396
1400
|
|
1397
1401
|
class Highlighter(QtGui.QSyntaxHighlighter):
|
1398
|
-
WORDS =
|
1402
|
+
WORDS = rf"[^\s{''.join(DEFAULT_WORD_BREAK_MARKERS)+''.join(DEFAULT_PUNCTUATION)}]+"
|
1399
1403
|
|
1400
1404
|
def __init__(self, *args):
|
1401
1405
|
super(Highlighter, self).__init__(*args)
|
@@ -1424,8 +1428,20 @@ class Highlighter(QtGui.QSyntaxHighlighter):
|
|
1424
1428
|
def highlightBlock(self, text):
|
1425
1429
|
self.settings.sync()
|
1426
1430
|
self.spellcheck_format.setUnderlineColor(self.settings.error_color)
|
1431
|
+
tokenizers = self.dictionary_model.corpus_model.corpus.get_tokenizers()
|
1432
|
+
dictionary_id = self.dictionary_model.corpus_model.corpus.get_dict_id_for_speaker(
|
1433
|
+
self.speaker_id
|
1434
|
+
)
|
1435
|
+
words = self.WORDS
|
1436
|
+
if isinstance(tokenizers, dict) and dictionary_id is not None:
|
1437
|
+
tokenizer = self.dictionary_model.corpus_model.corpus.get_tokenizer(dictionary_id)
|
1438
|
+
else:
|
1439
|
+
tokenizer = tokenizers
|
1440
|
+
if isinstance(tokenizer, SimpleTokenizer):
|
1441
|
+
extra_symbols = "".join(tokenizer.punctuation) + "".join(tokenizer.word_break_markers)
|
1442
|
+
words = rf"[^\s{extra_symbols}]+"
|
1427
1443
|
if self.dictionary_model is not None and self.dictionary_model.word_sets:
|
1428
|
-
for word_object in re.finditer(
|
1444
|
+
for word_object in re.finditer(words, text):
|
1429
1445
|
if not self.dictionary_model.check_word(word_object.group(), self.speaker_id):
|
1430
1446
|
self.setFormat(
|
1431
1447
|
word_object.start(),
|
anchor/settings.py
CHANGED
@@ -855,12 +855,27 @@ class AnchorSettings(QtCore.QSettings):
|
|
855
855
|
margin: 0 -2px; /* expand outside the groove */
|
856
856
|
}}
|
857
857
|
QTableWidget, QTableView, QTreeView, QTreeWidget {{
|
858
|
-
|
858
|
+
border: 4px solid {enabled_color};
|
859
|
+
}}
|
860
|
+
QTableView::item, QTreeWidget::item {{
|
861
|
+
background-color: {table_odd_color};
|
862
|
+
color: {table_text_color};
|
859
863
|
selection-background-color: {selection_color};
|
860
864
|
selection-color: {text_edit_color};
|
861
|
-
|
865
|
+
}}
|
866
|
+
QTableView::item:selected, QTreeWidget::item:selected {{
|
867
|
+
background-color: {selection_color};
|
868
|
+
color: {text_edit_color};
|
869
|
+
}}
|
870
|
+
QTableView::item:alternate, QTreeWidget::item:alternate {{
|
871
|
+
background-color: {table_even_color};
|
862
872
|
color: {table_text_color};
|
863
|
-
|
873
|
+
selection-background-color: {selection_color};
|
874
|
+
selection-color: {text_edit_color};
|
875
|
+
}}
|
876
|
+
QTableView::item:alternate:selected, QTreeWidget::item:alternate:selected {{
|
877
|
+
background-color: {selection_color};
|
878
|
+
color: {text_edit_color};
|
864
879
|
}}
|
865
880
|
QTreeView QLabel, QTreeWidget QLabel{{
|
866
881
|
color: {table_text_color};
|
anchor/undo.py
CHANGED
@@ -39,8 +39,11 @@ class CorpusCommand(QtGui.QUndoCommand):
|
|
39
39
|
|
40
40
|
def redo(self) -> None:
|
41
41
|
with self.corpus_model.edit_lock:
|
42
|
-
|
43
|
-
|
42
|
+
try:
|
43
|
+
self._redo(self.corpus_model.session)
|
44
|
+
self.corpus_model.session.commit()
|
45
|
+
except Exception:
|
46
|
+
self.corpus_model.session.rollback()
|
44
47
|
# while True:
|
45
48
|
# try:
|
46
49
|
# with self.corpus_model.session.begin_nested():
|
@@ -53,8 +56,11 @@ class CorpusCommand(QtGui.QUndoCommand):
|
|
53
56
|
|
54
57
|
def undo(self) -> None:
|
55
58
|
with self.corpus_model.edit_lock:
|
56
|
-
|
57
|
-
|
59
|
+
try:
|
60
|
+
self._undo(self.corpus_model.session)
|
61
|
+
self.corpus_model.session.commit()
|
62
|
+
except Exception:
|
63
|
+
self.corpus_model.session.rollback()
|
58
64
|
# while True:
|
59
65
|
# try:
|
60
66
|
# with self.corpus_model.session.begin_nested():
|
@@ -453,6 +459,11 @@ class UpdateUtteranceTextCommand(FileCommand):
|
|
453
459
|
self.new_text = other.new_text
|
454
460
|
return True
|
455
461
|
|
462
|
+
def update_data(self):
|
463
|
+
super().update_data()
|
464
|
+
self.corpus_model.changeCommandFired.emit()
|
465
|
+
self.corpus_model.update_utterance_table_row(self.utterance)
|
466
|
+
|
456
467
|
|
457
468
|
class ReplaceAllCommand(CorpusCommand):
|
458
469
|
def __init__(
|
anchor/widgets.py
CHANGED
@@ -74,10 +74,7 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
74
74
|
self.start_load_time = None
|
75
75
|
self.min_time = None
|
76
76
|
self.selection_model = None
|
77
|
-
self.
|
78
|
-
self.timer.setInterval(1)
|
79
|
-
self.timer.timeout.connect(self.checkStop)
|
80
|
-
# self.positionChanged.connect(self.checkStop)
|
77
|
+
self.positionChanged.connect(self.checkStop)
|
81
78
|
# self.positionChanged.connect(self.positionDebug)
|
82
79
|
self.errorOccurred.connect(self.handle_error)
|
83
80
|
o = None
|
@@ -89,7 +86,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
89
86
|
self._audio_output.setDevice(self.devices.defaultAudioOutput())
|
90
87
|
self.setAudioOutput(self._audio_output)
|
91
88
|
self.playbackStateChanged.connect(self.reset_position)
|
92
|
-
self.set_volume(self.settings.value(self.settings.VOLUME))
|
93
89
|
self.fade_in_anim = QtCore.QPropertyAnimation(self._audio_output, b"volume")
|
94
90
|
self.fade_in_anim.setDuration(10)
|
95
91
|
self.fade_in_anim.setStartValue(0.1)
|
@@ -105,6 +101,7 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
105
101
|
self.fade_out_anim.setKeyValueAt(0.1, self._audio_output.volume())
|
106
102
|
self.fade_out_anim.finished.connect(super().pause)
|
107
103
|
self.file_path = None
|
104
|
+
self.set_volume(self.settings.value(self.settings.VOLUME))
|
108
105
|
|
109
106
|
def setMuted(self, muted: bool):
|
110
107
|
self.audioOutput().setMuted(muted)
|
@@ -125,7 +122,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
125
122
|
or self.currentTime() >= self.maxTime()
|
126
123
|
):
|
127
124
|
self.setCurrentTime(self.startTime())
|
128
|
-
self.timer.start()
|
129
125
|
super(MediaPlayer, self).play()
|
130
126
|
if fade_in:
|
131
127
|
self.fade_in_anim.start()
|
@@ -153,14 +149,11 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
153
149
|
def reset_position(self):
|
154
150
|
state = self.playbackState()
|
155
151
|
if state == QtMultimedia.QMediaPlayer.PlaybackState.StoppedState:
|
156
|
-
self.timer.stop()
|
157
152
|
self.setCurrentTime(self.startTime())
|
158
|
-
self.timeChanged.emit(self.currentTime())
|
159
|
-
elif state == QtMultimedia.QMediaPlayer.PlaybackState.PausedState:
|
160
|
-
self.timer.stop()
|
161
153
|
|
162
154
|
def update_audio_device(self):
|
163
155
|
self._audio_output.setDevice(self.devices.defaultAudioOutput())
|
156
|
+
self.setAudioOutput(self._audio_output)
|
164
157
|
|
165
158
|
def refresh_settings(self):
|
166
159
|
self.settings.sync()
|
@@ -177,9 +170,9 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
177
170
|
self.selection_model.fileChanged.connect(self.loadNewFile)
|
178
171
|
self.selection_model.viewChanged.connect(self.update_times)
|
179
172
|
self.selection_model.selectionAudioChanged.connect(self.update_selection_times)
|
180
|
-
self.selection_model.currentTimeChanged.connect(self.update_selection_times)
|
181
173
|
|
182
174
|
def set_volume(self, volume: int):
|
175
|
+
self.settings.setValue(self.settings.VOLUME, volume)
|
183
176
|
if self.audioOutput() is None:
|
184
177
|
return
|
185
178
|
linearVolume = QtMultimedia.QtAudio.convertVolume(
|
@@ -188,6 +181,8 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
188
181
|
QtMultimedia.QtAudio.VolumeScale.LinearVolumeScale,
|
189
182
|
)
|
190
183
|
self.audioOutput().setVolume(linearVolume)
|
184
|
+
self.fade_in_anim.setEndValue(linearVolume)
|
185
|
+
self.fade_out_anim.setStartValue(linearVolume)
|
191
186
|
|
192
187
|
def volume(self) -> int:
|
193
188
|
if self.audioOutput() is None:
|
@@ -207,9 +202,12 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
207
202
|
self.setCurrentTime(self.startTime())
|
208
203
|
|
209
204
|
def update_times(self):
|
205
|
+
if self.playbackState() == QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
|
206
|
+
return
|
210
207
|
if self.currentTime() < self.startTime() or self.currentTime() > self.maxTime():
|
211
208
|
self.stop()
|
212
209
|
if self.playbackState() != QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
|
210
|
+
self.stop()
|
213
211
|
self.setCurrentTime(self.startTime())
|
214
212
|
|
215
213
|
def loadNewFile(self, *args):
|
@@ -238,27 +236,14 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
238
236
|
def setCurrentTime(self, time):
|
239
237
|
if time is None:
|
240
238
|
time = 0
|
241
|
-
if self.playbackState() == QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
|
242
|
-
return
|
243
239
|
pos = int(time * 1000)
|
244
240
|
self.setPosition(pos)
|
245
|
-
self.timeChanged.emit(self.currentTime())
|
246
241
|
|
247
242
|
def checkStop(self):
|
248
|
-
|
249
|
-
self.stop()
|
250
|
-
self.setSource(
|
251
|
-
QtCore.QUrl.fromLocalFile(
|
252
|
-
self.selection_model.model().file.sound_file.sound_file_path
|
253
|
-
)
|
254
|
-
)
|
255
|
-
self.play()
|
256
|
-
return
|
243
|
+
self.timeChanged.emit(self.currentTime())
|
257
244
|
if self.playbackState() == QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
|
258
245
|
if self.maxTime() is None or self.currentTime() > self.maxTime():
|
259
246
|
self.stop()
|
260
|
-
self.reset_position()
|
261
|
-
self.timeChanged.emit(self.currentTime())
|
262
247
|
|
263
248
|
|
264
249
|
class NewSpeakerField(QtWidgets.QLineEdit):
|
@@ -2546,6 +2531,7 @@ class SpeakerClustersWidget(QtWidgets.QWidget):
|
|
2546
2531
|
|
2547
2532
|
def __init__(self, *args, **kwargs):
|
2548
2533
|
super().__init__(*args, **kwargs)
|
2534
|
+
self.speaker_model = None
|
2549
2535
|
self.settings = AnchorSettings()
|
2550
2536
|
self.settings.sync()
|
2551
2537
|
form_layout = QtWidgets.QHBoxLayout()
|
@@ -2640,7 +2626,7 @@ class DiarizationTable(AnchorTableView):
|
|
2640
2626
|
self.doubleClicked.connect(self.search_utterance)
|
2641
2627
|
self.clicked.connect(self.reassign_utterance)
|
2642
2628
|
self.diarization_model: Optional[DiarizationModel] = None
|
2643
|
-
self.selection_model: Optional[
|
2629
|
+
self.selection_model: Optional[FileSelectionModel] = None
|
2644
2630
|
self.set_reference_utterance_action = QtGui.QAction("Use utterance as reference", self)
|
2645
2631
|
self.set_reference_utterance_action.triggered.connect(self.set_reference_utterance)
|
2646
2632
|
self.setContextMenuPolicy(QtCore.Qt.ContextMenuPolicy.CustomContextMenu)
|
@@ -2648,7 +2634,7 @@ class DiarizationTable(AnchorTableView):
|
|
2648
2634
|
|
2649
2635
|
def generate_context_menu(self, location):
|
2650
2636
|
menu = QtWidgets.QMenu()
|
2651
|
-
|
2637
|
+
menu.setStyleSheet(self.settings.menu_style_sheet)
|
2652
2638
|
menu.addAction(self.set_reference_utterance_action)
|
2653
2639
|
menu.exec_(self.mapToGlobal(location))
|
2654
2640
|
|
@@ -2656,7 +2642,7 @@ class DiarizationTable(AnchorTableView):
|
|
2656
2642
|
rows = self.selectionModel().selectedRows()
|
2657
2643
|
if not rows:
|
2658
2644
|
return
|
2659
|
-
utterance_id = self.diarization_model.
|
2645
|
+
utterance_id = self.diarization_model.utterance_ids[rows[0].row()]
|
2660
2646
|
self.diarization_model.set_utterance_filter(utterance_id)
|
2661
2647
|
self.referenceUtteranceSelected.emit(
|
2662
2648
|
self.diarization_model.data(
|
@@ -2665,7 +2651,7 @@ class DiarizationTable(AnchorTableView):
|
|
2665
2651
|
)
|
2666
2652
|
)
|
2667
2653
|
|
2668
|
-
def set_models(self, model: DiarizationModel, selection_model:
|
2654
|
+
def set_models(self, model: DiarizationModel, selection_model: FileSelectionModel):
|
2669
2655
|
self.diarization_model = model
|
2670
2656
|
self.selection_model = selection_model
|
2671
2657
|
self.setModel(model)
|
@@ -2684,14 +2670,14 @@ class DiarizationTable(AnchorTableView):
|
|
2684
2670
|
return
|
2685
2671
|
if index.column() == 0:
|
2686
2672
|
row = index.row()
|
2687
|
-
utterance_id = self.diarization_model.
|
2673
|
+
utterance_id = self.diarization_model.utterance_ids[row]
|
2688
2674
|
if utterance_id is None:
|
2689
2675
|
return
|
2690
2676
|
with self.diarization_model.corpus_model.corpus.session() as session:
|
2691
2677
|
try:
|
2692
|
-
file_id, begin, end,
|
2678
|
+
file_id, begin, end, speaker_id = (
|
2693
2679
|
session.query(
|
2694
|
-
Utterance.file_id, Utterance.begin, Utterance.end, Utterance.
|
2680
|
+
Utterance.file_id, Utterance.begin, Utterance.end, Utterance.speaker_id
|
2695
2681
|
)
|
2696
2682
|
.filter(Utterance.id == utterance_id)
|
2697
2683
|
.first()
|
@@ -2701,19 +2687,18 @@ class DiarizationTable(AnchorTableView):
|
|
2701
2687
|
return
|
2702
2688
|
else:
|
2703
2689
|
if index.column() == 1:
|
2704
|
-
speaker_id = self.diarization_model.
|
2690
|
+
speaker_id = self.diarization_model.suggested_indices[index.row()]
|
2705
2691
|
else:
|
2706
|
-
speaker_id = self.diarization_model.
|
2692
|
+
speaker_id = self.diarization_model.speaker_indices[index.row()]
|
2707
2693
|
with self.diarization_model.corpus_model.corpus.session() as session:
|
2708
2694
|
c = session.query(Corpus).first()
|
2709
2695
|
try:
|
2710
|
-
utterance_id, file_id, begin, end
|
2696
|
+
utterance_id, file_id, begin, end = (
|
2711
2697
|
session.query(
|
2712
2698
|
Utterance.id,
|
2713
2699
|
Utterance.file_id,
|
2714
2700
|
Utterance.begin,
|
2715
2701
|
Utterance.end,
|
2716
|
-
Utterance.channel,
|
2717
2702
|
)
|
2718
2703
|
.join(Utterance.speaker)
|
2719
2704
|
.filter(Utterance.speaker_id == speaker_id)
|
@@ -2725,12 +2710,12 @@ class DiarizationTable(AnchorTableView):
|
|
2725
2710
|
except TypeError:
|
2726
2711
|
self.selection_model.clearSelection()
|
2727
2712
|
return
|
2728
|
-
self.selection_model.set_current_utterance(utterance_id)
|
2729
2713
|
self.selection_model.set_current_file(
|
2730
2714
|
file_id,
|
2731
2715
|
begin,
|
2732
2716
|
end,
|
2733
|
-
|
2717
|
+
utterance_id,
|
2718
|
+
speaker_id,
|
2734
2719
|
force_update=True,
|
2735
2720
|
)
|
2736
2721
|
|
@@ -3280,7 +3265,7 @@ class SpeakerWidget(QtWidgets.QWidget):
|
|
3280
3265
|
def set_models(
|
3281
3266
|
self,
|
3282
3267
|
corpus_model: CorpusModel,
|
3283
|
-
selection_model:
|
3268
|
+
selection_model: FileSelectionModel,
|
3284
3269
|
speaker_model: SpeakerModel,
|
3285
3270
|
):
|
3286
3271
|
self.speaker_model = speaker_model
|
@@ -3576,7 +3561,6 @@ class ModelSelectWidget(QtWidgets.QWidget):
|
|
3576
3561
|
for i, m in enumerate(self.model.models):
|
3577
3562
|
if not m.available_locally or not os.path.exists(m.path):
|
3578
3563
|
continue
|
3579
|
-
print(m.name, m.path, os.path.exists(m.path))
|
3580
3564
|
self.model_select.addItem(m.name, userData=m.id)
|
3581
3565
|
if m.id == current_model:
|
3582
3566
|
index = i
|
anchor/workers.py
CHANGED
@@ -3650,6 +3650,8 @@ class ImportG2PModelWorker(FunctionWorker): # pragma: no cover
|
|
3650
3650
|
try:
|
3651
3651
|
generator = Generator(g2p_model_path=self.model_path, num_pronunciations=5)
|
3652
3652
|
generator.setup()
|
3653
|
+
except shutil.ReadError:
|
3654
|
+
self.signals.result.emit(None)
|
3653
3655
|
except Exception:
|
3654
3656
|
exctype, value = sys.exc_info()[:2]
|
3655
3657
|
self.signals.error.emit((exctype, value, traceback.format_exc()))
|
@@ -1,22 +0,0 @@
|
|
1
|
-
anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
|
3
|
-
anchor/_version.py,sha256=FKnJIExgNrZG2xJ0y_dGNBpxGbGBYylvfat-jHhLUuM,411
|
4
|
-
anchor/command_line.py,sha256=EucG805HyWk_zkMO9RXv9Yj0I0JVdDLZb1_DX2_ISjM,503
|
5
|
-
anchor/db.py,sha256=ef4lO6HtCKoxC9CorIc0ZbPxKpjHa576a0ZIBOWNU9E,4956
|
6
|
-
anchor/main.py,sha256=wX6LLudTX5-HOxqBB-BGLXTdYJuDGQbV11gA2XBvQ8w,120605
|
7
|
-
anchor/models.py,sha256=sCZf5wF6g1KorMgb0AifmqhaxZp5fiYmlmaJoow-tFI,95230
|
8
|
-
anchor/plot.py,sha256=eNVG9sDdRA9_KKrHKSb2TF66bIluJsJzVnTGN3q-Brk,104878
|
9
|
-
anchor/resources_rc.py,sha256=94wgxDTpP4Oy55Br7CZ_YnmvaqzHr4n-AydBPhZc-es,8427242
|
10
|
-
anchor/settings.py,sha256=QdfBtJowHpkBLzJ_3bZRKxF1rJDBW9Z5kp83sJVz0pA,46965
|
11
|
-
anchor/ui_corpus_manager.py,sha256=e3ybOd4UdYarrLBATxI8vIFnioa4R_BHrbsEz5mJ5eA,8564
|
12
|
-
anchor/ui_error_dialog.py,sha256=c_QS0s1VaJEV9AhcrQZQyWHHpUPudWjJY1NI7Ytipio,3832
|
13
|
-
anchor/ui_main_window.py,sha256=MYb4PtV1sHYgnc3QwPphKjU3LepzBJpxllhN4nyDook,63525
|
14
|
-
anchor/ui_preferences.py,sha256=MOC2dY4qkViW9cUbC0DVSO7FLg-dGSbmR630WFQ6V9c,41843
|
15
|
-
anchor/undo.py,sha256=HXhrzV-T2JKwPd28KCaR9S6GWmi2Wr2Xk7IBEonRRCs,32564
|
16
|
-
anchor/widgets.py,sha256=arL006v_cOGWudEF4Adbh_wRjiyFRGHP9BXOtfCr0h0,158301
|
17
|
-
anchor/workers.py,sha256=iWLBGCg6jJr_OmVFJCRGMgF5Rw5G9IhpdNiqDv8ZBxU,171083
|
18
|
-
Anchor_annotator-0.3.3.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
|
19
|
-
Anchor_annotator-0.3.3.dist-info/METADATA,sha256=KPemKhTKt7Rh3N4-k-f8V4tHoAkvoUKrhUlonplJrLo,1500
|
20
|
-
Anchor_annotator-0.3.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
21
|
-
Anchor_annotator-0.3.3.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
|
22
|
-
Anchor_annotator-0.3.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|