Anchor-annotator 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {Anchor_annotator-0.7.0.dist-info → Anchor_annotator-0.8.0.dist-info}/METADATA +1 -1
- Anchor_annotator-0.8.0.dist-info/RECORD +22 -0
- {Anchor_annotator-0.7.0.dist-info → Anchor_annotator-0.8.0.dist-info}/WHEEL +1 -1
- anchor/_version.py +2 -2
- anchor/main.py +151 -16
- anchor/models.py +76 -45
- anchor/plot.py +155 -66
- anchor/resources_rc.py +32928 -121948
- anchor/settings.py +8 -1
- anchor/ui_main_window.py +81 -18
- anchor/ui_preferences.py +27 -14
- anchor/undo.py +15 -9
- anchor/widgets.py +16 -17
- anchor/workers.py +218 -8
- Anchor_annotator-0.7.0.dist-info/RECORD +0 -22
- {Anchor_annotator-0.7.0.dist-info → Anchor_annotator-0.8.0.dist-info}/LICENSE +0 -0
- {Anchor_annotator-0.7.0.dist-info → Anchor_annotator-0.8.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: Anchor_annotator
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Summary: Anchor annotator is a program for inspecting corpora for the Montreal Forced Aligner and correcting transcriptions and pronunciations.
|
5
5
|
Home-page: https://github.com/MontrealCorpusTools/Anchor-annotator
|
6
6
|
Author: Montreal Corpus Tools
|
@@ -0,0 +1,22 @@
|
|
1
|
+
anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
|
3
|
+
anchor/_version.py,sha256=vspFLRfYI6gAAN7kyihey2lhPos0jxqKaNDWFlKPlmU,411
|
4
|
+
anchor/command_line.py,sha256=EucG805HyWk_zkMO9RXv9Yj0I0JVdDLZb1_DX2_ISjM,503
|
5
|
+
anchor/db.py,sha256=LlZzAy4bjmJIu0v4ev5Qjg_Fh2n9sMsKI2nAY1pwd0A,5057
|
6
|
+
anchor/main.py,sha256=3CN6wlIzdWjGxsYRSIIuqB2jQZPUdLX9PlQSRMSQ5aI,134355
|
7
|
+
anchor/models.py,sha256=0-CRXwRlLnFNelj0pbhfVTLTXAxfj1eJvJ8qEQAGrn4,98864
|
8
|
+
anchor/plot.py,sha256=KCSXgyAT5v2BpsX6MvPJuaq5aphTzfYvZ69jLzvcM0k,117879
|
9
|
+
anchor/resources_rc.py,sha256=5eFkwVBdDzfc6rkcC-HSzy8EYDmxiMXnYucgAoGVGsA,3599114
|
10
|
+
anchor/settings.py,sha256=H8RBeB-MhjE7adrwUcxOTZv2pPkMfgaIwCdxb8f012Q,52396
|
11
|
+
anchor/ui_corpus_manager.py,sha256=e3ybOd4UdYarrLBATxI8vIFnioa4R_BHrbsEz5mJ5eA,8564
|
12
|
+
anchor/ui_error_dialog.py,sha256=HKbjGT_jtdb9jfn9THQMbl1fmcdWyjYDazM4hCwZ5Yo,3931
|
13
|
+
anchor/ui_main_window.py,sha256=qPHaJWMd4k6nVIelcBRqNcATBuMbkv4j2kOpggG4DoY,40191
|
14
|
+
anchor/ui_preferences.py,sha256=_1U67al_FoTjVizcK1He4JKBEqfmh3KxUo0UEB7kt5Q,43822
|
15
|
+
anchor/undo.py,sha256=fDy8PA2Rckd9_dsa_lM_ohvQJS-l-VdQwB_P0i-Kvbw,33098
|
16
|
+
anchor/widgets.py,sha256=6opesi2nGs4_hv8NWdha3e2kUW9hlRAzbo-6gcMdMG0,159347
|
17
|
+
anchor/workers.py,sha256=T81tr2wgQh5oLOXZteGfccgvSwXJrJbk3SSmTLHdnmA,191386
|
18
|
+
Anchor_annotator-0.8.0.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
|
19
|
+
Anchor_annotator-0.8.0.dist-info/METADATA,sha256=FWSJbc9J1GcvsZF9hTHTnRTtLH_Alv5rbTwmXtPr3Gg,1500
|
20
|
+
Anchor_annotator-0.8.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
21
|
+
Anchor_annotator-0.8.0.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
|
22
|
+
Anchor_annotator-0.8.0.dist-info/RECORD,,
|
anchor/_version.py
CHANGED
anchor/main.py
CHANGED
@@ -13,7 +13,7 @@ from montreal_forced_aligner import config
|
|
13
13
|
from montreal_forced_aligner.command_line.utils import check_databases
|
14
14
|
from montreal_forced_aligner.config import MfaConfiguration, get_temporary_directory
|
15
15
|
from montreal_forced_aligner.corpus import AcousticCorpus
|
16
|
-
from montreal_forced_aligner.data import WorkflowType
|
16
|
+
from montreal_forced_aligner.data import Language, WorkflowType
|
17
17
|
from montreal_forced_aligner.db import CorpusWorkflow
|
18
18
|
from montreal_forced_aligner.diarization.speaker_diarizer import FOUND_SPEECHBRAIN
|
19
19
|
from montreal_forced_aligner.exceptions import DatabaseError
|
@@ -24,7 +24,9 @@ from montreal_forced_aligner.models import (
|
|
24
24
|
LanguageModel,
|
25
25
|
ModelManager,
|
26
26
|
)
|
27
|
+
from montreal_forced_aligner.transcription.models import FOUND_WHISPERX
|
27
28
|
from montreal_forced_aligner.utils import DatasetType, inspect_database
|
29
|
+
from montreal_forced_aligner.vad.models import MfaVAD
|
28
30
|
from PySide6 import QtCore, QtGui, QtMultimedia, QtWidgets
|
29
31
|
|
30
32
|
import anchor.db
|
@@ -246,11 +248,23 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
246
248
|
self.alignment_utterance_worker.signals.result.connect(self.finalize_utterance_alignment)
|
247
249
|
self.workers.append(self.alignment_utterance_worker)
|
248
250
|
|
251
|
+
self.transcribe_utterance_worker = workers.TranscribeUtteranceWorker(self)
|
252
|
+
self.transcribe_utterance_worker.signals.error.connect(self.handle_error)
|
253
|
+
self.transcribe_utterance_worker.signals.result.connect(
|
254
|
+
self.finalize_utterance_transcription
|
255
|
+
)
|
256
|
+
self.workers.append(self.transcribe_utterance_worker)
|
257
|
+
|
249
258
|
self.segment_utterance_worker = workers.SegmentUtteranceWorker(self)
|
250
259
|
self.segment_utterance_worker.signals.error.connect(self.handle_error)
|
251
260
|
self.segment_utterance_worker.signals.result.connect(self.finalize_segmentation)
|
252
261
|
self.workers.append(self.segment_utterance_worker)
|
253
262
|
|
263
|
+
self.trim_utterance_worker = workers.TrimUtteranceWorker(self)
|
264
|
+
self.trim_utterance_worker.signals.error.connect(self.handle_error)
|
265
|
+
self.trim_utterance_worker.signals.result.connect(self.finalize_trimming)
|
266
|
+
self.workers.append(self.trim_utterance_worker)
|
267
|
+
|
254
268
|
self.alignment_evaluation_worker = workers.AlignmentEvaluationWorker(self)
|
255
269
|
self.alignment_evaluation_worker.signals.error.connect(self.handle_error)
|
256
270
|
self.alignment_evaluation_worker.signals.finished.connect(self.finalize_adding_intervals)
|
@@ -270,6 +284,11 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
270
284
|
self.corpus_undo_stack = QtGui.QUndoStack(self)
|
271
285
|
self.dictionary_undo_stack = QtGui.QUndoStack(self)
|
272
286
|
|
287
|
+
self.g2p_model = None
|
288
|
+
self.acoustic_model = None
|
289
|
+
self.vad_model = None
|
290
|
+
self.language_model = None
|
291
|
+
self.ivector_extractor = None
|
273
292
|
self.set_up_models()
|
274
293
|
if self.settings.value(AnchorSettings.AUTOLOAD):
|
275
294
|
self.load_corpus()
|
@@ -280,6 +299,7 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
280
299
|
self.load_acoustic_model()
|
281
300
|
self.load_language_model()
|
282
301
|
self.load_g2p()
|
302
|
+
self.load_vad()
|
283
303
|
self.create_actions()
|
284
304
|
self.refresh_settings()
|
285
305
|
|
@@ -557,19 +577,18 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
557
577
|
self.ui.diarizationWidget.set_models(self.diarization_model, self.file_selection_model)
|
558
578
|
self.ui.oovWidget.set_models(self.oov_model)
|
559
579
|
self.file_selection_model.currentUtteranceChanged.connect(self.change_utterance)
|
580
|
+
self.file_selection_model.currentUtteranceChanged.connect(
|
581
|
+
self.selection_model.set_current_utterance
|
582
|
+
)
|
560
583
|
self.selection_model.fileViewRequested.connect(self.file_selection_model.set_current_file)
|
561
584
|
self.file_selection_model.fileChanged.connect(self.change_file)
|
562
585
|
self.selection_model.fileAboutToChange.connect(self.check_media_stop)
|
563
586
|
self.media_player.set_models(self.file_selection_model)
|
564
587
|
self.corpus_model.addCommand.connect(self.update_corpus_stack)
|
588
|
+
self.corpus_model.transcribeRequested.connect(self.begin_utterance_transcription)
|
565
589
|
self.file_utterances_model.addCommand.connect(self.update_corpus_stack)
|
566
590
|
self.file_selection_model.selectionChanged.connect(self.sync_selected_utterances)
|
567
591
|
|
568
|
-
self.g2p_model = None
|
569
|
-
self.acoustic_model = None
|
570
|
-
self.language_model = None
|
571
|
-
self.ivector_extractor = None
|
572
|
-
|
573
592
|
def sync_selected_utterances(self):
|
574
593
|
self.selection_model.update_selected_utterances(
|
575
594
|
self.file_selection_model.selected_utterances()
|
@@ -706,15 +725,21 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
706
725
|
self.ui.deleteUtterancesAct.setEnabled(False)
|
707
726
|
self.ui.splitUtterancesAct.setEnabled(False)
|
708
727
|
self.ui.alignUtteranceAct.setEnabled(False)
|
728
|
+
self.ui.transcribeUtteranceAct.setEnabled(False)
|
729
|
+
self.ui.trimUtteranceAct.setEnabled(False)
|
709
730
|
self.ui.segmentUtteranceAct.setEnabled(False)
|
710
731
|
if not selection and self.selection_model.current_utterance_id is None:
|
711
732
|
return
|
712
|
-
|
713
733
|
if len(selection) == 1 or self.selection_model.current_utterance_id is not None:
|
714
734
|
self.ui.splitUtterancesAct.setEnabled(True)
|
715
|
-
|
716
|
-
|
717
|
-
self.
|
735
|
+
self.ui.trimUtteranceAct.setEnabled(True)
|
736
|
+
if self.corpus_model.acoustic_model is not None:
|
737
|
+
if self.corpus_model.has_dictionary:
|
738
|
+
self.ui.alignUtteranceAct.setEnabled(True)
|
739
|
+
self.ui.transcribeUtteranceAct.setEnabled(True)
|
740
|
+
self.ui.segmentUtteranceAct.setEnabled(True)
|
741
|
+
elif not isinstance(self.acoustic_model, AcousticModel):
|
742
|
+
self.ui.transcribeUtteranceAct.setEnabled(True)
|
718
743
|
if len(selection) > 1:
|
719
744
|
self.ui.mergeUtterancesAct.setEnabled(True)
|
720
745
|
else:
|
@@ -801,6 +826,8 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
801
826
|
self.ui.openPreferencesAct.triggered.connect(self.open_options)
|
802
827
|
self.ui.openCorpusManagerAct.triggered.connect(self.open_corpus_manager)
|
803
828
|
self.ui.loadAcousticModelAct.triggered.connect(self.change_acoustic_model)
|
829
|
+
self.ui.kaldiVadAct.triggered.connect(self.change_vad)
|
830
|
+
self.ui.speechbrainVadAct.triggered.connect(self.change_vad)
|
804
831
|
self.ui.loadLanguageModelAct.triggered.connect(self.change_language_model)
|
805
832
|
self.ui.loadIvectorExtractorAct.triggered.connect(self.change_ivector_extractor)
|
806
833
|
self.ui.loadDictionaryAct.triggered.connect(self.change_dictionary)
|
@@ -870,6 +897,8 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
870
897
|
self.begin_reset_ivectors
|
871
898
|
)
|
872
899
|
self.ui.alignUtteranceAct.triggered.connect(self.begin_utterance_alignment)
|
900
|
+
self.ui.transcribeUtteranceAct.triggered.connect(self.begin_utterance_transcription)
|
901
|
+
self.ui.trimUtteranceAct.triggered.connect(self.begin_utterance_trimming)
|
873
902
|
self.ui.segmentUtteranceAct.triggered.connect(self.begin_utterance_segmentation)
|
874
903
|
self.ui.evaluateAlignmentsAct.triggered.connect(self.begin_alignment_evaluation)
|
875
904
|
self.ui.selectMappingFileAct.triggered.connect(self.change_custom_mapping)
|
@@ -918,6 +947,9 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
918
947
|
self.acoustic_action_group = QtGui.QActionGroup(self)
|
919
948
|
self.acoustic_action_group.setExclusive(True)
|
920
949
|
|
950
|
+
self.langauge_action_group = QtGui.QActionGroup(self)
|
951
|
+
self.langauge_action_group.setExclusive(True)
|
952
|
+
|
921
953
|
self.g2p_action_group = QtGui.QActionGroup(self)
|
922
954
|
self.g2p_action_group.setExclusive(True)
|
923
955
|
|
@@ -1032,6 +1064,25 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1032
1064
|
)
|
1033
1065
|
|
1034
1066
|
def refresh_model_actions(self):
|
1067
|
+
self.ui.menuLanguage.clear()
|
1068
|
+
for lang in sorted(Language, key=lambda x: x.display_name):
|
1069
|
+
a = QtGui.QAction(lang.display_name, parent=self)
|
1070
|
+
a.setCheckable(True)
|
1071
|
+
if lang.name == self.settings.value(self.settings.LANGUAGE):
|
1072
|
+
a.setChecked(True)
|
1073
|
+
self.langauge_action_group.addAction(a)
|
1074
|
+
a.triggered.connect(self.change_language)
|
1075
|
+
self.ui.menuLanguage.addAction(a)
|
1076
|
+
if not FOUND_SPEECHBRAIN:
|
1077
|
+
self.ui.speechbrainVadAct.setChecked(False)
|
1078
|
+
self.ui.speechbrainVadAct.setEnabled(False)
|
1079
|
+
self.settings.setValue(self.settings.VAD_MODEL, "kaldi")
|
1080
|
+
self.ui.kaldiVadAct.setChecked(True)
|
1081
|
+
else:
|
1082
|
+
self.ui.speechbrainVadAct.setEnabled(True)
|
1083
|
+
if self.settings.value(self.settings.VAD_MODEL) == "speechbrain":
|
1084
|
+
self.ui.speechbrainVadAct.setChecked(True)
|
1085
|
+
|
1035
1086
|
self.ui.menuDownload_acoustic_model.clear()
|
1036
1087
|
self.ui.menuDownload_G2P_model.clear()
|
1037
1088
|
self.ui.menuDownload_language_model.clear()
|
@@ -1168,11 +1219,10 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1168
1219
|
.first()
|
1169
1220
|
)
|
1170
1221
|
if m is None:
|
1171
|
-
|
1172
|
-
|
1173
|
-
name="speechbrain", path="speechbrain", available_locally=True
|
1174
|
-
)
|
1222
|
+
m = anchor.db.IvectorExtractor(
|
1223
|
+
name="speechbrain", path="speechbrain", available_locally=True
|
1175
1224
|
)
|
1225
|
+
session.add(m)
|
1176
1226
|
session.flush()
|
1177
1227
|
session.commit()
|
1178
1228
|
a = QtGui.QAction(text="speechbrain", parent=self)
|
@@ -1180,6 +1230,24 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1180
1230
|
a.triggered.connect(self.change_ivector_extractor)
|
1181
1231
|
self.ui.ivectorExtractorMenu.addAction(a)
|
1182
1232
|
self.ivector_action_group.addAction(a)
|
1233
|
+
for m_name, found in [("speechbrain", FOUND_SPEECHBRAIN), ("whisper", FOUND_WHISPERX)]:
|
1234
|
+
if not found:
|
1235
|
+
continue
|
1236
|
+
m = (
|
1237
|
+
session.query(anchor.db.AcousticModel)
|
1238
|
+
.filter(anchor.db.AcousticModel.path == m_name)
|
1239
|
+
.first()
|
1240
|
+
)
|
1241
|
+
if m is None:
|
1242
|
+
m = anchor.db.AcousticModel(name=m_name, path=m_name, available_locally=True)
|
1243
|
+
session.add(m)
|
1244
|
+
session.flush()
|
1245
|
+
session.commit()
|
1246
|
+
a = QtGui.QAction(text=m_name, parent=self)
|
1247
|
+
a.setData(m.id)
|
1248
|
+
a.triggered.connect(self.change_acoustic_model)
|
1249
|
+
self.ui.acousticModelMenu.addAction(a)
|
1250
|
+
self.acoustic_action_group.addAction(a)
|
1183
1251
|
|
1184
1252
|
for m in (
|
1185
1253
|
session.query(anchor.db.IvectorExtractor)
|
@@ -1411,7 +1479,10 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1411
1479
|
def begin_alignment(self):
|
1412
1480
|
self.enableMfaActions(False)
|
1413
1481
|
self.alignment_worker.set_params(
|
1414
|
-
self.corpus_model.corpus,
|
1482
|
+
self.corpus_model.corpus,
|
1483
|
+
self.acoustic_model,
|
1484
|
+
verify_transcripts=False,
|
1485
|
+
parameters=self.ui.alignmentWidget.parameters(),
|
1415
1486
|
)
|
1416
1487
|
self.alignment_worker.start()
|
1417
1488
|
self.set_application_state("loading")
|
@@ -1420,7 +1491,10 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1420
1491
|
def begin_verify_transcripts(self):
|
1421
1492
|
self.enableMfaActions(False)
|
1422
1493
|
self.alignment_worker.set_params(
|
1423
|
-
self.corpus_model.corpus,
|
1494
|
+
self.corpus_model.corpus,
|
1495
|
+
self.acoustic_model,
|
1496
|
+
verify_transcripts=True,
|
1497
|
+
parameters=self.ui.alignmentWidget.parameters(),
|
1424
1498
|
)
|
1425
1499
|
self.alignment_worker.start()
|
1426
1500
|
self.set_application_state("loading")
|
@@ -1475,6 +1549,12 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1475
1549
|
self.set_application_state("loading")
|
1476
1550
|
self.ui.loadingScreen.setCorpusName("Performing alignment...")
|
1477
1551
|
|
1552
|
+
def begin_utterance_transcription(self, utterance_id: int = None):
|
1553
|
+
if not utterance_id:
|
1554
|
+
utterance_id = self.selection_model.current_utterance_id
|
1555
|
+
self.transcribe_utterance_worker.set_params(self.corpus_model, utterance_id)
|
1556
|
+
self.transcribe_utterance_worker.start()
|
1557
|
+
|
1478
1558
|
def begin_utterance_segmentation(self):
|
1479
1559
|
if self.selection_model.current_utterance_id is None:
|
1480
1560
|
return
|
@@ -1483,6 +1563,14 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1483
1563
|
)
|
1484
1564
|
self.segment_utterance_worker.start()
|
1485
1565
|
|
1566
|
+
def begin_utterance_trimming(self):
|
1567
|
+
if self.selection_model.current_utterance_id is None:
|
1568
|
+
return
|
1569
|
+
self.trim_utterance_worker.set_params(
|
1570
|
+
self.corpus_model, self.selection_model.current_utterance_id
|
1571
|
+
)
|
1572
|
+
self.trim_utterance_worker.start()
|
1573
|
+
|
1486
1574
|
def begin_alignment_evaluation(self):
|
1487
1575
|
self.enableMfaActions(False)
|
1488
1576
|
with sqlalchemy.orm.Session(self.db_engine) as session:
|
@@ -1588,11 +1676,23 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1588
1676
|
self.check_actions()
|
1589
1677
|
self.set_application_state("loaded")
|
1590
1678
|
|
1679
|
+
def finalize_utterance_transcription(self, data):
|
1680
|
+
utterance_id, transcription = data
|
1681
|
+
|
1682
|
+
utt = self.file_utterances_model.get_utterance(utterance_id)
|
1683
|
+
utt.transcription_text = transcription
|
1684
|
+
self.file_utterances_model.utterancesReady.emit()
|
1685
|
+
|
1591
1686
|
def finalize_segmentation(self, data):
|
1592
1687
|
original_utterance_id, split_data = data
|
1593
1688
|
self.file_utterances_model.split_vad_utterance(original_utterance_id, split_data)
|
1594
1689
|
self.ensure_utterance_panel_visible()
|
1595
1690
|
|
1691
|
+
def finalize_trimming(self, data):
|
1692
|
+
original_utterance_id, begin, end = data
|
1693
|
+
self.file_utterances_model.update_utterance_times(original_utterance_id, begin, end)
|
1694
|
+
self.ui.utteranceDetailWidget.plot_widget.refresh_text_grid()
|
1695
|
+
|
1596
1696
|
def finalize_saving(self):
|
1597
1697
|
self.check_actions()
|
1598
1698
|
|
@@ -1914,6 +2014,9 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1914
2014
|
self.ui.alignUtteranceAct.setIcon(
|
1915
2015
|
QtGui.QIcon.fromTheme(QtGui.QIcon.ThemeIcon.FormatTextUnderline)
|
1916
2016
|
)
|
2017
|
+
self.ui.trimUtteranceAct.setIcon(
|
2018
|
+
QtGui.QIcon.fromTheme(QtGui.QIcon.ThemeIcon.FormatTextUnderline)
|
2019
|
+
)
|
1917
2020
|
self.ui.cancelCorpusLoadAct.setIcon(
|
1918
2021
|
QtGui.QIcon.fromTheme(QtGui.QIcon.ThemeIcon.ProcessStop)
|
1919
2022
|
)
|
@@ -1961,6 +2064,31 @@ class MainWindow(QtWidgets.QMainWindow):
|
|
1961
2064
|
)
|
1962
2065
|
self.download_worker.start()
|
1963
2066
|
|
2067
|
+
def change_language(self):
|
2068
|
+
from montreal_forced_aligner.transcription.models import MfaFasterWhisperPipeline
|
2069
|
+
|
2070
|
+
self.settings.setValue(self.settings.LANGUAGE, self.sender().text().lower())
|
2071
|
+
if isinstance(self.acoustic_model, MfaFasterWhisperPipeline):
|
2072
|
+
self.acoustic_model.set_language(self.sender().text().lower())
|
2073
|
+
|
2074
|
+
def change_vad(self):
|
2075
|
+
self.settings.setValue(self.settings.VAD_MODEL, self.sender().text().lower())
|
2076
|
+
self.load_vad()
|
2077
|
+
|
2078
|
+
def load_vad(self):
|
2079
|
+
if self.settings.value(self.settings.VAD_MODEL) == "speechbrain":
|
2080
|
+
model_dir = os.path.join(config.TEMPORARY_DIRECTORY, "models", "VAD")
|
2081
|
+
os.makedirs(model_dir, exist_ok=True)
|
2082
|
+
run_opts = None
|
2083
|
+
if self.settings.value(self.settings.CUDA):
|
2084
|
+
run_opts = {"device": "cuda"}
|
2085
|
+
self.vad_model = MfaVAD.from_hparams(
|
2086
|
+
source="speechbrain/vad-crdnn-libriparty", savedir=model_dir, run_opts=run_opts
|
2087
|
+
)
|
2088
|
+
else:
|
2089
|
+
self.vad_model = None
|
2090
|
+
self.trim_utterance_worker.set_vad_model(self.vad_model)
|
2091
|
+
|
1964
2092
|
def download_acoustic_model(self):
|
1965
2093
|
self.download_worker.set_params(
|
1966
2094
|
self.db_string, "acoustic", self.sender().text(), self.model_manager
|
@@ -2502,6 +2630,8 @@ class OptionsDialog(QtWidgets.QDialog):
|
|
2502
2630
|
self.ui.cudaCheckBox.setChecked(self.settings.value(self.settings.CUDA))
|
2503
2631
|
if config.GITHUB_TOKEN is not None:
|
2504
2632
|
self.ui.githubTokenEdit.setText(config.GITHUB_TOKEN)
|
2633
|
+
if config.HF_TOKEN is not None:
|
2634
|
+
self.ui.hfTokenEdit.setText(config.HF_TOKEN)
|
2505
2635
|
|
2506
2636
|
self.ui.autoloadLastUsedCorpusCheckBox.setChecked(
|
2507
2637
|
self.settings.value(self.settings.AUTOLOAD)
|
@@ -2634,11 +2764,16 @@ class OptionsDialog(QtWidgets.QDialog):
|
|
2634
2764
|
config.NUM_JOBS = self.ui.numJobsEdit.value()
|
2635
2765
|
config.USE_MP = self.ui.useMpCheckBox.isChecked()
|
2636
2766
|
config.GITHUB_TOKEN = self.ui.githubTokenEdit.text()
|
2767
|
+
config.HF_TOKEN = self.ui.hfTokenEdit.text()
|
2637
2768
|
config.GLOBAL_CONFIG.current_profile.num_jobs = config.NUM_JOBS
|
2638
2769
|
config.GLOBAL_CONFIG.current_profile.use_mp = config.USE_MP
|
2639
2770
|
config.GLOBAL_CONFIG.current_profile.github_token = config.GITHUB_TOKEN
|
2771
|
+
config.GLOBAL_CONFIG.current_profile.hf_token = config.HF_TOKEN
|
2640
2772
|
config.GLOBAL_CONFIG.save()
|
2641
2773
|
|
2774
|
+
self.settings.setValue(self.settings.GITHUB_TOKEN, self.ui.githubTokenEdit.text())
|
2775
|
+
self.settings.setValue(self.settings.HF_TOKEN, self.ui.hfTokenEdit.text())
|
2776
|
+
|
2642
2777
|
self.settings.setValue(
|
2643
2778
|
self.settings.SPEC_DYNAMIC_RANGE, int(self.ui.dynamicRangeEdit.value())
|
2644
2779
|
)
|
anchor/models.py
CHANGED
@@ -6,6 +6,7 @@ import os
|
|
6
6
|
import re
|
7
7
|
import subprocess
|
8
8
|
import typing
|
9
|
+
import unicodedata
|
9
10
|
from threading import Lock
|
10
11
|
from typing import Any, Optional, Union
|
11
12
|
|
@@ -23,12 +24,14 @@ from montreal_forced_aligner.corpus.acoustic_corpus import (
|
|
23
24
|
AcousticCorpusWithPronunciations,
|
24
25
|
)
|
25
26
|
from montreal_forced_aligner.data import PhoneType, WordType, WorkflowType
|
26
|
-
from montreal_forced_aligner.db import
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
from montreal_forced_aligner.db import (
|
28
|
+
CorpusWorkflow,
|
29
|
+
File,
|
30
|
+
Grapheme,
|
31
|
+
Phone,
|
32
|
+
Speaker,
|
33
|
+
Utterance,
|
34
|
+
Word,
|
32
35
|
)
|
33
36
|
from montreal_forced_aligner.g2p.generator import PyniniValidator
|
34
37
|
from montreal_forced_aligner.models import (
|
@@ -51,23 +54,6 @@ if typing.TYPE_CHECKING:
|
|
51
54
|
logger = logging.getLogger("anchor")
|
52
55
|
|
53
56
|
|
54
|
-
WORD_BREAK_SET = "".join(
|
55
|
-
sorted(
|
56
|
-
set(
|
57
|
-
DEFAULT_WORD_BREAK_MARKERS
|
58
|
-
+ DEFAULT_PUNCTUATION
|
59
|
-
+ DEFAULT_CLITIC_MARKERS
|
60
|
-
+ DEFAULT_COMPOUND_MARKERS
|
61
|
-
)
|
62
|
-
)
|
63
|
-
)
|
64
|
-
|
65
|
-
if "-" in WORD_BREAK_SET:
|
66
|
-
WORD_BREAK_SET = "" + WORD_BREAK_SET.replace("-", "")
|
67
|
-
|
68
|
-
WORD_BREAK_REGEX_SET = rf"[\s{WORD_BREAK_SET}]"
|
69
|
-
|
70
|
-
|
71
57
|
# noinspection PyUnresolvedReferences
|
72
58
|
@dataclass(slots=True)
|
73
59
|
class TextFilterQuery:
|
@@ -75,6 +61,7 @@ class TextFilterQuery:
|
|
75
61
|
regex: bool = False
|
76
62
|
word: bool = False
|
77
63
|
case_sensitive: bool = False
|
64
|
+
graphemes: typing.Collection[str] = None
|
78
65
|
|
79
66
|
@property
|
80
67
|
def search_text(self):
|
@@ -83,6 +70,11 @@ class TextFilterQuery:
|
|
83
70
|
return self.text
|
84
71
|
|
85
72
|
def generate_expression(self, posix=False):
|
73
|
+
word_symbols = r"\w"
|
74
|
+
if self.graphemes:
|
75
|
+
dash_prefix = "-" if "-" in self.graphemes else ""
|
76
|
+
graphemes = "".join([x for x in self.graphemes if x != "-"])
|
77
|
+
word_symbols = rf"[{dash_prefix}\w{graphemes}]"
|
86
78
|
text = self.text
|
87
79
|
if not self.case_sensitive:
|
88
80
|
text = text.lower()
|
@@ -97,11 +89,10 @@ class TextFilterQuery:
|
|
97
89
|
if not text.endswith(word_break_set):
|
98
90
|
text += word_break_set
|
99
91
|
if posix:
|
100
|
-
text = text.replace(r"\b", r"\y")
|
101
92
|
if text.startswith(r"\b"):
|
102
|
-
text = rf"((
|
93
|
+
text = rf"((?<!{word_symbols})|(?<=^))" + text[2:]
|
103
94
|
if text.endswith(r"\b"):
|
104
|
-
text = text[:-2] + rf"((
|
95
|
+
text = text[:-2] + rf"((?!{word_symbols})|(?=$))"
|
105
96
|
if not self.case_sensitive:
|
106
97
|
text = "(?i)" + text
|
107
98
|
return text
|
@@ -385,7 +376,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
385
376
|
if isinstance(utterance, int):
|
386
377
|
if utterance not in self.reversed_indices:
|
387
378
|
return
|
388
|
-
utterance = self.reversed_indices[utterance]
|
379
|
+
utterance = self.utterances[self.reversed_indices[utterance]]
|
389
380
|
old_speaker_id = utterance.speaker_id
|
390
381
|
if old_speaker_id == speaker_id:
|
391
382
|
return
|
@@ -395,10 +386,18 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
|
|
395
386
|
self.corpus_model.set_speaker_modified(old_speaker_id)
|
396
387
|
|
397
388
|
def update_utterance_times(
|
398
|
-
self,
|
389
|
+
self,
|
390
|
+
utterance: typing.Union[Utterance, int],
|
391
|
+
begin: Optional[float] = None,
|
392
|
+
end: Optional[float] = None,
|
399
393
|
):
|
400
394
|
if not self.corpus_model.editable:
|
401
395
|
return
|
396
|
+
if isinstance(utterance, int):
|
397
|
+
if utterance not in self.reversed_indices:
|
398
|
+
return
|
399
|
+
utterance = self.utterances[self.reversed_indices[utterance]]
|
400
|
+
|
402
401
|
if utterance.begin == begin and utterance.end == end:
|
403
402
|
return
|
404
403
|
self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
|
@@ -578,7 +577,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
578
577
|
resetView = QtCore.Signal()
|
579
578
|
viewChanged = QtCore.Signal(object, object)
|
580
579
|
selectionAudioChanged = QtCore.Signal(object)
|
581
|
-
currentUtteranceChanged = QtCore.Signal()
|
580
|
+
currentUtteranceChanged = QtCore.Signal(object)
|
582
581
|
speakerRequested = QtCore.Signal(object)
|
583
582
|
|
584
583
|
spectrogramReady = QtCore.Signal()
|
@@ -636,13 +635,22 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
636
635
|
y = self.model().y[begin_samp:end_samp, self.selected_channel]
|
637
636
|
else:
|
638
637
|
y = self.model().y[begin_samp:end_samp]
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
638
|
+
if self.settings.value(self.settings.SPECTRAL_FEATURES) == "mfcc":
|
639
|
+
spectrogram_worker = workers.MfccWorker(
|
640
|
+
y,
|
641
|
+
self.model().file.sound_file.sample_rate,
|
642
|
+
self.min_time,
|
643
|
+
self.max_time,
|
644
|
+
self.selected_channel,
|
645
|
+
)
|
646
|
+
else:
|
647
|
+
spectrogram_worker = workers.SpectrogramWorker(
|
648
|
+
y,
|
649
|
+
self.model().file.sound_file.sample_rate,
|
650
|
+
self.min_time,
|
651
|
+
self.max_time,
|
652
|
+
self.selected_channel,
|
653
|
+
)
|
646
654
|
spectrogram_worker.signals.result.connect(self.finalize_loading_spectrogram)
|
647
655
|
self.thread_pool.start(spectrogram_worker)
|
648
656
|
|
@@ -865,7 +873,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
865
873
|
def model(self) -> FileUtterancesModel:
|
866
874
|
return super().model()
|
867
875
|
|
868
|
-
def set_view_times(self, begin, end):
|
876
|
+
def set_view_times(self, begin, end, new_file=False):
|
869
877
|
begin = max(begin, 0)
|
870
878
|
end = min(end, self.model().file.duration)
|
871
879
|
if (begin, end) == (self.min_time, self.max_time):
|
@@ -882,7 +890,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
882
890
|
and not self.min_time <= self.selected_max_time <= self.max_time
|
883
891
|
):
|
884
892
|
self.selected_max_time = None
|
885
|
-
|
893
|
+
if not new_file:
|
894
|
+
self.view_change_timer.start()
|
886
895
|
|
887
896
|
def send_selection_update(self):
|
888
897
|
self.viewChanged.emit(self.min_time, self.max_time)
|
@@ -902,7 +911,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
902
911
|
else:
|
903
912
|
self.finalize_set_new_file()
|
904
913
|
self.speakerRequested.emit(speaker_id)
|
905
|
-
self.set_view_times(begin, end)
|
914
|
+
self.set_view_times(begin, end, new_file=True)
|
906
915
|
|
907
916
|
def finalize_set_new_file(self):
|
908
917
|
if self.requested_utterance_id is None:
|
@@ -930,16 +939,18 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
930
939
|
return
|
931
940
|
flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
|
932
941
|
flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
|
942
|
+
current_index = None
|
933
943
|
for u in utterances:
|
934
944
|
if u.id not in self.model().reversed_indices:
|
935
945
|
continue
|
946
|
+
current_index = u.id
|
936
947
|
row = self.model().reversed_indices[u.id]
|
937
948
|
|
938
949
|
index = self.model().index(row, 0)
|
939
950
|
if not index.isValid():
|
940
951
|
return
|
941
952
|
self.select(index, flags)
|
942
|
-
self.currentUtteranceChanged.emit()
|
953
|
+
self.currentUtteranceChanged.emit(current_index)
|
943
954
|
|
944
955
|
def update_select(self, utterance_id: int, deselect=False, reset=False):
|
945
956
|
if reset and [x.id for x in self.selected_utterances()] == [utterance_id]:
|
@@ -961,7 +972,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
|
|
961
972
|
self.select(index, flags)
|
962
973
|
if not deselect:
|
963
974
|
self.select_audio(self.model().utterances[row].begin, self.model().utterances[row].end)
|
964
|
-
self.currentUtteranceChanged.emit()
|
975
|
+
self.currentUtteranceChanged.emit(utterance_id)
|
965
976
|
|
966
977
|
|
967
978
|
class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
@@ -992,6 +1003,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
992
1003
|
# self.selectionChanged.connect(self.update_selection_audio)
|
993
1004
|
# self.model().newResults.connect(self.check_selection)
|
994
1005
|
self.model().unlockCorpus.connect(self.fileChanged.emit)
|
1006
|
+
self.model().layoutChanged.connect(self._update_selection)
|
995
1007
|
|
996
1008
|
def set_current_utterance(self, utterance_id):
|
997
1009
|
self.current_utterance_id = utterance_id
|
@@ -1078,8 +1090,10 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
|
|
1078
1090
|
if not index.isValid():
|
1079
1091
|
return
|
1080
1092
|
m = self.model()
|
1081
|
-
|
1082
|
-
|
1093
|
+
try:
|
1094
|
+
self.current_utterance_id = m._indices[index.row()]
|
1095
|
+
except IndexError:
|
1096
|
+
self.current_utterance_id = None
|
1083
1097
|
|
1084
1098
|
def selected_utterances(self):
|
1085
1099
|
current_utterance = self.current_utterance_id
|
@@ -1225,6 +1239,7 @@ class DictionaryTableModel(TableModel):
|
|
1225
1239
|
self.word_sets = {}
|
1226
1240
|
self.speaker_mapping = {}
|
1227
1241
|
self.phones = []
|
1242
|
+
self.graphemes = []
|
1228
1243
|
self.reference_phone_set = set()
|
1229
1244
|
self.custom_mapping = {}
|
1230
1245
|
|
@@ -1240,7 +1255,7 @@ class DictionaryTableModel(TableModel):
|
|
1240
1255
|
except KeyError:
|
1241
1256
|
return True
|
1242
1257
|
if dictionary_id is not None and self.word_sets[dictionary_id]:
|
1243
|
-
return word.lower() in self.word_sets[dictionary_id]
|
1258
|
+
return unicodedata.normalize("NFKC", word.lower()) in self.word_sets[dictionary_id]
|
1244
1259
|
return True
|
1245
1260
|
|
1246
1261
|
def lookup_word(self, word: str) -> None:
|
@@ -1270,6 +1285,19 @@ class DictionaryTableModel(TableModel):
|
|
1270
1285
|
if self.corpus_model.corpus.position_dependent_phones:
|
1271
1286
|
phones = sorted(set(x.rsplit("_", maxsplit=1)[0] for x in phones))
|
1272
1287
|
self.phones = phones
|
1288
|
+
specials = self.corpus_model.corpus.specials_set
|
1289
|
+
specials.update(
|
1290
|
+
[
|
1291
|
+
"#0",
|
1292
|
+
"<space>",
|
1293
|
+
]
|
1294
|
+
)
|
1295
|
+
self.graphemes = [
|
1296
|
+
x
|
1297
|
+
for x, in self.corpus_model.session.query(Grapheme.grapheme).filter(
|
1298
|
+
~Grapheme.grapheme.in_(specials)
|
1299
|
+
)
|
1300
|
+
]
|
1273
1301
|
|
1274
1302
|
def flags(
|
1275
1303
|
self, index: Union[QtCore.QModelIndex, QtCore.QPersistentModelIndex]
|
@@ -1339,7 +1367,7 @@ class DictionaryTableModel(TableModel):
|
|
1339
1367
|
continue
|
1340
1368
|
existing_pronunciations.add(self._data[r][2])
|
1341
1369
|
candidates = self.g2p_generator.rewriter(word)
|
1342
|
-
for c in candidates:
|
1370
|
+
for c, _ in candidates:
|
1343
1371
|
if c in existing_pronunciations:
|
1344
1372
|
continue
|
1345
1373
|
pronunciation = c
|
@@ -1969,6 +1997,8 @@ class CorpusModel(TableModel):
|
|
1969
1997
|
filesSaved = QtCore.Signal()
|
1970
1998
|
dictionarySaved = QtCore.Signal()
|
1971
1999
|
selectionRequested = QtCore.Signal(object)
|
2000
|
+
transcribeRequested = QtCore.Signal(object)
|
2001
|
+
alignRequested = QtCore.Signal(object)
|
1972
2002
|
requestFileView = QtCore.Signal(object)
|
1973
2003
|
utteranceTextUpdated = QtCore.Signal(object, object)
|
1974
2004
|
refreshUtteranceText = QtCore.Signal(object, object)
|
@@ -2061,6 +2091,7 @@ class CorpusModel(TableModel):
|
|
2061
2091
|
self.has_per_speaker_transcribed_alignments = False
|
2062
2092
|
self.has_transcript_verification_alignments = False
|
2063
2093
|
self.latest_alignment_workflow = None
|
2094
|
+
self.language = None
|
2064
2095
|
|
2065
2096
|
def update_latest_alignment_workflow(self):
|
2066
2097
|
with self.corpus.session() as session:
|