Anchor-annotator 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: Anchor_annotator
3
- Version: 0.7.0
3
+ Version: 0.8.0
4
4
  Summary: Anchor annotator is a program for inspecting corpora for the Montreal Forced Aligner and correcting transcriptions and pronunciations.
5
5
  Home-page: https://github.com/MontrealCorpusTools/Anchor-annotator
6
6
  Author: Montreal Corpus Tools
@@ -0,0 +1,22 @@
1
+ anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
3
+ anchor/_version.py,sha256=vspFLRfYI6gAAN7kyihey2lhPos0jxqKaNDWFlKPlmU,411
4
+ anchor/command_line.py,sha256=EucG805HyWk_zkMO9RXv9Yj0I0JVdDLZb1_DX2_ISjM,503
5
+ anchor/db.py,sha256=LlZzAy4bjmJIu0v4ev5Qjg_Fh2n9sMsKI2nAY1pwd0A,5057
6
+ anchor/main.py,sha256=3CN6wlIzdWjGxsYRSIIuqB2jQZPUdLX9PlQSRMSQ5aI,134355
7
+ anchor/models.py,sha256=0-CRXwRlLnFNelj0pbhfVTLTXAxfj1eJvJ8qEQAGrn4,98864
8
+ anchor/plot.py,sha256=KCSXgyAT5v2BpsX6MvPJuaq5aphTzfYvZ69jLzvcM0k,117879
9
+ anchor/resources_rc.py,sha256=5eFkwVBdDzfc6rkcC-HSzy8EYDmxiMXnYucgAoGVGsA,3599114
10
+ anchor/settings.py,sha256=H8RBeB-MhjE7adrwUcxOTZv2pPkMfgaIwCdxb8f012Q,52396
11
+ anchor/ui_corpus_manager.py,sha256=e3ybOd4UdYarrLBATxI8vIFnioa4R_BHrbsEz5mJ5eA,8564
12
+ anchor/ui_error_dialog.py,sha256=HKbjGT_jtdb9jfn9THQMbl1fmcdWyjYDazM4hCwZ5Yo,3931
13
+ anchor/ui_main_window.py,sha256=qPHaJWMd4k6nVIelcBRqNcATBuMbkv4j2kOpggG4DoY,40191
14
+ anchor/ui_preferences.py,sha256=_1U67al_FoTjVizcK1He4JKBEqfmh3KxUo0UEB7kt5Q,43822
15
+ anchor/undo.py,sha256=fDy8PA2Rckd9_dsa_lM_ohvQJS-l-VdQwB_P0i-Kvbw,33098
16
+ anchor/widgets.py,sha256=6opesi2nGs4_hv8NWdha3e2kUW9hlRAzbo-6gcMdMG0,159347
17
+ anchor/workers.py,sha256=T81tr2wgQh5oLOXZteGfccgvSwXJrJbk3SSmTLHdnmA,191386
18
+ Anchor_annotator-0.8.0.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
19
+ Anchor_annotator-0.8.0.dist-info/METADATA,sha256=FWSJbc9J1GcvsZF9hTHTnRTtLH_Alv5rbTwmXtPr3Gg,1500
20
+ Anchor_annotator-0.8.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
21
+ Anchor_annotator-0.8.0.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
22
+ Anchor_annotator-0.8.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (71.0.1)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
anchor/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.7.0'
16
- __version_tuple__ = version_tuple = (0, 7, 0)
15
+ __version__ = version = '0.8.0'
16
+ __version_tuple__ = version_tuple = (0, 8, 0)
anchor/main.py CHANGED
@@ -13,7 +13,7 @@ from montreal_forced_aligner import config
13
13
  from montreal_forced_aligner.command_line.utils import check_databases
14
14
  from montreal_forced_aligner.config import MfaConfiguration, get_temporary_directory
15
15
  from montreal_forced_aligner.corpus import AcousticCorpus
16
- from montreal_forced_aligner.data import WorkflowType
16
+ from montreal_forced_aligner.data import Language, WorkflowType
17
17
  from montreal_forced_aligner.db import CorpusWorkflow
18
18
  from montreal_forced_aligner.diarization.speaker_diarizer import FOUND_SPEECHBRAIN
19
19
  from montreal_forced_aligner.exceptions import DatabaseError
@@ -24,7 +24,9 @@ from montreal_forced_aligner.models import (
24
24
  LanguageModel,
25
25
  ModelManager,
26
26
  )
27
+ from montreal_forced_aligner.transcription.models import FOUND_WHISPERX
27
28
  from montreal_forced_aligner.utils import DatasetType, inspect_database
29
+ from montreal_forced_aligner.vad.models import MfaVAD
28
30
  from PySide6 import QtCore, QtGui, QtMultimedia, QtWidgets
29
31
 
30
32
  import anchor.db
@@ -246,11 +248,23 @@ class MainWindow(QtWidgets.QMainWindow):
246
248
  self.alignment_utterance_worker.signals.result.connect(self.finalize_utterance_alignment)
247
249
  self.workers.append(self.alignment_utterance_worker)
248
250
 
251
+ self.transcribe_utterance_worker = workers.TranscribeUtteranceWorker(self)
252
+ self.transcribe_utterance_worker.signals.error.connect(self.handle_error)
253
+ self.transcribe_utterance_worker.signals.result.connect(
254
+ self.finalize_utterance_transcription
255
+ )
256
+ self.workers.append(self.transcribe_utterance_worker)
257
+
249
258
  self.segment_utterance_worker = workers.SegmentUtteranceWorker(self)
250
259
  self.segment_utterance_worker.signals.error.connect(self.handle_error)
251
260
  self.segment_utterance_worker.signals.result.connect(self.finalize_segmentation)
252
261
  self.workers.append(self.segment_utterance_worker)
253
262
 
263
+ self.trim_utterance_worker = workers.TrimUtteranceWorker(self)
264
+ self.trim_utterance_worker.signals.error.connect(self.handle_error)
265
+ self.trim_utterance_worker.signals.result.connect(self.finalize_trimming)
266
+ self.workers.append(self.trim_utterance_worker)
267
+
254
268
  self.alignment_evaluation_worker = workers.AlignmentEvaluationWorker(self)
255
269
  self.alignment_evaluation_worker.signals.error.connect(self.handle_error)
256
270
  self.alignment_evaluation_worker.signals.finished.connect(self.finalize_adding_intervals)
@@ -270,6 +284,11 @@ class MainWindow(QtWidgets.QMainWindow):
270
284
  self.corpus_undo_stack = QtGui.QUndoStack(self)
271
285
  self.dictionary_undo_stack = QtGui.QUndoStack(self)
272
286
 
287
+ self.g2p_model = None
288
+ self.acoustic_model = None
289
+ self.vad_model = None
290
+ self.language_model = None
291
+ self.ivector_extractor = None
273
292
  self.set_up_models()
274
293
  if self.settings.value(AnchorSettings.AUTOLOAD):
275
294
  self.load_corpus()
@@ -280,6 +299,7 @@ class MainWindow(QtWidgets.QMainWindow):
280
299
  self.load_acoustic_model()
281
300
  self.load_language_model()
282
301
  self.load_g2p()
302
+ self.load_vad()
283
303
  self.create_actions()
284
304
  self.refresh_settings()
285
305
 
@@ -557,19 +577,18 @@ class MainWindow(QtWidgets.QMainWindow):
557
577
  self.ui.diarizationWidget.set_models(self.diarization_model, self.file_selection_model)
558
578
  self.ui.oovWidget.set_models(self.oov_model)
559
579
  self.file_selection_model.currentUtteranceChanged.connect(self.change_utterance)
580
+ self.file_selection_model.currentUtteranceChanged.connect(
581
+ self.selection_model.set_current_utterance
582
+ )
560
583
  self.selection_model.fileViewRequested.connect(self.file_selection_model.set_current_file)
561
584
  self.file_selection_model.fileChanged.connect(self.change_file)
562
585
  self.selection_model.fileAboutToChange.connect(self.check_media_stop)
563
586
  self.media_player.set_models(self.file_selection_model)
564
587
  self.corpus_model.addCommand.connect(self.update_corpus_stack)
588
+ self.corpus_model.transcribeRequested.connect(self.begin_utterance_transcription)
565
589
  self.file_utterances_model.addCommand.connect(self.update_corpus_stack)
566
590
  self.file_selection_model.selectionChanged.connect(self.sync_selected_utterances)
567
591
 
568
- self.g2p_model = None
569
- self.acoustic_model = None
570
- self.language_model = None
571
- self.ivector_extractor = None
572
-
573
592
  def sync_selected_utterances(self):
574
593
  self.selection_model.update_selected_utterances(
575
594
  self.file_selection_model.selected_utterances()
@@ -706,15 +725,21 @@ class MainWindow(QtWidgets.QMainWindow):
706
725
  self.ui.deleteUtterancesAct.setEnabled(False)
707
726
  self.ui.splitUtterancesAct.setEnabled(False)
708
727
  self.ui.alignUtteranceAct.setEnabled(False)
728
+ self.ui.transcribeUtteranceAct.setEnabled(False)
729
+ self.ui.trimUtteranceAct.setEnabled(False)
709
730
  self.ui.segmentUtteranceAct.setEnabled(False)
710
731
  if not selection and self.selection_model.current_utterance_id is None:
711
732
  return
712
-
713
733
  if len(selection) == 1 or self.selection_model.current_utterance_id is not None:
714
734
  self.ui.splitUtterancesAct.setEnabled(True)
715
- if self.corpus_model.acoustic_model is not None and self.corpus_model.has_dictionary:
716
- self.ui.alignUtteranceAct.setEnabled(True)
717
- self.ui.segmentUtteranceAct.setEnabled(True)
735
+ self.ui.trimUtteranceAct.setEnabled(True)
736
+ if self.corpus_model.acoustic_model is not None:
737
+ if self.corpus_model.has_dictionary:
738
+ self.ui.alignUtteranceAct.setEnabled(True)
739
+ self.ui.transcribeUtteranceAct.setEnabled(True)
740
+ self.ui.segmentUtteranceAct.setEnabled(True)
741
+ elif not isinstance(self.acoustic_model, AcousticModel):
742
+ self.ui.transcribeUtteranceAct.setEnabled(True)
718
743
  if len(selection) > 1:
719
744
  self.ui.mergeUtterancesAct.setEnabled(True)
720
745
  else:
@@ -801,6 +826,8 @@ class MainWindow(QtWidgets.QMainWindow):
801
826
  self.ui.openPreferencesAct.triggered.connect(self.open_options)
802
827
  self.ui.openCorpusManagerAct.triggered.connect(self.open_corpus_manager)
803
828
  self.ui.loadAcousticModelAct.triggered.connect(self.change_acoustic_model)
829
+ self.ui.kaldiVadAct.triggered.connect(self.change_vad)
830
+ self.ui.speechbrainVadAct.triggered.connect(self.change_vad)
804
831
  self.ui.loadLanguageModelAct.triggered.connect(self.change_language_model)
805
832
  self.ui.loadIvectorExtractorAct.triggered.connect(self.change_ivector_extractor)
806
833
  self.ui.loadDictionaryAct.triggered.connect(self.change_dictionary)
@@ -870,6 +897,8 @@ class MainWindow(QtWidgets.QMainWindow):
870
897
  self.begin_reset_ivectors
871
898
  )
872
899
  self.ui.alignUtteranceAct.triggered.connect(self.begin_utterance_alignment)
900
+ self.ui.transcribeUtteranceAct.triggered.connect(self.begin_utterance_transcription)
901
+ self.ui.trimUtteranceAct.triggered.connect(self.begin_utterance_trimming)
873
902
  self.ui.segmentUtteranceAct.triggered.connect(self.begin_utterance_segmentation)
874
903
  self.ui.evaluateAlignmentsAct.triggered.connect(self.begin_alignment_evaluation)
875
904
  self.ui.selectMappingFileAct.triggered.connect(self.change_custom_mapping)
@@ -918,6 +947,9 @@ class MainWindow(QtWidgets.QMainWindow):
918
947
  self.acoustic_action_group = QtGui.QActionGroup(self)
919
948
  self.acoustic_action_group.setExclusive(True)
920
949
 
950
+ self.langauge_action_group = QtGui.QActionGroup(self)
951
+ self.langauge_action_group.setExclusive(True)
952
+
921
953
  self.g2p_action_group = QtGui.QActionGroup(self)
922
954
  self.g2p_action_group.setExclusive(True)
923
955
 
@@ -1032,6 +1064,25 @@ class MainWindow(QtWidgets.QMainWindow):
1032
1064
  )
1033
1065
 
1034
1066
  def refresh_model_actions(self):
1067
+ self.ui.menuLanguage.clear()
1068
+ for lang in sorted(Language, key=lambda x: x.display_name):
1069
+ a = QtGui.QAction(lang.display_name, parent=self)
1070
+ a.setCheckable(True)
1071
+ if lang.name == self.settings.value(self.settings.LANGUAGE):
1072
+ a.setChecked(True)
1073
+ self.langauge_action_group.addAction(a)
1074
+ a.triggered.connect(self.change_language)
1075
+ self.ui.menuLanguage.addAction(a)
1076
+ if not FOUND_SPEECHBRAIN:
1077
+ self.ui.speechbrainVadAct.setChecked(False)
1078
+ self.ui.speechbrainVadAct.setEnabled(False)
1079
+ self.settings.setValue(self.settings.VAD_MODEL, "kaldi")
1080
+ self.ui.kaldiVadAct.setChecked(True)
1081
+ else:
1082
+ self.ui.speechbrainVadAct.setEnabled(True)
1083
+ if self.settings.value(self.settings.VAD_MODEL) == "speechbrain":
1084
+ self.ui.speechbrainVadAct.setChecked(True)
1085
+
1035
1086
  self.ui.menuDownload_acoustic_model.clear()
1036
1087
  self.ui.menuDownload_G2P_model.clear()
1037
1088
  self.ui.menuDownload_language_model.clear()
@@ -1168,11 +1219,10 @@ class MainWindow(QtWidgets.QMainWindow):
1168
1219
  .first()
1169
1220
  )
1170
1221
  if m is None:
1171
- session.add(
1172
- anchor.db.IvectorExtractor(
1173
- name="speechbrain", path="speechbrain", available_locally=True
1174
- )
1222
+ m = anchor.db.IvectorExtractor(
1223
+ name="speechbrain", path="speechbrain", available_locally=True
1175
1224
  )
1225
+ session.add(m)
1176
1226
  session.flush()
1177
1227
  session.commit()
1178
1228
  a = QtGui.QAction(text="speechbrain", parent=self)
@@ -1180,6 +1230,24 @@ class MainWindow(QtWidgets.QMainWindow):
1180
1230
  a.triggered.connect(self.change_ivector_extractor)
1181
1231
  self.ui.ivectorExtractorMenu.addAction(a)
1182
1232
  self.ivector_action_group.addAction(a)
1233
+ for m_name, found in [("speechbrain", FOUND_SPEECHBRAIN), ("whisper", FOUND_WHISPERX)]:
1234
+ if not found:
1235
+ continue
1236
+ m = (
1237
+ session.query(anchor.db.AcousticModel)
1238
+ .filter(anchor.db.AcousticModel.path == m_name)
1239
+ .first()
1240
+ )
1241
+ if m is None:
1242
+ m = anchor.db.AcousticModel(name=m_name, path=m_name, available_locally=True)
1243
+ session.add(m)
1244
+ session.flush()
1245
+ session.commit()
1246
+ a = QtGui.QAction(text=m_name, parent=self)
1247
+ a.setData(m.id)
1248
+ a.triggered.connect(self.change_acoustic_model)
1249
+ self.ui.acousticModelMenu.addAction(a)
1250
+ self.acoustic_action_group.addAction(a)
1183
1251
 
1184
1252
  for m in (
1185
1253
  session.query(anchor.db.IvectorExtractor)
@@ -1411,7 +1479,10 @@ class MainWindow(QtWidgets.QMainWindow):
1411
1479
  def begin_alignment(self):
1412
1480
  self.enableMfaActions(False)
1413
1481
  self.alignment_worker.set_params(
1414
- self.corpus_model.corpus, self.acoustic_model, self.ui.alignmentWidget.parameters()
1482
+ self.corpus_model.corpus,
1483
+ self.acoustic_model,
1484
+ verify_transcripts=False,
1485
+ parameters=self.ui.alignmentWidget.parameters(),
1415
1486
  )
1416
1487
  self.alignment_worker.start()
1417
1488
  self.set_application_state("loading")
@@ -1420,7 +1491,10 @@ class MainWindow(QtWidgets.QMainWindow):
1420
1491
  def begin_verify_transcripts(self):
1421
1492
  self.enableMfaActions(False)
1422
1493
  self.alignment_worker.set_params(
1423
- self.corpus_model.corpus, self.acoustic_model, self.ui.alignmentWidget.parameters()
1494
+ self.corpus_model.corpus,
1495
+ self.acoustic_model,
1496
+ verify_transcripts=True,
1497
+ parameters=self.ui.alignmentWidget.parameters(),
1424
1498
  )
1425
1499
  self.alignment_worker.start()
1426
1500
  self.set_application_state("loading")
@@ -1475,6 +1549,12 @@ class MainWindow(QtWidgets.QMainWindow):
1475
1549
  self.set_application_state("loading")
1476
1550
  self.ui.loadingScreen.setCorpusName("Performing alignment...")
1477
1551
 
1552
+ def begin_utterance_transcription(self, utterance_id: int = None):
1553
+ if not utterance_id:
1554
+ utterance_id = self.selection_model.current_utterance_id
1555
+ self.transcribe_utterance_worker.set_params(self.corpus_model, utterance_id)
1556
+ self.transcribe_utterance_worker.start()
1557
+
1478
1558
  def begin_utterance_segmentation(self):
1479
1559
  if self.selection_model.current_utterance_id is None:
1480
1560
  return
@@ -1483,6 +1563,14 @@ class MainWindow(QtWidgets.QMainWindow):
1483
1563
  )
1484
1564
  self.segment_utterance_worker.start()
1485
1565
 
1566
+ def begin_utterance_trimming(self):
1567
+ if self.selection_model.current_utterance_id is None:
1568
+ return
1569
+ self.trim_utterance_worker.set_params(
1570
+ self.corpus_model, self.selection_model.current_utterance_id
1571
+ )
1572
+ self.trim_utterance_worker.start()
1573
+
1486
1574
  def begin_alignment_evaluation(self):
1487
1575
  self.enableMfaActions(False)
1488
1576
  with sqlalchemy.orm.Session(self.db_engine) as session:
@@ -1588,11 +1676,23 @@ class MainWindow(QtWidgets.QMainWindow):
1588
1676
  self.check_actions()
1589
1677
  self.set_application_state("loaded")
1590
1678
 
1679
+ def finalize_utterance_transcription(self, data):
1680
+ utterance_id, transcription = data
1681
+
1682
+ utt = self.file_utterances_model.get_utterance(utterance_id)
1683
+ utt.transcription_text = transcription
1684
+ self.file_utterances_model.utterancesReady.emit()
1685
+
1591
1686
  def finalize_segmentation(self, data):
1592
1687
  original_utterance_id, split_data = data
1593
1688
  self.file_utterances_model.split_vad_utterance(original_utterance_id, split_data)
1594
1689
  self.ensure_utterance_panel_visible()
1595
1690
 
1691
+ def finalize_trimming(self, data):
1692
+ original_utterance_id, begin, end = data
1693
+ self.file_utterances_model.update_utterance_times(original_utterance_id, begin, end)
1694
+ self.ui.utteranceDetailWidget.plot_widget.refresh_text_grid()
1695
+
1596
1696
  def finalize_saving(self):
1597
1697
  self.check_actions()
1598
1698
 
@@ -1914,6 +2014,9 @@ class MainWindow(QtWidgets.QMainWindow):
1914
2014
  self.ui.alignUtteranceAct.setIcon(
1915
2015
  QtGui.QIcon.fromTheme(QtGui.QIcon.ThemeIcon.FormatTextUnderline)
1916
2016
  )
2017
+ self.ui.trimUtteranceAct.setIcon(
2018
+ QtGui.QIcon.fromTheme(QtGui.QIcon.ThemeIcon.FormatTextUnderline)
2019
+ )
1917
2020
  self.ui.cancelCorpusLoadAct.setIcon(
1918
2021
  QtGui.QIcon.fromTheme(QtGui.QIcon.ThemeIcon.ProcessStop)
1919
2022
  )
@@ -1961,6 +2064,31 @@ class MainWindow(QtWidgets.QMainWindow):
1961
2064
  )
1962
2065
  self.download_worker.start()
1963
2066
 
2067
+ def change_language(self):
2068
+ from montreal_forced_aligner.transcription.models import MfaFasterWhisperPipeline
2069
+
2070
+ self.settings.setValue(self.settings.LANGUAGE, self.sender().text().lower())
2071
+ if isinstance(self.acoustic_model, MfaFasterWhisperPipeline):
2072
+ self.acoustic_model.set_language(self.sender().text().lower())
2073
+
2074
+ def change_vad(self):
2075
+ self.settings.setValue(self.settings.VAD_MODEL, self.sender().text().lower())
2076
+ self.load_vad()
2077
+
2078
+ def load_vad(self):
2079
+ if self.settings.value(self.settings.VAD_MODEL) == "speechbrain":
2080
+ model_dir = os.path.join(config.TEMPORARY_DIRECTORY, "models", "VAD")
2081
+ os.makedirs(model_dir, exist_ok=True)
2082
+ run_opts = None
2083
+ if self.settings.value(self.settings.CUDA):
2084
+ run_opts = {"device": "cuda"}
2085
+ self.vad_model = MfaVAD.from_hparams(
2086
+ source="speechbrain/vad-crdnn-libriparty", savedir=model_dir, run_opts=run_opts
2087
+ )
2088
+ else:
2089
+ self.vad_model = None
2090
+ self.trim_utterance_worker.set_vad_model(self.vad_model)
2091
+
1964
2092
  def download_acoustic_model(self):
1965
2093
  self.download_worker.set_params(
1966
2094
  self.db_string, "acoustic", self.sender().text(), self.model_manager
@@ -2502,6 +2630,8 @@ class OptionsDialog(QtWidgets.QDialog):
2502
2630
  self.ui.cudaCheckBox.setChecked(self.settings.value(self.settings.CUDA))
2503
2631
  if config.GITHUB_TOKEN is not None:
2504
2632
  self.ui.githubTokenEdit.setText(config.GITHUB_TOKEN)
2633
+ if config.HF_TOKEN is not None:
2634
+ self.ui.hfTokenEdit.setText(config.HF_TOKEN)
2505
2635
 
2506
2636
  self.ui.autoloadLastUsedCorpusCheckBox.setChecked(
2507
2637
  self.settings.value(self.settings.AUTOLOAD)
@@ -2634,11 +2764,16 @@ class OptionsDialog(QtWidgets.QDialog):
2634
2764
  config.NUM_JOBS = self.ui.numJobsEdit.value()
2635
2765
  config.USE_MP = self.ui.useMpCheckBox.isChecked()
2636
2766
  config.GITHUB_TOKEN = self.ui.githubTokenEdit.text()
2767
+ config.HF_TOKEN = self.ui.hfTokenEdit.text()
2637
2768
  config.GLOBAL_CONFIG.current_profile.num_jobs = config.NUM_JOBS
2638
2769
  config.GLOBAL_CONFIG.current_profile.use_mp = config.USE_MP
2639
2770
  config.GLOBAL_CONFIG.current_profile.github_token = config.GITHUB_TOKEN
2771
+ config.GLOBAL_CONFIG.current_profile.hf_token = config.HF_TOKEN
2640
2772
  config.GLOBAL_CONFIG.save()
2641
2773
 
2774
+ self.settings.setValue(self.settings.GITHUB_TOKEN, self.ui.githubTokenEdit.text())
2775
+ self.settings.setValue(self.settings.HF_TOKEN, self.ui.hfTokenEdit.text())
2776
+
2642
2777
  self.settings.setValue(
2643
2778
  self.settings.SPEC_DYNAMIC_RANGE, int(self.ui.dynamicRangeEdit.value())
2644
2779
  )
anchor/models.py CHANGED
@@ -6,6 +6,7 @@ import os
6
6
  import re
7
7
  import subprocess
8
8
  import typing
9
+ import unicodedata
9
10
  from threading import Lock
10
11
  from typing import Any, Optional, Union
11
12
 
@@ -23,12 +24,14 @@ from montreal_forced_aligner.corpus.acoustic_corpus import (
23
24
  AcousticCorpusWithPronunciations,
24
25
  )
25
26
  from montreal_forced_aligner.data import PhoneType, WordType, WorkflowType
26
- from montreal_forced_aligner.db import CorpusWorkflow, File, Phone, Speaker, Utterance, Word
27
- from montreal_forced_aligner.dictionary.mixins import (
28
- DEFAULT_CLITIC_MARKERS,
29
- DEFAULT_COMPOUND_MARKERS,
30
- DEFAULT_PUNCTUATION,
31
- DEFAULT_WORD_BREAK_MARKERS,
27
+ from montreal_forced_aligner.db import (
28
+ CorpusWorkflow,
29
+ File,
30
+ Grapheme,
31
+ Phone,
32
+ Speaker,
33
+ Utterance,
34
+ Word,
32
35
  )
33
36
  from montreal_forced_aligner.g2p.generator import PyniniValidator
34
37
  from montreal_forced_aligner.models import (
@@ -51,23 +54,6 @@ if typing.TYPE_CHECKING:
51
54
  logger = logging.getLogger("anchor")
52
55
 
53
56
 
54
- WORD_BREAK_SET = "".join(
55
- sorted(
56
- set(
57
- DEFAULT_WORD_BREAK_MARKERS
58
- + DEFAULT_PUNCTUATION
59
- + DEFAULT_CLITIC_MARKERS
60
- + DEFAULT_COMPOUND_MARKERS
61
- )
62
- )
63
- )
64
-
65
- if "-" in WORD_BREAK_SET:
66
- WORD_BREAK_SET = "" + WORD_BREAK_SET.replace("-", "")
67
-
68
- WORD_BREAK_REGEX_SET = rf"[\s{WORD_BREAK_SET}]"
69
-
70
-
71
57
  # noinspection PyUnresolvedReferences
72
58
  @dataclass(slots=True)
73
59
  class TextFilterQuery:
@@ -75,6 +61,7 @@ class TextFilterQuery:
75
61
  regex: bool = False
76
62
  word: bool = False
77
63
  case_sensitive: bool = False
64
+ graphemes: typing.Collection[str] = None
78
65
 
79
66
  @property
80
67
  def search_text(self):
@@ -83,6 +70,11 @@ class TextFilterQuery:
83
70
  return self.text
84
71
 
85
72
  def generate_expression(self, posix=False):
73
+ word_symbols = r"\w"
74
+ if self.graphemes:
75
+ dash_prefix = "-" if "-" in self.graphemes else ""
76
+ graphemes = "".join([x for x in self.graphemes if x != "-"])
77
+ word_symbols = rf"[{dash_prefix}\w{graphemes}]"
86
78
  text = self.text
87
79
  if not self.case_sensitive:
88
80
  text = text.lower()
@@ -97,11 +89,10 @@ class TextFilterQuery:
97
89
  if not text.endswith(word_break_set):
98
90
  text += word_break_set
99
91
  if posix:
100
- text = text.replace(r"\b", r"\y")
101
92
  if text.startswith(r"\b"):
102
- text = rf"((?<={WORD_BREAK_REGEX_SET})|(?<=^))" + text[2:]
93
+ text = rf"((?<!{word_symbols})|(?<=^))" + text[2:]
103
94
  if text.endswith(r"\b"):
104
- text = text[:-2] + rf"((?={WORD_BREAK_REGEX_SET})|(?=$))"
95
+ text = text[:-2] + rf"((?!{word_symbols})|(?=$))"
105
96
  if not self.case_sensitive:
106
97
  text = "(?i)" + text
107
98
  return text
@@ -385,7 +376,7 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
385
376
  if isinstance(utterance, int):
386
377
  if utterance not in self.reversed_indices:
387
378
  return
388
- utterance = self.reversed_indices[utterance]
379
+ utterance = self.utterances[self.reversed_indices[utterance]]
389
380
  old_speaker_id = utterance.speaker_id
390
381
  if old_speaker_id == speaker_id:
391
382
  return
@@ -395,10 +386,18 @@ class FileUtterancesModel(QtCore.QAbstractListModel):
395
386
  self.corpus_model.set_speaker_modified(old_speaker_id)
396
387
 
397
388
  def update_utterance_times(
398
- self, utterance: Utterance, begin: Optional[float] = None, end: Optional[float] = None
389
+ self,
390
+ utterance: typing.Union[Utterance, int],
391
+ begin: Optional[float] = None,
392
+ end: Optional[float] = None,
399
393
  ):
400
394
  if not self.corpus_model.editable:
401
395
  return
396
+ if isinstance(utterance, int):
397
+ if utterance not in self.reversed_indices:
398
+ return
399
+ utterance = self.utterances[self.reversed_indices[utterance]]
400
+
402
401
  if utterance.begin == begin and utterance.end == end:
403
402
  return
404
403
  self.addCommand.emit(undo.UpdateUtteranceTimesCommand(utterance, begin, end, self))
@@ -578,7 +577,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
578
577
  resetView = QtCore.Signal()
579
578
  viewChanged = QtCore.Signal(object, object)
580
579
  selectionAudioChanged = QtCore.Signal(object)
581
- currentUtteranceChanged = QtCore.Signal()
580
+ currentUtteranceChanged = QtCore.Signal(object)
582
581
  speakerRequested = QtCore.Signal(object)
583
582
 
584
583
  spectrogramReady = QtCore.Signal()
@@ -636,13 +635,22 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
636
635
  y = self.model().y[begin_samp:end_samp, self.selected_channel]
637
636
  else:
638
637
  y = self.model().y[begin_samp:end_samp]
639
- spectrogram_worker = workers.SpectrogramWorker(
640
- y,
641
- self.model().file.sound_file.sample_rate,
642
- self.min_time,
643
- self.max_time,
644
- self.selected_channel,
645
- )
638
+ if self.settings.value(self.settings.SPECTRAL_FEATURES) == "mfcc":
639
+ spectrogram_worker = workers.MfccWorker(
640
+ y,
641
+ self.model().file.sound_file.sample_rate,
642
+ self.min_time,
643
+ self.max_time,
644
+ self.selected_channel,
645
+ )
646
+ else:
647
+ spectrogram_worker = workers.SpectrogramWorker(
648
+ y,
649
+ self.model().file.sound_file.sample_rate,
650
+ self.min_time,
651
+ self.max_time,
652
+ self.selected_channel,
653
+ )
646
654
  spectrogram_worker.signals.result.connect(self.finalize_loading_spectrogram)
647
655
  self.thread_pool.start(spectrogram_worker)
648
656
 
@@ -865,7 +873,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
865
873
  def model(self) -> FileUtterancesModel:
866
874
  return super().model()
867
875
 
868
- def set_view_times(self, begin, end):
876
+ def set_view_times(self, begin, end, new_file=False):
869
877
  begin = max(begin, 0)
870
878
  end = min(end, self.model().file.duration)
871
879
  if (begin, end) == (self.min_time, self.max_time):
@@ -882,7 +890,8 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
882
890
  and not self.min_time <= self.selected_max_time <= self.max_time
883
891
  ):
884
892
  self.selected_max_time = None
885
- self.view_change_timer.start()
893
+ if not new_file:
894
+ self.view_change_timer.start()
886
895
 
887
896
  def send_selection_update(self):
888
897
  self.viewChanged.emit(self.min_time, self.max_time)
@@ -902,7 +911,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
902
911
  else:
903
912
  self.finalize_set_new_file()
904
913
  self.speakerRequested.emit(speaker_id)
905
- self.set_view_times(begin, end)
914
+ self.set_view_times(begin, end, new_file=True)
906
915
 
907
916
  def finalize_set_new_file(self):
908
917
  if self.requested_utterance_id is None:
@@ -930,16 +939,18 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
930
939
  return
931
940
  flags = QtCore.QItemSelectionModel.SelectionFlag.Rows
932
941
  flags |= QtCore.QItemSelectionModel.SelectionFlag.Select
942
+ current_index = None
933
943
  for u in utterances:
934
944
  if u.id not in self.model().reversed_indices:
935
945
  continue
946
+ current_index = u.id
936
947
  row = self.model().reversed_indices[u.id]
937
948
 
938
949
  index = self.model().index(row, 0)
939
950
  if not index.isValid():
940
951
  return
941
952
  self.select(index, flags)
942
- self.currentUtteranceChanged.emit()
953
+ self.currentUtteranceChanged.emit(current_index)
943
954
 
944
955
  def update_select(self, utterance_id: int, deselect=False, reset=False):
945
956
  if reset and [x.id for x in self.selected_utterances()] == [utterance_id]:
@@ -961,7 +972,7 @@ class FileSelectionModel(QtCore.QItemSelectionModel):
961
972
  self.select(index, flags)
962
973
  if not deselect:
963
974
  self.select_audio(self.model().utterances[row].begin, self.model().utterances[row].end)
964
- self.currentUtteranceChanged.emit()
975
+ self.currentUtteranceChanged.emit(utterance_id)
965
976
 
966
977
 
967
978
  class CorpusSelectionModel(QtCore.QItemSelectionModel):
@@ -992,6 +1003,7 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
992
1003
  # self.selectionChanged.connect(self.update_selection_audio)
993
1004
  # self.model().newResults.connect(self.check_selection)
994
1005
  self.model().unlockCorpus.connect(self.fileChanged.emit)
1006
+ self.model().layoutChanged.connect(self._update_selection)
995
1007
 
996
1008
  def set_current_utterance(self, utterance_id):
997
1009
  self.current_utterance_id = utterance_id
@@ -1078,8 +1090,10 @@ class CorpusSelectionModel(QtCore.QItemSelectionModel):
1078
1090
  if not index.isValid():
1079
1091
  return
1080
1092
  m = self.model()
1081
- self.current_utterance_id = m._indices[index.row()]
1082
- self.currentUtteranceChanged.emit()
1093
+ try:
1094
+ self.current_utterance_id = m._indices[index.row()]
1095
+ except IndexError:
1096
+ self.current_utterance_id = None
1083
1097
 
1084
1098
  def selected_utterances(self):
1085
1099
  current_utterance = self.current_utterance_id
@@ -1225,6 +1239,7 @@ class DictionaryTableModel(TableModel):
1225
1239
  self.word_sets = {}
1226
1240
  self.speaker_mapping = {}
1227
1241
  self.phones = []
1242
+ self.graphemes = []
1228
1243
  self.reference_phone_set = set()
1229
1244
  self.custom_mapping = {}
1230
1245
 
@@ -1240,7 +1255,7 @@ class DictionaryTableModel(TableModel):
1240
1255
  except KeyError:
1241
1256
  return True
1242
1257
  if dictionary_id is not None and self.word_sets[dictionary_id]:
1243
- return word.lower() in self.word_sets[dictionary_id]
1258
+ return unicodedata.normalize("NFKC", word.lower()) in self.word_sets[dictionary_id]
1244
1259
  return True
1245
1260
 
1246
1261
  def lookup_word(self, word: str) -> None:
@@ -1270,6 +1285,19 @@ class DictionaryTableModel(TableModel):
1270
1285
  if self.corpus_model.corpus.position_dependent_phones:
1271
1286
  phones = sorted(set(x.rsplit("_", maxsplit=1)[0] for x in phones))
1272
1287
  self.phones = phones
1288
+ specials = self.corpus_model.corpus.specials_set
1289
+ specials.update(
1290
+ [
1291
+ "#0",
1292
+ "<space>",
1293
+ ]
1294
+ )
1295
+ self.graphemes = [
1296
+ x
1297
+ for x, in self.corpus_model.session.query(Grapheme.grapheme).filter(
1298
+ ~Grapheme.grapheme.in_(specials)
1299
+ )
1300
+ ]
1273
1301
 
1274
1302
  def flags(
1275
1303
  self, index: Union[QtCore.QModelIndex, QtCore.QPersistentModelIndex]
@@ -1339,7 +1367,7 @@ class DictionaryTableModel(TableModel):
1339
1367
  continue
1340
1368
  existing_pronunciations.add(self._data[r][2])
1341
1369
  candidates = self.g2p_generator.rewriter(word)
1342
- for c in candidates:
1370
+ for c, _ in candidates:
1343
1371
  if c in existing_pronunciations:
1344
1372
  continue
1345
1373
  pronunciation = c
@@ -1969,6 +1997,8 @@ class CorpusModel(TableModel):
1969
1997
  filesSaved = QtCore.Signal()
1970
1998
  dictionarySaved = QtCore.Signal()
1971
1999
  selectionRequested = QtCore.Signal(object)
2000
+ transcribeRequested = QtCore.Signal(object)
2001
+ alignRequested = QtCore.Signal(object)
1972
2002
  requestFileView = QtCore.Signal(object)
1973
2003
  utteranceTextUpdated = QtCore.Signal(object, object)
1974
2004
  refreshUtteranceText = QtCore.Signal(object, object)
@@ -2061,6 +2091,7 @@ class CorpusModel(TableModel):
2061
2091
  self.has_per_speaker_transcribed_alignments = False
2062
2092
  self.has_transcript_verification_alignments = False
2063
2093
  self.latest_alignment_workflow = None
2094
+ self.language = None
2064
2095
 
2065
2096
  def update_latest_alignment_workflow(self):
2066
2097
  with self.corpus.session() as session: