Anchor-annotator 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
anchor/widgets.py CHANGED
@@ -25,6 +25,8 @@ from anchor.models import (
25
25
  CorpusSelectionModel,
26
26
  DiarizationModel,
27
27
  DictionaryTableModel,
28
+ FileSelectionModel,
29
+ FileUtterancesModel,
28
30
  OovModel,
29
31
  SpeakerModel,
30
32
  TextFilterQuery,
@@ -61,7 +63,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
61
63
  self.max_time = None
62
64
  self.start_load_time = None
63
65
  self.min_time = None
64
- self.corpus_model = None
65
66
  self.selection_model = None
66
67
  self.timer = QtCore.QTimer(self)
67
68
  self.timer.setInterval(1)
@@ -78,7 +79,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
78
79
  self._audio_output.setDevice(self.devices.defaultAudioOutput())
79
80
  self.setAudioOutput(self._audio_output)
80
81
  self.playbackStateChanged.connect(self.reset_position)
81
- self.mediaStatusChanged.connect(self.update_load)
82
82
  self.fade_in_anim = QtCore.QPropertyAnimation(self._audio_output, b"volume")
83
83
  self.fade_in_anim.setDuration(10)
84
84
  self.fade_in_anim.setStartValue(0.1)
@@ -95,11 +95,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
95
95
  self.fade_out_anim.finished.connect(super().pause)
96
96
  self.file_path = None
97
97
 
98
- def update_load(self, state):
99
- if state == self.MediaStatus.LoadedMedia:
100
- self.reset_position()
101
- self.audioReady.emit(True)
102
-
103
98
  def handle_error(self, *args):
104
99
  print("ERROR")
105
100
  print(args)
@@ -118,12 +113,22 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
118
113
  self.fade_in_anim.start()
119
114
 
120
115
  def startTime(self):
121
- if self.selection_model.selected_min_time is not None:
116
+ if (
117
+ self.selection_model.selected_min_time is not None
118
+ and self.selection_model.min_time
119
+ <= self.selection_model.selected_min_time
120
+ <= self.selection_model.max_time
121
+ ):
122
122
  return self.selection_model.selected_min_time
123
123
  return self.selection_model.min_time
124
124
 
125
125
  def maxTime(self):
126
- if self.selection_model.selected_max_time is not None:
126
+ if (
127
+ self.selection_model.selected_max_time is not None
128
+ and self.selection_model.min_time
129
+ <= self.selection_model.selected_max_time
130
+ <= self.selection_model.max_time
131
+ ):
127
132
  return self.selection_model.selected_max_time
128
133
  return self.selection_model.max_time
129
134
 
@@ -149,14 +154,10 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
149
154
  break
150
155
  self._audio_output.setDevice(o)
151
156
 
152
- def set_corpus_models(
153
- self, corpus_model: Optional[CorpusModel], selection_model: Optional[CorpusSelectionModel]
154
- ):
155
- self.corpus_model = corpus_model
156
- self.selection_model = selection_model
157
- if corpus_model is None:
157
+ def set_models(self, selection_model: Optional[FileSelectionModel]):
158
+ if selection_model is None:
158
159
  return
159
- # self.selection_model.fileAboutToChange.connect(self.unload_file)
160
+ self.selection_model = selection_model
160
161
  self.selection_model.fileChanged.connect(self.loadNewFile)
161
162
  self.selection_model.viewChanged.connect(self.update_times)
162
163
  self.selection_model.selectionAudioChanged.connect(self.update_selection_times)
@@ -187,29 +188,27 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
187
188
  self.setCurrentTime(self.startTime())
188
189
 
189
190
  def update_times(self):
190
- if (
191
- self.playbackState() == QtMultimedia.QMediaPlayer.PlaybackState.StoppedState
192
- or self.currentTime() < self.startTime()
193
- or self.currentTime() > self.maxTime()
194
- ):
191
+ if self.currentTime() < self.startTime() or self.currentTime() > self.maxTime():
192
+ self.stop()
193
+ if self.playbackState() != QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
195
194
  self.setCurrentTime(self.startTime())
196
195
 
197
196
  def loadNewFile(self, *args):
198
197
  self.audioReady.emit(False)
199
198
  self.stop()
200
199
  try:
201
- new_file = self.selection_model.current_file.sound_file.sound_file_path
200
+ new_file = self.selection_model.model().file.sound_file.sound_file_path
202
201
  except Exception:
203
202
  self.setSource(QtCore.QUrl())
204
203
  return
205
204
  if (
206
205
  self.selection_model.max_time is None
207
- or self.selection_model.current_file is None
208
- or self.selection_model.current_file.duration is None
206
+ or self.selection_model.model().file is None
207
+ or self.selection_model.model().file.duration is None
209
208
  ):
210
209
  self.setSource(QtCore.QUrl())
211
210
  return
212
- self.channels = self.selection_model.current_file.num_channels
211
+ self.channels = self.selection_model.model().file.num_channels
213
212
  self.setSource(f"file:///{new_file}")
214
213
  self.setPosition(0)
215
214
  self.audioReady.emit(True)
@@ -218,19 +217,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
218
217
  pos = self.position()
219
218
  return pos / 1000
220
219
 
221
- def setMaxTime(self, max_time):
222
- if max_time is None:
223
- return
224
- self.max_time = max_time * 1000
225
-
226
- def setMinTime(
227
- self, min_time
228
- ): # Positions for MediaPlayer are in milliseconds, no SR required
229
- if min_time is None:
230
- min_time = 0
231
- self.min_time = int(min_time * 1000)
232
- self.setCurrentTime(min_time)
233
-
234
220
  def setCurrentTime(self, time):
235
221
  if time is None:
236
222
  time = 0
@@ -245,7 +231,7 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
245
231
  self.stop()
246
232
  self.setSource(
247
233
  QtCore.QUrl.fromLocalFile(
248
- self.selection_model.current_file.sound_file.sound_file_path
234
+ self.selection_model.model().file.sound_file.sound_file_path
249
235
  )
250
236
  )
251
237
  self.play()
@@ -793,6 +779,7 @@ class UtteranceDetailWidget(QtWidgets.QWidget): # pragma: no cover
793
779
  self.settings = AnchorSettings()
794
780
  self.setAttribute(QtCore.Qt.WidgetAttribute.WA_StyledBackground, True)
795
781
  self.corpus_model = None
782
+ self.file_model = None
796
783
  self.selection_model = None
797
784
  self.dictionary_model = None
798
785
  self.plot_widget = UtteranceView(self)
@@ -829,24 +816,28 @@ class UtteranceDetailWidget(QtWidgets.QWidget): # pragma: no cover
829
816
  def set_models(
830
817
  self,
831
818
  corpus_model: CorpusModel,
832
- selection_model: CorpusSelectionModel,
819
+ file_model: FileUtterancesModel,
820
+ selection_model: FileSelectionModel,
833
821
  dictionary_model: DictionaryTableModel,
834
822
  ):
835
823
  self.corpus_model = corpus_model
824
+ self.file_model = file_model
836
825
  self.selection_model = selection_model
837
826
  self.dictionary_model = dictionary_model
838
827
  self.corpus_model.textFilterChanged.connect(self.plot_widget.set_search_term)
839
828
  self.selection_model.viewChanged.connect(self.update_to_slider)
840
829
  self.selection_model.fileChanged.connect(self.update_to_slider)
841
- self.plot_widget.set_models(corpus_model, selection_model, self.dictionary_model)
830
+ self.plot_widget.set_models(
831
+ corpus_model, file_model, selection_model, self.dictionary_model
832
+ )
842
833
 
843
834
  def update_to_slider(self):
844
835
  with QtCore.QSignalBlocker(self.scroll_bar):
845
- if self.selection_model.current_file is None or self.selection_model.min_time is None:
836
+ if self.selection_model.model().file is None or self.selection_model.min_time is None:
846
837
  return
847
838
  if (
848
839
  self.selection_model.min_time == 0
849
- and self.selection_model.max_time == self.selection_model.current_file.duration
840
+ and self.selection_model.max_time == self.selection_model.model().file.duration
850
841
  ):
851
842
  self.scroll_bar.setPageStep(10)
852
843
  self.scroll_bar.setEnabled(False)
@@ -854,7 +845,7 @@ class UtteranceDetailWidget(QtWidgets.QWidget): # pragma: no cover
854
845
  self.pan_right_button.setEnabled(False)
855
846
  self.scroll_bar.setMaximum(0)
856
847
  return
857
- duration_ms = int(self.selection_model.current_file.duration * 1000)
848
+ duration_ms = int(self.selection_model.model().file.duration * 1000)
858
849
  begin = self.selection_model.min_time * 1000
859
850
  end = self.selection_model.max_time * 1000
860
851
  window_size_ms = int(end - begin)
anchor/workers.py CHANGED
@@ -19,7 +19,8 @@ import dataclassy
19
19
  import librosa
20
20
  import numpy as np
21
21
  import psycopg2.errors
22
- import resampy
22
+ import scipy
23
+ import scipy.signal
23
24
  import soundfile
24
25
  import sqlalchemy
25
26
  import tqdm
@@ -28,6 +29,7 @@ from _kalpy.feat import compute_pitch
28
29
  from _kalpy.ivector import Plda, ivector_normalize_length
29
30
  from _kalpy.matrix import DoubleVector, FloatVector
30
31
  from kalpy.feat.pitch import PitchComputer
32
+ from line_profiler_pycharm import profile
31
33
  from montreal_forced_aligner import config
32
34
  from montreal_forced_aligner.alignment import PretrainedAligner
33
35
  from montreal_forced_aligner.config import IVECTOR_DIMENSION, XVECTOR_DIMENSION
@@ -3081,6 +3083,7 @@ class SpeakerTierWorker(FunctionWorker): # pragma: no cover
3081
3083
  super().__init__("Generating speaker tier", *args)
3082
3084
  self.query_alignment = False
3083
3085
  self.session = None
3086
+ self.file_id = None
3084
3087
 
3085
3088
  def set_params(self, file_id):
3086
3089
  with self.lock:
@@ -3090,21 +3093,31 @@ class SpeakerTierWorker(FunctionWorker): # pragma: no cover
3090
3093
  if self.session is None:
3091
3094
  return
3092
3095
  self.stopped.clear()
3093
- with self.lock:
3094
- utterances = self.session.query(Utterance).options(
3095
- joinedload(Utterance.speaker, innerjoin=True),
3096
+ with self.lock, self.session() as session:
3097
+ show_phones = (
3098
+ self.settings.value(self.settings.TIER_ALIGNED_PHONES_VISIBLE)
3099
+ or self.settings.value(self.settings.TIER_TRANSCRIBED_PHONES_VISIBLE)
3100
+ or self.settings.value(self.settings.TIER_REFERENCE_PHONES_VISIBLE)
3096
3101
  )
3102
+ show_words = self.settings.value(
3103
+ self.settings.TIER_ALIGNED_WORDS_VISIBLE
3104
+ ) or self.settings.value(self.settings.TIER_TRANSCRIBED_WORDS_VISIBLE)
3105
+ utterances = session.query(Utterance)
3097
3106
  if self.query_alignment:
3098
- utterances = utterances.options(
3099
- selectinload(Utterance.phone_intervals).options(
3100
- joinedload(PhoneInterval.phone, innerjoin=True),
3101
- joinedload(PhoneInterval.workflow, innerjoin=True),
3102
- ),
3103
- selectinload(Utterance.word_intervals).options(
3104
- joinedload(WordInterval.word, innerjoin=True),
3105
- joinedload(WordInterval.workflow, innerjoin=True),
3106
- ),
3107
- )
3107
+ if show_phones:
3108
+ utterances = utterances.options(
3109
+ selectinload(Utterance.phone_intervals).options(
3110
+ joinedload(PhoneInterval.phone, innerjoin=True),
3111
+ joinedload(PhoneInterval.workflow, innerjoin=True),
3112
+ )
3113
+ )
3114
+ if show_words:
3115
+ utterances = utterances.options(
3116
+ selectinload(Utterance.word_intervals).options(
3117
+ joinedload(WordInterval.word, innerjoin=True),
3118
+ joinedload(WordInterval.workflow, innerjoin=True),
3119
+ ),
3120
+ )
3108
3121
  utterances = utterances.filter(Utterance.file_id == self.file_id).order_by(
3109
3122
  Utterance.begin
3110
3123
  )
@@ -3138,6 +3151,7 @@ class SpectrogramWorker(FunctionWorker): # pragma: no cover
3138
3151
  self.end = end
3139
3152
  self.channel = channel
3140
3153
 
3154
+ @profile
3141
3155
  def run(self):
3142
3156
  self.stopped.clear()
3143
3157
  dynamic_range = self.settings.value(self.settings.SPEC_DYNAMIC_RANGE)
@@ -3146,12 +3160,18 @@ class SpectrogramWorker(FunctionWorker): # pragma: no cover
3146
3160
  window_size = self.settings.value(self.settings.SPEC_WINDOW_SIZE)
3147
3161
  pre_emph_coeff = self.settings.value(self.settings.SPEC_PREEMPH)
3148
3162
  max_freq = self.settings.value(self.settings.SPEC_MAX_FREQ)
3163
+ if self.y.shape[0] == 0:
3164
+ return
3165
+ duration = self.y.shape[0] / self.sample_rate
3166
+ if duration > 30:
3167
+ return
3149
3168
  with self.lock:
3150
- if self.y.shape[0] == 0:
3151
- return
3152
3169
  max_sr = 2 * max_freq
3153
3170
  if self.sample_rate > max_sr:
3154
- self.y = resampy.resample(self.y, self.sample_rate, max_sr)
3171
+ self.y = scipy.signal.resample(
3172
+ self.y, int(self.y.shape[0] * max_sr / self.sample_rate)
3173
+ )
3174
+ # self.y = resampy.resample(self.y, self.sample_rate, max_sr, filter='kaiser_fast')
3155
3175
  self.sample_rate = max_sr
3156
3176
  self.y = librosa.effects.preemphasis(self.y, coef=pre_emph_coeff)
3157
3177
  if self.stopped.is_set():
@@ -3225,6 +3245,10 @@ class PitchWorker(FunctionWorker): # pragma: no cover
3225
3245
  max_f0=self.max_f0,
3226
3246
  penalty_factor=self.penalty_factor,
3227
3247
  delta_pitch=self.delta_pitch,
3248
+ add_pov_feature=True,
3249
+ add_normalized_log_pitch=False,
3250
+ add_delta_pitch=False,
3251
+ add_raw_log_pitch=True,
3228
3252
  )
3229
3253
 
3230
3254
  def run(self):
@@ -3232,6 +3256,8 @@ class PitchWorker(FunctionWorker): # pragma: no cover
3232
3256
  with self.lock:
3233
3257
  if self.y.shape[0] == 0:
3234
3258
  return
3259
+ if self.end - self.begin < 0.1:
3260
+ return
3235
3261
  pitch_track = compute_pitch(
3236
3262
  self.y, self.pitch_computer.extraction_opts, self.pitch_computer.process_opts
3237
3263
  ).numpy()
@@ -1,21 +0,0 @@
1
- anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
3
- anchor/_version.py,sha256=IMl2Pr_Sy4LVRKy_Sm4CdwUl1Gryous6ncL96EMYsnM,411
4
- anchor/command_line.py,sha256=xvuCWaPWNVZTg5Ic28qbOYsOLaFbodhBsoZHKJSBazs,482
5
- anchor/db.py,sha256=ef4lO6HtCKoxC9CorIc0ZbPxKpjHa576a0ZIBOWNU9E,4956
6
- anchor/main.py,sha256=GOol2yC_57qrJ-uTtvISGAlrZ5cMojcMq9puUVohojc,113324
7
- anchor/models.py,sha256=lWXlKzH9xGhdNbFgob7XZy2CGYZXAPoiIIP8Dmhqt-o,75130
8
- anchor/plot.py,sha256=CUAcUsPpX9Ja4PINTQN08gfuT_x27bK2kIkkAyH69-A,106870
9
- anchor/resources_rc.py,sha256=sQ6GvAK3NTVR5bvgR9jCWDeepSYOhEph2mg1ECxnMOs,3560262
10
- anchor/settings.py,sha256=SJ9-5xjThJp3-zl99OBLWLSXZmsyUU1JNsgGWHlkJS8,46649
11
- anchor/ui_error_dialog.py,sha256=c_QS0s1VaJEV9AhcrQZQyWHHpUPudWjJY1NI7Ytipio,3832
12
- anchor/ui_main_window.py,sha256=aEABdKi1Eb1c2MKUsbCbufEp1lKSLFNsC9TPX244UPI,64618
13
- anchor/ui_preferences.py,sha256=uer2Xzyq26j-5wwbIKKcK8YEe2w7OFJPXfWSkKcPWhI,40146
14
- anchor/undo.py,sha256=rVus-7HC9wPIiab3dUxIeNGK7jWOMSVmDvCFEwU-408,33163
15
- anchor/widgets.py,sha256=CkFsF1Iuck79lQSnszouLNt_MOPMp35zpL0dzQR2l1o,135702
16
- anchor/workers.py,sha256=0kytaQYryib3hm0qDFuZpnLIfLC-HeaZY6zV6PYTxr4,169699
17
- Anchor_annotator-0.1.0.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
18
- Anchor_annotator-0.1.0.dist-info/METADATA,sha256=zS_ndUVMaJxv4bBGpZgHQcwTjQSTqLh1KYYWag9h5Ds,1500
19
- Anchor_annotator-0.1.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
20
- Anchor_annotator-0.1.0.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
21
- Anchor_annotator-0.1.0.dist-info/RECORD,,