Anchor-annotator 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/METADATA +1 -1
- Anchor_annotator-0.2.0.dist-info/RECORD +21 -0
- anchor/_version.py +2 -2
- anchor/main.py +44 -22
- anchor/models.py +821 -468
- anchor/plot.py +428 -399
- anchor/undo.py +173 -128
- anchor/widgets.py +35 -44
- anchor/workers.py +43 -17
- Anchor_annotator-0.1.0.dist-info/RECORD +0 -21
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/LICENSE +0 -0
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/WHEEL +0 -0
- {Anchor_annotator-0.1.0.dist-info → Anchor_annotator-0.2.0.dist-info}/top_level.txt +0 -0
anchor/widgets.py
CHANGED
@@ -25,6 +25,8 @@ from anchor.models import (
|
|
25
25
|
CorpusSelectionModel,
|
26
26
|
DiarizationModel,
|
27
27
|
DictionaryTableModel,
|
28
|
+
FileSelectionModel,
|
29
|
+
FileUtterancesModel,
|
28
30
|
OovModel,
|
29
31
|
SpeakerModel,
|
30
32
|
TextFilterQuery,
|
@@ -61,7 +63,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
61
63
|
self.max_time = None
|
62
64
|
self.start_load_time = None
|
63
65
|
self.min_time = None
|
64
|
-
self.corpus_model = None
|
65
66
|
self.selection_model = None
|
66
67
|
self.timer = QtCore.QTimer(self)
|
67
68
|
self.timer.setInterval(1)
|
@@ -78,7 +79,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
78
79
|
self._audio_output.setDevice(self.devices.defaultAudioOutput())
|
79
80
|
self.setAudioOutput(self._audio_output)
|
80
81
|
self.playbackStateChanged.connect(self.reset_position)
|
81
|
-
self.mediaStatusChanged.connect(self.update_load)
|
82
82
|
self.fade_in_anim = QtCore.QPropertyAnimation(self._audio_output, b"volume")
|
83
83
|
self.fade_in_anim.setDuration(10)
|
84
84
|
self.fade_in_anim.setStartValue(0.1)
|
@@ -95,11 +95,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
95
95
|
self.fade_out_anim.finished.connect(super().pause)
|
96
96
|
self.file_path = None
|
97
97
|
|
98
|
-
def update_load(self, state):
|
99
|
-
if state == self.MediaStatus.LoadedMedia:
|
100
|
-
self.reset_position()
|
101
|
-
self.audioReady.emit(True)
|
102
|
-
|
103
98
|
def handle_error(self, *args):
|
104
99
|
print("ERROR")
|
105
100
|
print(args)
|
@@ -118,12 +113,22 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
118
113
|
self.fade_in_anim.start()
|
119
114
|
|
120
115
|
def startTime(self):
|
121
|
-
if
|
116
|
+
if (
|
117
|
+
self.selection_model.selected_min_time is not None
|
118
|
+
and self.selection_model.min_time
|
119
|
+
<= self.selection_model.selected_min_time
|
120
|
+
<= self.selection_model.max_time
|
121
|
+
):
|
122
122
|
return self.selection_model.selected_min_time
|
123
123
|
return self.selection_model.min_time
|
124
124
|
|
125
125
|
def maxTime(self):
|
126
|
-
if
|
126
|
+
if (
|
127
|
+
self.selection_model.selected_max_time is not None
|
128
|
+
and self.selection_model.min_time
|
129
|
+
<= self.selection_model.selected_max_time
|
130
|
+
<= self.selection_model.max_time
|
131
|
+
):
|
127
132
|
return self.selection_model.selected_max_time
|
128
133
|
return self.selection_model.max_time
|
129
134
|
|
@@ -149,14 +154,10 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
149
154
|
break
|
150
155
|
self._audio_output.setDevice(o)
|
151
156
|
|
152
|
-
def
|
153
|
-
|
154
|
-
):
|
155
|
-
self.corpus_model = corpus_model
|
156
|
-
self.selection_model = selection_model
|
157
|
-
if corpus_model is None:
|
157
|
+
def set_models(self, selection_model: Optional[FileSelectionModel]):
|
158
|
+
if selection_model is None:
|
158
159
|
return
|
159
|
-
|
160
|
+
self.selection_model = selection_model
|
160
161
|
self.selection_model.fileChanged.connect(self.loadNewFile)
|
161
162
|
self.selection_model.viewChanged.connect(self.update_times)
|
162
163
|
self.selection_model.selectionAudioChanged.connect(self.update_selection_times)
|
@@ -187,29 +188,27 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
187
188
|
self.setCurrentTime(self.startTime())
|
188
189
|
|
189
190
|
def update_times(self):
|
190
|
-
if (
|
191
|
-
self.
|
192
|
-
|
193
|
-
or self.currentTime() > self.maxTime()
|
194
|
-
):
|
191
|
+
if self.currentTime() < self.startTime() or self.currentTime() > self.maxTime():
|
192
|
+
self.stop()
|
193
|
+
if self.playbackState() != QtMultimedia.QMediaPlayer.PlaybackState.PlayingState:
|
195
194
|
self.setCurrentTime(self.startTime())
|
196
195
|
|
197
196
|
def loadNewFile(self, *args):
|
198
197
|
self.audioReady.emit(False)
|
199
198
|
self.stop()
|
200
199
|
try:
|
201
|
-
new_file = self.selection_model.
|
200
|
+
new_file = self.selection_model.model().file.sound_file.sound_file_path
|
202
201
|
except Exception:
|
203
202
|
self.setSource(QtCore.QUrl())
|
204
203
|
return
|
205
204
|
if (
|
206
205
|
self.selection_model.max_time is None
|
207
|
-
or self.selection_model.
|
208
|
-
or self.selection_model.
|
206
|
+
or self.selection_model.model().file is None
|
207
|
+
or self.selection_model.model().file.duration is None
|
209
208
|
):
|
210
209
|
self.setSource(QtCore.QUrl())
|
211
210
|
return
|
212
|
-
self.channels = self.selection_model.
|
211
|
+
self.channels = self.selection_model.model().file.num_channels
|
213
212
|
self.setSource(f"file:///{new_file}")
|
214
213
|
self.setPosition(0)
|
215
214
|
self.audioReady.emit(True)
|
@@ -218,19 +217,6 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
218
217
|
pos = self.position()
|
219
218
|
return pos / 1000
|
220
219
|
|
221
|
-
def setMaxTime(self, max_time):
|
222
|
-
if max_time is None:
|
223
|
-
return
|
224
|
-
self.max_time = max_time * 1000
|
225
|
-
|
226
|
-
def setMinTime(
|
227
|
-
self, min_time
|
228
|
-
): # Positions for MediaPlayer are in milliseconds, no SR required
|
229
|
-
if min_time is None:
|
230
|
-
min_time = 0
|
231
|
-
self.min_time = int(min_time * 1000)
|
232
|
-
self.setCurrentTime(min_time)
|
233
|
-
|
234
220
|
def setCurrentTime(self, time):
|
235
221
|
if time is None:
|
236
222
|
time = 0
|
@@ -245,7 +231,7 @@ class MediaPlayer(QtMultimedia.QMediaPlayer): # pragma: no cover
|
|
245
231
|
self.stop()
|
246
232
|
self.setSource(
|
247
233
|
QtCore.QUrl.fromLocalFile(
|
248
|
-
self.selection_model.
|
234
|
+
self.selection_model.model().file.sound_file.sound_file_path
|
249
235
|
)
|
250
236
|
)
|
251
237
|
self.play()
|
@@ -793,6 +779,7 @@ class UtteranceDetailWidget(QtWidgets.QWidget): # pragma: no cover
|
|
793
779
|
self.settings = AnchorSettings()
|
794
780
|
self.setAttribute(QtCore.Qt.WidgetAttribute.WA_StyledBackground, True)
|
795
781
|
self.corpus_model = None
|
782
|
+
self.file_model = None
|
796
783
|
self.selection_model = None
|
797
784
|
self.dictionary_model = None
|
798
785
|
self.plot_widget = UtteranceView(self)
|
@@ -829,24 +816,28 @@ class UtteranceDetailWidget(QtWidgets.QWidget): # pragma: no cover
|
|
829
816
|
def set_models(
|
830
817
|
self,
|
831
818
|
corpus_model: CorpusModel,
|
832
|
-
|
819
|
+
file_model: FileUtterancesModel,
|
820
|
+
selection_model: FileSelectionModel,
|
833
821
|
dictionary_model: DictionaryTableModel,
|
834
822
|
):
|
835
823
|
self.corpus_model = corpus_model
|
824
|
+
self.file_model = file_model
|
836
825
|
self.selection_model = selection_model
|
837
826
|
self.dictionary_model = dictionary_model
|
838
827
|
self.corpus_model.textFilterChanged.connect(self.plot_widget.set_search_term)
|
839
828
|
self.selection_model.viewChanged.connect(self.update_to_slider)
|
840
829
|
self.selection_model.fileChanged.connect(self.update_to_slider)
|
841
|
-
self.plot_widget.set_models(
|
830
|
+
self.plot_widget.set_models(
|
831
|
+
corpus_model, file_model, selection_model, self.dictionary_model
|
832
|
+
)
|
842
833
|
|
843
834
|
def update_to_slider(self):
|
844
835
|
with QtCore.QSignalBlocker(self.scroll_bar):
|
845
|
-
if self.selection_model.
|
836
|
+
if self.selection_model.model().file is None or self.selection_model.min_time is None:
|
846
837
|
return
|
847
838
|
if (
|
848
839
|
self.selection_model.min_time == 0
|
849
|
-
and self.selection_model.max_time == self.selection_model.
|
840
|
+
and self.selection_model.max_time == self.selection_model.model().file.duration
|
850
841
|
):
|
851
842
|
self.scroll_bar.setPageStep(10)
|
852
843
|
self.scroll_bar.setEnabled(False)
|
@@ -854,7 +845,7 @@ class UtteranceDetailWidget(QtWidgets.QWidget): # pragma: no cover
|
|
854
845
|
self.pan_right_button.setEnabled(False)
|
855
846
|
self.scroll_bar.setMaximum(0)
|
856
847
|
return
|
857
|
-
duration_ms = int(self.selection_model.
|
848
|
+
duration_ms = int(self.selection_model.model().file.duration * 1000)
|
858
849
|
begin = self.selection_model.min_time * 1000
|
859
850
|
end = self.selection_model.max_time * 1000
|
860
851
|
window_size_ms = int(end - begin)
|
anchor/workers.py
CHANGED
@@ -19,7 +19,8 @@ import dataclassy
|
|
19
19
|
import librosa
|
20
20
|
import numpy as np
|
21
21
|
import psycopg2.errors
|
22
|
-
import
|
22
|
+
import scipy
|
23
|
+
import scipy.signal
|
23
24
|
import soundfile
|
24
25
|
import sqlalchemy
|
25
26
|
import tqdm
|
@@ -28,6 +29,7 @@ from _kalpy.feat import compute_pitch
|
|
28
29
|
from _kalpy.ivector import Plda, ivector_normalize_length
|
29
30
|
from _kalpy.matrix import DoubleVector, FloatVector
|
30
31
|
from kalpy.feat.pitch import PitchComputer
|
32
|
+
from line_profiler_pycharm import profile
|
31
33
|
from montreal_forced_aligner import config
|
32
34
|
from montreal_forced_aligner.alignment import PretrainedAligner
|
33
35
|
from montreal_forced_aligner.config import IVECTOR_DIMENSION, XVECTOR_DIMENSION
|
@@ -3081,6 +3083,7 @@ class SpeakerTierWorker(FunctionWorker): # pragma: no cover
|
|
3081
3083
|
super().__init__("Generating speaker tier", *args)
|
3082
3084
|
self.query_alignment = False
|
3083
3085
|
self.session = None
|
3086
|
+
self.file_id = None
|
3084
3087
|
|
3085
3088
|
def set_params(self, file_id):
|
3086
3089
|
with self.lock:
|
@@ -3090,21 +3093,31 @@ class SpeakerTierWorker(FunctionWorker): # pragma: no cover
|
|
3090
3093
|
if self.session is None:
|
3091
3094
|
return
|
3092
3095
|
self.stopped.clear()
|
3093
|
-
with self.lock:
|
3094
|
-
|
3095
|
-
|
3096
|
+
with self.lock, self.session() as session:
|
3097
|
+
show_phones = (
|
3098
|
+
self.settings.value(self.settings.TIER_ALIGNED_PHONES_VISIBLE)
|
3099
|
+
or self.settings.value(self.settings.TIER_TRANSCRIBED_PHONES_VISIBLE)
|
3100
|
+
or self.settings.value(self.settings.TIER_REFERENCE_PHONES_VISIBLE)
|
3096
3101
|
)
|
3102
|
+
show_words = self.settings.value(
|
3103
|
+
self.settings.TIER_ALIGNED_WORDS_VISIBLE
|
3104
|
+
) or self.settings.value(self.settings.TIER_TRANSCRIBED_WORDS_VISIBLE)
|
3105
|
+
utterances = session.query(Utterance)
|
3097
3106
|
if self.query_alignment:
|
3098
|
-
|
3099
|
-
|
3100
|
-
|
3101
|
-
|
3102
|
-
|
3103
|
-
|
3104
|
-
|
3105
|
-
|
3106
|
-
|
3107
|
-
|
3107
|
+
if show_phones:
|
3108
|
+
utterances = utterances.options(
|
3109
|
+
selectinload(Utterance.phone_intervals).options(
|
3110
|
+
joinedload(PhoneInterval.phone, innerjoin=True),
|
3111
|
+
joinedload(PhoneInterval.workflow, innerjoin=True),
|
3112
|
+
)
|
3113
|
+
)
|
3114
|
+
if show_words:
|
3115
|
+
utterances = utterances.options(
|
3116
|
+
selectinload(Utterance.word_intervals).options(
|
3117
|
+
joinedload(WordInterval.word, innerjoin=True),
|
3118
|
+
joinedload(WordInterval.workflow, innerjoin=True),
|
3119
|
+
),
|
3120
|
+
)
|
3108
3121
|
utterances = utterances.filter(Utterance.file_id == self.file_id).order_by(
|
3109
3122
|
Utterance.begin
|
3110
3123
|
)
|
@@ -3138,6 +3151,7 @@ class SpectrogramWorker(FunctionWorker): # pragma: no cover
|
|
3138
3151
|
self.end = end
|
3139
3152
|
self.channel = channel
|
3140
3153
|
|
3154
|
+
@profile
|
3141
3155
|
def run(self):
|
3142
3156
|
self.stopped.clear()
|
3143
3157
|
dynamic_range = self.settings.value(self.settings.SPEC_DYNAMIC_RANGE)
|
@@ -3146,12 +3160,18 @@ class SpectrogramWorker(FunctionWorker): # pragma: no cover
|
|
3146
3160
|
window_size = self.settings.value(self.settings.SPEC_WINDOW_SIZE)
|
3147
3161
|
pre_emph_coeff = self.settings.value(self.settings.SPEC_PREEMPH)
|
3148
3162
|
max_freq = self.settings.value(self.settings.SPEC_MAX_FREQ)
|
3163
|
+
if self.y.shape[0] == 0:
|
3164
|
+
return
|
3165
|
+
duration = self.y.shape[0] / self.sample_rate
|
3166
|
+
if duration > 30:
|
3167
|
+
return
|
3149
3168
|
with self.lock:
|
3150
|
-
if self.y.shape[0] == 0:
|
3151
|
-
return
|
3152
3169
|
max_sr = 2 * max_freq
|
3153
3170
|
if self.sample_rate > max_sr:
|
3154
|
-
self.y =
|
3171
|
+
self.y = scipy.signal.resample(
|
3172
|
+
self.y, int(self.y.shape[0] * max_sr / self.sample_rate)
|
3173
|
+
)
|
3174
|
+
# self.y = resampy.resample(self.y, self.sample_rate, max_sr, filter='kaiser_fast')
|
3155
3175
|
self.sample_rate = max_sr
|
3156
3176
|
self.y = librosa.effects.preemphasis(self.y, coef=pre_emph_coeff)
|
3157
3177
|
if self.stopped.is_set():
|
@@ -3225,6 +3245,10 @@ class PitchWorker(FunctionWorker): # pragma: no cover
|
|
3225
3245
|
max_f0=self.max_f0,
|
3226
3246
|
penalty_factor=self.penalty_factor,
|
3227
3247
|
delta_pitch=self.delta_pitch,
|
3248
|
+
add_pov_feature=True,
|
3249
|
+
add_normalized_log_pitch=False,
|
3250
|
+
add_delta_pitch=False,
|
3251
|
+
add_raw_log_pitch=True,
|
3228
3252
|
)
|
3229
3253
|
|
3230
3254
|
def run(self):
|
@@ -3232,6 +3256,8 @@ class PitchWorker(FunctionWorker): # pragma: no cover
|
|
3232
3256
|
with self.lock:
|
3233
3257
|
if self.y.shape[0] == 0:
|
3234
3258
|
return
|
3259
|
+
if self.end - self.begin < 0.1:
|
3260
|
+
return
|
3235
3261
|
pitch_track = compute_pitch(
|
3236
3262
|
self.y, self.pitch_computer.extraction_opts, self.pitch_computer.process_opts
|
3237
3263
|
).numpy()
|
@@ -1,21 +0,0 @@
|
|
1
|
-
anchor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
anchor/__main__.py,sha256=5ufG8lcx2x1am-04xI991AG7saJd24dxPw5JzjmB878,45
|
3
|
-
anchor/_version.py,sha256=IMl2Pr_Sy4LVRKy_Sm4CdwUl1Gryous6ncL96EMYsnM,411
|
4
|
-
anchor/command_line.py,sha256=xvuCWaPWNVZTg5Ic28qbOYsOLaFbodhBsoZHKJSBazs,482
|
5
|
-
anchor/db.py,sha256=ef4lO6HtCKoxC9CorIc0ZbPxKpjHa576a0ZIBOWNU9E,4956
|
6
|
-
anchor/main.py,sha256=GOol2yC_57qrJ-uTtvISGAlrZ5cMojcMq9puUVohojc,113324
|
7
|
-
anchor/models.py,sha256=lWXlKzH9xGhdNbFgob7XZy2CGYZXAPoiIIP8Dmhqt-o,75130
|
8
|
-
anchor/plot.py,sha256=CUAcUsPpX9Ja4PINTQN08gfuT_x27bK2kIkkAyH69-A,106870
|
9
|
-
anchor/resources_rc.py,sha256=sQ6GvAK3NTVR5bvgR9jCWDeepSYOhEph2mg1ECxnMOs,3560262
|
10
|
-
anchor/settings.py,sha256=SJ9-5xjThJp3-zl99OBLWLSXZmsyUU1JNsgGWHlkJS8,46649
|
11
|
-
anchor/ui_error_dialog.py,sha256=c_QS0s1VaJEV9AhcrQZQyWHHpUPudWjJY1NI7Ytipio,3832
|
12
|
-
anchor/ui_main_window.py,sha256=aEABdKi1Eb1c2MKUsbCbufEp1lKSLFNsC9TPX244UPI,64618
|
13
|
-
anchor/ui_preferences.py,sha256=uer2Xzyq26j-5wwbIKKcK8YEe2w7OFJPXfWSkKcPWhI,40146
|
14
|
-
anchor/undo.py,sha256=rVus-7HC9wPIiab3dUxIeNGK7jWOMSVmDvCFEwU-408,33163
|
15
|
-
anchor/widgets.py,sha256=CkFsF1Iuck79lQSnszouLNt_MOPMp35zpL0dzQR2l1o,135702
|
16
|
-
anchor/workers.py,sha256=0kytaQYryib3hm0qDFuZpnLIfLC-HeaZY6zV6PYTxr4,169699
|
17
|
-
Anchor_annotator-0.1.0.dist-info/LICENSE,sha256=C0oIsblENEgWQ7XMNdYoXyXsIA5wa3YF0I9lK3H7A1s,1076
|
18
|
-
Anchor_annotator-0.1.0.dist-info/METADATA,sha256=zS_ndUVMaJxv4bBGpZgHQcwTjQSTqLh1KYYWag9h5Ds,1500
|
19
|
-
Anchor_annotator-0.1.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
20
|
-
Anchor_annotator-0.1.0.dist-info/top_level.txt,sha256=wX6ZKxImGRZKFQjs3f6XYw_TfbAp6Xs3SmbLfLbFAJ0,7
|
21
|
-
Anchor_annotator-0.1.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|