nkululeko 0.92.2__py3-none-any.whl → 0.93.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,8 +6,11 @@ import numpy as np
6
6
  from pyannote.audio import Pipeline
7
7
  import torch
8
8
 
9
+ import audiofile
10
+
9
11
  from nkululeko.feature_extractor import FeatureExtractor
10
12
  import nkululeko.glob_conf as glob_conf
13
+ from nkululeko.utils.files import concat_files
11
14
  from nkululeko.utils.util import Util
12
15
 
13
16
 
@@ -20,7 +23,7 @@ class SIDPredictor:
20
23
  def __init__(self, df):
21
24
  self.df = df
22
25
  self.util = Util("sidPredictor")
23
- hf_token = self.util.config_val("Model", "hf_token", None)
26
+ hf_token = self.util.config_val("MODEL", "hf_token", None)
24
27
  if hf_token is None:
25
28
  self.util.error(
26
29
  "speaker id prediction needs huggingface token: [MODEL][hf_token]"
@@ -29,20 +32,45 @@ class SIDPredictor:
29
32
  "pyannote/speaker-diarization-3.1",
30
33
  use_auth_token=hf_token,
31
34
  )
32
- device = self.util.config_val("Model", "device", "cpu")
35
+ device = self.util.config_val("MODEL", "device", "cpu")
33
36
  self.pipeline.to(torch.device(device))
34
37
 
35
38
  def predict(self, split_selection):
36
39
  self.util.debug(f"estimating speaker id for {split_selection} samples")
37
40
  return_df = self.df.copy()
38
- # @todo
39
41
  # 1) concat all audio files
42
+ tmp_file = "tmp.wav"
43
+ concat_files(return_df.index, tmp_file)
40
44
  # 2) get segmentations with pyannote
41
- # 3) map pyannote segments with orginal ones and assign speaker id
45
+ sname = "pyannotation"
46
+ if self.util.exist_pickle(sname):
47
+ annotation = self.util.from_pickle(sname)
48
+ else:
49
+ annotation = self.pipeline(tmp_file)
50
+ self.util.to_pickle(annotation, sname)
42
51
 
52
+ speakers, starts, ends = [], [], []
53
+ # print the result
54
+ for turn, _, speaker in annotation.itertracks(yield_label=True):
55
+ start = turn.start
56
+ end = turn.end
57
+ speakers.append(speaker)
58
+ starts.append(start)
59
+ ends.append(end)
60
+ # 3) map pyannote segments with orginal ones and assign speaker id
61
+ target_speakers = []
62
+ position = 0
63
+ for idx, (file, start, end) in enumerate(return_df.index.to_list()):
64
+ seg_start = start.total_seconds()
65
+ seg_end = end.total_seconds()
66
+ # file_duration = audiofile.duration(file)
67
+ seg_duration = seg_end - seg_start
68
+ offset = position + seg_start + seg_duration / 2
69
+ l = [i < offset for i in starts]
70
+ r = [i for i, x in enumerate(l) if x]
71
+ s_index = r.pop()
72
+ # self.util.debug(f"offset: {offset}, speaker = {speakers[s_index]}")
73
+ position += seg_duration
74
+ target_speakers.append(speakers[s_index])
75
+ return_df["speaker"] = target_speakers
43
76
  return return_df
44
-
45
- def concat_files(self, df):
46
- pass
47
- # todo
48
- # please use https://audeering.github.io/audiofile/usage.html#read-a-file
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.92.2"
1
+ VERSION="0.93.0"
2
2
  SAMPLING_RATE = 16000
@@ -53,7 +53,7 @@ class Dataset_CSV(Dataset):
53
53
  if audformat.index_type(df.index) == "segmented":
54
54
  file_index = (
55
55
  df.index.levels[0]
56
- .map(lambda x: root + "/" + audio_path + "/" + x)
56
+ .map(lambda x: os.path.join(root, audio_path, x))
57
57
  .values
58
58
  )
59
59
  df = df.set_index(df.index.set_levels(file_index, level="file"))
@@ -62,20 +62,20 @@ class Dataset_CSV(Dataset):
62
62
  df = pd.DataFrame(df)
63
63
  df = df.set_index(
64
64
  df.index.to_series().apply(
65
- lambda x: root + "/" + audio_path + "/" + x
65
+ lambda x: os.path.join(root, audio_path, x)
66
66
  )
67
67
  )
68
68
  else: # absolute path is True
69
69
  if audformat.index_type(df.index) == "segmented":
70
70
  file_index = (
71
- df.index.levels[0].map(lambda x: audio_path + "/" + x).values
71
+ df.index.levels[0].map(lambda x: os.path.join(audio_path, x)).values
72
72
  )
73
73
  df = df.set_index(df.index.set_levels(file_index, level="file"))
74
74
  else:
75
75
  if not isinstance(df, pd.DataFrame):
76
76
  df = pd.DataFrame(df)
77
77
  df = df.set_index(
78
- df.index.to_series().apply(lambda x: audio_path + "/" + x)
78
+ df.index.to_series().apply(lambda x: os.path.join(audio_path, x))
79
79
  )
80
80
 
81
81
  self.df = df
nkululeko/experiment.py CHANGED
@@ -422,9 +422,7 @@ class Experiment:
422
422
  return df_ret
423
423
 
424
424
  def autopredict(self):
425
- """
426
- Predict labels for samples with existing models and add to the dataframe.
427
- """
425
+ """Predict labels for samples with existing models and add to the dataframe."""
428
426
  sample_selection = self.util.config_val("PREDICT", "split", "all")
429
427
  if sample_selection == "all":
430
428
  df = pd.concat([self.df_train, self.df_test])
@@ -1,4 +1,4 @@
1
- """ feats_wav2vec2.py
1
+ """feats_wav2vec2.py.
2
2
  feat_types example = [wav2vec2-large-robust-ft-swbd-300h,
3
3
  wav2vec2-xls-r-2b, wav2vec2-large, wav2vec2-large-xlsr-53, wav2vec2-base]
4
4
 
@@ -11,12 +11,13 @@ import os
11
11
  import pandas as pd
12
12
  import torch
13
13
  import torchaudio
14
- import transformers
15
14
  from tqdm import tqdm
16
- from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
15
+ import transformers
16
+ from transformers import Wav2Vec2FeatureExtractor
17
+ from transformers import Wav2Vec2Model
17
18
 
18
- import nkululeko.glob_conf as glob_conf
19
19
  from nkululeko.feat_extract.featureset import Featureset
20
+ import nkululeko.glob_conf as glob_conf
20
21
 
21
22
 
22
23
  class Wav2vec2(Featureset):
@@ -8,10 +8,11 @@ import pandas as pd
8
8
  import torch
9
9
  import torchaudio
10
10
  from tqdm import tqdm
11
- from transformers import Wav2Vec2FeatureExtractor, WavLMModel
11
+ from transformers import Wav2Vec2FeatureExtractor
12
+ from transformers import WavLMModel
12
13
 
13
- import nkululeko.glob_conf as glob_conf
14
14
  from nkululeko.feat_extract.featureset import Featureset
15
+ import nkululeko.glob_conf as glob_conf
15
16
 
16
17
 
17
18
  class Wavlm(Featureset):
nkululeko/utils/files.py CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
- # copied from librosa.util.files.py
3
+ # find_files copied from librosa.util.files.py
4
4
 
5
5
  """Utility functions for dealing with files"""
6
6
  from __future__ import annotations
@@ -8,11 +8,22 @@ from __future__ import annotations
8
8
  import glob
9
9
  import os
10
10
  from pathlib import Path
11
- from typing import Any, List, Optional, Set, Union
11
+ from typing import Any
12
+ from typing import List
13
+ from typing import Optional
14
+ from typing import Set
15
+ from typing import Union
16
+
17
+ import numpy as np
18
+ from tqdm import tqdm
19
+
20
+ import audiofile
21
+
12
22
 
13
23
  # add new function here
14
24
  __all__ = [
15
25
  "find_files",
26
+ "concat_files",
16
27
  ]
17
28
 
18
29
 
@@ -143,3 +154,16 @@ def __get_files(dir_name: Union[str, os.PathLike[Any]], extensions: Set[str]):
143
154
  myfiles |= set(glob.glob(globstr))
144
155
 
145
156
  return myfiles
157
+
158
+
159
+ def concat_files(index, outfile_path):
160
+ buffer = np.asarray([])
161
+ sr = 16000
162
+ for idx, (file, start, end) in enumerate(tqdm(index.to_list())):
163
+ signal, sr = audiofile.read(
164
+ file,
165
+ offset=start.total_seconds(),
166
+ duration=(end - start).total_seconds(),
167
+ )
168
+ buffer = np.concatenate([buffer, signal])
169
+ audiofile.write(outfile_path, buffer, sr)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.92.2
3
+ Version: 0.93.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -355,6 +355,10 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
355
355
  Changelog
356
356
  =========
357
357
 
358
+ Version 0.93.0
359
+ --------------
360
+ * integrated pyannote for speaker prediction for predict module
361
+
358
362
  Version 0.92.2
359
363
  --------------
360
364
  * added some output to automatic speaker id
@@ -365,7 +369,7 @@ Version 0.92.1
365
369
 
366
370
  Version 0.92.0
367
371
  --------------
368
- * added first version of automatic speaker prediction/segmentation
372
+ * added first version of automatic speaker prediction for segment module
369
373
 
370
374
  Version 0.91.3
371
375
  --------------
@@ -2,13 +2,13 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
3
3
  nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
4
4
  nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
5
- nkululeko/constants.py,sha256=HBBuhT6kpIHhRMiSBkU07cszGO7kO2A8qTYrN6zH9rw,39
5
+ nkululeko/constants.py,sha256=YWEAJOJi8kWTdeRVHLVLQH66lH0czOfFjt-jbpnGmwY,39
6
6
  nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
7
7
  nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
8
8
  nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
9
9
  nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
10
10
  nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
11
- nkululeko/experiment.py,sha256=h3DS-k6vk5juXa3HJXI7Z4vvnNspO4qj5SJ1o1Z3PIk,31860
11
+ nkululeko/experiment.py,sha256=uU_8WR8JuUD50lgcl_K_BBQYmHMbuwAniWft8bGHuDU,31842
12
12
  nkululeko/explore.py,sha256=Y5lPPychnI-7fyP8zvwVb9P09fvprbUPOofOppuABYQ,3658
13
13
  nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
14
14
  nkululeko/feature_extractor.py,sha256=UnspIWz3XrNhKnBBhWZkH2bHvD-sROtrQVqB1JvkUyw,4088
@@ -43,14 +43,14 @@ nkululeko/autopredict/ap_gender.py,sha256=b6oTqHKVwOnYh4YlKbuMflssS4HJqs_c1ayusa
43
43
  nkululeko/autopredict/ap_mos.py,sha256=e4hmgb0Yf1_AbC5P0CqXJIvufjhbTrqmI5goARxrY0Y,1107
44
44
  nkululeko/autopredict/ap_pesq.py,sha256=mRt3Loucaoy4vJxwfuxUt0fP88bMGvkmrLCEpKEXWp0,1140
45
45
  nkululeko/autopredict/ap_sdr.py,sha256=VQ2UkxOO3ipqYNNjFwKgEaGCk8IzLI5lX_2tZFLIvTY,1188
46
- nkululeko/autopredict/ap_sid.py,sha256=87LXMHzJ8jt2q9dUtPJd_nJi_XOcFoqpbva-BT4UJN0,1393
46
+ nkululeko/autopredict/ap_sid.py,sha256=mCxf2DUOPUlDdnVwCeljFJtCXM4uum1poZQ9RrwHHM8,2641
47
47
  nkululeko/autopredict/ap_snr.py,sha256=AiTU8-7CMEeowmYkMO19lw1HCb1yTXC6KeulNf8gOqw,1110
48
48
  nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwpcnA,1187
49
49
  nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
50
50
  nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
51
51
  nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  nkululeko/data/dataset.py,sha256=Hz2IOsdcESG-P3aP7r4d1xj_gIP6fyGCYOwukoQ7SM8,29321
53
- nkululeko/data/dataset_csv.py,sha256=mb7FpHOmJHxpt1QYVBKveyqJN3MUt30TRfwwJw0TT5c,4816
53
+ nkululeko/data/dataset_csv.py,sha256=p2b4eS5R2Q5zdOIc56NRRU2PTFXSRt0qrdHGafHkWKo,4830
54
54
  nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
56
56
  nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
@@ -71,8 +71,8 @@ nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDc
71
71
  nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
72
72
  nkululeko/feat_extract/feats_squim.py,sha256=yJifsp9kj9iJjW_UAKr3LlvVhX5rv7el4bepn0wN2a8,4578
73
73
  nkululeko/feat_extract/feats_trill.py,sha256=TUCrh5xbfnHD2gzb9mlkMSV4aK6YXazMqsh5xJ5yzUI,3188
74
- nkululeko/feat_extract/feats_wav2vec2.py,sha256=lINWb2rBLXuMzNKV8gKsTke8wuXIF1X4jOu-GMB3aPg,5272
75
- nkululeko/feat_extract/feats_wavlm.py,sha256=kTuxFnymBMYP3t9yAQJjRQ5ul4AiS0O8NXq3z6B9AYs,4731
74
+ nkululeko/feat_extract/feats_wav2vec2.py,sha256=WYB9XlRzgDi8cGSKzhV5jahA0GZ_SiWgaQ25IcEemto,5296
75
+ nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
76
76
  nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
77
77
  nkululeko/feat_extract/featureset.py,sha256=clcBv9rzBRW-bfw7JC_FYTjU5uUS-c0UE1XtQLYYRiE,1615
78
78
  nkululeko/feat_extract/feinberg_praat.py,sha256=bgzWtQkKbgcygrzwAxDXosui1rcc38qhWuJq9GLr0z8,21308
@@ -109,12 +109,12 @@ nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xe
109
109
  nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILAx3rp5do,4216
110
110
  nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
111
111
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
- nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
112
+ nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
113
113
  nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
114
114
  nkululeko/utils/util.py,sha256=XFZdhCc_LM4EmoZ5tKKaBCQLXclcNmvHwhfT_CXB98c,16723
115
- nkululeko-0.92.2.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
116
- nkululeko-0.92.2.dist-info/METADATA,sha256=pwdxFGECc-W2WdmnXxgJz6Jy3CbvwzeHASfu7WxFK7g,41832
117
- nkululeko-0.92.2.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
118
- nkululeko-0.92.2.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
119
- nkululeko-0.92.2.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
120
- nkululeko-0.92.2.dist-info/RECORD,,
115
+ nkululeko-0.93.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
116
+ nkululeko-0.93.0.dist-info/METADATA,sha256=3q74htqBYa_dfgtZzah5SPDwjG3o2c9sfGBlJK9sfI4,41933
117
+ nkululeko-0.93.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
118
+ nkululeko-0.93.0.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
119
+ nkululeko-0.93.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
120
+ nkululeko-0.93.0.dist-info/RECORD,,