nkululeko 0.93.7__py3-none-any.whl → 0.93.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.93.7"
1
+ VERSION="0.93.8"
2
2
  SAMPLING_RATE = 16000
@@ -18,6 +18,10 @@ class ImportSet(Featureset):
18
18
  def extract(self):
19
19
  """Import the features."""
20
20
  self.util.debug(f"importing features for {self.name}")
21
+ # import_files_append: set this to True if the multiple tables should be combined row-wise, else they are combined column-wise
22
+ import_files_append = eval(
23
+ self.util.config_val("FEATS", "import_files_append", "True")
24
+ )
21
25
  try:
22
26
  feat_import_files = self.util.config_val("FEATS", "import_file", False)
23
27
  feat_import_files = ast.literal_eval(feat_import_files)
@@ -38,7 +42,10 @@ class ImportSet(Featureset):
38
42
  df = audformat.utils.read_csv(feat_import_file)
39
43
  df = self.util.make_segmented_index(df)
40
44
  df = df[df.index.isin(self.data_df.index)]
41
- feat_df = pd.concat([feat_df, df])
45
+ if import_files_append:
46
+ feat_df = pd.concat([feat_df, df], axis=0)
47
+ else:
48
+ feat_df = pd.concat([feat_df, df], axis=1)
42
49
  if feat_df.shape[0] == 0:
43
50
  self.util.error(f"Imported features for data set {self.name} not found!")
44
51
  # and assign to be the "official" feature set
@@ -3,15 +3,16 @@ import os
3
3
 
4
4
  import audiofile as af
5
5
  import pandas as pd
6
- import tensorflow as tf
7
- import tensorflow_hub as hub
6
+
7
+ # import tensorflow as tf
8
+ # import tensorflow_hub as hub
8
9
  from tqdm import tqdm
9
10
 
10
11
  import nkululeko.glob_conf as glob_conf
11
12
  from nkululeko.feat_extract.featureset import Featureset
12
13
 
13
14
  # Import TF 2.X and make sure we're running eager.
14
- assert tf.executing_eagerly()
15
+ # assert tf.executing_eagerly()
15
16
 
16
17
 
17
18
  class TRILLset(Featureset):
@@ -39,7 +40,7 @@ class TRILLset(Featureset):
39
40
  "trill.model",
40
41
  "https://tfhub.dev/google/nonsemantic-speech-benchmark/trill/3",
41
42
  )
42
- self.model = hub.load(model_path)
43
+ # self.model = hub.load(model_path)
43
44
  self.feats_type = feats_type
44
45
 
45
46
  def extract(self):
nkululeko/plots.py CHANGED
@@ -242,84 +242,100 @@ class Plots:
242
242
 
243
243
  def _plot2cont_cat(self, df, cont1, cont2, cat, ylab):
244
244
  """Plot relation of two continuous distributions with one categorical."""
245
+ plot_df = df[[cont1, cont2, cat]].copy()
245
246
  if cont2 == "class_label":
246
- df.rename(columns={cont2: self.target})
247
+ plot_df = plot_df.rename(columns={cont2: self.target})
247
248
  cont2 = self.target
248
249
  if cont1 == "class_label":
249
- df.rename(columns={cont1: self.target})
250
+ plot_df = plot_df.rename(columns={cont1: self.target})
250
251
  cont1 = self.target
251
252
  if cat == "class_label":
252
- df.rename(columns={cat: self.target})
253
+ plot_df = plot_df.rename(columns={cat: self.target})
253
254
  cat = self.target
254
- pearson = stats.pearsonr(df[cont1], df[cont2])
255
+ pearson = stats.pearsonr(plot_df[cont1], plot_df[cont2])
255
256
  # trunc to three digits
256
257
  pearson = int(pearson[0] * 1000) / 1000
257
258
  pearson_string = f"PCC: {pearson}"
258
259
  ccc_string = ""
259
260
  if self.with_ccc:
260
- ccc_val = ccc(df[cont1], df[cont2])
261
+ ccc_val = ccc(plot_df[cont1], plot_df[cont2])
261
262
  ccc_val = int(ccc_val * 1000) / 1000
262
263
  ccc_string = f"CCC: {ccc_val}"
263
- ax = sns.lmplot(data=df, x=cont1, y=cont2, hue=cat)
264
- caption = f"{ylab} {df.shape[0]}. {pearson_string} {ccc_string}"
264
+ ax = sns.lmplot(data=plot_df, x=cont1, y=cont2, hue=cat)
265
+ caption = f"{ylab} {plot_df.shape[0]}. {pearson_string} {ccc_string}"
265
266
  ax.figure.suptitle(caption)
266
267
  return ax, caption
267
268
 
268
269
  def _plot2cont(self, df, col1, col2, ylab):
269
270
  """Plot relation of two continuous distributions."""
271
+ plot_df = df[[col1, col2]].copy()
270
272
  # rename "class_label" to the original target
271
273
  if col2 == "class_label":
272
- df.rename(columns={col2: self.target})
274
+ plot_df = plot_df.rename(columns={col2: self.target})
273
275
  col2 = self.target
274
276
  if col1 == "class_label":
275
- df.rename(columns={col1: self.target})
277
+ plot_df = plot_df.rename(columns={col1: self.target})
276
278
  col1 = self.target
277
- pearson = stats.pearsonr(df[col1], df[col2])
279
+ pearson = stats.pearsonr(plot_df[col1], plot_df[col2])
278
280
  # trunc to three digits
279
281
  pearson = int(pearson[0] * 1000) / 1000
280
282
  pearson_string = f"PCC: {pearson}"
281
283
  ccc_string = ""
282
284
  if self.with_ccc:
283
- ccc_val = ccc(df[col1], df[col2])
285
+ ccc_val = ccc(plot_df[col1], plot_df[col2])
284
286
  ccc_val = int(ccc_val * 1000) / 1000
285
287
  ccc_string = f"CCC: {ccc_val}"
286
- ax = sns.lmplot(data=df, x=col1, y=col2)
287
- caption = f"{ylab} {df.shape[0]}. {pearson_string} {ccc_string}"
288
+ ax = sns.lmplot(data=plot_df, x=col1, y=col2)
289
+ caption = f"{ylab} {plot_df.shape[0]}. {pearson_string} {ccc_string}"
288
290
  ax.figure.suptitle(caption)
289
291
  return ax, caption
290
292
 
291
293
  def plotcatcont(self, df, cat_col, cont_col, xlab, ylab):
292
294
  """Plot relation of categorical distribution with continuous."""
293
295
  # rename "class_label" to the original target
296
+ plot_df = df[[cat_col, cont_col]].copy()
294
297
  if cat_col == "class_label":
295
- df.rename(columns={cat_col: self.target})
298
+ plot_df = plot_df.rename(columns={cat_col: self.target})
296
299
  cat_col = self.target
297
300
  dist_type = self.util.config_val("EXPL", "dist_type", "kde")
298
- cats, cat_str, es = su.get_effect_size(df, cat_col, cont_col)
301
+ cats, cat_str, es = su.get_effect_size(plot_df, cat_col, cont_col)
299
302
  model_type = self.util.get_model_type()
300
303
  if dist_type == "hist" and model_type != "tree":
301
- ax = sns.histplot(df, x=cont_col, hue=cat_col, kde=True)
302
- caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
304
+ ax = sns.histplot(plot_df, x=cont_col, hue=cat_col, kde=True)
305
+ caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({cats}):" f" {es}"
303
306
  ax.set_title(caption)
304
307
  ax.set_xlabel(f"{cont_col}")
305
308
  ax.set_ylabel(f"number of {ylab}")
306
309
  else:
307
310
  ax = sns.displot(
308
- df, x=cont_col, hue=cat_col, kind="kde", fill=True, warn_singular=False
311
+ plot_df,
312
+ x=cont_col,
313
+ hue=cat_col,
314
+ kind="kde",
315
+ fill=True,
316
+ warn_singular=False,
309
317
  )
310
318
  ax.set(xlabel=f"{cont_col}")
311
- caption = f"{ylab} {df.shape[0]}. {cat_str} ({cats}):" f" {es}"
319
+ caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({cats}):" f" {es}"
312
320
  ax.figure.suptitle(caption)
313
321
  return ax, caption
314
322
 
315
323
  def _plot2cat(self, df, col1, col2, xlab, ylab):
316
324
  """Plot relation of 2 categorical distributions."""
317
- crosstab = pd.crosstab(index=df[col1], columns=df[col2])
325
+ plot_df = df[[col1, col2]].copy()
326
+ # rename "class_label" to the original target
327
+ if col2 == "class_label":
328
+ plot_df = plot_df.rename(columns={col2: self.target})
329
+ col2 = self.target
330
+ if col1 == "class_label":
331
+ plot_df = plot_df.rename(columns={col1: self.target})
332
+ col1 = self.target
333
+ crosstab = pd.crosstab(index=plot_df[col1], columns=plot_df[col2])
318
334
  res_pval = stats.chi2_contingency(crosstab)
319
335
  res_pval = int(res_pval[1] * 1000) / 1000
320
- caption = f"{ylab} {df.shape[0]}. P-val chi2: {res_pval}"
336
+ caption = f"{ylab} {plot_df.shape[0]}. P-val chi2: {res_pval}"
321
337
  ax = (
322
- df.groupby(col1, observed=False)[col2]
338
+ plot_df.groupby(col1, observed=False)[col2]
323
339
  .value_counts()
324
340
  .unstack()
325
341
  .plot(kind="bar", stacked=True, title=caption, rot=0)
@@ -3,32 +3,28 @@ import glob
3
3
  import json
4
4
  import math
5
5
 
6
+ # import os
7
+ from confidence_intervals import evaluate_with_conf_int
6
8
  import matplotlib.pyplot as plt
7
9
  import numpy as np
10
+ from scipy.special import softmax
11
+ from scipy.stats import entropy
12
+ from scipy.stats import pearsonr
13
+ from sklearn.metrics import ConfusionMatrixDisplay
14
+ from sklearn.metrics import RocCurveDisplay
15
+ from sklearn.metrics import auc
16
+ from sklearn.metrics import classification_report
17
+ from sklearn.metrics import confusion_matrix
18
+ from sklearn.metrics import r2_score
19
+ from sklearn.metrics import roc_auc_score
20
+ from sklearn.metrics import roc_curve
8
21
 
9
22
  # from torch import is_tensor
10
- from audmetric import (
11
- accuracy,
12
- concordance_cc,
13
- mean_absolute_error,
14
- mean_squared_error,
15
- unweighted_average_recall,
16
- )
17
-
18
- # import os
19
- from confidence_intervals import evaluate_with_conf_int
20
- from scipy.special import softmax
21
- from scipy.stats import entropy, pearsonr
22
- from sklearn.metrics import (
23
- ConfusionMatrixDisplay,
24
- RocCurveDisplay,
25
- auc,
26
- classification_report,
27
- confusion_matrix,
28
- r2_score,
29
- roc_auc_score,
30
- roc_curve,
31
- )
23
+ from audmetric import accuracy
24
+ from audmetric import concordance_cc
25
+ from audmetric import mean_absolute_error
26
+ from audmetric import mean_squared_error
27
+ from audmetric import unweighted_average_recall
32
28
 
33
29
  import nkululeko.glob_conf as glob_conf
34
30
  from nkululeko.plots import Plots
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.93.7
3
+ Version: 0.93.8
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -32,8 +32,6 @@ Requires-Dist: scikit_learn
32
32
  Requires-Dist: scipy
33
33
  Requires-Dist: seaborn
34
34
  Requires-Dist: sounddevice
35
- Requires-Dist: tensorflow
36
- Requires-Dist: tensorflow_hub
37
35
  Requires-Dist: torch
38
36
  Requires-Dist: torchvision
39
37
  Requires-Dist: transformers
@@ -355,6 +353,11 @@ If you use it, please mention the Nkululeko paper:
355
353
  Changelog
356
354
  =========
357
355
 
356
+ Version 0.93.8
357
+ --------------
358
+ * fixed bugs in plot
359
+ * added import_files_append=False
360
+
358
361
  Version 0.93.7
359
362
  --------------
360
363
  * added a safety to remove nan values after mapping
@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
3
3
  nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
4
4
  nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
5
- nkululeko/constants.py,sha256=YYzv168mmioG2Q0PtPCW_hXkhHgUwKjFGEItE1RU22g,39
5
+ nkululeko/constants.py,sha256=CK6zAyfTJ7tNTdh3Js3fEqv5jrLD4c_UfCI4ZOK7bcE,39
6
6
  nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
7
7
  nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
8
8
  nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
@@ -20,7 +20,7 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
20
20
  nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
21
21
  nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
22
22
  nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
23
- nkululeko/plots.py,sha256=emn2NpZyOGlC8pw0NdAGRbUkPzZzbOKFO50HZ7vJG40,25275
23
+ nkululeko/plots.py,sha256=zHWZ8Ns_0SLOEdbDVulObpRPoXRw_qqPXJv2dM08EeE,26049
24
24
  nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
25
25
  nkululeko/resample.py,sha256=rn3-M1A-iwVGibfQNGyeYNa7briD24lIN9Szq_1uTJo,5194
26
26
  nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
@@ -60,7 +60,7 @@ nkululeko/feat_extract/feats_auddim.py,sha256=CGLp_aYhudfwoU5522vjrvjPxfZcyw593A
60
60
  nkululeko/feat_extract/feats_audmodel.py,sha256=OsZyB1rdcG0Fai2gAwBlbuubmWor1_-P4IDkZLqgPKE,3161
61
61
  nkululeko/feat_extract/feats_clap.py,sha256=1tttpfm2SJmQgYm2u8eUVpDiDOpWdKqFChpY3ZZokNs,3395
62
62
  nkululeko/feat_extract/feats_hubert.py,sha256=F3vrPCkx8EimJjFWYCZ7Yg9uo1G3NjYt4UKrGIUev8k,5172
63
- nkululeko/feat_extract/feats_import.py,sha256=skqXq5oLJLyQbY1jlsg3VDoUt93yI1OLwbtcc44AQo4,1627
63
+ nkululeko/feat_extract/feats_import.py,sha256=u9pTRbxZF2AbQDo4bhyqW6tlH3boS3KHpqW5Gh-Uy1U,2006
64
64
  nkululeko/feat_extract/feats_mld.py,sha256=5aRoYiGDm5ApoFntxAMQYPjEelXHHRBHZcAJR9dxaeI,1945
65
65
  nkululeko/feat_extract/feats_mos.py,sha256=3UXCKe86F49yHpZMQnLfDWXx9XdmlXHOy8efoa3WaOk,4138
66
66
  nkululeko/feat_extract/feats_opensmile.py,sha256=BLj5sUaBPz7vLPfNlt9LdQurSypmViqgSpPK-6aXGhQ,4029
@@ -70,7 +70,7 @@ nkululeko/feat_extract/feats_snr.py,sha256=5uEm10d89TQPf0s-CuVpQ3ftc0bLEeuB8aGuu
70
70
  nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDcPRxaiH-Qn8,3621
71
71
  nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
72
72
  nkululeko/feat_extract/feats_squim.py,sha256=yJifsp9kj9iJjW_UAKr3LlvVhX5rv7el4bepn0wN2a8,4578
73
- nkululeko/feat_extract/feats_trill.py,sha256=TUCrh5xbfnHD2gzb9mlkMSV4aK6YXazMqsh5xJ5yzUI,3188
73
+ nkululeko/feat_extract/feats_trill.py,sha256=JgyUQ8ihIL5PlUpxjchlbC9547GI0SyUwkFEquya85Q,3197
74
74
  nkululeko/feat_extract/feats_wav2vec2.py,sha256=WYB9XlRzgDi8cGSKzhV5jahA0GZ_SiWgaQ25IcEemto,5296
75
75
  nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
76
76
  nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
@@ -102,7 +102,7 @@ nkululeko/reporting/defines.py,sha256=0vh-Tlx4fAPpk1o6mP_4x3EkIoqzYMr38IZnj-JM5z
102
102
  nkululeko/reporting/latex_writer.py,sha256=NGwSIfd4nfslDkNUOSZSdqY_VDLA8634thyhe-vj1bY,1824
103
103
  nkululeko/reporting/report.py,sha256=bYN8B66gg3IWHAyfd6uIVjpYKy3rOI6aEwgfXU0LSAY,1006
104
104
  nkululeko/reporting/report_item.py,sha256=AqKD40AlZpRuHLbggn5PkH6ctGJwh9rGNBNgOvgUODg,534
105
- nkululeko/reporting/reporter.py,sha256=4OlYZAParkfJKO_aAyxqVpLc21zxZ-jDhtJKIMeUssI,20151
105
+ nkululeko/reporting/reporter.py,sha256=nDQsj0xl3_a1Rn3CAAdbWIenodDIT72LsYFNzCaKtmg,20384
106
106
  nkululeko/reporting/result.py,sha256=G63a2tHCwHhM6NBJgYzsWKWJm4Yu3r4hsCHA2Km7eHU,1073
107
107
  nkululeko/segmenting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  nkululeko/segmenting/seg_inaspeechsegmenter.py,sha256=b3t0zdpJYofKWMyKRMtMMX91xeR-k8d5pbnNaQHcsOE,1902
@@ -112,9 +112,9 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
112
  nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
113
113
  nkululeko/utils/stats.py,sha256=vCRzhCR0Gx5SiJyAGbj1TIto8ocGz58CM5Pr3LltagA,2948
114
114
  nkululeko/utils/util.py,sha256=wFDslqxpCVDwi6LBakIFDDy1kYsxt5G7ykE38CocmtA,16880
115
- nkululeko-0.93.7.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
116
- nkululeko-0.93.7.dist-info/METADATA,sha256=tUTllLqcYKVJXgGOkPb2vcXBUcBKqjl03rKExtV4zeE,42447
117
- nkululeko-0.93.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
118
- nkululeko-0.93.7.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
119
- nkululeko-0.93.7.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
120
- nkululeko-0.93.7.dist-info/RECORD,,
115
+ nkululeko-0.93.8.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
116
+ nkululeko-0.93.8.dist-info/METADATA,sha256=QbeZSf2hWV3iClfliA5r6us-ZE6tHeluJffh5dqyKXA,42477
117
+ nkululeko-0.93.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
118
+ nkululeko-0.93.8.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
119
+ nkululeko-0.93.8.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
120
+ nkululeko-0.93.8.dist-info/RECORD,,