nkululeko 0.77.13__py3-none-any.whl → 0.78.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.77.13"
1
+ VERSION="0.78.0"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -577,6 +577,9 @@ class Experiment:
577
577
  scale_feats,
578
578
  )
579
579
  self.feats_train, self.feats_test = self.scaler_feats.scale()
580
+ # store versions
581
+ self.util.save_to_store(self.feats_train, "feats_train_scaled")
582
+ self.util.save_to_store(self.feats_test, "feats_test_scaled")
580
583
 
581
584
  def init_runmanager(self):
582
585
  """Initialize the manager object for the runs."""
nkululeko/plots.py CHANGED
@@ -379,51 +379,115 @@ class Plots:
379
379
  )
380
380
 
381
381
  def scatter_plot(self, feats, label_df, label, dimred_type):
382
+ dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
382
383
  fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
383
384
  sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
384
- filename = (
385
- f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}"
386
- )
385
+ filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
387
386
  filename = f"{fig_dir}{filename}.{self.format}"
388
387
  self.util.debug(f"computing {dimred_type}, this might take a while...")
389
388
  data = None
390
389
  labels = label_df[label]
391
390
  if dimred_type == "tsne":
392
- data = self.getTsne(feats)
393
- elif dimred_type == "umap":
394
- import umap
391
+ data = self.getTsne(feats, dim_num)
392
+ else:
393
+ if dimred_type == "umap":
394
+ import umap
395
+
396
+ y = umap.UMAP(
397
+ n_neighbors=10,
398
+ random_state=0,
399
+ n_components=dim_num,
400
+ ).fit_transform(feats.values)
401
+ elif dimred_type == "pca":
402
+ from sklearn.decomposition import PCA
403
+ from sklearn.preprocessing import StandardScaler
395
404
 
396
- y_umap = umap.UMAP(
397
- n_neighbors=10,
398
- random_state=0,
399
- ).fit_transform(feats.values)
405
+ scaler = StandardScaler()
406
+ pca = PCA(n_components=dim_num)
407
+ y = pca.fit_transform(scaler.fit_transform(feats.values))
408
+ else:
409
+ self.util.error(
410
+ f"no such dimensionality reduction function: {dimred_type}"
411
+ )
412
+ if dim_num == 2:
413
+ columns = ["Dim_1", "Dim_2"]
414
+ elif dim_num == 3:
415
+ columns = ["Dim_1", "Dim_2", "Dim_3"]
416
+ else:
417
+ self.util.error(f"wrong dimension number: {dim_num}")
400
418
  data = pd.DataFrame(
401
- y_umap,
419
+ y,
402
420
  feats.index,
403
- columns=["Dim_1", "Dim_2"],
421
+ columns=columns,
404
422
  )
405
- elif dimred_type == "pca":
406
- from sklearn.decomposition import PCA
407
- from sklearn.preprocessing import StandardScaler
408
423
 
409
- scaler = StandardScaler()
410
- pca = PCA(n_components=2)
411
- y_pca = pca.fit_transform(scaler.fit_transform(feats.values))
412
- data = pd.DataFrame(
413
- y_pca,
414
- feats.index,
415
- columns=["Dim_1", "Dim_2"],
424
+ if dim_num == 2:
425
+ plot_data = np.vstack((data.T, labels)).T
426
+ plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
427
+ # plt.tight_layout()
428
+ ax = (
429
+ sns.FacetGrid(plot_df, hue="label", height=6)
430
+ .map(plt.scatter, "Dim_1", "Dim_2")
431
+ .add_legend()
432
+ )
433
+ elif dim_num == 3:
434
+ from mpl_toolkits.mplot3d import Axes3D
435
+ from sklearn.preprocessing import LabelEncoder
436
+
437
+ le = LabelEncoder()
438
+
439
+ labels_e = le.fit_transform(labels)
440
+ plot_data = np.vstack((data.T, labels_e)).T
441
+ plot_df = pd.DataFrame(
442
+ data=plot_data, columns=("Dim_1", "Dim_2", "Dim_3", "label")
416
443
  )
444
+ # plt.tight_layout()
445
+ # axes instance
446
+ fig = plt.figure(figsize=(6, 6))
447
+ ax = Axes3D(fig, auto_add_to_figure=False)
448
+ fig.add_axes(ax)
449
+ # get colormap from seaborn
450
+ # cmap = ListedColormap(sns.color_palette("hsv", 256).as_hex())
451
+ color_dict = {
452
+ 0: "red",
453
+ 1: "blue",
454
+ 2: "green",
455
+ 3: "yellow",
456
+ 4: "purple",
457
+ 5: "#ff69b4",
458
+ 6: "black",
459
+ 7: "cyan",
460
+ 8: "magenta",
461
+ 9: "#faebd7",
462
+ 10: "#2e8b57",
463
+ 11: "#eeefff",
464
+ 12: "#da70d6",
465
+ 13: "#ff7f50",
466
+ 14: "#cd853f",
467
+ 15: "#bc8f8f",
468
+ 16: "#5f9ea0",
469
+ 17: "#daa520",
470
+ }
471
+ # plot
472
+ # make the numbers bigger so they can be used as distinguishable colors
473
+ labels_ex = [color_dict[xi] for xi in labels_e]
474
+ sc = ax.scatter(
475
+ plot_df.Dim_1,
476
+ plot_df.Dim_2,
477
+ plot_df.Dim_3,
478
+ s=40,
479
+ c=labels_ex,
480
+ marker="o",
481
+ # cmap=cmap,
482
+ alpha=1,
483
+ )
484
+ ax.set_xlabel("Dim_1")
485
+ ax.set_ylabel("Dim_2")
486
+ ax.set_zlabel("Dim_3")
487
+ # legend
488
+ plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
417
489
  else:
418
- self.util.error(f"no such dimensionality reduction function: {dimred_type}")
419
- plot_data = np.vstack((data.T, labels)).T
420
- plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
421
- # plt.tight_layout()
422
- ax = (
423
- sns.FacetGrid(plot_df, hue="label", height=6)
424
- .map(plt.scatter, "Dim_1", "Dim_2")
425
- .add_legend()
426
- )
490
+ self.util.error(f"wrong dimension number: {dim_num}")
427
491
  fig = ax.figure
428
492
  plt.savefig(filename)
429
493
  fig.clear()
@@ -437,35 +501,10 @@ class Plots:
437
501
  )
438
502
  )
439
503
 
440
- def plotTsne(self, feats, labels, filename, perplexity=30, learning_rate=200):
441
- """Make a TSNE plot to see whether features are useful for classification"""
442
- fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
443
- filename = f"{fig_dir}{filename}.{self.format}"
444
- self.util.debug(f"plotting tsne to {filename}, this might take a while...")
445
- model = TSNE(
446
- n_components=2,
447
- random_state=0,
448
- perplexity=perplexity,
449
- learning_rate=learning_rate,
450
- )
451
- tsne_data = model.fit_transform(feats)
452
- tsne_data_labs = np.vstack((tsne_data.T, labels)).T
453
- tsne_df = pd.DataFrame(data=tsne_data_labs, columns=("Dim_1", "Dim_2", "label"))
454
- # plt.tight_layout()
455
- ax = (
456
- sns.FacetGrid(tsne_df, hue="label", height=6)
457
- .map(plt.scatter, "Dim_1", "Dim_2")
458
- .add_legend()
459
- )
460
- fig = ax.figure
461
- plt.savefig(filename)
462
- fig.clear()
463
- plt.close(fig)
464
-
465
- def getTsne(self, feats, perplexity=30, learning_rate=200):
504
+ def getTsne(self, feats, dim_num, perplexity=30, learning_rate=200):
466
505
  """Make a TSNE plot to see whether features are useful for classification"""
467
506
  model = TSNE(
468
- n_components=2,
507
+ n_components=dim_num,
469
508
  random_state=0,
470
509
  perplexity=perplexity,
471
510
  learning_rate=learning_rate,
nkululeko/scaler.py CHANGED
@@ -95,10 +95,12 @@ class Scaler:
95
95
  b2 = np.quantile(self.feats_train[c], 0.66)
96
96
  feats_bin_train[c] = self._bin(self.feats_train[c].values, b1, b2).values
97
97
  feats_bin_test[c] = self._bin(self.feats_test[c].values, b1, b2).values
98
+ self.feats_train = feats_bin_train
99
+ self.feats_test = feats_bin_test
98
100
 
99
101
  def _bin(self, series, b1, b2):
100
102
  bins = [-1000000, b1, b2, 1000000]
101
- labels = ["low", "middle", "high"]
103
+ labels = [0, 0.5, 1]
102
104
  result = np.digitize(series, bins) - 1
103
105
  result = pd.Series(result)
104
106
  for i, l in enumerate(labels):
nkululeko/utils/util.py CHANGED
@@ -307,7 +307,13 @@ class Util:
307
307
  elif format == "csv":
308
308
  return audformat.utils.read_csv(name)
309
309
  else:
310
- self.error(f"unkown store format: {format}")
310
+ self.error(f"unknown store format: {format}")
311
+
312
+ def save_to_store(self, df, name):
313
+ store = self.get_path("store")
314
+ store_format = self.config_val("FEATS", "store_format", "pkl")
315
+ storage = f"{store}{name}.{store_format}"
316
+ self.write_store(df, storage, store_format)
311
317
 
312
318
  def copy_flags(self, df_source, df_target):
313
319
  if hasattr(df_source, "is_labeled"):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nkululeko
3
- Version: 0.77.13
3
+ Version: 0.78.0
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -313,6 +313,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
313
313
  Changelog
314
314
  =========
315
315
 
316
+ Version 0.78.0
317
+ --------------
318
+ * added 3-d scatter plots
319
+ * removed epoch-plots if epoch_num=1
320
+
321
+ Version 0.77.14
322
+ --------------
323
+ * fixed bug preventing bin scaling to work
324
+
316
325
  Version 0.77.13
317
326
  --------------
318
327
  * added bins scaler
@@ -2,10 +2,10 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
2
2
  nkululeko/aug_train.py,sha256=uUCqtAGhT6P1dyrPpwT409wEhxfzsxpCiq_v_zdmfOI,3103
3
3
  nkululeko/augment.py,sha256=sHWG4Jmb4BjnaaXXnRYMP7Jkk0qlaZ2ohsArP8uW_d8,3003
4
4
  nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
5
- nkululeko/constants.py,sha256=3c07uj-DuO1BAfszagYr4S2y4QcHzfmPYXiRjCaZ3C4,40
5
+ nkululeko/constants.py,sha256=svWlKgLGI6BGzQ_UC7BqFvjh7pXevsQxvV9VqPg-n2s,39
6
6
  nkululeko/demo.py,sha256=FFR8qHMCY8kKKRWDTa8xA7A8mWeYalRKYNtV5rjGg44,1915
7
7
  nkululeko/demo_predictor.py,sha256=j4ySWWcIxW7ZAIBH86m9BfRFokzrUNkRD6fpsvAQGTw,2629
8
- nkululeko/experiment.py,sha256=g-GdcvcWyQKj70VutyFPuzSxmsD-Tk_-vY-5FM7dszs,28644
8
+ nkululeko/experiment.py,sha256=znrMTpGqNeBQWD0F7sUHzQyRjCxsRH9t3V_8NhAtzrg,28823
9
9
  nkululeko/explore.py,sha256=pfciOL66e0T4Bk0RTkwDyE6pK_baSUdjMo2Ybpst3L4,2202
10
10
  nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
11
11
  nkululeko/feature_extractor.py,sha256=tKv1b1-o7xNMgBavTR8VY8_H5HKoJEnnosS-KcjmOEU,7281
@@ -15,13 +15,13 @@ nkululeko/glob_conf.py,sha256=6MZe83QCgHD_zB79Sl9m6UJQASwfqJlyb-1nqrQ_6Iw,381
15
15
  nkululeko/modelrunner.py,sha256=TQW08f72-GjBEIFTE3_8B8qMCWvTJUqJ1fveygmYnXI,9278
16
16
  nkululeko/multidb.py,sha256=5F62YhUKwi__a_fhcovyOCOcydTH48nM3JJ3oZ47Tjg,3852
17
17
  nkululeko/nkululeko.py,sha256=Rm6L9iQx7KWO7jJW1pdwMgYfRruapfSoYE9TE-MaD2s,1851
18
- nkululeko/plots.py,sha256=tW9VlhD1K6FRM15rtYYJiHO7YFhbJpOXVy8SHu7UrqI,21687
18
+ nkululeko/plots.py,sha256=K88ZRPFGX_r03BT742H06Dde20xZYdltv7dxjgUiAFA,23025
19
19
  nkululeko/predict.py,sha256=dRXX-sQVESa7cNi_56S6UkUOa_pV1g_K4xYtYVM1SJs,1876
20
20
  nkululeko/reporter.py,sha256=wrtWbU_UKDwhKQNMi7Q_Ix5N_UTzTagRwZikgUGk1c8,11606
21
21
  nkululeko/resample.py,sha256=C2S3aOTwlx5iYah_hs0JARHBC8Cq4Z5cH_mnDMb5RKk,2185
22
22
  nkululeko/result.py,sha256=HeiOrrqf9W2yxMryN33zgEmQejNWRxNmm589AYt6-eM,499
23
23
  nkululeko/runmanager.py,sha256=WoZ4cRyHV7kAhtt-55RAAj6D3iXqEvvEMm6Ll4YkV2E,7192
24
- nkululeko/scaler.py,sha256=gA4ziPMx_qIubtk97v-vrqzlP54tbXvxpna5g63GQCg,4031
24
+ nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
25
25
  nkululeko/segment.py,sha256=DfJYZsCEH41gwKyjpMgv8kWUzfVkmC0wWTbgHOL4i4g,4787
26
26
  nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
27
27
  nkululeko/test.py,sha256=cRtOn_d3Fh2kZmnT4nnQeGzZTRtpr5jRhowykOwunME,1421
@@ -99,9 +99,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
99
99
  nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
100
  nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
101
101
  nkululeko/utils/stats.py,sha256=29otJpUp1VqbtDKmlLkPPzBmVfTFiHZ70rUdR4860rM,2788
102
- nkululeko/utils/util.py,sha256=xYLHCWcKYF-DlSYYDGHd9EXnvZPB_bYdE6BXCq5Twgo,11985
103
- nkululeko-0.77.13.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
104
- nkululeko-0.77.13.dist-info/METADATA,sha256=CU3Dp3UQziQ5j4cYGbwG7lRhe_Acfj7ThrYlSY04lG0,31461
105
- nkululeko-0.77.13.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
106
- nkululeko-0.77.13.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
107
- nkululeko-0.77.13.dist-info/RECORD,,
102
+ nkululeko/utils/util.py,sha256=Ibwco798iDhmamO7SjVStfxOl-DNBdzkKl2HniYgoNE,12238
103
+ nkululeko-0.78.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
104
+ nkululeko-0.78.0.dist-info/METADATA,sha256=Vq5Rc0-yJHX1IDRVElWFgoTqCwVyqfS6QSXoVKMJeOI,31629
105
+ nkululeko-0.78.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
106
+ nkululeko-0.78.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
107
+ nkululeko-0.78.0.dist-info/RECORD,,