nkululeko 0.77.13__py3-none-any.whl → 0.78.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +3 -0
- nkululeko/plots.py +97 -58
- nkululeko/scaler.py +3 -1
- nkululeko/utils/util.py +7 -1
- {nkululeko-0.77.13.dist-info → nkululeko-0.78.0.dist-info}/METADATA +10 -1
- {nkululeko-0.77.13.dist-info → nkululeko-0.78.0.dist-info}/RECORD +10 -10
- {nkululeko-0.77.13.dist-info → nkululeko-0.78.0.dist-info}/LICENSE +0 -0
- {nkululeko-0.77.13.dist-info → nkululeko-0.78.0.dist-info}/WHEEL +0 -0
- {nkululeko-0.77.13.dist-info → nkululeko-0.78.0.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.
|
1
|
+
VERSION="0.78.0"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -577,6 +577,9 @@ class Experiment:
|
|
577
577
|
scale_feats,
|
578
578
|
)
|
579
579
|
self.feats_train, self.feats_test = self.scaler_feats.scale()
|
580
|
+
# store versions
|
581
|
+
self.util.save_to_store(self.feats_train, "feats_train_scaled")
|
582
|
+
self.util.save_to_store(self.feats_test, "feats_test_scaled")
|
580
583
|
|
581
584
|
def init_runmanager(self):
|
582
585
|
"""Initialize the manager object for the runs."""
|
nkululeko/plots.py
CHANGED
@@ -379,51 +379,115 @@ class Plots:
|
|
379
379
|
)
|
380
380
|
|
381
381
|
def scatter_plot(self, feats, label_df, label, dimred_type):
|
382
|
+
dim_num = int(self.util.config_val("EXPL", "scatter.dim", 2))
|
382
383
|
fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
|
383
384
|
sample_selection = self.util.config_val("EXPL", "sample_selection", "all")
|
384
|
-
filename = (
|
385
|
-
f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}"
|
386
|
-
)
|
385
|
+
filename = f"{label}_{self.util.get_feattype_name()}_{sample_selection}_{dimred_type}_{str(dim_num)}d"
|
387
386
|
filename = f"{fig_dir}{filename}.{self.format}"
|
388
387
|
self.util.debug(f"computing {dimred_type}, this might take a while...")
|
389
388
|
data = None
|
390
389
|
labels = label_df[label]
|
391
390
|
if dimred_type == "tsne":
|
392
|
-
data = self.getTsne(feats)
|
393
|
-
|
394
|
-
|
391
|
+
data = self.getTsne(feats, dim_num)
|
392
|
+
else:
|
393
|
+
if dimred_type == "umap":
|
394
|
+
import umap
|
395
|
+
|
396
|
+
y = umap.UMAP(
|
397
|
+
n_neighbors=10,
|
398
|
+
random_state=0,
|
399
|
+
n_components=dim_num,
|
400
|
+
).fit_transform(feats.values)
|
401
|
+
elif dimred_type == "pca":
|
402
|
+
from sklearn.decomposition import PCA
|
403
|
+
from sklearn.preprocessing import StandardScaler
|
395
404
|
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
405
|
+
scaler = StandardScaler()
|
406
|
+
pca = PCA(n_components=dim_num)
|
407
|
+
y = pca.fit_transform(scaler.fit_transform(feats.values))
|
408
|
+
else:
|
409
|
+
self.util.error(
|
410
|
+
f"no such dimensionality reduction function: {dimred_type}"
|
411
|
+
)
|
412
|
+
if dim_num == 2:
|
413
|
+
columns = ["Dim_1", "Dim_2"]
|
414
|
+
elif dim_num == 3:
|
415
|
+
columns = ["Dim_1", "Dim_2", "Dim_3"]
|
416
|
+
else:
|
417
|
+
self.util.error(f"wrong dimension number: {dim_num}")
|
400
418
|
data = pd.DataFrame(
|
401
|
-
|
419
|
+
y,
|
402
420
|
feats.index,
|
403
|
-
columns=
|
421
|
+
columns=columns,
|
404
422
|
)
|
405
|
-
elif dimred_type == "pca":
|
406
|
-
from sklearn.decomposition import PCA
|
407
|
-
from sklearn.preprocessing import StandardScaler
|
408
423
|
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
424
|
+
if dim_num == 2:
|
425
|
+
plot_data = np.vstack((data.T, labels)).T
|
426
|
+
plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
|
427
|
+
# plt.tight_layout()
|
428
|
+
ax = (
|
429
|
+
sns.FacetGrid(plot_df, hue="label", height=6)
|
430
|
+
.map(plt.scatter, "Dim_1", "Dim_2")
|
431
|
+
.add_legend()
|
432
|
+
)
|
433
|
+
elif dim_num == 3:
|
434
|
+
from mpl_toolkits.mplot3d import Axes3D
|
435
|
+
from sklearn.preprocessing import LabelEncoder
|
436
|
+
|
437
|
+
le = LabelEncoder()
|
438
|
+
|
439
|
+
labels_e = le.fit_transform(labels)
|
440
|
+
plot_data = np.vstack((data.T, labels_e)).T
|
441
|
+
plot_df = pd.DataFrame(
|
442
|
+
data=plot_data, columns=("Dim_1", "Dim_2", "Dim_3", "label")
|
416
443
|
)
|
444
|
+
# plt.tight_layout()
|
445
|
+
# axes instance
|
446
|
+
fig = plt.figure(figsize=(6, 6))
|
447
|
+
ax = Axes3D(fig, auto_add_to_figure=False)
|
448
|
+
fig.add_axes(ax)
|
449
|
+
# get colormap from seaborn
|
450
|
+
# cmap = ListedColormap(sns.color_palette("hsv", 256).as_hex())
|
451
|
+
color_dict = {
|
452
|
+
0: "red",
|
453
|
+
1: "blue",
|
454
|
+
2: "green",
|
455
|
+
3: "yellow",
|
456
|
+
4: "purple",
|
457
|
+
5: "#ff69b4",
|
458
|
+
6: "black",
|
459
|
+
7: "cyan",
|
460
|
+
8: "magenta",
|
461
|
+
9: "#faebd7",
|
462
|
+
10: "#2e8b57",
|
463
|
+
11: "#eeefff",
|
464
|
+
12: "#da70d6",
|
465
|
+
13: "#ff7f50",
|
466
|
+
14: "#cd853f",
|
467
|
+
15: "#bc8f8f",
|
468
|
+
16: "#5f9ea0",
|
469
|
+
17: "#daa520",
|
470
|
+
}
|
471
|
+
# plot
|
472
|
+
# make the numbers bigger so they can be used as distinguishable colors
|
473
|
+
labels_ex = [color_dict[xi] for xi in labels_e]
|
474
|
+
sc = ax.scatter(
|
475
|
+
plot_df.Dim_1,
|
476
|
+
plot_df.Dim_2,
|
477
|
+
plot_df.Dim_3,
|
478
|
+
s=40,
|
479
|
+
c=labels_ex,
|
480
|
+
marker="o",
|
481
|
+
# cmap=cmap,
|
482
|
+
alpha=1,
|
483
|
+
)
|
484
|
+
ax.set_xlabel("Dim_1")
|
485
|
+
ax.set_ylabel("Dim_2")
|
486
|
+
ax.set_zlabel("Dim_3")
|
487
|
+
# legend
|
488
|
+
plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
|
417
489
|
else:
|
418
|
-
self.util.error(f"
|
419
|
-
plot_data = np.vstack((data.T, labels)).T
|
420
|
-
plot_df = pd.DataFrame(data=plot_data, columns=("Dim_1", "Dim_2", "label"))
|
421
|
-
# plt.tight_layout()
|
422
|
-
ax = (
|
423
|
-
sns.FacetGrid(plot_df, hue="label", height=6)
|
424
|
-
.map(plt.scatter, "Dim_1", "Dim_2")
|
425
|
-
.add_legend()
|
426
|
-
)
|
490
|
+
self.util.error(f"wrong dimension number: {dim_num}")
|
427
491
|
fig = ax.figure
|
428
492
|
plt.savefig(filename)
|
429
493
|
fig.clear()
|
@@ -437,35 +501,10 @@ class Plots:
|
|
437
501
|
)
|
438
502
|
)
|
439
503
|
|
440
|
-
def
|
441
|
-
"""Make a TSNE plot to see whether features are useful for classification"""
|
442
|
-
fig_dir = self.util.get_path("fig_dir") + "../" # one up because of the runs
|
443
|
-
filename = f"{fig_dir}{filename}.{self.format}"
|
444
|
-
self.util.debug(f"plotting tsne to {filename}, this might take a while...")
|
445
|
-
model = TSNE(
|
446
|
-
n_components=2,
|
447
|
-
random_state=0,
|
448
|
-
perplexity=perplexity,
|
449
|
-
learning_rate=learning_rate,
|
450
|
-
)
|
451
|
-
tsne_data = model.fit_transform(feats)
|
452
|
-
tsne_data_labs = np.vstack((tsne_data.T, labels)).T
|
453
|
-
tsne_df = pd.DataFrame(data=tsne_data_labs, columns=("Dim_1", "Dim_2", "label"))
|
454
|
-
# plt.tight_layout()
|
455
|
-
ax = (
|
456
|
-
sns.FacetGrid(tsne_df, hue="label", height=6)
|
457
|
-
.map(plt.scatter, "Dim_1", "Dim_2")
|
458
|
-
.add_legend()
|
459
|
-
)
|
460
|
-
fig = ax.figure
|
461
|
-
plt.savefig(filename)
|
462
|
-
fig.clear()
|
463
|
-
plt.close(fig)
|
464
|
-
|
465
|
-
def getTsne(self, feats, perplexity=30, learning_rate=200):
|
504
|
+
def getTsne(self, feats, dim_num, perplexity=30, learning_rate=200):
|
466
505
|
"""Make a TSNE plot to see whether features are useful for classification"""
|
467
506
|
model = TSNE(
|
468
|
-
n_components=
|
507
|
+
n_components=dim_num,
|
469
508
|
random_state=0,
|
470
509
|
perplexity=perplexity,
|
471
510
|
learning_rate=learning_rate,
|
nkululeko/scaler.py
CHANGED
@@ -95,10 +95,12 @@ class Scaler:
|
|
95
95
|
b2 = np.quantile(self.feats_train[c], 0.66)
|
96
96
|
feats_bin_train[c] = self._bin(self.feats_train[c].values, b1, b2).values
|
97
97
|
feats_bin_test[c] = self._bin(self.feats_test[c].values, b1, b2).values
|
98
|
+
self.feats_train = feats_bin_train
|
99
|
+
self.feats_test = feats_bin_test
|
98
100
|
|
99
101
|
def _bin(self, series, b1, b2):
|
100
102
|
bins = [-1000000, b1, b2, 1000000]
|
101
|
-
labels = [
|
103
|
+
labels = [0, 0.5, 1]
|
102
104
|
result = np.digitize(series, bins) - 1
|
103
105
|
result = pd.Series(result)
|
104
106
|
for i, l in enumerate(labels):
|
nkululeko/utils/util.py
CHANGED
@@ -307,7 +307,13 @@ class Util:
|
|
307
307
|
elif format == "csv":
|
308
308
|
return audformat.utils.read_csv(name)
|
309
309
|
else:
|
310
|
-
self.error(f"
|
310
|
+
self.error(f"unknown store format: {format}")
|
311
|
+
|
312
|
+
def save_to_store(self, df, name):
|
313
|
+
store = self.get_path("store")
|
314
|
+
store_format = self.config_val("FEATS", "store_format", "pkl")
|
315
|
+
storage = f"{store}{name}.{store_format}"
|
316
|
+
self.write_store(df, storage, store_format)
|
311
317
|
|
312
318
|
def copy_flags(self, df_source, df_target):
|
313
319
|
if hasattr(df_source, "is_labeled"):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: nkululeko
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.78.0
|
4
4
|
Summary: Machine learning audio prediction experiments based on templates
|
5
5
|
Home-page: https://github.com/felixbur/nkululeko
|
6
6
|
Author: Felix Burkhardt
|
@@ -313,6 +313,15 @@ F. Burkhardt, Johannes Wagner, Hagen Wierstorf, Florian Eyben and Björn Schulle
|
|
313
313
|
Changelog
|
314
314
|
=========
|
315
315
|
|
316
|
+
Version 0.78.0
|
317
|
+
--------------
|
318
|
+
* added 3-d scatter plots
|
319
|
+
* removed epoch-plots if epoch_num=1
|
320
|
+
|
321
|
+
Version 0.77.14
|
322
|
+
--------------
|
323
|
+
* fixed bug preventing bin scaling to work
|
324
|
+
|
316
325
|
Version 0.77.13
|
317
326
|
--------------
|
318
327
|
* added bins scaler
|
@@ -2,10 +2,10 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=uUCqtAGhT6P1dyrPpwT409wEhxfzsxpCiq_v_zdmfOI,3103
|
3
3
|
nkululeko/augment.py,sha256=sHWG4Jmb4BjnaaXXnRYMP7Jkk0qlaZ2ohsArP8uW_d8,3003
|
4
4
|
nkululeko/cacheddataset.py,sha256=lIJ6hUo5LoxSrzXtWV8mzwO7wRtUETWnOQ4ws2XfL1E,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=svWlKgLGI6BGzQ_UC7BqFvjh7pXevsQxvV9VqPg-n2s,39
|
6
6
|
nkululeko/demo.py,sha256=FFR8qHMCY8kKKRWDTa8xA7A8mWeYalRKYNtV5rjGg44,1915
|
7
7
|
nkululeko/demo_predictor.py,sha256=j4ySWWcIxW7ZAIBH86m9BfRFokzrUNkRD6fpsvAQGTw,2629
|
8
|
-
nkululeko/experiment.py,sha256=
|
8
|
+
nkululeko/experiment.py,sha256=znrMTpGqNeBQWD0F7sUHzQyRjCxsRH9t3V_8NhAtzrg,28823
|
9
9
|
nkululeko/explore.py,sha256=pfciOL66e0T4Bk0RTkwDyE6pK_baSUdjMo2Ybpst3L4,2202
|
10
10
|
nkululeko/export.py,sha256=mHeEAAmtZuxdyebLlbSzPrHSi9OMgJHbk35d3DTxRBc,4632
|
11
11
|
nkululeko/feature_extractor.py,sha256=tKv1b1-o7xNMgBavTR8VY8_H5HKoJEnnosS-KcjmOEU,7281
|
@@ -15,13 +15,13 @@ nkululeko/glob_conf.py,sha256=6MZe83QCgHD_zB79Sl9m6UJQASwfqJlyb-1nqrQ_6Iw,381
|
|
15
15
|
nkululeko/modelrunner.py,sha256=TQW08f72-GjBEIFTE3_8B8qMCWvTJUqJ1fveygmYnXI,9278
|
16
16
|
nkululeko/multidb.py,sha256=5F62YhUKwi__a_fhcovyOCOcydTH48nM3JJ3oZ47Tjg,3852
|
17
17
|
nkululeko/nkululeko.py,sha256=Rm6L9iQx7KWO7jJW1pdwMgYfRruapfSoYE9TE-MaD2s,1851
|
18
|
-
nkululeko/plots.py,sha256=
|
18
|
+
nkululeko/plots.py,sha256=K88ZRPFGX_r03BT742H06Dde20xZYdltv7dxjgUiAFA,23025
|
19
19
|
nkululeko/predict.py,sha256=dRXX-sQVESa7cNi_56S6UkUOa_pV1g_K4xYtYVM1SJs,1876
|
20
20
|
nkululeko/reporter.py,sha256=wrtWbU_UKDwhKQNMi7Q_Ix5N_UTzTagRwZikgUGk1c8,11606
|
21
21
|
nkululeko/resample.py,sha256=C2S3aOTwlx5iYah_hs0JARHBC8Cq4Z5cH_mnDMb5RKk,2185
|
22
22
|
nkululeko/result.py,sha256=HeiOrrqf9W2yxMryN33zgEmQejNWRxNmm589AYt6-eM,499
|
23
23
|
nkululeko/runmanager.py,sha256=WoZ4cRyHV7kAhtt-55RAAj6D3iXqEvvEMm6Ll4YkV2E,7192
|
24
|
-
nkululeko/scaler.py,sha256=
|
24
|
+
nkululeko/scaler.py,sha256=4nkIqoajkIkuTPK0Z02ifMN_awl6fP_i-GBYdoGYgGM,4101
|
25
25
|
nkululeko/segment.py,sha256=DfJYZsCEH41gwKyjpMgv8kWUzfVkmC0wWTbgHOL4i4g,4787
|
26
26
|
nkululeko/syllable_nuclei.py,sha256=Sky-C__MeUDaxqHnDl2TGLLYOYvsahD35TUjWGeG31k,10047
|
27
27
|
nkululeko/test.py,sha256=cRtOn_d3Fh2kZmnT4nnQeGzZTRtpr5jRhowykOwunME,1421
|
@@ -99,9 +99,9 @@ nkululeko/segmenting/seg_silero.py,sha256=lLytS38KzARS17omwv8VBw-zz60RVSXGSvZ5Ev
|
|
99
99
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
100
100
|
nkululeko/utils/files.py,sha256=UiGAtZRWYjHSvlmPaTMtzyNNGE6qaLaxQkybctS7iRM,4021
|
101
101
|
nkululeko/utils/stats.py,sha256=29otJpUp1VqbtDKmlLkPPzBmVfTFiHZ70rUdR4860rM,2788
|
102
|
-
nkululeko/utils/util.py,sha256=
|
103
|
-
nkululeko-0.
|
104
|
-
nkululeko-0.
|
105
|
-
nkululeko-0.
|
106
|
-
nkululeko-0.
|
107
|
-
nkululeko-0.
|
102
|
+
nkululeko/utils/util.py,sha256=Ibwco798iDhmamO7SjVStfxOl-DNBdzkKl2HniYgoNE,12238
|
103
|
+
nkululeko-0.78.0.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
104
|
+
nkululeko-0.78.0.dist-info/METADATA,sha256=Vq5Rc0-yJHX1IDRVElWFgoTqCwVyqfS6QSXoVKMJeOI,31629
|
105
|
+
nkululeko-0.78.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
106
|
+
nkululeko-0.78.0.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
107
|
+
nkululeko-0.78.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|