nkululeko 0.93.12__py3-none-any.whl → 0.93.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/constants.py +1 -1
- nkululeko/data/dataset.py +29 -58
- nkululeko/data/dataset_csv.py +5 -4
- nkululeko/feat_extract/feats_analyser.py +4 -2
- nkululeko/feat_extract/feats_wav2vec2.py +1 -0
- nkululeko/plots.py +16 -4
- nkululeko/utils/stats.py +11 -9
- nkululeko/utils/util.py +15 -0
- nkululeko-0.93.14.dist-info/METADATA +39 -0
- {nkululeko-0.93.12.dist-info → nkululeko-0.93.14.dist-info}/RECORD +14 -14
- nkululeko-0.93.12.dist-info/METADATA +0 -1491
- {nkululeko-0.93.12.dist-info → nkululeko-0.93.14.dist-info}/LICENSE +0 -0
- {nkululeko-0.93.12.dist-info → nkululeko-0.93.14.dist-info}/WHEEL +0 -0
- {nkululeko-0.93.12.dist-info → nkululeko-0.93.14.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.93.12.dist-info → nkululeko-0.93.14.dist-info}/top_level.txt +0 -0
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.93.
|
1
|
+
VERSION="0.93.14"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/data/dataset.py
CHANGED
@@ -34,11 +34,9 @@ class Dataset:
|
|
34
34
|
self.plot = Plots()
|
35
35
|
self.limit = int(self.util.config_val_data(self.name, "limit", 0))
|
36
36
|
self.target_tables_append = eval(
|
37
|
-
self.util.config_val_data(
|
38
|
-
self.name, "target_tables_append", "False")
|
37
|
+
self.util.config_val_data(self.name, "target_tables_append", "False")
|
39
38
|
)
|
40
|
-
self.start_fresh = eval(
|
41
|
-
self.util.config_val("DATA", "no_reuse", "False"))
|
39
|
+
self.start_fresh = eval(self.util.config_val("DATA", "no_reuse", "False"))
|
42
40
|
self.is_labeled, self.got_speaker, self.got_gender, self.got_age = (
|
43
41
|
False,
|
44
42
|
False,
|
@@ -72,8 +70,7 @@ class Dataset:
|
|
72
70
|
try:
|
73
71
|
self.db = audformat.Database.load(root)
|
74
72
|
except FileNotFoundError:
|
75
|
-
self.util.error(
|
76
|
-
f"{self.name}: no audformat database found at {root}")
|
73
|
+
self.util.error(f"{self.name}: no audformat database found at {root}")
|
77
74
|
return root
|
78
75
|
|
79
76
|
def _check_cols(self, df):
|
@@ -95,8 +92,7 @@ class Dataset:
|
|
95
92
|
)
|
96
93
|
self.util.debug(r_string)
|
97
94
|
if glob_conf.report.initial:
|
98
|
-
glob_conf.report.add_item(ReportItem(
|
99
|
-
"Data", "Load report", r_string))
|
95
|
+
glob_conf.report.add_item(ReportItem("Data", "Load report", r_string))
|
100
96
|
glob_conf.report.initial = False
|
101
97
|
|
102
98
|
def load(self):
|
@@ -107,8 +103,7 @@ class Dataset:
|
|
107
103
|
store_file = f"{store}{self.name}.{store_format}"
|
108
104
|
self.root = self._load_db()
|
109
105
|
if not self.start_fresh and os.path.isfile(store_file):
|
110
|
-
self.util.debug(
|
111
|
-
f"{self.name}: reusing previously stored file {store_file}")
|
106
|
+
self.util.debug(f"{self.name}: reusing previously stored file {store_file}")
|
112
107
|
self.df = self.util.get_store(store_file, store_format)
|
113
108
|
self.is_labeled = self.target in self.df
|
114
109
|
self.got_gender = "gender" in self.df
|
@@ -123,12 +118,10 @@ class Dataset:
|
|
123
118
|
# map the audio file paths
|
124
119
|
self.db.map_files(lambda x: os.path.join(self.root, x))
|
125
120
|
# the dataframes (potentially more than one) with at least the file names
|
126
|
-
df_files = self.util.config_val_data(
|
127
|
-
self.name, "files_tables", "['files']")
|
121
|
+
df_files = self.util.config_val_data(self.name, "files_tables", "['files']")
|
128
122
|
df_files_tables = ast.literal_eval(df_files)
|
129
123
|
# The label for the target column
|
130
|
-
self.col_label = self.util.config_val_data(
|
131
|
-
self.name, "label", self.target)
|
124
|
+
self.col_label = self.util.config_val_data(self.name, "label", self.target)
|
132
125
|
(
|
133
126
|
df,
|
134
127
|
self.is_labeled,
|
@@ -164,8 +157,7 @@ class Dataset:
|
|
164
157
|
self.got_age = got_age2 or self.got_age
|
165
158
|
if audformat.is_filewise_index(df_target.index):
|
166
159
|
try:
|
167
|
-
df_target = df_target.loc[df.index.get_level_values(
|
168
|
-
"file")]
|
160
|
+
df_target = df_target.loc[df.index.get_level_values("file")]
|
169
161
|
df_target = df_target.set_index(df.index)
|
170
162
|
except KeyError:
|
171
163
|
# just a try...
|
@@ -214,8 +206,7 @@ class Dataset:
|
|
214
206
|
end = self.df.index.get_level_values(2)
|
215
207
|
self.df["duration"] = (end - start).total_seconds()
|
216
208
|
elif self.df.duration.dtype == "timedelta64[ns]":
|
217
|
-
self.df["duration"] = self.df["duration"].map(
|
218
|
-
lambda x: x.total_seconds())
|
209
|
+
self.df["duration"] = self.df["duration"].map(lambda x: x.total_seconds())
|
219
210
|
# Perform some filtering if desired
|
220
211
|
required = self.util.config_val_data(self.name, "required", False)
|
221
212
|
if required:
|
@@ -245,18 +236,15 @@ class Dataset:
|
|
245
236
|
res.append(abs(n - max))
|
246
237
|
return res
|
247
238
|
|
248
|
-
reverse = eval(self.util.config_val_data(
|
249
|
-
self.name, "reverse", "False"))
|
239
|
+
reverse = eval(self.util.config_val_data(self.name, "reverse", "False"))
|
250
240
|
if reverse:
|
251
|
-
max = eval(self.util.config_val_data(
|
252
|
-
self.name, "reverse.max", "False"))
|
241
|
+
max = eval(self.util.config_val_data(self.name, "reverse.max", "False"))
|
253
242
|
if max:
|
254
243
|
max = float(max)
|
255
244
|
else:
|
256
245
|
max = self.df[self.target].values.max()
|
257
246
|
self.util.debug(f"reversing target numbers with max values: {max}")
|
258
|
-
self.df[self.target] = reverse_array(
|
259
|
-
self.df[self.target].values, max)
|
247
|
+
self.df[self.target] = reverse_array(self.df[self.target].values, max)
|
260
248
|
|
261
249
|
# check if the target variable should be scaled (z-transformed)
|
262
250
|
scale = self.util.config_val_data(self.name, "scale", False)
|
@@ -329,15 +317,13 @@ class Dataset:
|
|
329
317
|
pass
|
330
318
|
try:
|
331
319
|
# also it might be possible that the age is part of the speaker description
|
332
|
-
df_local["age"] = db[table]["speaker"].get(
|
333
|
-
map="age").astype(int)
|
320
|
+
df_local["age"] = db[table]["speaker"].get(map="age").astype(int)
|
334
321
|
got_age = True
|
335
322
|
except (ValueError, audformat.errors.BadKeyError):
|
336
323
|
pass
|
337
324
|
try:
|
338
325
|
# same for the target, e.g. "age"
|
339
|
-
df_local[self.target] = db[table]["speaker"].get(
|
340
|
-
map=self.target)
|
326
|
+
df_local[self.target] = db[table]["speaker"].get(map=self.target)
|
341
327
|
is_labeled = True
|
342
328
|
except (ValueError, audformat.core.errors.BadKeyError):
|
343
329
|
pass
|
@@ -398,10 +384,8 @@ class Dataset:
|
|
398
384
|
testdf = self.db.tables[self.target + ".test"].df
|
399
385
|
traindf = self.db.tables[self.target + ".train"].df
|
400
386
|
# use only the train and test samples that were not perhaps filtered out by an earlier processing step
|
401
|
-
self.df_test = self.df.loc[self.df.index.intersection(
|
402
|
-
|
403
|
-
self.df_train = self.df.loc[self.df.index.intersection(
|
404
|
-
traindf.index)]
|
387
|
+
self.df_test = self.df.loc[self.df.index.intersection(testdf.index)]
|
388
|
+
self.df_train = self.df.loc[self.df.index.intersection(traindf.index)]
|
405
389
|
elif split_strategy == "train":
|
406
390
|
self.df_train = self.df
|
407
391
|
self.df_test = pd.DataFrame()
|
@@ -424,23 +408,18 @@ class Dataset:
|
|
424
408
|
if entry_train_tables:
|
425
409
|
train_tables = ast.literal_eval(entry_train_tables)
|
426
410
|
for train_table in train_tables:
|
427
|
-
traindf = pd.concat(
|
428
|
-
[traindf, self.db.tables[train_table].df])
|
411
|
+
traindf = pd.concat([traindf, self.db.tables[train_table].df])
|
429
412
|
# use only the train and test samples that were not perhaps filtered out by an earlier processing step
|
430
413
|
# testdf.index.map(lambda x: os.path.join(self.root, x))
|
431
414
|
# testdf.index = testdf.index.to_series().apply(lambda x: self.root+x)
|
432
415
|
testdf = testdf.set_index(
|
433
|
-
audformat.utils.to_segmented_index(
|
434
|
-
testdf.index, allow_nat=False)
|
416
|
+
audformat.utils.to_segmented_index(testdf.index, allow_nat=False)
|
435
417
|
)
|
436
418
|
traindf = traindf.set_index(
|
437
|
-
audformat.utils.to_segmented_index(
|
438
|
-
traindf.index, allow_nat=False)
|
419
|
+
audformat.utils.to_segmented_index(traindf.index, allow_nat=False)
|
439
420
|
)
|
440
|
-
self.df_test = self.df.loc[self.df.index.intersection(
|
441
|
-
|
442
|
-
self.df_train = self.df.loc[self.df.index.intersection(
|
443
|
-
traindf.index)]
|
421
|
+
self.df_test = self.df.loc[self.df.index.intersection(testdf.index)]
|
422
|
+
self.df_train = self.df.loc[self.df.index.intersection(traindf.index)]
|
444
423
|
# it might be necessary to copy the target values
|
445
424
|
try:
|
446
425
|
self.df_test[self.target] = testdf[self.target]
|
@@ -467,12 +446,10 @@ class Dataset:
|
|
467
446
|
self.util.error(f"unknown split strategy: {split_strategy}")
|
468
447
|
|
469
448
|
# check if train or test set should be ignored
|
470
|
-
as_test = eval(self.util.config_val_data(
|
471
|
-
self.name, "as_test", "False"))
|
449
|
+
as_test = eval(self.util.config_val_data(self.name, "as_test", "False"))
|
472
450
|
if as_test:
|
473
451
|
self.df_train = pd.DataFrame()
|
474
|
-
as_train = eval(self.util.config_val_data(
|
475
|
-
self.name, "as_train", "False"))
|
452
|
+
as_train = eval(self.util.config_val_data(self.name, "as_train", "False"))
|
476
453
|
if as_train:
|
477
454
|
self.df_test = pd.DataFrame()
|
478
455
|
|
@@ -503,8 +480,7 @@ class Dataset:
|
|
503
480
|
|
504
481
|
seed = 42
|
505
482
|
k = 30
|
506
|
-
test_size = int(self.util.config_val_data(
|
507
|
-
self.name, "test_size", 20)) / 100.0
|
483
|
+
test_size = int(self.util.config_val_data(self.name, "test_size", 20)) / 100.0
|
508
484
|
df = self.df
|
509
485
|
# split target
|
510
486
|
targets = df[self.target].to_numpy()
|
@@ -520,8 +496,7 @@ class Dataset:
|
|
520
496
|
stratif_vars = self.util.config_val("DATA", "balance", False)
|
521
497
|
stratif_vars_array = {}
|
522
498
|
if not stratif_vars:
|
523
|
-
self.util.error(
|
524
|
-
"balanced split needs stratif_vars to stratify the splits")
|
499
|
+
self.util.error("balanced split needs stratif_vars to stratify the splits")
|
525
500
|
else:
|
526
501
|
stratif_vars = ast.literal_eval(stratif_vars)
|
527
502
|
for stratif_var in stratif_vars.keys():
|
@@ -530,8 +505,7 @@ class Dataset:
|
|
530
505
|
continue
|
531
506
|
else:
|
532
507
|
data = df[stratif_var].to_numpy()
|
533
|
-
bins = self.util.config_val(
|
534
|
-
"DATA", f"{stratif_var}_bins", False)
|
508
|
+
bins = self.util.config_val("DATA", f"{stratif_var}_bins", False)
|
535
509
|
if bins:
|
536
510
|
data = binning(data, nbins=int(bins))
|
537
511
|
stratif_vars_array[stratif_var] = data
|
@@ -582,8 +556,7 @@ class Dataset:
|
|
582
556
|
|
583
557
|
def split_speakers(self):
|
584
558
|
"""One way to split train and eval sets: Specify percentage of evaluation speakers"""
|
585
|
-
test_percent = int(self.util.config_val_data(
|
586
|
-
self.name, "test_size", 20))
|
559
|
+
test_percent = int(self.util.config_val_data(self.name, "test_size", 20))
|
587
560
|
df = self.df
|
588
561
|
s_num = df.speaker.nunique()
|
589
562
|
test_num = int(s_num * (test_percent / 100))
|
@@ -602,8 +575,7 @@ class Dataset:
|
|
602
575
|
|
603
576
|
def random_split(self):
|
604
577
|
"""One way to split train and eval sets: Specify percentage of random samples"""
|
605
|
-
test_percent = int(self.util.config_val_data(
|
606
|
-
self.name, "test_size", 20))
|
578
|
+
test_percent = int(self.util.config_val_data(self.name, "test_size", 20))
|
607
579
|
df = self.df
|
608
580
|
s_num = len(df)
|
609
581
|
test_num = int(s_num * (test_percent / 100))
|
@@ -707,8 +679,7 @@ class Dataset:
|
|
707
679
|
if df.empty:
|
708
680
|
return
|
709
681
|
if self.check_continuous_classification():
|
710
|
-
self.util.debug(
|
711
|
-
f"{self.name}: binning continuous variable to categories")
|
682
|
+
self.util.debug(f"{self.name}: binning continuous variable to categories")
|
712
683
|
cat_vals = self.util.continuous_to_categorical(df[self.target])
|
713
684
|
df[self.target] = cat_vals.values
|
714
685
|
labels = ast.literal_eval(glob_conf.config["DATA"]["labels"])
|
nkululeko/data/dataset_csv.py
CHANGED
@@ -3,19 +3,20 @@ import ast
|
|
3
3
|
import os
|
4
4
|
import os.path
|
5
5
|
|
6
|
-
import audformat.utils
|
7
6
|
import pandas as pd
|
8
7
|
|
9
|
-
import
|
8
|
+
import audformat.utils
|
9
|
+
|
10
10
|
from nkululeko.data.dataset import Dataset
|
11
|
+
import nkululeko.glob_conf as glob_conf
|
11
12
|
from nkululeko.reporting.report_item import ReportItem
|
12
13
|
|
13
14
|
|
14
15
|
class Dataset_CSV(Dataset):
|
15
|
-
"""Class to represent datasets stored as a csv file"""
|
16
|
+
"""Class to represent datasets stored as a csv file."""
|
16
17
|
|
17
18
|
def load(self):
|
18
|
-
"""Load the dataframe with files, speakers and task labels"""
|
19
|
+
"""Load the dataframe with files, speakers and task labels."""
|
19
20
|
self.util.debug(f"loading {self.name}")
|
20
21
|
self.got_target, self.got_speaker, self.got_gender = False, False, False
|
21
22
|
data_file = self.util.config_val_data(self.name, "", "")
|
@@ -4,8 +4,10 @@ import ast
|
|
4
4
|
import matplotlib.pyplot as plt
|
5
5
|
import pandas as pd
|
6
6
|
from sklearn.inspection import permutation_importance
|
7
|
-
from sklearn.linear_model import LinearRegression
|
8
|
-
from sklearn.
|
7
|
+
from sklearn.linear_model import LinearRegression
|
8
|
+
from sklearn.linear_model import LogisticRegression
|
9
|
+
from sklearn.tree import DecisionTreeClassifier
|
10
|
+
from sklearn.tree import DecisionTreeRegressor
|
9
11
|
|
10
12
|
import nkululeko.glob_conf as glob_conf
|
11
13
|
from nkululeko.plots import Plots
|
@@ -49,6 +49,7 @@ class Wav2vec2(Featureset):
|
|
49
49
|
hidden_layer = int(self.util.config_val("FEATS", "wav2vec2.layer", "0"))
|
50
50
|
config.num_hidden_layers = layer_num - hidden_layer
|
51
51
|
self.util.debug(f"using hidden layer #{config.num_hidden_layers}")
|
52
|
+
|
52
53
|
self.processor = Wav2Vec2FeatureExtractor.from_pretrained(model_path)
|
53
54
|
self.model = Wav2Vec2Model.from_pretrained(model_path, config=config).to(
|
54
55
|
self.device
|
nkululeko/plots.py
CHANGED
@@ -24,8 +24,10 @@ class Plots:
|
|
24
24
|
self.format = self.util.config_val("PLOT", "format", "png")
|
25
25
|
self.target = self.util.config_val("DATA", "target", "emotion")
|
26
26
|
self.with_ccc = eval(self.util.config_val("PLOT", "ccc", "False"))
|
27
|
+
self.type_s = "samples"
|
27
28
|
|
28
29
|
def plot_distributions_speaker(self, df):
|
30
|
+
self.type_s = "speaker"
|
29
31
|
df_speakers = pd.DataFrame()
|
30
32
|
pd.options.mode.chained_assignment = None # default='warn'
|
31
33
|
for s in df.speaker.unique():
|
@@ -297,12 +299,22 @@ class Plots:
|
|
297
299
|
if cat_col == "class_label":
|
298
300
|
plot_df = plot_df.rename(columns={cat_col: self.target})
|
299
301
|
cat_col = self.target
|
302
|
+
elif cont_col == "class_label":
|
303
|
+
plot_df = plot_df.rename(columns={cont_col: self.target})
|
304
|
+
cont_col = self.target
|
300
305
|
dist_type = self.util.config_val("EXPL", "dist_type", "kde")
|
301
|
-
|
306
|
+
max_cat, cat_str, effect_results = su.get_effect_size(
|
307
|
+
plot_df, cat_col, cont_col
|
308
|
+
)
|
309
|
+
self.util.debug(effect_results)
|
310
|
+
self.util.print_results_to_store(
|
311
|
+
f"cohens-d_{self.type_s}", str(effect_results) + "\n"
|
312
|
+
)
|
313
|
+
es = effect_results[max_cat]
|
302
314
|
model_type = self.util.get_model_type()
|
303
315
|
if dist_type == "hist" and model_type != "tree":
|
304
316
|
ax = sns.histplot(plot_df, x=cont_col, hue=cat_col, kde=True)
|
305
|
-
caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({
|
317
|
+
caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({max_cat}):" f" {es}"
|
306
318
|
ax.set_title(caption)
|
307
319
|
ax.set_xlabel(f"{cont_col}")
|
308
320
|
ax.set_ylabel(f"number of {ylab}")
|
@@ -316,7 +328,7 @@ class Plots:
|
|
316
328
|
warn_singular=False,
|
317
329
|
)
|
318
330
|
ax.set(xlabel=f"{cont_col}")
|
319
|
-
caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({
|
331
|
+
caption = f"{ylab} {plot_df.shape[0]}. {cat_str} ({max_cat}):" f" {es}"
|
320
332
|
ax.figure.suptitle(caption)
|
321
333
|
return ax, caption
|
322
334
|
|
@@ -327,7 +339,7 @@ class Plots:
|
|
327
339
|
if col2 == "class_label":
|
328
340
|
plot_df = plot_df.rename(columns={col2: self.target})
|
329
341
|
col2 = self.target
|
330
|
-
|
342
|
+
elif col1 == "class_label":
|
331
343
|
plot_df = plot_df.rename(columns={col1: self.target})
|
332
344
|
col1 = self.target
|
333
345
|
crosstab = pd.crosstab(index=plot_df[col1], columns=plot_df[col2])
|
nkululeko/utils/stats.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
import math
|
2
1
|
from itertools import combinations
|
2
|
+
import math
|
3
3
|
|
4
4
|
import numpy as np
|
5
|
+
import pandas as pd
|
5
6
|
|
6
7
|
|
7
8
|
def check_na(a):
|
@@ -14,9 +15,8 @@ def check_na(a):
|
|
14
15
|
return a
|
15
16
|
|
16
17
|
|
17
|
-
def cohen_d(d1, d2):
|
18
|
-
"""
|
19
|
-
Compute Cohen's d from two distributions of real valued arrays.
|
18
|
+
def cohen_d(d1: np.array, d2: np.array) -> float:
|
19
|
+
"""Compute Cohen's d from two distributions of real valued arrays.
|
20
20
|
|
21
21
|
Args:
|
22
22
|
d1: one array
|
@@ -50,7 +50,9 @@ def all_combinations(items_list):
|
|
50
50
|
return result
|
51
51
|
|
52
52
|
|
53
|
-
def get_effect_size(
|
53
|
+
def get_effect_size(
|
54
|
+
df: pd.DataFrame, target: str, variable: str
|
55
|
+
) -> tuple[str, str, dict]:
|
54
56
|
"""Get the effect size as Cohen's D.
|
55
57
|
|
56
58
|
Effect size is computed from a real numbered variable on a categorical target.
|
@@ -68,10 +70,10 @@ def get_effect_size(df, target, variable):
|
|
68
70
|
for c in categories:
|
69
71
|
cats[c] = df[df[target] == c][variable].values
|
70
72
|
combos = all_combinations(categories)
|
71
|
-
results = {}
|
73
|
+
results = {categories[0]: 0}
|
72
74
|
if len(categories) == 1:
|
73
75
|
cat_s = cohens_D_to_string(0)
|
74
|
-
return categories[0], cat_s,
|
76
|
+
return categories[0], cat_s, results
|
75
77
|
else:
|
76
78
|
for combo in combos:
|
77
79
|
one = combo[0]
|
@@ -79,10 +81,10 @@ def get_effect_size(df, target, variable):
|
|
79
81
|
results[f"{one}-{other}"] = cohen_d(cats[one], cats[other])
|
80
82
|
max_cat = max(results, key=results.get)
|
81
83
|
cat_s = cohens_D_to_string(float(results[max_cat]))
|
82
|
-
return max_cat, cat_s, results
|
84
|
+
return max_cat, cat_s, results
|
83
85
|
|
84
86
|
|
85
|
-
def cohens_D_to_string(val):
|
87
|
+
def cohens_D_to_string(val: float) -> str:
|
86
88
|
if val < 0.2:
|
87
89
|
rval = "no effect"
|
88
90
|
elif val < 0.2:
|
nkululeko/utils/util.py
CHANGED
@@ -160,6 +160,21 @@ class Util:
|
|
160
160
|
pred_name = self.get_model_description()
|
161
161
|
return f"{results_dir}/pred_{target}_{pred_name}.csv"
|
162
162
|
|
163
|
+
def print_results_to_store(self, name: str, contents: str) -> str:
|
164
|
+
"""Write contents to a result file.
|
165
|
+
|
166
|
+
Args:
|
167
|
+
name (str): the (sub) name of the file_
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
str: The path to the file
|
171
|
+
"""
|
172
|
+
results_dir = self.get_path("res_dir")
|
173
|
+
pred_name = self.get_model_description()
|
174
|
+
path = os.path.join(results_dir, f"{name}_{pred_name}.txt")
|
175
|
+
with open(path, "a") as f:
|
176
|
+
f.write(contents)
|
177
|
+
|
163
178
|
def is_categorical(self, pd_series):
|
164
179
|
"""Check if a dataframe column is categorical."""
|
165
180
|
return pd_series.dtype.name == "object" or isinstance(
|
@@ -0,0 +1,39 @@
|
|
1
|
+
Metadata-Version: 2.2
|
2
|
+
Name: nkululeko
|
3
|
+
Version: 0.93.14
|
4
|
+
Summary: Machine learning audio prediction experiments based on templates
|
5
|
+
Home-page: https://github.com/felixbur/nkululeko
|
6
|
+
Author: Felix Burkhardt
|
7
|
+
Author-email: fxburk@gmail.com
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
10
|
+
Classifier: Operating System :: OS Independent
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
12
|
+
Classifier: Topic :: Scientific/Engineering
|
13
|
+
Requires-Python: >=3.9
|
14
|
+
License-File: LICENSE
|
15
|
+
Requires-Dist: audeer
|
16
|
+
Requires-Dist: audformat
|
17
|
+
Requires-Dist: audinterface
|
18
|
+
Requires-Dist: audiofile
|
19
|
+
Requires-Dist: audiomentations
|
20
|
+
Requires-Dist: audmetric
|
21
|
+
Requires-Dist: audonnx
|
22
|
+
Requires-Dist: confidence_intervals
|
23
|
+
Requires-Dist: datasets
|
24
|
+
Requires-Dist: imageio
|
25
|
+
Requires-Dist: matplotlib
|
26
|
+
Requires-Dist: numpy
|
27
|
+
Requires-Dist: opensmile
|
28
|
+
Requires-Dist: pandas
|
29
|
+
Requires-Dist: praat-parselmouth
|
30
|
+
Requires-Dist: scikit_learn
|
31
|
+
Requires-Dist: scipy
|
32
|
+
Requires-Dist: seaborn
|
33
|
+
Requires-Dist: sounddevice
|
34
|
+
Requires-Dist: torch
|
35
|
+
Requires-Dist: torchvision
|
36
|
+
Requires-Dist: transformers
|
37
|
+
Requires-Dist: umap-learn
|
38
|
+
Requires-Dist: xgboost
|
39
|
+
Requires-Dist: pylatex
|
@@ -2,7 +2,7 @@ nkululeko/__init__.py,sha256=62f8HiEzJ8rG2QlTFJXUCMpvuH3fKI33DoJSj33mscc,63
|
|
2
2
|
nkululeko/aug_train.py,sha256=FoMbBrfyOZd4QAw7oIHl3X6-UpsqAKWVDIolCA7qOWs,3196
|
3
3
|
nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
|
4
4
|
nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
|
5
|
-
nkululeko/constants.py,sha256=
|
5
|
+
nkululeko/constants.py,sha256=o5ER1luWQ6hCEUmTnLGYzK-uGjv9VCnzzDYq2KIxo0o,40
|
6
6
|
nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
|
7
7
|
nkululeko/demo.py,sha256=4Yzhg6pCPBYPGJrP7JX2TysVosl_R1llpVDKc2P_gUA,4955
|
8
8
|
nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
|
@@ -20,7 +20,7 @@ nkululeko/modelrunner.py,sha256=lJy-xM4QfDDWeL0dLTE_VIb4sYrnd_Z_yJRK3wwohQA,1119
|
|
20
20
|
nkululeko/multidb.py,sha256=sO6OwJn8sn1-C-ig3thsIL8QMWHdV9SnJhDodKjeKrI,6876
|
21
21
|
nkululeko/nkuluflag.py,sha256=PGWSmZz-PiiHLgcZJAoGOI_Y-sZDVI1ksB8p5r7riWM,3725
|
22
22
|
nkululeko/nkululeko.py,sha256=M7baIq2nAoi6dEoBL4ATEuqAs5U1fvl_hyqAl5DybAQ,2040
|
23
|
-
nkululeko/plots.py,sha256=
|
23
|
+
nkululeko/plots.py,sha256=jutO1nC7EMXGEPXCivVGhgrk3I0WrYrvIWyClm7ASaE,26440
|
24
24
|
nkululeko/predict.py,sha256=MLnHEyFmSiHLLs-HDczag8Vu3zKF5T1rXLKdZZJ6py8,2083
|
25
25
|
nkululeko/resample.py,sha256=rn3-M1A-iwVGibfQNGyeYNa7briD24lIN9Szq_1uTJo,5194
|
26
26
|
nkululeko/runmanager.py,sha256=AswmORVUkCIH0gTx6zEyufvFATQBS8C5TXo2erSNdVg,7611
|
@@ -49,12 +49,12 @@ nkululeko/autopredict/ap_stoi.py,sha256=UEQg1ZV0meAsxgdWB8ieRs9GPXHqArmsaOyCGRwp
|
|
49
49
|
nkululeko/autopredict/ap_valence.py,sha256=WrW4Ltqi_odW49_4QEVKkfnrcztLIVZ4cXIEHu4dBN8,1026
|
50
50
|
nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
|
51
51
|
nkululeko/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
52
|
-
nkululeko/data/dataset.py,sha256=
|
53
|
-
nkululeko/data/dataset_csv.py,sha256=
|
52
|
+
nkululeko/data/dataset.py,sha256=G6RFK2msSVHxpzDm8gZSAD4GK6ieMS5fTbqVS-NOFuY,30081
|
53
|
+
nkululeko/data/dataset_csv.py,sha256=AIbtB6pGk5BSQGIgfokZ7tEGFjmuOq5w2XumRSimVWs,4833
|
54
54
|
nkululeko/feat_extract/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55
55
|
nkululeko/feat_extract/feats_agender.py,sha256=onfAQ6-xx_mFMJXEF1IX8cHBmGtGeX6weJmxbkfh1_o,3184
|
56
56
|
nkululeko/feat_extract/feats_agender_agender.py,sha256=_YQv1qw--3uQfnyTQDCwlmPRnrhdMhgXbYK2yQtseW0,3464
|
57
|
-
nkululeko/feat_extract/feats_analyser.py,sha256=
|
57
|
+
nkululeko/feat_extract/feats_analyser.py,sha256=txuIEgO4uprle35RzBczvZm5Hc7iUl2p9oBEfdrvg_I,13506
|
58
58
|
nkululeko/feat_extract/feats_ast.py,sha256=w62xEoLiFtU-rj6SXkqXAktmoFaXcAcAWpUyEjp8JWo,4652
|
59
59
|
nkululeko/feat_extract/feats_auddim.py,sha256=CGLp_aYhudfwoU5522vjrvjPxfZcyw593A8xLjYefV8,3134
|
60
60
|
nkululeko/feat_extract/feats_audmodel.py,sha256=OsZyB1rdcG0Fai2gAwBlbuubmWor1_-P4IDkZLqgPKE,3161
|
@@ -71,7 +71,7 @@ nkululeko/feat_extract/feats_spectra.py,sha256=6WhFUpB0WTutg7OFMlAw9lSwVU5OBYCDc
|
|
71
71
|
nkululeko/feat_extract/feats_spkrec.py,sha256=o_6bdU4lIkj64S5Kdjf1iyuo1VASeYxE4XdxV94a8gE,4732
|
72
72
|
nkululeko/feat_extract/feats_squim.py,sha256=yJifsp9kj9iJjW_UAKr3LlvVhX5rv7el4bepn0wN2a8,4578
|
73
73
|
nkululeko/feat_extract/feats_trill.py,sha256=JgyUQ8ihIL5PlUpxjchlbC9547GI0SyUwkFEquya85Q,3197
|
74
|
-
nkululeko/feat_extract/feats_wav2vec2.py,sha256=
|
74
|
+
nkululeko/feat_extract/feats_wav2vec2.py,sha256=q1QzMD3KbhF2SOmxdwI7CiViRmhlFRyghxN_6SmUc0E,5297
|
75
75
|
nkululeko/feat_extract/feats_wavlm.py,sha256=O9cfc39VF5aPJRRATKb37pHT4W11i2cu5O1mY9LOjIA,4755
|
76
76
|
nkululeko/feat_extract/feats_whisper.py,sha256=n3ESZtva7wshs8E8diBlQYa9xCH_P0UY1DncSrxz-FY,4508
|
77
77
|
nkululeko/feat_extract/featureset.py,sha256=clcBv9rzBRW-bfw7JC_FYTjU5uUS-c0UE1XtQLYYRiE,1615
|
@@ -110,11 +110,11 @@ nkululeko/segmenting/seg_pyannote.py,sha256=6IPbgjnGOz9juzEKDTZN3PSipX4t6Mz-DILA
|
|
110
110
|
nkululeko/segmenting/seg_silero.py,sha256=ulodnvtRq5MLHDxy_RmAK4tJg6h1d-mPq-uCPFkGVKg,4258
|
111
111
|
nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
112
112
|
nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
|
113
|
-
nkululeko/utils/stats.py,sha256=
|
114
|
-
nkululeko/utils/util.py,sha256=
|
115
|
-
nkululeko-0.93.
|
116
|
-
nkululeko-0.93.
|
117
|
-
nkululeko-0.93.
|
118
|
-
nkululeko-0.93.
|
119
|
-
nkululeko-0.93.
|
120
|
-
nkululeko-0.93.
|
113
|
+
nkululeko/utils/stats.py,sha256=3Fyx8q8BSKYmiufT6OkRug9RATWmGrr9BaX_y8jziWo,3074
|
114
|
+
nkululeko/utils/util.py,sha256=J_dmqkOVAW63Q7IFUBj0BgygKzMXA0nORxY62-o8z_g,17360
|
115
|
+
nkululeko-0.93.14.dist-info/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
116
|
+
nkululeko-0.93.14.dist-info/METADATA,sha256=2cqjRLPed00dMPGG8SDMHG9k0w1gx0bItfrYsGk4rR4,1148
|
117
|
+
nkululeko-0.93.14.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
118
|
+
nkululeko-0.93.14.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
|
119
|
+
nkululeko-0.93.14.dist-info/top_level.txt,sha256=DPFNNSHPjUeVKj44dVANAjuVGRCC3MusJ08lc2a8xFA,10
|
120
|
+
nkululeko-0.93.14.dist-info/RECORD,,
|