nkululeko 0.95.7__py3-none-any.whl → 0.95.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nkululeko/autopredict/ap_translate.py +39 -0
- nkululeko/autopredict/google_translator.py +63 -0
- nkululeko/constants.py +1 -1
- nkululeko/experiment.py +5 -0
- nkululeko/plots.py +8 -2
- nkululeko/utils/util.py +5 -2
- {nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/METADATA +1 -1
- {nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/RECORD +12 -10
- {nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/WHEEL +0 -0
- {nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/entry_points.txt +0 -0
- {nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/licenses/LICENSE +0 -0
- {nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
"""A translator for text.
|
2
|
+
|
3
|
+
Currently based on google translate.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from nkululeko.utils.util import Util
|
7
|
+
|
8
|
+
|
9
|
+
class TextTranslator:
|
10
|
+
"""Translator.
|
11
|
+
|
12
|
+
translate text with the google translate model
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __init__(self, df, util=None):
|
16
|
+
self.df = df
|
17
|
+
if util is not None:
|
18
|
+
self.util = util
|
19
|
+
else:
|
20
|
+
# create a new util instance
|
21
|
+
# this is needed to access the config and other utilities
|
22
|
+
# in the autopredict module
|
23
|
+
self.util = Util("translator")
|
24
|
+
|
25
|
+
self.language = self.util.config_val("PREDICT", "target_language", "en")
|
26
|
+
from nkululeko.autopredict.google_translator import GoogleTranslator
|
27
|
+
self.translator = GoogleTranslator(
|
28
|
+
language=self.language,
|
29
|
+
util=self.util,
|
30
|
+
)
|
31
|
+
|
32
|
+
def predict(self, split_selection):
|
33
|
+
self.util.debug(f"translating text for {split_selection} samples")
|
34
|
+
df = self.translator.translate_index(
|
35
|
+
self.df
|
36
|
+
)
|
37
|
+
return_df = self.df.copy()
|
38
|
+
return_df[self.language] = df[self.language].values
|
39
|
+
return return_df
|
@@ -0,0 +1,63 @@
|
|
1
|
+
import os
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
import torch
|
5
|
+
from tqdm import tqdm
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
from googletrans import Translator
|
9
|
+
|
10
|
+
import audeer
|
11
|
+
import audiofile
|
12
|
+
|
13
|
+
from nkululeko.utils.util import Util
|
14
|
+
|
15
|
+
import httpx
|
16
|
+
|
17
|
+
class GoogleTranslator:
|
18
|
+
def __init__(self, language="en", util=None):
|
19
|
+
self.language = language
|
20
|
+
self.util = util
|
21
|
+
|
22
|
+
async def translate_text(self, text):
|
23
|
+
async with Translator() as translator:
|
24
|
+
result = translator.translate(text, dest="en")
|
25
|
+
return (await result).text
|
26
|
+
|
27
|
+
def translate_index(self, df:pd.DataFrame) -> pd.DataFrame:
|
28
|
+
"""Transcribe the audio files in the given index.
|
29
|
+
|
30
|
+
:param index: Index containing tuples of (file, start, end).
|
31
|
+
:return: DataFrame with transcriptions indexed by the original index.
|
32
|
+
:rtype: pd.DataFrame
|
33
|
+
"""
|
34
|
+
file_name = ""
|
35
|
+
seg_index = 0
|
36
|
+
translations = []
|
37
|
+
translator_cache = audeer.mkdir(
|
38
|
+
audeer.path(self.util.get_path("cache"), "translations"))
|
39
|
+
file_name = ""
|
40
|
+
for idx, row in tqdm(df.iterrows(), total=len(df)):
|
41
|
+
file = idx[0]
|
42
|
+
start = idx[1]
|
43
|
+
end = idx[2]
|
44
|
+
if file != file_name:
|
45
|
+
file_name = file
|
46
|
+
seg_index = 0
|
47
|
+
cache_name = audeer.basename_wo_ext(file)+str(seg_index)
|
48
|
+
cache_path = audeer.path(translator_cache, cache_name + ".json")
|
49
|
+
if os.path.isfile(cache_path):
|
50
|
+
translation = self.util.read_json(cache_path)["translation"]
|
51
|
+
else:
|
52
|
+
text = row['text']
|
53
|
+
translation = asyncio.run(self.translate_text(text))
|
54
|
+
self.util.save_json(cache_path,
|
55
|
+
{"translation": translation,
|
56
|
+
"file": file,
|
57
|
+
"start": start.total_seconds(),
|
58
|
+
"end": end.total_seconds()})
|
59
|
+
translations.append(translation)
|
60
|
+
seg_index += 1
|
61
|
+
|
62
|
+
df = pd.DataFrame({self.language:translations}, index=df.index)
|
63
|
+
return df
|
nkululeko/constants.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
VERSION="0.95.
|
1
|
+
VERSION="0.95.9"
|
2
2
|
SAMPLING_RATE = 16000
|
nkululeko/experiment.py
CHANGED
@@ -574,6 +574,11 @@ class Experiment:
|
|
574
574
|
|
575
575
|
predictor = TextPredictor(df, self.util)
|
576
576
|
df = predictor.predict(sample_selection)
|
577
|
+
elif target == "translation":
|
578
|
+
from nkululeko.autopredict.ap_translate import TextTranslator
|
579
|
+
|
580
|
+
predictor = TextTranslator(df, self.util)
|
581
|
+
df = predictor.predict(sample_selection)
|
577
582
|
elif target == "arousal":
|
578
583
|
from nkululeko.autopredict.ap_arousal import ArousalPredictor
|
579
584
|
|
nkululeko/plots.py
CHANGED
@@ -28,7 +28,10 @@ class Plots:
|
|
28
28
|
self.with_ccc = eval(self.util.config_val("PLOT", "ccc", "False"))
|
29
29
|
self.type_s = "samples"
|
30
30
|
|
31
|
-
def plot_distributions_speaker(self, df):
|
31
|
+
def plot_distributions_speaker(self, df: pd.DataFrame):
|
32
|
+
if df.empty:
|
33
|
+
self.util.warn("plot_distributions_speaker: empty DataFrame, nothing to plot")
|
34
|
+
return
|
32
35
|
self.type_s = "speaker"
|
33
36
|
df_speakers = pd.DataFrame()
|
34
37
|
pd.options.mode.chained_assignment = None # default='warn'
|
@@ -87,7 +90,10 @@ class Plots:
|
|
87
90
|
|
88
91
|
self.plot_distributions(df_speakers, type_s="speakers")
|
89
92
|
|
90
|
-
def plot_distributions(self, df, type_s="samples"):
|
93
|
+
def plot_distributions(self, df: pd.DataFrame, type_s: str = "samples"):
|
94
|
+
if df.empty:
|
95
|
+
self.util.warn("plot_distributions: empty DataFrame, nothing to plot")
|
96
|
+
return
|
91
97
|
class_label, df = self._check_binning("class_label", df)
|
92
98
|
value_counts_conf = self.util.config_val("EXPL", "value_counts", False)
|
93
99
|
if not isinstance(value_counts_conf, str):
|
nkululeko/utils/util.py
CHANGED
@@ -189,8 +189,11 @@ class Util:
|
|
189
189
|
|
190
190
|
def is_categorical(self, pd_series):
|
191
191
|
"""Check if a dataframe column is categorical."""
|
192
|
-
return
|
193
|
-
pd_series.dtype
|
192
|
+
return (
|
193
|
+
pd_series.dtype.name == "object"
|
194
|
+
or pd_series.dtype.name == "bool"
|
195
|
+
or isinstance(pd_series.dtype, pd.CategoricalDtype)
|
196
|
+
or isinstance(pd_series.dtype, pd.BooleanDtype)
|
194
197
|
)
|
195
198
|
|
196
199
|
def get_name(self):
|
@@ -4,13 +4,13 @@ nkululeko/aug_train.py,sha256=wpiHCJ7zsW38kumg3ypwXZe2HQrhUblAnv7P2QeJnAc,3525
|
|
4
4
|
nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
|
5
5
|
nkululeko/balance.py,sha256=r7opXbrqAipm2euPPaOmLlA5J10p2bHQgO5kWk2x9ro,8702
|
6
6
|
nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
|
7
|
-
nkululeko/constants.py,sha256=
|
7
|
+
nkululeko/constants.py,sha256=t_C_hQqVC1idXJB6HHr1m7ZtCYC5JVvqhYrVLRhzwIw,39
|
8
8
|
nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
|
9
9
|
nkululeko/demo.py,sha256=tu7Al2l5MCLVegkDC-NE2wcuc_YE7NRbgOlPW3yhGEs,4940
|
10
10
|
nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
|
11
11
|
nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
|
12
12
|
nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
|
13
|
-
nkululeko/experiment.py,sha256=
|
13
|
+
nkululeko/experiment.py,sha256=TG9G9kSETT_R8d92aRKMMsb0HRGyM_GBFHBsU9A6ppw,38633
|
14
14
|
nkululeko/explore.py,sha256=PjNcLuPdvWqCqYXUvGhd0hBijIhzdyi3ED1RF6o5Gjk,4212
|
15
15
|
nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
|
16
16
|
nkululeko/feature_extractor.py,sha256=CsKmBoxwNClRGu20ox_eCxMG4u_1OH8Y83FYw7GfUwA,4230
|
@@ -24,7 +24,7 @@ nkululeko/nkuluflag.py,sha256=_83LqLr2bSHjnVJuPeSAHCIyuiIbRxgpFKW6CwanWFM,3728
|
|
24
24
|
nkululeko/nkululeko.py,sha256=6ALPMMIz6l0O3IRaP0q4b59ZUxpfzNqLQUqZMf5t3Zo,1976
|
25
25
|
nkululeko/optim.py,sha256=Pn_02irXYJJmNG1yWA9GImHirpbXXywV61MalZb2wVA,1658
|
26
26
|
nkululeko/optimizationrunner.py,sha256=UfWU_gOPaHUVjvYaw3AoF9HoDGYxIjbCyTGmi1PVu3s,44283
|
27
|
-
nkululeko/plots.py,sha256=
|
27
|
+
nkululeko/plots.py,sha256=DnTJHmz50vphnTiazCy2J6k0wP0-MRWir7gj7i_WKXM,27808
|
28
28
|
nkululeko/predict.py,sha256=PWv1Pc39lrxqqIWrYszVk5SL37dDL93CHgcruItNID8,2211
|
29
29
|
nkululeko/resample.py,sha256=rn3-M1A-iwVGibfQNGyeYNa7briD24lIN9Szq_1uTJo,5194
|
30
30
|
nkululeko/runmanager.py,sha256=YtGQP0UyyQTKkilncB1XYM-T8oatzGcZEOcj5SorjJw,8902
|
@@ -52,8 +52,10 @@ nkululeko/autopredict/ap_sid.py,sha256=b_JwVWlqcwdC7acU9Q7mExuOJKUn6qdlmQTm8pmmp
|
|
52
52
|
nkululeko/autopredict/ap_snr.py,sha256=cjc0pUsCN_RacTw1UBR2cGY9t_um48f2cjo3QJDn7bw,1111
|
53
53
|
nkululeko/autopredict/ap_stoi.py,sha256=csv9qCcRmieHAhypszqGoGt9r3biM8IYPgcTwp9GIFM,1188
|
54
54
|
nkululeko/autopredict/ap_text.py,sha256=zaz9qIg90-ghZhBe1ka0HoUnap6s6RyopUKoCpttHOU,1333
|
55
|
+
nkululeko/autopredict/ap_translate.py,sha256=3yxNQmysAB3GP84YjFg_9Wc5yz0iXZXxrL5VZpEyyiI,1138
|
55
56
|
nkululeko/autopredict/ap_valence.py,sha256=9S06SpO_zXKSpkf0InHYYXZcD9HDGoCJ6UPkn__eBAg,1027
|
56
57
|
nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
|
58
|
+
nkululeko/autopredict/google_translator.py,sha256=at5v7vWbP9UwXDVqVCqai4SKz-XuazpeeFsptSGrBUU,2114
|
57
59
|
nkululeko/autopredict/whisper_transcriber.py,sha256=DWDvpRaV5KmUF18ojPEvxnVXm_h_nWyY-TfW2Ngd5N8,2941
|
58
60
|
nkululeko/autopredict/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
59
61
|
nkululeko/autopredict/tests/test_whisper_transcriber.py,sha256=ilas6j3OUvq_xnQCRZgytQCtyrpNU6tvG5a8kPvVKBQ,5085
|
@@ -133,10 +135,10 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
133
135
|
nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
|
134
136
|
nkululeko/utils/stats.py,sha256=3Fyx8q8BSKYmiufT6OkRug9RATWmGrr9BaX_y8jziWo,3074
|
135
137
|
nkululeko/utils/unzip.py,sha256=G68f5120TjwACZC3bQcneMniddnwubPbBdMc2L5KBOo,1206
|
136
|
-
nkululeko/utils/util.py,sha256=
|
137
|
-
nkululeko-0.95.
|
138
|
-
nkululeko-0.95.
|
139
|
-
nkululeko-0.95.
|
140
|
-
nkululeko-0.95.
|
141
|
-
nkululeko-0.95.
|
142
|
-
nkululeko-0.95.
|
138
|
+
nkululeko/utils/util.py,sha256=s7Hd7Ju1r3_WCw8gLD9YK4O6k3S_WhFcN2-XZBSctSM,18705
|
139
|
+
nkululeko-0.95.9.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
|
140
|
+
nkululeko-0.95.9.dist-info/METADATA,sha256=WhITXnJHYD5GhyATjEb7kJhmMecWRu-BeMBw7pSWNdc,21998
|
141
|
+
nkululeko-0.95.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
142
|
+
nkululeko-0.95.9.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
|
143
|
+
nkululeko-0.95.9.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
|
144
|
+
nkululeko-0.95.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|