PyPI - nkululeko - Versions diffs - 0.95.7__py3-none-any.whl → 0.95.9__py3-none-any.whl - Mend

nkululeko 0.95.7py3-none-any.whl → 0.95.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

nkululeko/autopredict/ap_translate.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""A translator for text.
+Currently based on google translate.
+"""
+from nkululeko.utils.util import Util
+class TextTranslator:
+    """Translator.
+    translate text with the google translate model
+    """
+    def __init__(self, df, util=None):
+        self.df = df
+        if util is not None:
+            self.util = util
+        else:
+            # create a new util instance
+            # this is needed to access the config and other utilities
+            # in the autopredict module
+            self.util = Util("translator")
+        self.language = self.util.config_val("PREDICT", "target_language", "en")
+        from nkululeko.autopredict.google_translator import GoogleTranslator
+        self.translator = GoogleTranslator(
+            language=self.language,
+            util=self.util,
+        )
+    def predict(self, split_selection):
+        self.util.debug(f"translating text for {split_selection} samples")
+        df = self.translator.translate_index(
+            self.df
+        )
+        return_df = self.df.copy()
+        return_df[self.language] = df[self.language].values
+        return return_df

nkululeko/autopredict/google_translator.py ADDED Viewed

@@ -0,0 +1,63 @@
+import os
+import pandas as pd
+import torch
+from tqdm import tqdm
+import asyncio
+from googletrans import Translator
+import audeer
+import audiofile
+from nkululeko.utils.util import Util
+import httpx
+class GoogleTranslator:
+    def __init__(self, language="en", util=None):
+        self.language = language
+        self.util = util
+    async def translate_text(self, text):
+        async with Translator() as translator:
+            result = translator.translate(text, dest="en")
+            return (await result).text
+    def translate_index(self, df:pd.DataFrame) ->  pd.DataFrame:
+        """Transcribe the audio files in the given index.
+        :param index: Index containing tuples of (file, start, end).
+        :return: DataFrame with transcriptions indexed by the original index.
+        :rtype: pd.DataFrame
+        """
+        file_name = ""
+        seg_index = 0
+        translations = []
+        translator_cache = audeer.mkdir(
+            audeer.path(self.util.get_path("cache"), "translations"))
+        file_name = ""
+        for idx, row in tqdm(df.iterrows(), total=len(df)):
+            file = idx[0]
+            start = idx[1]
+            end = idx[2]
+            if file != file_name:
+                file_name = file
+                seg_index = 0
+            cache_name = audeer.basename_wo_ext(file)+str(seg_index)
+            cache_path = audeer.path(translator_cache, cache_name + ".json")
+            if os.path.isfile(cache_path):
+                translation = self.util.read_json(cache_path)["translation"]
+            else:
+                text = row['text']
+                translation = asyncio.run(self.translate_text(text))
+                self.util.save_json(cache_path,
+                                {"translation": translation,
+                                 "file": file,
+                                 "start": start.total_seconds(),
+                                 "end": end.total_seconds()})
+            translations.append(translation)
+            seg_index += 1
+        df = pd.DataFrame({self.language:translations}, index=df.index)
+        return df

nkululeko/constants.py CHANGED Viewed

@@ -1,2 +1,2 @@
-VERSION="0.95.7"
+VERSION="0.95.9"
 SAMPLING_RATE = 16000

nkululeko/experiment.py CHANGED Viewed

@@ -574,6 +574,11 @@ class Experiment:
                 predictor = TextPredictor(df, self.util)
                 df = predictor.predict(sample_selection)
+            elif target == "translation":
+                from nkululeko.autopredict.ap_translate import TextTranslator
+                predictor = TextTranslator(df, self.util)
+                df = predictor.predict(sample_selection)
             elif target == "arousal":
                 from nkululeko.autopredict.ap_arousal import ArousalPredictor

nkululeko/plots.py CHANGED Viewed

@@ -28,7 +28,10 @@ class Plots:
         self.with_ccc = eval(self.util.config_val("PLOT", "ccc", "False"))
         self.type_s = "samples"
-    def plot_distributions_speaker(self, df):
+    def plot_distributions_speaker(self, df: pd.DataFrame):
+        if df.empty:
+            self.util.warn("plot_distributions_speaker: empty DataFrame, nothing to plot")
+            return
         self.type_s = "speaker"
         df_speakers = pd.DataFrame()
         pd.options.mode.chained_assignment = None  # default='warn'
@@ -87,7 +90,10 @@ class Plots:
         self.plot_distributions(df_speakers, type_s="speakers")
-    def plot_distributions(self, df, type_s="samples"):
+    def plot_distributions(self, df: pd.DataFrame, type_s: str = "samples"):
+        if df.empty:
+            self.util.warn("plot_distributions: empty DataFrame, nothing to plot")
+            return
         class_label, df = self._check_binning("class_label", df)
         value_counts_conf = self.util.config_val("EXPL", "value_counts", False)
         if not isinstance(value_counts_conf, str):

nkululeko/utils/util.py CHANGED Viewed

@@ -189,8 +189,11 @@ class Util:
     def is_categorical(self, pd_series):
         """Check if a dataframe column is categorical."""
-        return pd_series.dtype.name == "object" or isinstance(
-            pd_series.dtype, pd.CategoricalDtype
+        return (
+            pd_series.dtype.name == "object"
+            or pd_series.dtype.name == "bool"
+            or isinstance(pd_series.dtype, pd.CategoricalDtype)
+            or isinstance(pd_series.dtype, pd.BooleanDtype)
         )
     def get_name(self):

{nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nkululeko
-Version: 0.95.7
+Version: 0.95.9
 Summary: Machine learning audio prediction experiments based on templates
 Home-page: https://github.com/felixbur/nkululeko
 Author: Felix Burkhardt

{nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/RECORD RENAMED Viewed

@@ -4,13 +4,13 @@ nkululeko/aug_train.py,sha256=wpiHCJ7zsW38kumg3ypwXZe2HQrhUblAnv7P2QeJnAc,3525
 nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
 nkululeko/balance.py,sha256=r7opXbrqAipm2euPPaOmLlA5J10p2bHQgO5kWk2x9ro,8702
 nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
-nkululeko/constants.py,sha256=6jfPRCrnqqRsGqz83bT34_5gPBbTiIAsnhzVWUrKXl4,39
+nkululeko/constants.py,sha256=t_C_hQqVC1idXJB6HHr1m7ZtCYC5JVvqhYrVLRhzwIw,39
 nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
 nkululeko/demo.py,sha256=tu7Al2l5MCLVegkDC-NE2wcuc_YE7NRbgOlPW3yhGEs,4940
 nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
 nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
 nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
-nkululeko/experiment.py,sha256=BAc220lktt_tvifl-m-ZIPO7Nwi-HzDBNyTfjPDbQkE,38397
+nkululeko/experiment.py,sha256=TG9G9kSETT_R8d92aRKMMsb0HRGyM_GBFHBsU9A6ppw,38633
 nkululeko/explore.py,sha256=PjNcLuPdvWqCqYXUvGhd0hBijIhzdyi3ED1RF6o5Gjk,4212
 nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
 nkululeko/feature_extractor.py,sha256=CsKmBoxwNClRGu20ox_eCxMG4u_1OH8Y83FYw7GfUwA,4230
@@ -24,7 +24,7 @@ nkululeko/nkuluflag.py,sha256=_83LqLr2bSHjnVJuPeSAHCIyuiIbRxgpFKW6CwanWFM,3728
 nkululeko/nkululeko.py,sha256=6ALPMMIz6l0O3IRaP0q4b59ZUxpfzNqLQUqZMf5t3Zo,1976
 nkululeko/optim.py,sha256=Pn_02irXYJJmNG1yWA9GImHirpbXXywV61MalZb2wVA,1658
 nkululeko/optimizationrunner.py,sha256=UfWU_gOPaHUVjvYaw3AoF9HoDGYxIjbCyTGmi1PVu3s,44283
-nkululeko/plots.py,sha256=rVkOGWB7yLkZ1dGg_MXeKhPOtiquiYIyCam4KYOdJQY,27519
+nkululeko/plots.py,sha256=DnTJHmz50vphnTiazCy2J6k0wP0-MRWir7gj7i_WKXM,27808
 nkululeko/predict.py,sha256=PWv1Pc39lrxqqIWrYszVk5SL37dDL93CHgcruItNID8,2211
 nkululeko/resample.py,sha256=rn3-M1A-iwVGibfQNGyeYNa7briD24lIN9Szq_1uTJo,5194
 nkululeko/runmanager.py,sha256=YtGQP0UyyQTKkilncB1XYM-T8oatzGcZEOcj5SorjJw,8902
@@ -52,8 +52,10 @@ nkululeko/autopredict/ap_sid.py,sha256=b_JwVWlqcwdC7acU9Q7mExuOJKUn6qdlmQTm8pmmp
 nkululeko/autopredict/ap_snr.py,sha256=cjc0pUsCN_RacTw1UBR2cGY9t_um48f2cjo3QJDn7bw,1111
 nkululeko/autopredict/ap_stoi.py,sha256=csv9qCcRmieHAhypszqGoGt9r3biM8IYPgcTwp9GIFM,1188
 nkululeko/autopredict/ap_text.py,sha256=zaz9qIg90-ghZhBe1ka0HoUnap6s6RyopUKoCpttHOU,1333
+nkululeko/autopredict/ap_translate.py,sha256=3yxNQmysAB3GP84YjFg_9Wc5yz0iXZXxrL5VZpEyyiI,1138
 nkululeko/autopredict/ap_valence.py,sha256=9S06SpO_zXKSpkf0InHYYXZcD9HDGoCJ6UPkn__eBAg,1027
 nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
+nkululeko/autopredict/google_translator.py,sha256=at5v7vWbP9UwXDVqVCqai4SKz-XuazpeeFsptSGrBUU,2114
 nkululeko/autopredict/whisper_transcriber.py,sha256=DWDvpRaV5KmUF18ojPEvxnVXm_h_nWyY-TfW2Ngd5N8,2941
 nkululeko/autopredict/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/autopredict/tests/test_whisper_transcriber.py,sha256=ilas6j3OUvq_xnQCRZgytQCtyrpNU6tvG5a8kPvVKBQ,5085
@@ -133,10 +135,10 @@ nkululeko/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
 nkululeko/utils/stats.py,sha256=3Fyx8q8BSKYmiufT6OkRug9RATWmGrr9BaX_y8jziWo,3074
 nkululeko/utils/unzip.py,sha256=G68f5120TjwACZC3bQcneMniddnwubPbBdMc2L5KBOo,1206
-nkululeko/utils/util.py,sha256=yHgzfj-8ncgCvyrrrH_NDWCh6VmhAqVYY6Vlgyg-c6E,18585
-nkululeko-0.95.7.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
-nkululeko-0.95.7.dist-info/METADATA,sha256=s_XLh9XUEm_NRApCwnUc8QKkRHWqlva7yxY8Jce0vSI,21998
-nkululeko-0.95.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nkululeko-0.95.7.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
-nkululeko-0.95.7.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
-nkululeko-0.95.7.dist-info/RECORD,,
+nkululeko/utils/util.py,sha256=s7Hd7Ju1r3_WCw8gLD9YK4O6k3S_WhFcN2-XZBSctSM,18705
+nkululeko-0.95.9.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
+nkululeko-0.95.9.dist-info/METADATA,sha256=WhITXnJHYD5GhyATjEb7kJhmMecWRu-BeMBw7pSWNdc,21998
+nkululeko-0.95.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nkululeko-0.95.9.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
+nkululeko-0.95.9.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
+nkululeko-0.95.9.dist-info/RECORD,,

{nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{nkululeko-0.95.7.dist-info → nkululeko-0.95.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

nkululeko 0.95.7__py3-none-any.whl → 0.95.9__py3-none-any.whl

nkululeko 0.95.7py3-none-any.whl → 0.95.9py3-none-any.whl