nkululeko 0.95.8__py3-none-any.whl → 0.95.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ """A translator for text.
2
+
3
+ Currently based on google translate.
4
+ """
5
+
6
+ from nkululeko.utils.util import Util
7
+
8
+
9
+ class TextTranslator:
10
+ """Translator.
11
+
12
+ translate text with the google translate model
13
+ """
14
+
15
+ def __init__(self, df, util=None):
16
+ self.df = df
17
+ if util is not None:
18
+ self.util = util
19
+ else:
20
+ # create a new util instance
21
+ # this is needed to access the config and other utilities
22
+ # in the autopredict module
23
+ self.util = Util("translator")
24
+
25
+ self.language = self.util.config_val("PREDICT", "target_language", "en")
26
+ from nkululeko.autopredict.google_translator import GoogleTranslator
27
+ self.translator = GoogleTranslator(
28
+ language=self.language,
29
+ util=self.util,
30
+ )
31
+
32
+ def predict(self, split_selection):
33
+ self.util.debug(f"translating text for {split_selection} samples")
34
+ df = self.translator.translate_index(
35
+ self.df
36
+ )
37
+ return_df = self.df.copy()
38
+ return_df[self.language] = df[self.language].values
39
+ return return_df
@@ -0,0 +1,63 @@
1
+ import os
2
+
3
+ import pandas as pd
4
+ import torch
5
+ from tqdm import tqdm
6
+
7
+ import asyncio
8
+ from googletrans import Translator
9
+
10
+ import audeer
11
+ import audiofile
12
+
13
+ from nkululeko.utils.util import Util
14
+
15
+ import httpx
16
+
17
+ class GoogleTranslator:
18
+ def __init__(self, language="en", util=None):
19
+ self.language = language
20
+ self.util = util
21
+
22
+ async def translate_text(self, text):
23
+ async with Translator() as translator:
24
+ result = translator.translate(text, dest="en")
25
+ return (await result).text
26
+
27
+ def translate_index(self, df:pd.DataFrame) -> pd.DataFrame:
28
+ """Transcribe the audio files in the given index.
29
+
30
+ :param index: Index containing tuples of (file, start, end).
31
+ :return: DataFrame with transcriptions indexed by the original index.
32
+ :rtype: pd.DataFrame
33
+ """
34
+ file_name = ""
35
+ seg_index = 0
36
+ translations = []
37
+ translator_cache = audeer.mkdir(
38
+ audeer.path(self.util.get_path("cache"), "translations"))
39
+ file_name = ""
40
+ for idx, row in tqdm(df.iterrows(), total=len(df)):
41
+ file = idx[0]
42
+ start = idx[1]
43
+ end = idx[2]
44
+ if file != file_name:
45
+ file_name = file
46
+ seg_index = 0
47
+ cache_name = audeer.basename_wo_ext(file)+str(seg_index)
48
+ cache_path = audeer.path(translator_cache, cache_name + ".json")
49
+ if os.path.isfile(cache_path):
50
+ translation = self.util.read_json(cache_path)["translation"]
51
+ else:
52
+ text = row['text']
53
+ translation = asyncio.run(self.translate_text(text))
54
+ self.util.save_json(cache_path,
55
+ {"translation": translation,
56
+ "file": file,
57
+ "start": start.total_seconds(),
58
+ "end": end.total_seconds()})
59
+ translations.append(translation)
60
+ seg_index += 1
61
+
62
+ df = pd.DataFrame({self.language:translations}, index=df.index)
63
+ return df
nkululeko/constants.py CHANGED
@@ -1,2 +1,2 @@
1
- VERSION="0.95.8"
1
+ VERSION="0.95.9"
2
2
  SAMPLING_RATE = 16000
nkululeko/experiment.py CHANGED
@@ -574,6 +574,11 @@ class Experiment:
574
574
 
575
575
  predictor = TextPredictor(df, self.util)
576
576
  df = predictor.predict(sample_selection)
577
+ elif target == "translation":
578
+ from nkululeko.autopredict.ap_translate import TextTranslator
579
+
580
+ predictor = TextTranslator(df, self.util)
581
+ df = predictor.predict(sample_selection)
577
582
  elif target == "arousal":
578
583
  from nkululeko.autopredict.ap_arousal import ArousalPredictor
579
584
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nkululeko
3
- Version: 0.95.8
3
+ Version: 0.95.9
4
4
  Summary: Machine learning audio prediction experiments based on templates
5
5
  Home-page: https://github.com/felixbur/nkululeko
6
6
  Author: Felix Burkhardt
@@ -4,13 +4,13 @@ nkululeko/aug_train.py,sha256=wpiHCJ7zsW38kumg3ypwXZe2HQrhUblAnv7P2QeJnAc,3525
4
4
  nkululeko/augment.py,sha256=3RzaxB3gRxovgJVjHXi0glprW01J7RaHhUkqotW2T3U,2955
5
5
  nkululeko/balance.py,sha256=r7opXbrqAipm2euPPaOmLlA5J10p2bHQgO5kWk2x9ro,8702
6
6
  nkululeko/cacheddataset.py,sha256=XFpWZmbJRg0pvhnIgYf0TkclxllD-Fctu-Ol0PF_00c,969
7
- nkululeko/constants.py,sha256=9cVRluLqakiiCBPe3kAeJizsgpn2LUbgdAs0Y9scIEM,39
7
+ nkululeko/constants.py,sha256=t_C_hQqVC1idXJB6HHr1m7ZtCYC5JVvqhYrVLRhzwIw,39
8
8
  nkululeko/demo-ft.py,sha256=iD9Pzp9QjyAv31q1cDZ75vPez7Ve8A4Cfukv5yfZdrQ,770
9
9
  nkululeko/demo.py,sha256=tu7Al2l5MCLVegkDC-NE2wcuc_YE7NRbgOlPW3yhGEs,4940
10
10
  nkululeko/demo_feats.py,sha256=BvZjeNFTlERIRlq34OHM4Z96jdDQAhB01BGQAUcX9dM,2026
11
11
  nkululeko/demo_predictor.py,sha256=lDF-xOxRdEAclOmbepAYg-BQXQdGkHfq2n74PTIoop8,4872
12
12
  nkululeko/ensemble.py,sha256=71V-rre61H3J4sh7lu-OTo4I2_g7mm_rQxwW1ARDHgY,12782
13
- nkululeko/experiment.py,sha256=BAc220lktt_tvifl-m-ZIPO7Nwi-HzDBNyTfjPDbQkE,38397
13
+ nkululeko/experiment.py,sha256=TG9G9kSETT_R8d92aRKMMsb0HRGyM_GBFHBsU9A6ppw,38633
14
14
  nkululeko/explore.py,sha256=PjNcLuPdvWqCqYXUvGhd0hBijIhzdyi3ED1RF6o5Gjk,4212
15
15
  nkululeko/export.py,sha256=U-V4acxtuL6qKt6oAsVcM5TTeWogYUJ3GU-lA6rq6d4,4336
16
16
  nkululeko/feature_extractor.py,sha256=CsKmBoxwNClRGu20ox_eCxMG4u_1OH8Y83FYw7GfUwA,4230
@@ -52,8 +52,10 @@ nkululeko/autopredict/ap_sid.py,sha256=b_JwVWlqcwdC7acU9Q7mExuOJKUn6qdlmQTm8pmmp
52
52
  nkululeko/autopredict/ap_snr.py,sha256=cjc0pUsCN_RacTw1UBR2cGY9t_um48f2cjo3QJDn7bw,1111
53
53
  nkululeko/autopredict/ap_stoi.py,sha256=csv9qCcRmieHAhypszqGoGt9r3biM8IYPgcTwp9GIFM,1188
54
54
  nkululeko/autopredict/ap_text.py,sha256=zaz9qIg90-ghZhBe1ka0HoUnap6s6RyopUKoCpttHOU,1333
55
+ nkululeko/autopredict/ap_translate.py,sha256=3yxNQmysAB3GP84YjFg_9Wc5yz0iXZXxrL5VZpEyyiI,1138
55
56
  nkululeko/autopredict/ap_valence.py,sha256=9S06SpO_zXKSpkf0InHYYXZcD9HDGoCJ6UPkn__eBAg,1027
56
57
  nkululeko/autopredict/estimate_snr.py,sha256=1k9-XadABudnsNOeFZD_Fg0E64-GUQVS7JEp82MLQS4,4995
58
+ nkululeko/autopredict/google_translator.py,sha256=at5v7vWbP9UwXDVqVCqai4SKz-XuazpeeFsptSGrBUU,2114
57
59
  nkululeko/autopredict/whisper_transcriber.py,sha256=DWDvpRaV5KmUF18ojPEvxnVXm_h_nWyY-TfW2Ngd5N8,2941
58
60
  nkululeko/autopredict/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
61
  nkululeko/autopredict/tests/test_whisper_transcriber.py,sha256=ilas6j3OUvq_xnQCRZgytQCtyrpNU6tvG5a8kPvVKBQ,5085
@@ -134,9 +136,9 @@ nkululeko/utils/files.py,sha256=SrrYaU7AB80MZHiV1jcB0h_zigvYLYgSVNTXV4ao38g,4593
134
136
  nkululeko/utils/stats.py,sha256=3Fyx8q8BSKYmiufT6OkRug9RATWmGrr9BaX_y8jziWo,3074
135
137
  nkululeko/utils/unzip.py,sha256=G68f5120TjwACZC3bQcneMniddnwubPbBdMc2L5KBOo,1206
136
138
  nkululeko/utils/util.py,sha256=s7Hd7Ju1r3_WCw8gLD9YK4O6k3S_WhFcN2-XZBSctSM,18705
137
- nkululeko-0.95.8.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
138
- nkululeko-0.95.8.dist-info/METADATA,sha256=ye2EpYXgbMMRD1L1KnUqjbFO5W7--glJKj-1yBmHCX8,21998
139
- nkululeko-0.95.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
140
- nkululeko-0.95.8.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
141
- nkululeko-0.95.8.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
142
- nkululeko-0.95.8.dist-info/RECORD,,
139
+ nkululeko-0.95.9.dist-info/licenses/LICENSE,sha256=0zGP5B_W35yAcGfHPS18Q2B8UhvLRY3dQq1MhpsJU_U,1076
140
+ nkululeko-0.95.9.dist-info/METADATA,sha256=WhITXnJHYD5GhyATjEb7kJhmMecWRu-BeMBw7pSWNdc,21998
141
+ nkululeko-0.95.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
142
+ nkululeko-0.95.9.dist-info/entry_points.txt,sha256=lNTkFEdh6Kjo5o95ZAWf_0Lq-4ztGoAoMVSDuPtuyS0,442
143
+ nkululeko-0.95.9.dist-info/top_level.txt,sha256=bf1k1YKkqcXemNX_cUgoyKqQ3_GVErPqAY-53J36jkM,19
144
+ nkululeko-0.95.9.dist-info/RECORD,,