py2ls 0.1.4.6__py3-none-any.whl → 0.1.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/config +1 -0
- py2ls/ips.py +581 -118
- py2ls/netfinder.py +452 -128
- py2ls/translator.py +172 -121
- {py2ls-0.1.4.6.dist-info → py2ls-0.1.4.8.dist-info}/METADATA +1 -1
- {py2ls-0.1.4.6.dist-info → py2ls-0.1.4.8.dist-info}/RECORD +7 -7
- {py2ls-0.1.4.6.dist-info → py2ls-0.1.4.8.dist-info}/WHEEL +1 -1
py2ls/translator.py
CHANGED
@@ -13,6 +13,7 @@ import numpy as np
|
|
13
13
|
from nltk.tokenize import sent_tokenize
|
14
14
|
from itertools import pairwise
|
15
15
|
from tqdm import tqdm
|
16
|
+
from fuzzywuzzy import fuzz,process
|
16
17
|
|
17
18
|
|
18
19
|
def split_by_sent_n(text,n=10):
|
@@ -58,106 +59,173 @@ def get_lang_code_iso639():
|
|
58
59
|
return lang_code_iso639
|
59
60
|
|
60
61
|
# get_lang_code_iso639()
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
'
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
'
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
'
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
'
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
'
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
'
|
115
|
-
|
116
|
-
|
117
|
-
'
|
118
|
-
'
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
'
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
'
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
'
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
62
|
+
|
63
|
+
def detect_lang(text, output='lang',verbose=False):
|
64
|
+
lang_code_iso639={'Abkhazian': 'ab',
|
65
|
+
'Afar': 'aa',
|
66
|
+
'Afrikaans': 'af',
|
67
|
+
'Akan': 'ak',
|
68
|
+
'Albanian': 'sq',
|
69
|
+
'Amharic': 'am',
|
70
|
+
'Arabic': 'ar',
|
71
|
+
'Armenian': 'hy',
|
72
|
+
'Assamese': 'as',
|
73
|
+
# 'Avaric': 'av',
|
74
|
+
'Aymara': 'ay',
|
75
|
+
'Azerbaijani': 'az',
|
76
|
+
'Bashkir': 'ba',
|
77
|
+
'Basque': 'eu',
|
78
|
+
'Belarusian': 'be',
|
79
|
+
'Bislama': 'bi',
|
80
|
+
'Breton': 'br',
|
81
|
+
'Burmese': 'my',
|
82
|
+
'Catalan, Valencian': 'ca',
|
83
|
+
'Chamorro': 'ch',
|
84
|
+
'Chichewa, Chewa, Nyanja': 'ny',
|
85
|
+
'Chinese': 'zh',
|
86
|
+
'Corsican': 'co',
|
87
|
+
'Cree': 'cr',
|
88
|
+
'Croatian': 'hr',
|
89
|
+
'Danish': 'da',
|
90
|
+
'Dutch, Flemish': 'nl',
|
91
|
+
'Dzongkha': 'dz',
|
92
|
+
'English': 'en',
|
93
|
+
'Finnish': 'fi',
|
94
|
+
'French': 'fr',
|
95
|
+
'Galician': 'gl',
|
96
|
+
'Georgian': 'ka',
|
97
|
+
'German': 'de',
|
98
|
+
'Greek, Modern (1453–)': 'el',
|
99
|
+
'Gujarati': 'gu',
|
100
|
+
'Hausa': 'ha',
|
101
|
+
'Hebrew': 'he',
|
102
|
+
'Hindi': 'hi',
|
103
|
+
'Hungarian': 'hu',
|
104
|
+
'Icelandic': 'is',
|
105
|
+
'Italian': 'it',
|
106
|
+
'Kikuyu, Gikuyu': 'ki',
|
107
|
+
'Korean': 'ko',
|
108
|
+
'Kurdish': 'ku',
|
109
|
+
'Latin': 'la',
|
110
|
+
'Limburgan, Limburger, Limburgish': 'li',
|
111
|
+
'Luba-Katanga': 'lu',
|
112
|
+
'Macedonian': 'mk',
|
113
|
+
'Malay': 'ms',
|
114
|
+
'Nauru': 'na',
|
115
|
+
'North Ndebele': 'nd',
|
116
|
+
'Nepali': 'ne',
|
117
|
+
'Norwegian': 'no',
|
118
|
+
'Norwegian Nynorsk': 'nn',
|
119
|
+
'Sichuan Yi, Nuosu': 'ii',
|
120
|
+
'Occitan': 'oc',
|
121
|
+
'Ojibwa': 'oj',
|
122
|
+
'Oriya': 'or',
|
123
|
+
'Ossetian, Ossetic': 'os',
|
124
|
+
'Persian': 'fa',
|
125
|
+
'Punjabi, Panjabi': 'pa',
|
126
|
+
'Quechua': 'qu',
|
127
|
+
'Romanian, Moldavian, Moldovan': 'ro',
|
128
|
+
'Russian': 'ru',
|
129
|
+
'Samoan': 'sm',
|
130
|
+
'Sanskrit': 'sa',
|
131
|
+
'Serbian': 'sr',
|
132
|
+
'Shona': 'sn',
|
133
|
+
'Sinhala, Sinhalese': 'si',
|
134
|
+
'Slovenian': 'sl',
|
135
|
+
'Somali': 'so',
|
136
|
+
'Sundanese': 'su',
|
137
|
+
'Swahili': 'sw',
|
138
|
+
'Swati': 'ss',
|
139
|
+
'Tajik': 'tg',
|
140
|
+
'Tamil': 'ta',
|
141
|
+
'Telugu': 'te',
|
142
|
+
'Thai': 'th',
|
143
|
+
'Tibetan': 'bo',
|
144
|
+
'Tigrinya': 'ti',
|
145
|
+
'Tonga (Tonga Islands)': 'to',
|
146
|
+
'Tsonga': 'ts',
|
147
|
+
'Twi': 'tw',
|
148
|
+
'Ukrainian': 'uk',
|
149
|
+
'Urdu': 'ur',
|
150
|
+
'Uzbek': 'uz',
|
151
|
+
'Venda': 've',
|
152
|
+
'Vietnamese': 'vi',
|
153
|
+
'Volapük': 'vo',
|
154
|
+
'Welsh': 'cy',
|
155
|
+
'Wolof': 'wo',
|
156
|
+
'Xhosa': 'xh',
|
157
|
+
'Yiddish': 'yi',
|
158
|
+
'Yoruba': 'yo',
|
159
|
+
'Zulu': 'zu'}
|
160
|
+
l_lang,l_code = [],[]
|
161
|
+
[[l_lang.append(v),l_code.append(k)] for v,k in lang_code_iso639.items()]
|
162
|
+
try:
|
163
|
+
if is_text(text):
|
164
|
+
code_detect=detect(text)
|
165
|
+
if 'c' in output.lower(): # return code
|
166
|
+
return l_code[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
167
|
+
else:
|
168
|
+
return l_lang[strcmp(code_detect,l_code, verbose=verbose)[1]]
|
169
|
+
else:
|
170
|
+
print(f"{text} is not supported")
|
171
|
+
return 'no'
|
172
|
+
except:
|
173
|
+
return 'no'
|
174
|
+
|
175
|
+
def is_text(s):
|
176
|
+
has_alpha = any(char.isalpha() for char in s)
|
177
|
+
has_non_alpha = any(not char.isalpha() for char in s)
|
178
|
+
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
179
|
+
return has_alpha and has_non_alpha
|
180
|
+
|
181
|
+
def strcmp(search_term, candidates, ignore_case=True, verbose=True, scorer='WR'):
|
182
|
+
"""
|
183
|
+
Compares a search term with a list of candidate strings and finds the best match based on similarity score.
|
184
|
+
|
185
|
+
Parameters:
|
186
|
+
search_term (str): The term to be searched for.
|
187
|
+
candidates (list of str): A list of candidate strings to compare against the search term.
|
188
|
+
ignore_case (bool): If True, the comparison ignores case differences.
|
189
|
+
verbose (bool): If True, prints the similarity score and the best match.
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
tuple: A tuple containing the best match and its index in the candidates list.
|
193
|
+
"""
|
194
|
+
def to_lower(s, ignore_case=True):
|
195
|
+
#Converts a string or list of strings to lowercase if ignore_case is True.
|
196
|
+
if ignore_case:
|
197
|
+
if isinstance(s, str):
|
198
|
+
return s.lower()
|
199
|
+
elif isinstance(s, list):
|
200
|
+
return [elem.lower() for elem in s]
|
201
|
+
return s
|
202
|
+
str1_,str2_ = to_lower(search_term, ignore_case),to_lower(candidates, ignore_case)
|
203
|
+
if isinstance(str2_, list):
|
204
|
+
if 'part' in scorer.lower():
|
205
|
+
similarity_scores = [fuzz.partial_ratio(str1_, word) for word in str2_]
|
206
|
+
elif 'W' in scorer.lower():
|
207
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
208
|
+
elif 'Ratio' in scorer.lower():
|
209
|
+
similarity_scores = [fuzz.Ratio(str1_, word) for word in str2_]
|
210
|
+
else:
|
211
|
+
similarity_scores = [fuzz.WRatio(str1_, word) for word in str2_]
|
212
|
+
best_match_index = similarity_scores.index(max(similarity_scores))
|
213
|
+
best_match_score = similarity_scores[best_match_index]
|
214
|
+
else:
|
215
|
+
best_match_index = 0
|
216
|
+
if 'part' in scorer.lower():
|
217
|
+
best_match_score = fuzz.partial_ratio(str1_, str2_)
|
218
|
+
elif 'W' in scorer.lower():
|
219
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
220
|
+
elif 'Ratio' in scorer.lower():
|
221
|
+
best_match_score = fuzz.Ratio(str1_, str2_)
|
222
|
+
else:
|
223
|
+
best_match_score = fuzz.WRatio(str1_, str2_)
|
224
|
+
if verbose:
|
225
|
+
print(f"\nbest_match is: {candidates[best_match_index],best_match_score}")
|
226
|
+
best_match = process.extract(search_term, candidates)
|
227
|
+
print(f"建议: {best_match}")
|
228
|
+
return candidates[best_match_index], best_match_index
|
161
229
|
|
162
230
|
|
163
231
|
def methods(idx=0):
|
@@ -231,23 +299,6 @@ def get_language_code(language, translator="google"):
|
|
231
299
|
# print(f"Google Translate Language Code for '{language}': {google_lang_code}")
|
232
300
|
# print(f"DeepL Translator Language Code for '{language}': {deepl_lang_code}")
|
233
301
|
|
234
|
-
def detect_language(text):
|
235
|
-
"""
|
236
|
-
Detect the language of the given text.
|
237
|
-
"""
|
238
|
-
if len(text.strip()) < 3:
|
239
|
-
print("Error: Input text is too short for language detection.")
|
240
|
-
return "english"
|
241
|
-
else:
|
242
|
-
lang_code = detect(text)
|
243
|
-
detected_language=search_iso639_fullname(lang_code)
|
244
|
-
print(detected_language)
|
245
|
-
return detected_language
|
246
|
-
|
247
|
-
|
248
|
-
# text_to_detect = "Bonjour, comment ça va?"
|
249
|
-
# detected_language = detect_language(text_to_detect)
|
250
|
-
# print("Detected language:", detected_language)
|
251
302
|
|
252
303
|
def load_docx(filename):
|
253
304
|
"""
|
@@ -438,13 +489,13 @@ def translate(
|
|
438
489
|
text=merge_text(text)
|
439
490
|
text = replace_text(text)
|
440
491
|
if lang_src is None:
|
441
|
-
lang_src =
|
492
|
+
lang_src = detect_lang(text)
|
442
493
|
try:
|
443
494
|
if len(text) > limit:
|
444
495
|
n=auto_chunk_size(text)
|
445
496
|
text_segments = split_by_sent_n(text,n)
|
446
497
|
translations = ""
|
447
|
-
for segment in tqdm(text_segments,desc='is translating'):
|
498
|
+
for segment in tqdm(text_segments,desc='is translating', colour="green"):
|
448
499
|
segment = replace_text(merge_text(segment))
|
449
500
|
translated_segment = translate_segment(text=segment, lang=lang, lang_src=lang_src, method=method, user_agent=user_agent,service_urls=service_urls, verbose=verbose,error_verbose=error_verbose
|
450
501
|
)
|
@@ -479,7 +530,7 @@ def translate_segment(
|
|
479
530
|
text_clean = filter_errors(text)
|
480
531
|
text_clean = replace_text(text_clean)
|
481
532
|
if lang_src is None:
|
482
|
-
lang_src =
|
533
|
+
lang_src = detect_lang(text_clean)
|
483
534
|
try:
|
484
535
|
lang_src = get_language_code(lang_src, 'google')
|
485
536
|
lang_tgt = get_language_code(lang, 'google')
|
@@ -547,7 +598,7 @@ def translate_with_retry(
|
|
547
598
|
raise RuntimeError(f"Error using {service_url}: {e}")
|
548
599
|
|
549
600
|
if lang_src is None:
|
550
|
-
lang_src =
|
601
|
+
lang_src = detect_lang(text)
|
551
602
|
lang_src = get_language_code(language=lang_src)
|
552
603
|
lang = get_language_code(language=lang)
|
553
604
|
print(f"lang:{lang},lang_src:{lang_src}")
|
@@ -2,7 +2,7 @@ py2ls/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
|
|
2
2
|
py2ls/.git/COMMIT_EDITMSG,sha256=5xj-jWMbrdOc9m7gSn-UcsAQ9FMNvWSbLWSsrOUIO5w,7
|
3
3
|
py2ls/.git/FETCH_HEAD,sha256=1FfG9FtKEzbthC4ygl5ci0pnEm7ZaF3ZY7njNqkjz2I,100
|
4
4
|
py2ls/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
|
5
|
-
py2ls/.git/config,sha256=
|
5
|
+
py2ls/.git/config,sha256=CL7WR7jU8VRchJwRooCBkXUMvuRoPdf3FWIBAOlap1c,378
|
6
6
|
py2ls/.git/description,sha256=ZzMxc0Ca26m45Twn1DDnOHqin5VHEZ9uOTBrScIXSjE,16
|
7
7
|
py2ls/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
|
8
8
|
py2ls/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
|
@@ -100,12 +100,12 @@ py2ls/brain_atlas.py,sha256=w1o5EelRjq89zuFJUNSz4Da8HnTCwAwDAZ4NU4a-bAY,5486
|
|
100
100
|
py2ls/correlators.py,sha256=RbOaJIPLCHJtUm5SFi_4dCJ7VFUPWR0PErfK3K26ad4,18243
|
101
101
|
py2ls/dbhandler.py,sha256=i9dNrpHyx0oIaFieHI4X4tsrCdN-aFxudPTDOgy9Ppo,3574
|
102
102
|
py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
|
103
|
-
py2ls/ips.py,sha256=
|
104
|
-
py2ls/netfinder.py,sha256=
|
103
|
+
py2ls/ips.py,sha256=XBKhLvg613WL18wgm5pc4xwe-EI6MOGlCp4AZCQlW60,141873
|
104
|
+
py2ls/netfinder.py,sha256=ZsLWGYMeRuGvxj2nqE0Z8ANoaVl18Necfw0HQfh2q7I,45548
|
105
105
|
py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
|
106
106
|
py2ls/sleep_events_detectors.py,sha256=36MCuRrpurn0Uvzpo3p3b3_JlVsRNHSWCXbJxCGM3mg,51546
|
107
|
-
py2ls/translator.py,sha256=
|
107
|
+
py2ls/translator.py,sha256=OEYljlmICGb9pO06tMWJCoRfZTHBJ8g-tNkF_zLlzgg,34118
|
108
108
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
109
|
-
py2ls-0.1.4.
|
110
|
-
py2ls-0.1.4.
|
111
|
-
py2ls-0.1.4.
|
109
|
+
py2ls-0.1.4.8.dist-info/METADATA,sha256=LlicemgTJqMy8o5i8Nu8jT7Pp1g-Q3BF-zLc5Wnrr3I,17943
|
110
|
+
py2ls-0.1.4.8.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
111
|
+
py2ls-0.1.4.8.dist-info/RECORD,,
|