semantic-compressor 1.4__tar.gz → 1.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {semantic_compressor-1.4/semantic_compressor.egg-info → semantic_compressor-1.6}/PKG-INFO +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/semantic.py +14 -7
- {semantic_compressor-1.4 → semantic_compressor-1.6}/pyproject.toml +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.6/semantic_compressor.egg-info}/PKG-INFO +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.6}/setup.py +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.6}/LICENSE +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/README.md +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/__init__.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/__init__.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/base.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/basic.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/regex.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/embedding_model.onnx +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/en_stopwords.pkl +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/lid.176.ftz +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/pt_stopwords.pkl +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/SOURCES.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/dependency_links.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/requires.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/top_level.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.6}/setup.cfg +0 -0
@@ -13,10 +13,6 @@ from collections import Counter
|
|
13
13
|
import onnxruntime as ort
|
14
14
|
import nltk
|
15
15
|
|
16
|
-
# Inicializando os stemmers
|
17
|
-
stemmer_english = PorterStemmer()
|
18
|
-
stemmer_portuguese = RSLPStemmer()
|
19
|
-
|
20
16
|
tokenizer = RegexTokenizer()
|
21
17
|
nltk_data_path = str(importlib.resources.files('compressor').joinpath('resources/nltk_data'))
|
22
18
|
|
@@ -24,6 +20,10 @@ os.environ['NLTK_DATA'] = nltk_data_path
|
|
24
20
|
|
25
21
|
nltk.download('rslp')
|
26
22
|
|
23
|
+
# Inicializando os stemmers
|
24
|
+
stemmer_english = PorterStemmer()
|
25
|
+
stemmer_portuguese = RSLPStemmer()
|
26
|
+
|
27
27
|
english_stopwords_path = str(importlib.resources.files('compressor').joinpath('resources/en_stopwords.pkl'))
|
28
28
|
portuguese_stopwords_path = str(importlib.resources.files('compressor').joinpath('resources/pt_stopwords.pkl'))
|
29
29
|
fasttext_model_path = str(importlib.resources.files('compressor').joinpath('resources/lid.176.ftz'))
|
@@ -250,11 +250,18 @@ def stem_text(text, lang='en'):
|
|
250
250
|
|
251
251
|
return stemmed_text
|
252
252
|
|
253
|
-
def correct_spelling(
|
253
|
+
def correct_spelling(sentence, detected_lang="pt"):
|
254
254
|
spell = SpellChecker(language=detected_lang)
|
255
|
-
words =
|
255
|
+
words = sentence.split()
|
256
256
|
fixed = [spell.correction(word) for word in words]
|
257
|
-
|
257
|
+
|
258
|
+
final_words = []
|
259
|
+
|
260
|
+
# Interpolate original words with fixed words (each word could be "None" in "fixed" when no correction is needed)
|
261
|
+
for original, fixed_word in zip(words, fixed):
|
262
|
+
final_words.append(fixed_word if fixed_word is not None else original)
|
263
|
+
|
264
|
+
return " ".join(final_words)
|
258
265
|
|
259
266
|
def find_needle_in_haystack(
|
260
267
|
*, haystack: str, needle: str, block_size = 300,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/embedding_model.onnx
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/requires.txt
RENAMED
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|