semantic-compressor 1.4__tar.gz → 1.5__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {semantic_compressor-1.4/semantic_compressor.egg-info → semantic_compressor-1.5}/PKG-INFO +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/semantic.py +10 -3
- {semantic_compressor-1.4 → semantic_compressor-1.5}/pyproject.toml +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.5/semantic_compressor.egg-info}/PKG-INFO +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.5}/setup.py +1 -1
- {semantic_compressor-1.4 → semantic_compressor-1.5}/LICENSE +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/README.md +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/__init__.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/minbpe/__init__.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/minbpe/base.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/minbpe/basic.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/minbpe/regex.py +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/resources/embedding_model.onnx +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/resources/en_stopwords.pkl +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/resources/lid.176.ftz +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/resources/pt_stopwords.pkl +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/semantic_compressor.egg-info/SOURCES.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/semantic_compressor.egg-info/dependency_links.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/semantic_compressor.egg-info/requires.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/semantic_compressor.egg-info/top_level.txt +0 -0
- {semantic_compressor-1.4 → semantic_compressor-1.5}/setup.cfg +0 -0
@@ -250,11 +250,18 @@ def stem_text(text, lang='en'):
|
|
250
250
|
|
251
251
|
return stemmed_text
|
252
252
|
|
253
|
-
def correct_spelling(
|
253
|
+
def correct_spelling(sentence, detected_lang="pt"):
|
254
254
|
spell = SpellChecker(language=detected_lang)
|
255
|
-
words =
|
255
|
+
words = sentence.split()
|
256
256
|
fixed = [spell.correction(word) for word in words]
|
257
|
-
|
257
|
+
|
258
|
+
final_words = []
|
259
|
+
|
260
|
+
# Interpolate original words with fixed words (each word could be "None" in "fixed" when no correction is needed)
|
261
|
+
for original, fixed_word in zip(words, fixed):
|
262
|
+
final_words.append(fixed_word if fixed_word is not None else original)
|
263
|
+
|
264
|
+
return " ".join(final_words)
|
258
265
|
|
259
266
|
def find_needle_in_haystack(
|
260
267
|
*, haystack: str, needle: str, block_size = 300,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.5}/compressor/resources/embedding_model.onnx
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.5}/semantic_compressor.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.5}/semantic_compressor.egg-info/requires.txt
RENAMED
File without changes
|
{semantic_compressor-1.4 → semantic_compressor-1.5}/semantic_compressor.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|