semantic-compressor 1.4__py3-none-any.whl → 1.5__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- compressor/semantic.py +10 -3
- {semantic_compressor-1.4.dist-info → semantic_compressor-1.5.dist-info}/METADATA +1 -1
- {semantic_compressor-1.4.dist-info → semantic_compressor-1.5.dist-info}/RECORD +6 -6
- {semantic_compressor-1.4.dist-info → semantic_compressor-1.5.dist-info}/LICENSE +0 -0
- {semantic_compressor-1.4.dist-info → semantic_compressor-1.5.dist-info}/WHEEL +0 -0
- {semantic_compressor-1.4.dist-info → semantic_compressor-1.5.dist-info}/top_level.txt +0 -0
compressor/semantic.py
CHANGED
@@ -250,11 +250,18 @@ def stem_text(text, lang='en'):
|
|
250
250
|
|
251
251
|
return stemmed_text
|
252
252
|
|
253
|
-
def correct_spelling(
|
253
|
+
def correct_spelling(sentence, detected_lang="pt"):
|
254
254
|
spell = SpellChecker(language=detected_lang)
|
255
|
-
words =
|
255
|
+
words = sentence.split()
|
256
256
|
fixed = [spell.correction(word) for word in words]
|
257
|
-
|
257
|
+
|
258
|
+
final_words = []
|
259
|
+
|
260
|
+
# Interpolate original words with fixed words (each word could be "None" in "fixed" when no correction is needed)
|
261
|
+
for original, fixed_word in zip(words, fixed):
|
262
|
+
final_words.append(fixed_word if fixed_word is not None else original)
|
263
|
+
|
264
|
+
return " ".join(final_words)
|
258
265
|
|
259
266
|
def find_needle_in_haystack(
|
260
267
|
*, haystack: str, needle: str, block_size = 300,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
compressor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
compressor/semantic.py,sha256=
|
2
|
+
compressor/semantic.py,sha256=6TbAxHx69RkQNuAqdrnL91oTMkclwH2H20BylvecwZI,13985
|
3
3
|
compressor/minbpe/__init__.py,sha256=wZ1z2QKkncvGgiZDBc91AP5m7-M-MVenPStKbS6xylE,95
|
4
4
|
compressor/minbpe/base.py,sha256=tTKag04RRFnc4ppoieBbDV0V6thzi_ZvZTlhOYIoY7Q,6881
|
5
5
|
compressor/minbpe/basic.py,sha256=0kD4tU8l2MZegfPaHMfDo5CnaSzb9i1v9tDBy6GwMbg,2883
|
@@ -8,8 +8,8 @@ compressor/resources/embedding_model.onnx,sha256=uLBbAfCGEJTwR1yyiK0bMDroruLr6W5
|
|
8
8
|
compressor/resources/en_stopwords.pkl,sha256=Q2PyGQnphPUs_jxN9NMSqp2EQjYv4b4oMJY2aMYvbSY,1310
|
9
9
|
compressor/resources/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
|
10
10
|
compressor/resources/pt_stopwords.pkl,sha256=-9bJaxJWjeOFxLHLT9D-rI3XTzGC0iLJfMiwBDnkCYI,1716
|
11
|
-
semantic_compressor-1.
|
12
|
-
semantic_compressor-1.
|
13
|
-
semantic_compressor-1.
|
14
|
-
semantic_compressor-1.
|
15
|
-
semantic_compressor-1.
|
11
|
+
semantic_compressor-1.5.dist-info/LICENSE,sha256=DFRihXonZ3qVRaTrzuXNaDI_-h2jyT2SqWqjtTDHfqI,1067
|
12
|
+
semantic_compressor-1.5.dist-info/METADATA,sha256=nnp2cnhzAa6SNLd2niPtvPe5aJdI2kDHkiKghhHsUcE,6178
|
13
|
+
semantic_compressor-1.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
14
|
+
semantic_compressor-1.5.dist-info/top_level.txt,sha256=qb2SlKrEmMrQDVrhwxu3Wr7U6JupPXtDGrJpIQr8xSc,11
|
15
|
+
semantic_compressor-1.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|