semantic-compressor 1.4__py3-none-any.whl → 1.5__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
compressor/semantic.py CHANGED
@@ -250,11 +250,18 @@ def stem_text(text, lang='en'):
250
250
 
251
251
  return stemmed_text
252
252
 
253
- def correct_spelling(frase, detected_lang="pt"):
253
+ def correct_spelling(sentence, detected_lang="pt"):
254
254
  spell = SpellChecker(language=detected_lang)
255
- words = frase.split()
255
+ words = sentence.split()
256
256
  fixed = [spell.correction(word) for word in words]
257
- return " ".join(fixed)
257
+
258
+ final_words = []
259
+
260
+ # Interpolate original words with fixed words (each word could be "None" in "fixed" when no correction is needed)
261
+ for original, fixed_word in zip(words, fixed):
262
+ final_words.append(fixed_word if fixed_word is not None else original)
263
+
264
+ return " ".join(final_words)
258
265
 
259
266
  def find_needle_in_haystack(
260
267
  *, haystack: str, needle: str, block_size = 300,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantic_compressor
3
- Version: 1.4
3
+ Version: 1.5
4
4
  Author: Carlo Moro
5
5
  Author-email: Carlo Moro <cnmoro@gmail.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,5 +1,5 @@
1
1
  compressor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- compressor/semantic.py,sha256=OxqzVCAnICKD3W_P3SAe4JbJt-PyOs5VVR-go8taZVI,13701
2
+ compressor/semantic.py,sha256=6TbAxHx69RkQNuAqdrnL91oTMkclwH2H20BylvecwZI,13985
3
3
  compressor/minbpe/__init__.py,sha256=wZ1z2QKkncvGgiZDBc91AP5m7-M-MVenPStKbS6xylE,95
4
4
  compressor/minbpe/base.py,sha256=tTKag04RRFnc4ppoieBbDV0V6thzi_ZvZTlhOYIoY7Q,6881
5
5
  compressor/minbpe/basic.py,sha256=0kD4tU8l2MZegfPaHMfDo5CnaSzb9i1v9tDBy6GwMbg,2883
@@ -8,8 +8,8 @@ compressor/resources/embedding_model.onnx,sha256=uLBbAfCGEJTwR1yyiK0bMDroruLr6W5
8
8
  compressor/resources/en_stopwords.pkl,sha256=Q2PyGQnphPUs_jxN9NMSqp2EQjYv4b4oMJY2aMYvbSY,1310
9
9
  compressor/resources/lid.176.ftz,sha256=jzRyz-hzintgmejpmcPL-uDc0VaWqsfXc4qAOdtgPoM,938013
10
10
  compressor/resources/pt_stopwords.pkl,sha256=-9bJaxJWjeOFxLHLT9D-rI3XTzGC0iLJfMiwBDnkCYI,1716
11
- semantic_compressor-1.4.dist-info/LICENSE,sha256=DFRihXonZ3qVRaTrzuXNaDI_-h2jyT2SqWqjtTDHfqI,1067
12
- semantic_compressor-1.4.dist-info/METADATA,sha256=BEKlYCs7nYakGXQzbC_8_Gz-MKSAXzSp01pAD0HjIS0,6178
13
- semantic_compressor-1.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
14
- semantic_compressor-1.4.dist-info/top_level.txt,sha256=qb2SlKrEmMrQDVrhwxu3Wr7U6JupPXtDGrJpIQr8xSc,11
15
- semantic_compressor-1.4.dist-info/RECORD,,
11
+ semantic_compressor-1.5.dist-info/LICENSE,sha256=DFRihXonZ3qVRaTrzuXNaDI_-h2jyT2SqWqjtTDHfqI,1067
12
+ semantic_compressor-1.5.dist-info/METADATA,sha256=nnp2cnhzAa6SNLd2niPtvPe5aJdI2kDHkiKghhHsUcE,6178
13
+ semantic_compressor-1.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
14
+ semantic_compressor-1.5.dist-info/top_level.txt,sha256=qb2SlKrEmMrQDVrhwxu3Wr7U6JupPXtDGrJpIQr8xSc,11
15
+ semantic_compressor-1.5.dist-info/RECORD,,