semantic-compressor 1.4__tar.gz → 1.6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (21) hide show
  1. {semantic_compressor-1.4/semantic_compressor.egg-info → semantic_compressor-1.6}/PKG-INFO +1 -1
  2. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/semantic.py +14 -7
  3. {semantic_compressor-1.4 → semantic_compressor-1.6}/pyproject.toml +1 -1
  4. {semantic_compressor-1.4 → semantic_compressor-1.6/semantic_compressor.egg-info}/PKG-INFO +1 -1
  5. {semantic_compressor-1.4 → semantic_compressor-1.6}/setup.py +1 -1
  6. {semantic_compressor-1.4 → semantic_compressor-1.6}/LICENSE +0 -0
  7. {semantic_compressor-1.4 → semantic_compressor-1.6}/README.md +0 -0
  8. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/__init__.py +0 -0
  9. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/__init__.py +0 -0
  10. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/base.py +0 -0
  11. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/basic.py +0 -0
  12. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/minbpe/regex.py +0 -0
  13. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/embedding_model.onnx +0 -0
  14. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/en_stopwords.pkl +0 -0
  15. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/lid.176.ftz +0 -0
  16. {semantic_compressor-1.4 → semantic_compressor-1.6}/compressor/resources/pt_stopwords.pkl +0 -0
  17. {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/SOURCES.txt +0 -0
  18. {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/dependency_links.txt +0 -0
  19. {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/requires.txt +0 -0
  20. {semantic_compressor-1.4 → semantic_compressor-1.6}/semantic_compressor.egg-info/top_level.txt +0 -0
  21. {semantic_compressor-1.4 → semantic_compressor-1.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantic_compressor
3
- Version: 1.4
3
+ Version: 1.6
4
4
  Author: Carlo Moro
5
5
  Author-email: Carlo Moro <cnmoro@gmail.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -13,10 +13,6 @@ from collections import Counter
13
13
  import onnxruntime as ort
14
14
  import nltk
15
15
 
16
- # Inicializando os stemmers
17
- stemmer_english = PorterStemmer()
18
- stemmer_portuguese = RSLPStemmer()
19
-
20
16
  tokenizer = RegexTokenizer()
21
17
  nltk_data_path = str(importlib.resources.files('compressor').joinpath('resources/nltk_data'))
22
18
 
@@ -24,6 +20,10 @@ os.environ['NLTK_DATA'] = nltk_data_path
24
20
 
25
21
  nltk.download('rslp')
26
22
 
23
+ # Inicializando os stemmers
24
+ stemmer_english = PorterStemmer()
25
+ stemmer_portuguese = RSLPStemmer()
26
+
27
27
  english_stopwords_path = str(importlib.resources.files('compressor').joinpath('resources/en_stopwords.pkl'))
28
28
  portuguese_stopwords_path = str(importlib.resources.files('compressor').joinpath('resources/pt_stopwords.pkl'))
29
29
  fasttext_model_path = str(importlib.resources.files('compressor').joinpath('resources/lid.176.ftz'))
@@ -250,11 +250,18 @@ def stem_text(text, lang='en'):
250
250
 
251
251
  return stemmed_text
252
252
 
253
- def correct_spelling(frase, detected_lang="pt"):
253
+ def correct_spelling(sentence, detected_lang="pt"):
254
254
  spell = SpellChecker(language=detected_lang)
255
- words = frase.split()
255
+ words = sentence.split()
256
256
  fixed = [spell.correction(word) for word in words]
257
- return " ".join(fixed)
257
+
258
+ final_words = []
259
+
260
+ # Interpolate original words with fixed words (each word could be "None" in "fixed" when no correction is needed)
261
+ for original, fixed_word in zip(words, fixed):
262
+ final_words.append(fixed_word if fixed_word is not None else original)
263
+
264
+ return " ".join(final_words)
258
265
 
259
266
  def find_needle_in_haystack(
260
267
  *, haystack: str, needle: str, block_size = 300,
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "semantic_compressor"
7
- version = "1.4"
7
+ version = "1.6"
8
8
  authors = [
9
9
  { name="Carlo Moro", email="cnmoro@gmail.com" },
10
10
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantic_compressor
3
- Version: 1.4
3
+ Version: 1.6
4
4
  Author: Carlo Moro
5
5
  Author-email: Carlo Moro <cnmoro@gmail.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='semantic_compressor',
5
- version='1.4',
5
+ version='1.6',
6
6
  author='Carlo Moro',
7
7
  author_email='cnmoro@gmail.com',
8
8
  description="Semantic text compression",