SqueakyCleanText 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {SqueakyCleanText-0.2.0/SqueakyCleanText.egg-info → SqueakyCleanText-0.2.1}/PKG-INFO +1 -3
  2. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/README.md +0 -2
  3. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1/SqueakyCleanText.egg-info}/PKG-INFO +1 -3
  4. SqueakyCleanText-0.2.1/SqueakyCleanText.egg-info/requires.txt +19 -0
  5. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/config.py +1 -2
  6. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/ner.py +1 -1
  7. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/setup.py +10 -10
  8. SqueakyCleanText-0.2.0/SqueakyCleanText.egg-info/requires.txt +0 -19
  9. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/LICENSE +0 -0
  10. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/MANIFEST.in +0 -0
  11. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/SqueakyCleanText.egg-info/SOURCES.txt +0 -0
  12. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/SqueakyCleanText.egg-info/dependency_links.txt +0 -0
  13. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/SqueakyCleanText.egg-info/entry_points.txt +0 -0
  14. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/SqueakyCleanText.egg-info/top_level.txt +0 -0
  15. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/__init__.py +0 -0
  16. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/scripts/__init__.py +0 -0
  17. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/scripts/download_nltk_stopwords.py +0 -0
  18. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/sct.py +0 -0
  19. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/__init__.py +0 -0
  20. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/constants.py +0 -0
  21. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/contact.py +0 -0
  22. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/datetime.py +0 -0
  23. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/normtext.py +0 -0
  24. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/resources.py +0 -0
  25. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/special.py +0 -0
  26. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/sct/utils/stopwords.py +0 -0
  27. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/setup.cfg +0 -0
  28. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/tests/__init__.py +0 -0
  29. {SqueakyCleanText-0.2.0 → SqueakyCleanText-0.2.1}/tests/test_sct.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: SqueakyCleanText
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: A comprehensive text cleaning and preprocessing pipeline.
5
5
  Home-page: https://github.com/rhnfzl/SqueakyCleanText
6
6
  Author: Rehan Fazal
@@ -53,8 +53,6 @@ SqueakyCleanText offers functionality to streamline this process, ensuring that
53
53
  Depending on sigle model for Name Entity recognition is not be ideal, as there is a high chance it might skip the entity all together. Also combining the language specific NER model makes it more specific for text and reduces the chance of missing out the entity.
54
54
  The package NER model has the chunking mechanism which helps to do the NER process even if the text is longer than the model token size.
55
55
 
56
- Important : Model
57
-
58
56
  By automating these text cleaning steps, SqueakyCleanText ensures your data is prepared efficiently and effectively, saving time and improving model performance.
59
57
 
60
58
  ## Installation
@@ -28,8 +28,6 @@ SqueakyCleanText offers functionality to streamline this process, ensuring that
28
28
  Depending on sigle model for Name Entity recognition is not be ideal, as there is a high chance it might skip the entity all together. Also combining the language specific NER model makes it more specific for text and reduces the chance of missing out the entity.
29
29
  The package NER model has the chunking mechanism which helps to do the NER process even if the text is longer than the model token size.
30
30
 
31
- Important : Model
32
-
33
31
  By automating these text cleaning steps, SqueakyCleanText ensures your data is prepared efficiently and effectively, saving time and improving model performance.
34
32
 
35
33
  ## Installation
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: SqueakyCleanText
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: A comprehensive text cleaning and preprocessing pipeline.
5
5
  Home-page: https://github.com/rhnfzl/SqueakyCleanText
6
6
  Author: Rehan Fazal
@@ -53,8 +53,6 @@ SqueakyCleanText offers functionality to streamline this process, ensuring that
53
53
  Depending on sigle model for Name Entity recognition is not be ideal, as there is a high chance it might skip the entity all together. Also combining the language specific NER model makes it more specific for text and reduces the chance of missing out the entity.
54
54
  The package NER model has the chunking mechanism which helps to do the NER process even if the text is longer than the model token size.
55
55
 
56
- Important : Model
57
-
58
56
  By automating these text cleaning steps, SqueakyCleanText ensures your data is prepared efficiently and effectively, saving time and improving model performance.
59
57
 
60
58
  ## Installation
@@ -0,0 +1,19 @@
1
+ lingua-language-detector>=2.0.0
2
+ nltk>=3.8
3
+ emoji>=2.8
4
+ ftfy>=6.1
5
+ Unidecode>=1.3
6
+ beautifulsoup4>=4.12
7
+ transformers>=4.30
8
+ torch>=2.0.0
9
+ presidio_anonymizer>=2.2.355
10
+
11
+ [dev]
12
+ hypothesis==6.82.7
13
+ faker==20.1.0
14
+ flake8==6.1.0
15
+ pytest==7.5.0
16
+
17
+ [test]
18
+ coverage==7.3.1
19
+ pytest-cov==4.1.0
@@ -52,5 +52,4 @@ LANGUAGE = None
52
52
  NER_MODELS_LIST = ["FacebookAI/xlm-roberta-large-finetuned-conll03-english",
53
53
  "FacebookAI/xlm-roberta-large-finetuned-conll02-dutch",
54
54
  "FacebookAI/xlm-roberta-large-finetuned-conll03-german",
55
- "FacebookAI/xlm-roberta-large-finetuned-conll03-spanish",
56
- "Babelscape/wikineural-multilingual-ner"]
55
+ "FacebookAI/xlm-roberta-large-finetuned-conll02-spanish",
@@ -36,7 +36,7 @@ class GeneralNER:
36
36
  model_name = ["FacebookAI/xlm-roberta-large-finetuned-conll03-english",
37
37
  "FacebookAI/xlm-roberta-large-finetuned-conll02-dutch",
38
38
  "FacebookAI/xlm-roberta-large-finetuned-conll03-german",
39
- "FacebookAI/xlm-roberta-large-finetuned-conll03-spanish",
39
+ "FacebookAI/xlm-roberta-large-finetuned-conll02-spanish",
40
40
  "Babelscape/wikineural-multilingual-ner"]
41
41
 
42
42
  english_model_name = model_name[0]
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='SqueakyCleanText',
5
- version='0.2.0',
5
+ version='0.2.1',
6
6
  author='Rehan Fazal',
7
7
  description='A comprehensive text cleaning and preprocessing pipeline.',
8
8
  long_description=open('README.md', encoding='utf-8').read(),
@@ -11,15 +11,15 @@ setup(
11
11
  license='MIT',
12
12
  packages=find_packages(),
13
13
  install_requires=[
14
- 'lingua-language-detector>=2.0.0,<2.1',
15
- 'nltk>=3.8,<3.9',
16
- 'emoji>=2.8,<2.9',
17
- 'ftfy>=6.1,<6.2',
18
- 'Unidecode>=1.3,<1.4',
19
- 'beautifulsoup4>=4.12,<4.13',
20
- 'transformers>=4.30,<4.31',
21
- 'torch>=2.0,<2.1',
22
- 'presidio_anonymizer>=2.2.355,<2.3',
14
+ 'lingua-language-detector>=2.0.0',
15
+ 'nltk>=3.8',
16
+ 'emoji>=2.8',
17
+ 'ftfy>=6.1',
18
+ 'Unidecode>=1.3',
19
+ 'beautifulsoup4>=4.12',
20
+ 'transformers>=4.30',
21
+ 'torch>=2.0.0',
22
+ 'presidio_anonymizer>=2.2.355',
23
23
  ],
24
24
  extras_require={
25
25
  'dev': [
@@ -1,19 +0,0 @@
1
- lingua-language-detector<2.1,>=2.0.0
2
- nltk<3.9,>=3.8
3
- emoji<2.9,>=2.8
4
- ftfy<6.2,>=6.1
5
- Unidecode<1.4,>=1.3
6
- beautifulsoup4<4.13,>=4.12
7
- transformers<4.31,>=4.30
8
- torch<2.1,>=2.0
9
- presidio_anonymizer<2.3,>=2.2.355
10
-
11
- [dev]
12
- hypothesis==6.82.7
13
- faker==20.1.0
14
- flake8==6.1.0
15
- pytest==7.5.0
16
-
17
- [test]
18
- coverage==7.3.1
19
- pytest-cov==4.1.0