SinaTools 0.1.3__py2.py3-none-any.whl → 0.1.7__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. {SinaTools-0.1.3.dist-info → SinaTools-0.1.7.dist-info}/METADATA +14 -20
  2. SinaTools-0.1.7.dist-info/RECORD +101 -0
  3. SinaTools-0.1.7.dist-info/entry_points.txt +18 -0
  4. SinaTools-0.1.7.dist-info/top_level.txt +1 -0
  5. {nlptools → sinatools}/CLI/DataDownload/download_files.py +9 -9
  6. {nlptools → sinatools}/CLI/morphology/ALMA_multi_word.py +10 -20
  7. sinatools/CLI/morphology/morph_analyzer.py +80 -0
  8. nlptools/CLI/arabiner/bin/infer2.py → sinatools/CLI/ner/corpus_entity_extractor.py +5 -9
  9. nlptools/CLI/arabiner/bin/infer.py → sinatools/CLI/ner/entity_extractor.py +4 -8
  10. {nlptools → sinatools}/CLI/salma/salma_tools.py +8 -8
  11. {nlptools → sinatools}/CLI/utils/arStrip.py +10 -21
  12. sinatools/CLI/utils/corpus_tokenizer.py +50 -0
  13. {nlptools → sinatools}/CLI/utils/implication.py +9 -9
  14. {nlptools → sinatools}/CLI/utils/jaccard.py +10 -10
  15. sinatools/CLI/utils/remove_latin.py +34 -0
  16. sinatools/CLI/utils/remove_punctuation.py +42 -0
  17. {nlptools → sinatools}/CLI/utils/sentence_tokenizer.py +9 -22
  18. {nlptools → sinatools}/CLI/utils/text_transliteration.py +10 -17
  19. {nlptools → sinatools}/DataDownload/downloader.py +10 -10
  20. sinatools/VERSION +1 -0
  21. {nlptools → sinatools}/__init__.py +1 -1
  22. {nlptools → sinatools}/morphology/ALMA_multi_word.py +4 -5
  23. {nlptools → sinatools}/morphology/__init__.py +4 -14
  24. sinatools/morphology/morph_analyzer.py +172 -0
  25. sinatools/ner/__init__.py +12 -0
  26. nlptools/arabiner/bin/infer.py → sinatools/ner/entity_extractor.py +9 -8
  27. {nlptools → sinatools}/salma/__init__.py +2 -2
  28. {nlptools → sinatools}/salma/settings.py +1 -1
  29. {nlptools → sinatools}/salma/views.py +12 -12
  30. {nlptools → sinatools}/salma/wsd.py +2 -2
  31. {nlptools/morphology → sinatools/utils}/charsets.py +1 -3
  32. {nlptools → sinatools}/utils/implication.py +10 -10
  33. {nlptools → sinatools}/utils/jaccard.py +2 -2
  34. {nlptools → sinatools}/utils/parser.py +18 -21
  35. {nlptools → sinatools}/utils/text_transliteration.py +1 -1
  36. nlptools/utils/corpus_tokenizer.py → sinatools/utils/tokenizer.py +58 -5
  37. {nlptools/morphology → sinatools/utils}/tokenizers_words.py +3 -6
  38. SinaTools-0.1.3.dist-info/RECORD +0 -122
  39. SinaTools-0.1.3.dist-info/entry_points.txt +0 -18
  40. SinaTools-0.1.3.dist-info/top_level.txt +0 -1
  41. nlptools/CLI/morphology/morph_analyzer.py +0 -91
  42. nlptools/CLI/utils/corpus_tokenizer.py +0 -74
  43. nlptools/CLI/utils/latin_remove.py +0 -51
  44. nlptools/CLI/utils/remove_Punc.py +0 -53
  45. nlptools/VERSION +0 -1
  46. nlptools/arabiner/bin/__init__.py +0 -14
  47. nlptools/arabiner/bin/eval.py +0 -87
  48. nlptools/arabiner/bin/process.py +0 -140
  49. nlptools/arabiner/bin/train.py +0 -221
  50. nlptools/arabiner/data/__init__.py +0 -1
  51. nlptools/arabiner/data/datasets.py +0 -146
  52. nlptools/arabiner/data/transforms.py +0 -118
  53. nlptools/arabiner/nn/BaseModel.py +0 -22
  54. nlptools/arabiner/nn/BertNestedTagger.py +0 -34
  55. nlptools/arabiner/nn/BertSeqTagger.py +0 -17
  56. nlptools/arabiner/nn/__init__.py +0 -3
  57. nlptools/arabiner/trainers/BaseTrainer.py +0 -117
  58. nlptools/arabiner/trainers/BertNestedTrainer.py +0 -203
  59. nlptools/arabiner/trainers/BertTrainer.py +0 -163
  60. nlptools/arabiner/trainers/__init__.py +0 -3
  61. nlptools/arabiner/utils/__init__.py +0 -0
  62. nlptools/arabiner/utils/data.py +0 -124
  63. nlptools/arabiner/utils/helpers.py +0 -151
  64. nlptools/arabiner/utils/metrics.py +0 -69
  65. nlptools/morphology/morph_analyzer.py +0 -170
  66. nlptools/morphology/settings.py +0 -8
  67. nlptools/utils/__init__.py +0 -0
  68. nlptools/utils/sentence_tokenizer.py +0 -53
  69. {SinaTools-0.1.3.data/data/nlptools → SinaTools-0.1.7.data/data/sinatools}/environment.yml +0 -0
  70. {SinaTools-0.1.3.dist-info → SinaTools-0.1.7.dist-info}/AUTHORS.rst +0 -0
  71. {SinaTools-0.1.3.dist-info → SinaTools-0.1.7.dist-info}/LICENSE +0 -0
  72. {SinaTools-0.1.3.dist-info → SinaTools-0.1.7.dist-info}/WHEEL +0 -0
  73. {nlptools → sinatools}/CLI/utils/__init__.py +0 -0
  74. {nlptools → sinatools}/DataDownload/__init__.py +0 -0
  75. {nlptools → sinatools}/arabert/__init__.py +0 -0
  76. {nlptools → sinatools}/arabert/arabert/__init__.py +0 -0
  77. {nlptools → sinatools}/arabert/arabert/create_classification_data.py +0 -0
  78. {nlptools → sinatools}/arabert/arabert/create_pretraining_data.py +0 -0
  79. {nlptools → sinatools}/arabert/arabert/extract_features.py +0 -0
  80. {nlptools → sinatools}/arabert/arabert/lamb_optimizer.py +0 -0
  81. {nlptools → sinatools}/arabert/arabert/modeling.py +0 -0
  82. {nlptools → sinatools}/arabert/arabert/optimization.py +0 -0
  83. {nlptools → sinatools}/arabert/arabert/run_classifier.py +0 -0
  84. {nlptools → sinatools}/arabert/arabert/run_pretraining.py +0 -0
  85. {nlptools → sinatools}/arabert/arabert/run_squad.py +0 -0
  86. {nlptools → sinatools}/arabert/arabert/tokenization.py +0 -0
  87. {nlptools → sinatools}/arabert/araelectra/__init__.py +0 -0
  88. {nlptools → sinatools}/arabert/araelectra/build_openwebtext_pretraining_dataset.py +0 -0
  89. {nlptools → sinatools}/arabert/araelectra/build_pretraining_dataset.py +0 -0
  90. {nlptools → sinatools}/arabert/araelectra/build_pretraining_dataset_single_file.py +0 -0
  91. {nlptools → sinatools}/arabert/araelectra/configure_finetuning.py +0 -0
  92. {nlptools → sinatools}/arabert/araelectra/configure_pretraining.py +0 -0
  93. {nlptools → sinatools}/arabert/araelectra/finetune/__init__.py +0 -0
  94. {nlptools → sinatools}/arabert/araelectra/finetune/feature_spec.py +0 -0
  95. {nlptools → sinatools}/arabert/araelectra/finetune/preprocessing.py +0 -0
  96. {nlptools → sinatools}/arabert/araelectra/finetune/scorer.py +0 -0
  97. {nlptools → sinatools}/arabert/araelectra/finetune/task.py +0 -0
  98. {nlptools → sinatools}/arabert/araelectra/finetune/task_builder.py +0 -0
  99. {nlptools → sinatools}/arabert/araelectra/flops_computation.py +0 -0
  100. {nlptools → sinatools}/arabert/araelectra/model/__init__.py +0 -0
  101. {nlptools → sinatools}/arabert/araelectra/model/modeling.py +0 -0
  102. {nlptools → sinatools}/arabert/araelectra/model/optimization.py +0 -0
  103. {nlptools → sinatools}/arabert/araelectra/model/tokenization.py +0 -0
  104. {nlptools → sinatools}/arabert/araelectra/pretrain/__init__.py +0 -0
  105. {nlptools → sinatools}/arabert/araelectra/pretrain/pretrain_data.py +0 -0
  106. {nlptools → sinatools}/arabert/araelectra/pretrain/pretrain_helpers.py +0 -0
  107. {nlptools → sinatools}/arabert/araelectra/run_finetuning.py +0 -0
  108. {nlptools → sinatools}/arabert/araelectra/run_pretraining.py +0 -0
  109. {nlptools → sinatools}/arabert/araelectra/util/__init__.py +0 -0
  110. {nlptools → sinatools}/arabert/araelectra/util/training_utils.py +0 -0
  111. {nlptools → sinatools}/arabert/araelectra/util/utils.py +0 -0
  112. {nlptools → sinatools}/arabert/aragpt2/__init__.py +0 -0
  113. {nlptools → sinatools}/arabert/aragpt2/create_pretraining_data.py +0 -0
  114. {nlptools → sinatools}/arabert/aragpt2/gpt2/__init__.py +0 -0
  115. {nlptools → sinatools}/arabert/aragpt2/gpt2/lamb_optimizer.py +0 -0
  116. {nlptools → sinatools}/arabert/aragpt2/gpt2/optimization.py +0 -0
  117. {nlptools → sinatools}/arabert/aragpt2/gpt2/run_pretraining.py +0 -0
  118. {nlptools → sinatools}/arabert/aragpt2/grover/__init__.py +0 -0
  119. {nlptools → sinatools}/arabert/aragpt2/grover/dataloader.py +0 -0
  120. {nlptools → sinatools}/arabert/aragpt2/grover/modeling.py +0 -0
  121. {nlptools → sinatools}/arabert/aragpt2/grover/modeling_gpt2.py +0 -0
  122. {nlptools → sinatools}/arabert/aragpt2/grover/optimization_adafactor.py +0 -0
  123. {nlptools → sinatools}/arabert/aragpt2/grover/train_tpu.py +0 -0
  124. {nlptools → sinatools}/arabert/aragpt2/grover/utils.py +0 -0
  125. {nlptools → sinatools}/arabert/aragpt2/train_bpe_tokenizer.py +0 -0
  126. {nlptools → sinatools}/arabert/preprocess.py +0 -0
  127. {nlptools → sinatools}/environment.yml +0 -0
  128. {nlptools → sinatools}/install_env.py +0 -0
  129. /nlptools/nlptools.py → /sinatools/sinatools.py +0 -0
  130. {nlptools/arabiner → sinatools/utils}/__init__.py +0 -0
  131. {nlptools → sinatools}/utils/readfile.py +0 -0
  132. {nlptools → sinatools}/utils/utils.py +0 -0
File without changes
@@ -1,53 +0,0 @@
1
- def remove_empty_values(sentences):
2
- return [value for value in sentences if value != '']
3
-
4
-
5
- def sent_tokenize(text, dot=True, new_line=True, question_mark=True, exclamation_mark=True):
6
- """
7
- This method tokenizes a text into a set of sentences based on the selected separators, including the dot, new line, question mark, and exclamation mark.
8
-
9
- Args:
10
- text (:obj:`str`): Arabic text to be tokenized.
11
- dot (:obj:`str`): flag to split text based on Dot (default is True).
12
- new_line (:obj:`str`): flag to split text based on new_line (default is True).
13
- question_mark (:obj:`str`): flag to split text based on question_mark (default is True).
14
- exclamation_mark (:obj:`str`): flag to split text based on exclamation_mark (default is True).
15
-
16
- Returns:
17
- :obj:`list`: list of sentences.
18
-
19
- **Example:**
20
-
21
- .. highlight:: python
22
- .. code-block:: python
23
-
24
- from nlptools.utils import sentence_tokenizer
25
- sentences = sentence_tokenizer.sent_tokenize("مختبر سينا لحوسبة اللغة والذكاء الإصطناعي. في جامعة بيرزيت.", dot=True, new_line=True, question_mark=True, exclamation_mark=True)
26
- print(sentences)
27
-
28
- #output
29
- ['مختبر سينا لحوسبة اللغة والذكاء الإصطناعي.', 'في جامعة بيرزيت.']
30
- """
31
- separators = []
32
- split_text = [text]
33
- if new_line==True:
34
- separators.append('\n')
35
- if dot==True:
36
- separators.append('.')
37
- if question_mark==True:
38
- separators.append('?')
39
- separators.append('؟')
40
- if exclamation_mark==True:
41
- separators.append('!')
42
-
43
- for sep in separators:
44
- new_split_text = []
45
- for part in split_text:
46
- tokens = part.split(sep)
47
- tokens_with_separator = [token + sep for token in tokens[:-1]]
48
- tokens_with_separator.append(tokens[-1].strip())
49
- new_split_text.extend(tokens_with_separator)
50
- split_text = new_split_text
51
-
52
- split_text = remove_empty_values(split_text)
53
- return split_text
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes