SinaTools 0.1.26__tar.gz → 0.1.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. SinaTools-0.1.27/PKG-INFO +50 -0
  2. SinaTools-0.1.27/README.rst +39 -0
  3. SinaTools-0.1.27/SinaTools.egg-info/PKG-INFO +50 -0
  4. {SinaTools-0.1.26 → SinaTools-0.1.27}/SinaTools.egg-info/SOURCES.txt +2 -1
  5. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/DataDownload/download_files.py +3 -3
  6. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/jaccard.py +2 -2
  7. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/DataDownload/downloader.py +3 -3
  8. SinaTools-0.1.27/sinatools/VERSION +1 -0
  9. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/__init__.py +6 -1
  10. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/entity_extractor.py +1 -1
  11. SinaTools-0.1.27/sinatools/ner/relation_extractor.py +201 -0
  12. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/synonyms/__init__.py +2 -2
  13. SinaTools-0.1.27/sinatools/utils/similarity.py +149 -0
  14. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/wsd/__init__.py +1 -1
  15. SinaTools-0.1.26/PKG-INFO +0 -20
  16. SinaTools-0.1.26/README.rst +0 -9
  17. SinaTools-0.1.26/SinaTools.egg-info/PKG-INFO +0 -20
  18. SinaTools-0.1.26/sinatools/VERSION +0 -1
  19. SinaTools-0.1.26/sinatools/utils/jaccard.py +0 -247
  20. {SinaTools-0.1.26 → SinaTools-0.1.27}/AUTHORS.rst +0 -0
  21. {SinaTools-0.1.26 → SinaTools-0.1.27}/CONTRIBUTING.rst +0 -0
  22. {SinaTools-0.1.26 → SinaTools-0.1.27}/LICENSE +0 -0
  23. {SinaTools-0.1.26 → SinaTools-0.1.27}/MANIFEST.in +0 -0
  24. {SinaTools-0.1.26 → SinaTools-0.1.27}/SinaTools.egg-info/dependency_links.txt +0 -0
  25. {SinaTools-0.1.26 → SinaTools-0.1.27}/SinaTools.egg-info/entry_points.txt +0 -0
  26. {SinaTools-0.1.26 → SinaTools-0.1.27}/SinaTools.egg-info/not-zip-safe +0 -0
  27. {SinaTools-0.1.26 → SinaTools-0.1.27}/SinaTools.egg-info/requires.txt +0 -0
  28. {SinaTools-0.1.26 → SinaTools-0.1.27}/SinaTools.egg-info/top_level.txt +0 -0
  29. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/Makefile +0 -0
  30. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/_images/download.png +0 -0
  31. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/_static/download.png +0 -0
  32. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/_static/file.png +0 -0
  33. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/_static/minus.png +0 -0
  34. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/_static/plus.png +0 -0
  35. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/html/_images/SinaLogo.jpg +0 -0
  36. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/html/_images/download.png +0 -0
  37. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/html/_static/SinaLogo.jpg +0 -0
  38. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/html/_static/download.png +0 -0
  39. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/html/_static/file.png +0 -0
  40. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/html/_static/minus.png +0 -0
  41. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/build/html/_static/plus.png +0 -0
  42. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/make.bat +0 -0
  43. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/License.rst +0 -0
  44. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/Overview.rst +0 -0
  45. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/_static/SinaLogo.jpg +0 -0
  46. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/_static/download.png +0 -0
  47. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/about.rst +0 -0
  48. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/DataDownload/downloader.rst +0 -0
  49. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/DataDownload.rst +0 -0
  50. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/arabiner/bin/infer.rst +0 -0
  51. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/arabiner.rst +0 -0
  52. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/morphology/morph_analyzer.rst +0 -0
  53. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/morphology.rst +0 -0
  54. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/salma/views.rst +0 -0
  55. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/salma.rst +0 -0
  56. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/utils/corpus_tokenizer.rst +0 -0
  57. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/utils/implication.rst +0 -0
  58. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/utils/jaccard.rst +0 -0
  59. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/utils/parser.rst +0 -0
  60. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/utils/sentence_tokenizer.rst +0 -0
  61. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/utils/text_transliteration.rst +0 -0
  62. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api/utils.rst +0 -0
  63. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/api.rst +0 -0
  64. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/authors.rst +0 -0
  65. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/DataDownload/download_files.rst +0 -0
  66. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/DataDownload/get_appdatadir.rst +0 -0
  67. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/DataDownload.rst +0 -0
  68. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/arabiner/infer.rst +0 -0
  69. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/arabiner.rst +0 -0
  70. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/morphology/ALMA_multi_word.rst +0 -0
  71. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/morphology/morph_analyzer.rst +0 -0
  72. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/morphology.rst +0 -0
  73. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/salma/salma_tools.rst +0 -0
  74. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/salma.rst +0 -0
  75. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/arStrip.rst +0 -0
  76. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/corpus_tokenizer.rst +0 -0
  77. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/implication.rst +0 -0
  78. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/jaccard.rst +0 -0
  79. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/latin_remove.rst +0 -0
  80. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/remove_punc.rst +0 -0
  81. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/sentence_tokenizer.rst +0 -0
  82. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils/text_transliteration.rst +0 -0
  83. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools/utils.rst +0 -0
  84. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/cli_tools.rst +0 -0
  85. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/conf.py +0 -0
  86. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/index.rst +0 -0
  87. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/installation.rst +0 -0
  88. {SinaTools-0.1.26 → SinaTools-0.1.27}/docs/source/readme.rst +0 -0
  89. {SinaTools-0.1.26 → SinaTools-0.1.27}/setup.cfg +0 -0
  90. {SinaTools-0.1.26 → SinaTools-0.1.27}/setup.py +0 -0
  91. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/morphology/ALMA_multi_word.py +0 -0
  92. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/morphology/morph_analyzer.py +0 -0
  93. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/ner/corpus_entity_extractor.py +0 -0
  94. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/ner/entity_extractor.py +0 -0
  95. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/__init__.py +0 -0
  96. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/arStrip.py +0 -0
  97. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/corpus_tokenizer.py +0 -0
  98. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/implication.py +0 -0
  99. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/remove_latin.py +0 -0
  100. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/remove_punctuation.py +0 -0
  101. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/sentence_tokenizer.py +0 -0
  102. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/text_dublication_detector.py +0 -0
  103. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/CLI/utils/text_transliteration.py +0 -0
  104. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/DataDownload/__init__.py +0 -0
  105. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/__init__.py +0 -0
  106. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/__init__.py +0 -0
  107. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/__init__.py +0 -0
  108. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/create_classification_data.py +0 -0
  109. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/create_pretraining_data.py +0 -0
  110. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/extract_features.py +0 -0
  111. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/lamb_optimizer.py +0 -0
  112. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/modeling.py +0 -0
  113. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/optimization.py +0 -0
  114. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/run_classifier.py +0 -0
  115. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/run_pretraining.py +0 -0
  116. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/run_squad.py +0 -0
  117. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/arabert/tokenization.py +0 -0
  118. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/__init__.py +0 -0
  119. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/build_openwebtext_pretraining_dataset.py +0 -0
  120. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/build_pretraining_dataset.py +0 -0
  121. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/build_pretraining_dataset_single_file.py +0 -0
  122. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/configure_finetuning.py +0 -0
  123. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/configure_pretraining.py +0 -0
  124. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/finetune/__init__.py +0 -0
  125. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/finetune/feature_spec.py +0 -0
  126. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/finetune/preprocessing.py +0 -0
  127. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/finetune/scorer.py +0 -0
  128. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/finetune/task.py +0 -0
  129. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/finetune/task_builder.py +0 -0
  130. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/flops_computation.py +0 -0
  131. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/model/__init__.py +0 -0
  132. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/model/modeling.py +0 -0
  133. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/model/optimization.py +0 -0
  134. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/model/tokenization.py +0 -0
  135. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/pretrain/__init__.py +0 -0
  136. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/pretrain/pretrain_data.py +0 -0
  137. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/pretrain/pretrain_helpers.py +0 -0
  138. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/run_finetuning.py +0 -0
  139. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/run_pretraining.py +0 -0
  140. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/util/__init__.py +0 -0
  141. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/util/training_utils.py +0 -0
  142. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/araelectra/util/utils.py +0 -0
  143. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/__init__.py +0 -0
  144. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/create_pretraining_data.py +0 -0
  145. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/gpt2/__init__.py +0 -0
  146. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/gpt2/lamb_optimizer.py +0 -0
  147. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/gpt2/optimization.py +0 -0
  148. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/gpt2/run_pretraining.py +0 -0
  149. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/grover/__init__.py +0 -0
  150. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/grover/dataloader.py +0 -0
  151. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/grover/modeling.py +0 -0
  152. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/grover/modeling_gpt2.py +0 -0
  153. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/grover/optimization_adafactor.py +0 -0
  154. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/grover/train_tpu.py +0 -0
  155. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/grover/utils.py +0 -0
  156. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/aragpt2/train_bpe_tokenizer.py +0 -0
  157. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/arabert/preprocess.py +0 -0
  158. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/environment.yml +0 -0
  159. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/install_env.py +0 -0
  160. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/morphology/ALMA_multi_word.py +0 -0
  161. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/morphology/__init__.py +0 -0
  162. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/morphology/morph_analyzer.py +0 -0
  163. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/data/__init__.py +0 -0
  164. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/data/datasets.py +0 -0
  165. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/data/transforms.py +0 -0
  166. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/data_format.py +0 -0
  167. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/datasets.py +0 -0
  168. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/helpers.py +0 -0
  169. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/metrics.py +0 -0
  170. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/nn/BaseModel.py +0 -0
  171. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/nn/BertNestedTagger.py +0 -0
  172. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/nn/BertSeqTagger.py +0 -0
  173. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/nn/__init__.py +0 -0
  174. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/trainers/BaseTrainer.py +0 -0
  175. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/trainers/BertNestedTrainer.py +0 -0
  176. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/trainers/BertTrainer.py +0 -0
  177. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/trainers/__init__.py +0 -0
  178. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/ner/transforms.py +0 -0
  179. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/semantic_relatedness/__init__.py +0 -0
  180. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/semantic_relatedness/compute_relatedness.py +0 -0
  181. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/sinatools.py +0 -0
  182. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/synonyms/synonyms_generator.py +0 -0
  183. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/__init__.py +0 -0
  184. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/charsets.py +0 -0
  185. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/implication.py +0 -0
  186. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/parser.py +0 -0
  187. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/readfile.py +0 -0
  188. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/text_dublication_detector.py +0 -0
  189. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/text_transliteration.py +0 -0
  190. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/tokenizer.py +0 -0
  191. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/utils/tokenizers_words.py +0 -0
  192. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/wsd/disambiguator.py +0 -0
  193. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/wsd/settings.py +0 -0
  194. {SinaTools-0.1.26 → SinaTools-0.1.27}/sinatools/wsd/wsd.py +0 -0
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.1
2
+ Name: SinaTools
3
+ Version: 0.1.27
4
+ Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
5
+ Home-page: https://github.com/SinaLab/sinatools
6
+ License: MIT license
7
+ Description: SinaTools
8
+ ======================
9
+ Open Source Toolkit for Arabic NLP and NLU developed by [SinaLab](http://sina.birzeit.edu/) at Birzeit University. SinaTools is available through Python APIs, command lines, colabs, and online demos.
10
+
11
+ See the full list of [Available Packages](https://sina.birzeit.edu/sinatools/), which include: (1) [Morphology Tagging](https://sina.birzeit.edu/sinatools/index.html#morph), (2) [Named Entity Recognition (NER)](https://sina.birzeit.edu/sinatools/index.html#ner), (3) [Word Sense Disambiguation (WSD)](https://sina.birzeit.edu/sinatools/index.html#wsd), (4) [Semantic Relatedness](https://sina.birzeit.edu/sinatools/index.html#sr), (5) [Synonymy Extraction and Evaluation](https://sina.birzeit.edu/sinatools/index.html#se), (6) [Relation Extraction](https://sina.birzeit.edu/sinatools/index.html), (7) [Utilities](https://sina.birzeit.edu/sinatools/index.html#u) (diacritic-based word matching, Jaccard similarly, parser, tokenizers, corpora processing, transliteration, etc).
12
+
13
+ See [Demo Pages](https://sina.birzeit.edu/sinatools/).
14
+
15
+ See the [benchmarking](https://www.jarrar.info/publications/HJK24.pdf), which shows that SinaTools outperformed all related toolkits.
16
+
17
+ Installation
18
+ --------
19
+ To install SinaTools, ensure you are using Python version 3.10.8, then clone the [GitHub](git://github.com/SinaLab/SinaTools) repository.
20
+
21
+ Alternatively, you can execute the following command:
22
+
23
+ ```bash
24
+ pip install sinatools
25
+ ```
26
+
27
+ Installing Models and Data Files
28
+ --------
29
+ Some modules in SinaTools require some data files and fine-tuned models to be downloaded. To download these models, please consult the [DataDownload](https://sina.birzeit.edu/sinatools/documentation/cli_tools/DataDownload/DataDownload.html).
30
+
31
+ Documentation
32
+ --------
33
+ For information, please refer to the [main page](https://sina.birzeit.edu/sinatools) or the [online domuementation](https://sina.birzeit.edu/sinatools/documentation).
34
+
35
+ Citation
36
+ -------
37
+ Tymaa Hammouda, Mustafa Jarrar, Mohammed Khalilia: [SinaTools: Open Source Toolkit for Arabic Natural Language Understanding](http://www.jarrar.info/publications/HJK24.pdf). In Proceedings of the 2024 AI in Computational Linguistics (ACLing 2024), Procedia Computer Science, Dubai. ELSEVIER.
38
+
39
+ License
40
+ --------
41
+ SinaTools is available under the MIT License. See the [LICENSE](https://github.com/SinaLab/sinatools/blob/main/LICENSE) file for more information.
42
+
43
+ Reporting Issues
44
+ --------
45
+ To report any issues or bugs, please contact us at "sina.institute.bzu@gmail.com" or visit [SinaTools Issues](https://github.com/SinaLab/sinatools/issues).
46
+
47
+
48
+ Keywords: sinatools
49
+ Platform: UNKNOWN
50
+ Description-Content-Type: text/markdown
@@ -0,0 +1,39 @@
1
+ SinaTools
2
+ ======================
3
+ Open Source Toolkit for Arabic NLP and NLU developed by [SinaLab](http://sina.birzeit.edu/) at Birzeit University. SinaTools is available through Python APIs, command lines, colabs, and online demos.
4
+
5
+ See the full list of [Available Packages](https://sina.birzeit.edu/sinatools/), which include: (1) [Morphology Tagging](https://sina.birzeit.edu/sinatools/index.html#morph), (2) [Named Entity Recognition (NER)](https://sina.birzeit.edu/sinatools/index.html#ner), (3) [Word Sense Disambiguation (WSD)](https://sina.birzeit.edu/sinatools/index.html#wsd), (4) [Semantic Relatedness](https://sina.birzeit.edu/sinatools/index.html#sr), (5) [Synonymy Extraction and Evaluation](https://sina.birzeit.edu/sinatools/index.html#se), (6) [Relation Extraction](https://sina.birzeit.edu/sinatools/index.html), (7) [Utilities](https://sina.birzeit.edu/sinatools/index.html#u) (diacritic-based word matching, Jaccard similarly, parser, tokenizers, corpora processing, transliteration, etc).
6
+
7
+ See [Demo Pages](https://sina.birzeit.edu/sinatools/).
8
+
9
+ See the [benchmarking](https://www.jarrar.info/publications/HJK24.pdf), which shows that SinaTools outperformed all related toolkits.
10
+
11
+ Installation
12
+ --------
13
+ To install SinaTools, ensure you are using Python version 3.10.8, then clone the [GitHub](git://github.com/SinaLab/SinaTools) repository.
14
+
15
+ Alternatively, you can execute the following command:
16
+
17
+ ```bash
18
+ pip install sinatools
19
+ ```
20
+
21
+ Installing Models and Data Files
22
+ --------
23
+ Some modules in SinaTools require some data files and fine-tuned models to be downloaded. To download these models, please consult the [DataDownload](https://sina.birzeit.edu/sinatools/documentation/cli_tools/DataDownload/DataDownload.html).
24
+
25
+ Documentation
26
+ --------
27
+ For information, please refer to the [main page](https://sina.birzeit.edu/sinatools) or the [online domuementation](https://sina.birzeit.edu/sinatools/documentation).
28
+
29
+ Citation
30
+ -------
31
+ Tymaa Hammouda, Mustafa Jarrar, Mohammed Khalilia: [SinaTools: Open Source Toolkit for Arabic Natural Language Understanding](http://www.jarrar.info/publications/HJK24.pdf). In Proceedings of the 2024 AI in Computational Linguistics (ACLing 2024), Procedia Computer Science, Dubai. ELSEVIER.
32
+
33
+ License
34
+ --------
35
+ SinaTools is available under the MIT License. See the [LICENSE](https://github.com/SinaLab/sinatools/blob/main/LICENSE) file for more information.
36
+
37
+ Reporting Issues
38
+ --------
39
+ To report any issues or bugs, please contact us at "sina.institute.bzu@gmail.com" or visit [SinaTools Issues](https://github.com/SinaLab/sinatools/issues).
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.1
2
+ Name: SinaTools
3
+ Version: 0.1.27
4
+ Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
5
+ Home-page: https://github.com/SinaLab/sinatools
6
+ License: MIT license
7
+ Description: SinaTools
8
+ ======================
9
+ Open Source Toolkit for Arabic NLP and NLU developed by [SinaLab](http://sina.birzeit.edu/) at Birzeit University. SinaTools is available through Python APIs, command lines, colabs, and online demos.
10
+
11
+ See the full list of [Available Packages](https://sina.birzeit.edu/sinatools/), which include: (1) [Morphology Tagging](https://sina.birzeit.edu/sinatools/index.html#morph), (2) [Named Entity Recognition (NER)](https://sina.birzeit.edu/sinatools/index.html#ner), (3) [Word Sense Disambiguation (WSD)](https://sina.birzeit.edu/sinatools/index.html#wsd), (4) [Semantic Relatedness](https://sina.birzeit.edu/sinatools/index.html#sr), (5) [Synonymy Extraction and Evaluation](https://sina.birzeit.edu/sinatools/index.html#se), (6) [Relation Extraction](https://sina.birzeit.edu/sinatools/index.html), (7) [Utilities](https://sina.birzeit.edu/sinatools/index.html#u) (diacritic-based word matching, Jaccard similarly, parser, tokenizers, corpora processing, transliteration, etc).
12
+
13
+ See [Demo Pages](https://sina.birzeit.edu/sinatools/).
14
+
15
+ See the [benchmarking](https://www.jarrar.info/publications/HJK24.pdf), which shows that SinaTools outperformed all related toolkits.
16
+
17
+ Installation
18
+ --------
19
+ To install SinaTools, ensure you are using Python version 3.10.8, then clone the [GitHub](git://github.com/SinaLab/SinaTools) repository.
20
+
21
+ Alternatively, you can execute the following command:
22
+
23
+ ```bash
24
+ pip install sinatools
25
+ ```
26
+
27
+ Installing Models and Data Files
28
+ --------
29
+ Some modules in SinaTools require some data files and fine-tuned models to be downloaded. To download these models, please consult the [DataDownload](https://sina.birzeit.edu/sinatools/documentation/cli_tools/DataDownload/DataDownload.html).
30
+
31
+ Documentation
32
+ --------
33
+ For information, please refer to the [main page](https://sina.birzeit.edu/sinatools) or the [online domuementation](https://sina.birzeit.edu/sinatools/documentation).
34
+
35
+ Citation
36
+ -------
37
+ Tymaa Hammouda, Mustafa Jarrar, Mohammed Khalilia: [SinaTools: Open Source Toolkit for Arabic Natural Language Understanding](http://www.jarrar.info/publications/HJK24.pdf). In Proceedings of the 2024 AI in Computational Linguistics (ACLing 2024), Procedia Computer Science, Dubai. ELSEVIER.
38
+
39
+ License
40
+ --------
41
+ SinaTools is available under the MIT License. See the [LICENSE](https://github.com/SinaLab/sinatools/blob/main/LICENSE) file for more information.
42
+
43
+ Reporting Issues
44
+ --------
45
+ To report any issues or bugs, please contact us at "sina.institute.bzu@gmail.com" or visit [SinaTools Issues](https://github.com/SinaLab/sinatools/issues).
46
+
47
+
48
+ Keywords: sinatools
49
+ Platform: UNKNOWN
50
+ Description-Content-Type: text/markdown
@@ -155,6 +155,7 @@ sinatools/ner/datasets.py
155
155
  sinatools/ner/entity_extractor.py
156
156
  sinatools/ner/helpers.py
157
157
  sinatools/ner/metrics.py
158
+ sinatools/ner/relation_extractor.py
158
159
  sinatools/ner/transforms.py
159
160
  sinatools/ner/data/__init__.py
160
161
  sinatools/ner/data/datasets.py
@@ -174,9 +175,9 @@ sinatools/synonyms/synonyms_generator.py
174
175
  sinatools/utils/__init__.py
175
176
  sinatools/utils/charsets.py
176
177
  sinatools/utils/implication.py
177
- sinatools/utils/jaccard.py
178
178
  sinatools/utils/parser.py
179
179
  sinatools/utils/readfile.py
180
+ sinatools/utils/similarity.py
180
181
  sinatools/utils/text_dublication_detector.py
181
182
  sinatools/utils/text_transliteration.py
182
183
  sinatools/utils/tokenizer.py
@@ -56,14 +56,14 @@ def main():
56
56
  download_file(urls["ner"])
57
57
  download_file(urls["wsd_model"])
58
58
  download_file(urls["wsd_tokenizer"])
59
- download_file(urls["glosses_dic"])
59
+ download_file(urls["one_gram"])
60
60
  download_file(urls["five_grams"])
61
61
  download_file(urls["four_grams"])
62
62
  download_file(urls["three_grams"])
63
63
  download_file(urls["two_grams"])
64
64
  elif file == "synonyms":
65
- download_file(urls["synonyms_level2"])
66
- download_file(urls["synonyms_level3"])
65
+ download_file(urls["graph_l2"])
66
+ download_file(urls["graph_l3"])
67
67
  else:
68
68
  url = urls[file]
69
69
  download_file(url)
@@ -46,7 +46,7 @@ Examples:
46
46
  """
47
47
 
48
48
  import argparse
49
- from sinatools.utils.jaccard import jaccard
49
+ from sinatools.utils.similarity import get_jaccard
50
50
  from sinatools.utils.readfile import read_file
51
51
 
52
52
 
@@ -76,7 +76,7 @@ def main():
76
76
  print("Either --file1 and --file2 arguments or both --set1 and --set2 arguments must be provided.")
77
77
  return
78
78
 
79
- similarity = jaccard(args.delimiter, set1, set2, args.selection, args.ignoreAllDiacriticsButNotShadda, args.ignoreShaddaDiacritic)
79
+ similarity = get_jaccard(args.delimiter, set1, set2, args.selection, args.ignoreAllDiacriticsButNotShadda, args.ignoreShaddaDiacritic)
80
80
 
81
81
  print("Jaccard Result:", similarity)
82
82
 
@@ -10,13 +10,13 @@ urls = {
10
10
  'ner': 'https://sina.birzeit.edu/Wj27012000.tar.gz',
11
11
  'wsd_model': 'https://sina.birzeit.edu/bert-base-arabertv02_22_May_2021_00h_allglosses_unused01.zip',
12
12
  'wsd_tokenizer': 'https://sina.birzeit.edu/bert-base-arabertv02.zip',
13
- 'glosses_dic': 'https://sina.birzeit.edu/glosses_dic.pickle',
13
+ 'one_gram': 'https://sina.birzeit.edu/one_gram.pickle',
14
14
  'five_grams': 'https://sina.birzeit.edu/five_grams.pickle',
15
15
  'four_grams':'https://sina.birzeit.edu/four_grams.pickle',
16
16
  'three_grams':'https://sina.birzeit.edu/three_grams.pickle',
17
17
  'two_grams':'https://sina.birzeit.edu/two_grams.pickle',
18
- 'synonyms_level2':'https://sina.birzeit.edu/synonyms_level2.pkl',
19
- 'synonyms_level3':'https://sina.birzeit.edu/synonyms_level3.pkl'
18
+ 'graph_l2':'https://sina.birzeit.edu/graph_l2.pkl',
19
+ 'graph_l3':'https://sina.birzeit.edu/graph_l3.pkl'
20
20
  }
21
21
 
22
22
  def get_appdatadir():
@@ -0,0 +1 @@
1
+ 0.1.27
@@ -7,6 +7,8 @@ import torch
7
7
  import pickle
8
8
  import json
9
9
  from argparse import Namespace
10
+ from transformers import pipeline
11
+ #from transformers import AutoModelForSequenceClassification
10
12
 
11
13
  tagger = None
12
14
  tag_vocab = None
@@ -35,4 +37,7 @@ if torch.cuda.is_available():
35
37
 
36
38
  train_config.trainer_config["kwargs"]["model"] = model
37
39
  tagger = load_object(train_config.trainer_config["fn"], train_config.trainer_config["kwargs"])
38
- tagger.load(os.path.join(model_path,"checkpoints"))
40
+ tagger.load(os.path.join(model_path,"checkpoints"))
41
+
42
+ pipe = pipeline("sentiment-analysis", model= os.path.join(path, "best_model"), return_all_scores =True, max_length=128, truncation=True)
43
+ #pipe = AutoModelForSequenceClassification.from_pretrained(os.path.join(path, "best_model"))
@@ -27,7 +27,7 @@ def convert_nested_to_flat(nested_tags):
27
27
 
28
28
  return flat_tags
29
29
 
30
- def extract(text, ner_method):
30
+ def extract(text, ner_method="nested"):
31
31
 
32
32
  dataset, token_vocab = text2segments(text)
33
33
 
@@ -0,0 +1,201 @@
1
+ import torch
2
+ import json
3
+ from urllib.request import Request, urlopen
4
+ from sinatools.ner.entity_extractor import extract
5
+ from . import pipe
6
+
7
+
8
+ # ============================ Extract entities and their types ========================
9
+ def jsons_to_list_of_lists(json_list):
10
+ return [[d['token'], d['tags']] for d in json_list]
11
+
12
+ def entities_and_types(sentence):
13
+ output_list = jsons_to_list_of_lists(extract(sentence))
14
+ json_short = distill_entities(output_list)
15
+
16
+ entities = {}
17
+ for entity in json_short:
18
+ name = entity[0]
19
+ entity_type = entity[1]
20
+ entities[name] = entity_type
21
+
22
+ return entities
23
+
24
+ def distill_entities(entities):
25
+ # This is list that we put the output what we need
26
+ list_output = list()
27
+
28
+ # This line go to sort function and save the output to temp_entities
29
+ temp_entities = sortTags(entities)
30
+
31
+ # This list help us to make the output,
32
+ temp_list = list()
33
+
34
+ # initlize the temp_list
35
+ temp_list.append(["", "", 0, 0])
36
+ word_position = 0
37
+
38
+ # For each entity, convert ibo to distllir list.
39
+ for entity in temp_entities:
40
+ # This is counter tag of this entity
41
+ counter_tag = 0
42
+ # For each tag
43
+ for tag in str(entity[1]).split():
44
+ # If the counter tag greater than or equal to lenght of templist, if yes then we will append the empty value in templist
45
+ if counter_tag >= len(temp_list):
46
+ temp_list.append(["", "", 0, 0])
47
+
48
+ # If tag equal O and word postion of this tag is not equal zero then it will add all
49
+ # not empty eliment of temp list in output list
50
+ if "O" == tag and word_position != 0:
51
+ for j in range(0, len(temp_list)):
52
+ if temp_list[j][1] != "":
53
+ list_output.append([temp_list[j][0].strip(), temp_list[j][1], temp_list[j][2], temp_list[j][3]])
54
+ temp_list[j][0] = ""
55
+ temp_list[j][1] = ""
56
+ temp_list[j][2] = word_position
57
+ temp_list[j][3] = word_position
58
+ # if this tag not equal O, and split by '-' the tag and check the lenght equals two and if the first eliment
59
+ # of the split its B
60
+ elif "O" != tag and len(tag.split("-")) == 2 and tag.split("-")[0] == "B":
61
+ # if the temp_list of counter is not empty then it will append in output list and hten it will
62
+ # initilize by new string and tag in templist of counter
63
+ if temp_list[counter_tag][1] != "":
64
+ list_output.append([temp_list[counter_tag][0].strip(), temp_list[counter_tag][1], temp_list[counter_tag][2], temp_list[counter_tag][3]])
65
+ temp_list[counter_tag][0] = str(entity[0]) + " "
66
+ temp_list[counter_tag][1] = str(tag).split("-")[1]
67
+ temp_list[counter_tag][2] = word_position
68
+ temp_list[counter_tag][3] = word_position
69
+
70
+ # if this tag not equal O, and split by '-' the tag and check the lenght equals two and if the first eliment
71
+ # of the split its O
72
+ elif "O" != tag and len(tag.split("-")) == 2 and tag.split("-")[0] == "I" and word_position != 0:
73
+ # For each of temp_list, check if in this counter tag of templist is same tag with this.tag
74
+ # then will complete if not it will save in output list and cheak another
75
+ for j in range(counter_tag,len(temp_list)):
76
+ if temp_list[j][1] == tag[2:] and temp_list[j][3] != word_position:
77
+ temp_list[j][0] += str(entity[0]) + " "
78
+ temp_list[j][3] += 1
79
+ break
80
+ else:
81
+ if temp_list[j][1] != "":
82
+ list_output.append([temp_list[j][0].strip(), temp_list[j][1], temp_list[j][2], temp_list[j][3]])
83
+ temp_list[j][0] = ""
84
+ temp_list[j][1] = ""
85
+ temp_list[j][2] = word_position
86
+ temp_list[j][3] = word_position
87
+ counter_tag += 1
88
+ word_position += 1
89
+ # For each temp_list, at the end of the previous loop, there will be some
90
+ # values in this list, we should save it to the output list
91
+ for j in range(0, len(temp_list)):
92
+ if temp_list[j][1] != "":
93
+ list_output.append([temp_list[j][0].strip(), temp_list[j][1], temp_list[j][2], temp_list[j][3]])
94
+ return sorted(list_output, key=lambda x: (x[2]))
95
+
96
+ def sortTags(entities):
97
+ temp_entities = entities
98
+ temp_counter = 0
99
+ # For each entity, this loop will sort each tag of entitiy, first it will check if the
100
+ # previous tags has same count of this tag, second will sort the tags and check if this tags is correct
101
+ for entity in temp_entities:
102
+ tags = entity[1].split()
103
+ for tag in tags:
104
+ # if the counter is not 0 then, will complete
105
+ if temp_counter != 0:
106
+ # Check if this tag is equal I-, if yes then it will count how many tag in this tags and
107
+ # count how many tag in previous tags
108
+ if "I-" == tag[0:2]:
109
+ counter_of_this_tag = 0
110
+ counter_of_previous_tag = 0
111
+ for word in tags:
112
+ if tag.split("-")[1] in word:
113
+ counter_of_this_tag+=1
114
+ for word in temp_entities[temp_counter-1][1].split():
115
+ if tag.split("-")[1] in word:
116
+ counter_of_previous_tag+=1
117
+ # if the counter of previous tag is bigger than counter of this tag, then we
118
+ # need to add I-tag in this tags
119
+ if counter_of_previous_tag > counter_of_this_tag:
120
+ tags.append("I-"+tag.split("-")[1])
121
+ # Sort the tags
122
+ tags.sort()
123
+ # Need to revers the tags because it should begins with I
124
+ tags.reverse()
125
+ # If the counter is not 0 then we can complete
126
+ if temp_counter != 0:
127
+ this_tags = tags
128
+ previous_tags = temp_entities[temp_counter - 1][1].split()
129
+ sorted_tags = list()
130
+
131
+ # Check if the this tag is not O and previous tags is not O, then will complete,
132
+ # if not then it will ignor this tag
133
+ if "O" not in this_tags and "O" not in previous_tags:
134
+ index = 0
135
+ #For each previous tags, need sort this tag by previous tags if its I, B we can ignor
136
+ for i in previous_tags:
137
+ j = 0
138
+ while this_tags and j < len(this_tags):
139
+ if this_tags[j][0:2] == "I-" and this_tags[j][2:] == i[2:]:
140
+ sorted_tags.insert(index, this_tags.pop(j))
141
+ break
142
+ elif this_tags[j][0:2] == "B-":
143
+ break
144
+ j += 1
145
+ index += 1
146
+ sorted_tags += this_tags
147
+ tags = sorted_tags
148
+ str_tag = " "
149
+ str_tag = str_tag.join(tags)
150
+ str_tag = str_tag.strip()
151
+ temp_entities[temp_counter][1] = str_tag
152
+ temp_counter += 1
153
+ return temp_entities
154
+
155
+ # ============= Prepare Templates and Catergorize Extracted Entities ================
156
+ temp03={'location':'مكان حدوث','agent':'أحد المتأثرين في','happened at':'تاريخ حدوث'}
157
+ categories = {
158
+ 'agent': ['PERS', 'NORP', 'OCC', 'ORG'],
159
+ 'location': ['LOC', 'FAC', 'GPE'],
160
+ 'happened at': ['DATE', 'TIME']
161
+ }
162
+
163
+ def get_entity_category(entity_type, categories):
164
+ for category, types in categories.items():
165
+ if entity_type in types:
166
+ return category
167
+ return None
168
+
169
+
170
+ # ============ Extract entities, their types and categorize them ===============
171
+ def relation_extraction(sentence):
172
+ #test_sentence="صورة إعتقال طفل فلسطيني خلال انتفاضة الأقصى ."
173
+ entities=entities_and_types(sentence)
174
+
175
+ event_indices = [i for i, (_, entity_type) in enumerate(entities.items()) if entity_type == 'EVENT']
176
+ arg_event_indices = [i for i, (_, entity_type) in enumerate(entities.items()) if entity_type != 'EVENT']
177
+
178
+ output_list=[]
179
+
180
+ for i in event_indices:
181
+ event_entity=list(entities.keys())[i]
182
+ for j in arg_event_indices:
183
+ arg_name= list(entities.keys())[j]
184
+ arg_type=entities[arg_name]
185
+ category = get_entity_category(arg_type, categories)
186
+
187
+ if category in temp03:
188
+ relation_sentence=f"[CLS] {sentence} [SEP] {event_entity} {temp03[category]} {arg_name}"
189
+ predicted_relation=pipe(relation_sentence)
190
+ score = predicted_relation[0][0]['score']
191
+ if score > 0.50:
192
+ #print(f"Event:{event_entity} Relation:{category} Argument:{arg_name}\n")
193
+ #output_list.append([{event_entity} ,{category}, {arg_name}])
194
+ output_list.append(f"Event:{event_entity}, Relation:{category}, Argument:{arg_name}")
195
+
196
+ else:
197
+ #print(f"Event:{event_entity} Relation:No relation Argument:{arg_name}\n")
198
+ #output_list.append([{event_entity} ,'No relation', {arg_name}])
199
+ output_list.append(f"Event:{event_entity}, Relation:No relation, Argument:{arg_name}")
200
+
201
+ return output_list
@@ -3,7 +3,7 @@ from sinatools.DataDownload import downloader
3
3
  import os
4
4
 
5
5
  synonyms_level2_dict = {}
6
- level2_dict = 'synonyms_level2.pkl'
6
+ level2_dict = 'graph_l2.pkl'
7
7
  path = downloader.get_appdatadir()
8
8
  file_path = os.path.join(path, level2_dict)
9
9
  with open(file_path, 'rb') as f:
@@ -11,7 +11,7 @@ with open(file_path, 'rb') as f:
11
11
 
12
12
 
13
13
  synonyms_level3_dict = {}
14
- level3_dict = 'synonyms_level3.pkl'
14
+ level3_dict = 'graph_l3.pkl'
15
15
  path = downloader.get_appdatadir()
16
16
  file_path = os.path.join(path, level3_dict)
17
17
  with open(file_path, 'rb') as f:
@@ -0,0 +1,149 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from sinatools.utils.parser import arStrip
4
+ from sinatools.utils.implication import Implication
5
+ import argparse
6
+
7
+ def normalize_word(word: str, ignore_all_diacritics_but_not_shadda: bool=True, ignore_shadda_diacritic: bool=True) -> str:
8
+ if ignore_all_diacritics_but_not_shadda:
9
+ word = arStrip(word, True, True, False, False, False, False)
10
+
11
+ if ignore_shadda_diacritic:
12
+ word = arStrip(word, False, False, True, False, False, False)
13
+
14
+ return word
15
+
16
+
17
+ def get_preferred_word(word1, word2):
18
+ implication = Implication(word1, word2)
19
+
20
+ direction = implication.get_direction()
21
+
22
+ if direction in (0, 2):
23
+ return word1
24
+
25
+ elif direction == 1:
26
+ return word2
27
+
28
+ elif direction == 3:
29
+ if not word1.endswith("َ") and not word1.endswith("ُ"):
30
+ return word2
31
+ return word1
32
+
33
+
34
+ def get_non_preferred_word(word1, word2):
35
+
36
+ implication = Implication(word1, word2)
37
+ if implication.get_distance() < 15:
38
+ direction = implication.get_direction()
39
+ if direction == 0 or direction == 1:
40
+ return word1
41
+ elif direction == 2:
42
+ return word2
43
+ elif direction == 3:
44
+ if not word1.endswith("َ") and not word1.endswith("ُ"):
45
+ return word1
46
+ return word2
47
+ return "#"
48
+
49
+ def get_intersection(list1, list2, ignore_all_diacritics_but_not_shadda=False, ignore_shadda_diacritic=False):
50
+
51
+ list1 = [str(i) for i in list1 if i not in (None, ' ', '')]
52
+ list1 = [str(i.strip()) for i in list1]
53
+
54
+ list2 = [str(i) for i in list2 if i not in (None, ' ', '')]
55
+ list2 = [str(i.strip()) for i in list2]
56
+
57
+ interection_list = []
58
+
59
+ for list1_word in list1:
60
+ for list2_word in list2:
61
+ word1 = normalize_word(list1_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
62
+ word2 = normalize_word(list2_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
63
+
64
+ implication = Implication(word1, word2)
65
+ if implication.get_direction() >= 0 and implication.get_distance() < 15:
66
+ interection_list.append(get_preferred_word(word1, word2))
67
+
68
+ i = 0
69
+ while i < len(interection_list):
70
+ j = i + 1
71
+ while j < len(interection_list):
72
+ non_preferred_word = get_non_preferred_word(interection_list[i], interection_list[j])
73
+ if non_preferred_word != "#":
74
+ interection_list.remove(non_preferred_word)
75
+ j += 1
76
+ i += 1
77
+
78
+ return interection_list
79
+
80
+
81
+
82
+ def get_union(list1, list2, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic):
83
+
84
+ list1 = [str(i) for i in list1 if i not in (None, ' ', '')]
85
+
86
+ list2 = [str(i) for i in list2 if i not in (None, ' ', '')]
87
+
88
+ union_list = []
89
+
90
+ for list1_word in list1:
91
+ word1 = normalize_word(list1_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
92
+ union_list.append(word1)
93
+
94
+ for list2_word in list2:
95
+ word2 = normalize_word(list2_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
96
+ union_list.append(word2)
97
+
98
+ i = 0
99
+ while i < len(union_list):
100
+ j = i + 1
101
+ while j < len(union_list):
102
+ non_preferred_word = get_non_preferred_word(union_list[i], union_list[j])
103
+ if (non_preferred_word != "#"):
104
+ union_list.remove(non_preferred_word)
105
+ j = j + 1
106
+ i = i + 1
107
+
108
+ return union_list
109
+
110
+
111
+
112
+ def get_jaccard_similarity(list1: list, list2: list, ignore_all_diacritics_but_not_shadda: bool, ignore_shadda_diacritic: bool) -> float:
113
+
114
+ intersection_list = get_intersection(list1, list2, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
115
+
116
+ union_list = get_union(list1, list2, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
117
+
118
+ return float(len(intersection_list)) / float(len(union_list))
119
+
120
+ def get_jaccard(delimiter, str1, str2, selection, ignoreAllDiacriticsButNotShadda=True, ignoreShaddaDiacritic=True):
121
+
122
+ try:
123
+ list1 = str1.split(delimiter)
124
+ list2 = str2.split(delimiter)
125
+
126
+ if selection == "intersection":
127
+ intersection = get_intersection(list1, list2, ignoreAllDiacriticsButNotShadda, ignoreShaddaDiacritic)
128
+ return intersection
129
+ elif selection == "union":
130
+ union = get_union(list1, list2, ignoreAllDiacriticsButNotShadda, ignoreShaddaDiacritic)
131
+ return union
132
+ elif selection == "jaccardSimilarity":
133
+ similarity = get_jaccard_similarity(list1, list2, ignoreAllDiacriticsButNotShadda, ignoreShaddaDiacritic)
134
+ return similarity
135
+ elif selection == "jaccardAll":
136
+ intersection = get_intersection(list1, list2, ignoreAllDiacriticsButNotShadda, ignoreShaddaDiacritic)
137
+ union = get_union(list1, list2, ignoreAllDiacriticsButNotShadda, ignoreShaddaDiacritic)
138
+ similarity = get_jaccard_similarity(list1, list2, ignoreAllDiacriticsButNotShadda, ignoreShaddaDiacritic)
139
+ output_list = ["intersection:", intersection, "union:", union, "similarity:", similarity]
140
+ return output_list
141
+ else:
142
+ return 'Invalid selection option'
143
+
144
+ except AttributeError as ae:
145
+ print(f"Attribute error occurred: {str(ae)}")
146
+ return 'Invalid input type'
147
+ except Exception as e:
148
+ print(f"Error occurred: {str(e)}")
149
+ return 'An error has occurred'
@@ -4,7 +4,7 @@ from sinatools.DataDownload import downloader
4
4
  import os
5
5
 
6
6
  glosses_dic = {}
7
- filename = 'glosses_dic.pickle'
7
+ filename = 'one_gram.pickle'
8
8
  path =downloader.get_appdatadir()
9
9
  file_path = os.path.join(path, filename)
10
10
  with open(file_path, 'rb') as f:
SinaTools-0.1.26/PKG-INFO DELETED
@@ -1,20 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: SinaTools
3
- Version: 0.1.26
4
- Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
5
- Home-page: https://github.com/SinaLab/sinatools
6
- License: MIT license
7
- Description: SinaTools
8
- ---------
9
-
10
- Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
11
-
12
- Python APIs, command lines, colabs, and online demos.
13
-
14
- * Free software: MIT license
15
- * Documentation: https://sina.birzeit.edu/sinatools/
16
-
17
-
18
- Keywords: sinatools
19
- Platform: UNKNOWN
20
- Description-Content-Type: text/markdown
@@ -1,9 +0,0 @@
1
- SinaTools
2
- ---------
3
-
4
- Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
5
-
6
- Python APIs, command lines, colabs, and online demos.
7
-
8
- * Free software: MIT license
9
- * Documentation: https://sina.birzeit.edu/sinatools/