PyPI - SinaTools - Versions diffs - 0.1.4__py2.py3-none-any.whl → 0.1.7__py2.py3-none-any.whl - Mend

SinaTools 0.1.4py2.py3-none-any.whl → 0.1.7py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{SinaTools-0.1.4.dist-info → SinaTools-0.1.7.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,12 @@
 Metadata-Version: 2.1
 Name: SinaTools
-Version: 0.1.4
-Summary: UNKNOWN
-Home-page: https://github.com/SinaLab/nlptools
-Author: UNKNOWN
-Author-email: UNKNOWN
+Version: 0.1.7
+Summary: A short description of your project
+Home-page: https://github.com/SinaLab/sinatools
 License: MIT license
-Keywords: nlptools
+Keywords: sinatools
 Platform: UNKNOWN
+Description-Content-Type: text/markdown
 Requires-Dist: six
 Requires-Dist: farasapy
 Requires-Dist: tqdm
@@ -20,32 +19,27 @@ Requires-Dist: torchtext (==0.14.0)
 Requires-Dist: torchvision (==0.14.0)
 Requires-Dist: seqeval (==1.2.2)
 Requires-Dist: natsort (==7.1.1)
+Requires-Dist: pandas (==1.2.4)
 ========
-nlptools
+sinatools
 ========
+.. image:: https://img.shields.io/pypi/v/sinatools.svg
+   :target: https://pypi.python.org/pypi/SinaTools
-.. image:: https://img.shields.io/pypi/v/nlptools.svg
-        :target: https://pypi.python.org/pypi/SinaTools
-.. image:: https://img.shields.io/travis/sina_institute/nlptools.svg
-        :target: https://travis-ci.com/sina_institute/SinaTools
-.. image:: https://readthedocs.org/projects/nlptools/badge/?version=latest
-        :target: https://SinaTools.readthedocs.io/en/latest/?version=latest
-        :alt: Documentation Status
+.. image:: https://img.shields.io/travis/sina_institute/sinatools.svg
+   :target: https://travis-ci.com/sina_institute/SinaTools
+.. image:: https://readthedocs.org/projects/sinatools/badge/?version=latest
+   :target: https://SinaTools.readthedocs.io/en/latest/?version=latest
+   :alt: Documentation Status
 Python Boilerplate contains all the boilerplate you need to create a Python package.
 * Free software: MIT license
 * Documentation: https://sina.birzeit.edu/sinatools/
 Credits
 -------

SinaTools-0.1.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,101 @@
+SinaTools-0.1.7.data/data/sinatools/environment.yml,sha256=OzilhLjZbo_3nU93EQNUFX-6G5O3newiSWrwxvMH2Os,7231
+sinatools/VERSION,sha256=Gmytzwl0rsvqV5jsEYdTXHSbWrOb2vARjvgA3N9TGwY,5
+sinatools/__init__.py,sha256=bEosTU1o-FSpyytS6iVP_82BXHF2yHnzpJxPLYRbeII,135
+sinatools/environment.yml,sha256=OzilhLjZbo_3nU93EQNUFX-6G5O3newiSWrwxvMH2Os,7231
+sinatools/install_env.py,sha256=EODeeE0ZzfM_rz33_JSIruX03Nc4ghyVOM5BHVhsZaQ,404
+sinatools/sinatools.py,sha256=vR5AaF0iel21LvsdcqwheoBz0SIj9K9I_Ub8M8oA98Y,20
+sinatools/CLI/DataDownload/download_files.py,sha256=17CtswdAT66KO7hw3o87RaWbM-BxdUpsheE6bysP3-c,2302
+sinatools/CLI/morphology/ALMA_multi_word.py,sha256=ZImJ1vtcpSHydI1BjJmK3KcMJbGBZX16kO4L6rxvBvA,2086
+sinatools/CLI/morphology/morph_analyzer.py,sha256=ieIM47QK9Nct3MtCS9uq3h2rZN5r4qNhsLmlVeE6wiE,3503
+sinatools/CLI/ner/corpus_entity_extractor.py,sha256=jsxTQsR4i8ZwsWrX1XxkYUbLGygYKV7-pWDiubfaANE,3751
+sinatools/CLI/ner/entity_extractor.py,sha256=BHAs2nGKL9npHUXj-6FDHQCuR2jidvFJX8yUkgQKxhc,4436
+sinatools/CLI/salma/salma_tools.py,sha256=8IDMSXjpM2u8jXc6c5JcI_l2CmiwdCxsUBJVN1Rrfk0,1971
+sinatools/CLI/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sinatools/CLI/utils/arStrip.py,sha256=pOMh9e4O-vBixbv2HM5qjlA8-qJH3Zf7DeJVekPrgjo,3252
+sinatools/CLI/utils/corpus_tokenizer.py,sha256=nH0T4h6urr_0Qy6-wN3PquOtnwybj0REde5Ts_OE4U8,1650
+sinatools/CLI/utils/implication.py,sha256=3vw526ZL0WR8LiIKbjYibTQWE_UeYvHThc1W9-BlbHg,3133
+sinatools/CLI/utils/jaccard.py,sha256=1zSkEQevB-1D5xcT__qmrgB1s8CISU70wDMBteCKCSo,4601
+sinatools/CLI/utils/remove_latin.py,sha256=dzRzRapmM4mJwS-rhNy9PYQKS-ONMsRBmN1ZcPfEBfE,848
+sinatools/CLI/utils/remove_punctuation.py,sha256=vJAZlEn7WGftZAFVFYnddkRrxdJ_rMmKB9vFZkY-jN4,1097
+sinatools/CLI/utils/sentence_tokenizer.py,sha256=Wli8eiDbWSd_Z8UKpu_JkaS8jImowa1vnRL0oYCSfqw,2823
+sinatools/CLI/utils/text_transliteration.py,sha256=vz-3kxWf8pNYVCqNAtBAiA6u_efrS5NtWT-ofN1NX6I,2014
+sinatools/DataDownload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sinatools/DataDownload/downloader.py,sha256=MbTPqqlg5vOTErxeVvdMn5k0TsYaG6kef2zHkeBLXlk,6480
+sinatools/arabert/__init__.py,sha256=ely2PttjgSv7vKdzskuD1rtK_l_UOpmxJSz8isrveD0,16
+sinatools/arabert/preprocess.py,sha256=qI0FsuMTOzdRlYGCtLrjpXgikNElUZPv9bnjaKDZKJ4,33024
+sinatools/arabert/arabert/__init__.py,sha256=KbSAH-XqbRygn0y59m5-ZYOLXgpT1gSgE3F-qd4rKEc,627
+sinatools/arabert/arabert/create_classification_data.py,sha256=BhemGNRbYz_Pun0Q5WerN2-9n-ILmU3tm4J-OlHw5-A,7678
+sinatools/arabert/arabert/create_pretraining_data.py,sha256=2M-cF3CLHbQ0cdWrzFT6Frg1vVP4Y-CFoq8iEPyxgsE,18924
+sinatools/arabert/arabert/extract_features.py,sha256=C1IzASrlX7u4_M2xdr_PjzWfTRZgklhUXA2WHKgQt-I,15585
+sinatools/arabert/arabert/lamb_optimizer.py,sha256=uN3Dcx-6n2_OwepyymRrGrB4EcSkR8b2ZczZrOr7bpY,6263
+sinatools/arabert/arabert/modeling.py,sha256=KliecCmA1pP3owg0mYge6On3IRHunMF5kMLuEwc0VLw,40896
+sinatools/arabert/arabert/optimization.py,sha256=Wx0Js6Zsfc3iVw-_7Q1SCnxfP_qqbdTAyFD-vZSpOyk,8153
+sinatools/arabert/arabert/run_classifier.py,sha256=AdVGyvidlmbEp12b-PauiBo6EmFLEO7tqeJKuLhK2DA,38777
+sinatools/arabert/arabert/run_pretraining.py,sha256=yO16nKkHDfcYA2Zx7vv8KN4te6_1qFOzyVeDzFT-DQw,21894
+sinatools/arabert/arabert/run_squad.py,sha256=PORxgiByP8L6vZqAFkqgHPJ_ZjAlqlg64gtkdLmDNns,53456
+sinatools/arabert/arabert/tokenization.py,sha256=R6xkyCb8_vgeksXiLeqDvV5vOnLb1cPNsvfDij6YVFk,14132
+sinatools/arabert/araelectra/__init__.py,sha256=ely2PttjgSv7vKdzskuD1rtK_l_UOpmxJSz8isrveD0,16
+sinatools/arabert/araelectra/build_openwebtext_pretraining_dataset.py,sha256=pIo6VFT3XXOYroZaab3msZAP6XjCKu0KcrIZQA0Pj8U,3881
+sinatools/arabert/araelectra/build_pretraining_dataset.py,sha256=Z8ZmKznaE_2SPDRoPYR1SDhjTN_NTpNCFFuhUkykwl8,9041
+sinatools/arabert/araelectra/build_pretraining_dataset_single_file.py,sha256=W7HFr1XoO6bCDR7X7w-bOuwULFtTSjeKbJ2LHzzHf9k,3224
+sinatools/arabert/araelectra/configure_finetuning.py,sha256=YfGLMdgN6Qqm357Mzy5UMjkuLPPWtBs7f4dA-DKE6JM,7768
+sinatools/arabert/araelectra/configure_pretraining.py,sha256=oafQgu4WmVdxBcU5mSfXhPlvCk43CJwAWXC10Q58BlI,5801
+sinatools/arabert/araelectra/flops_computation.py,sha256=krHTeuPH9xQu5ldprBOPJNlJRvC7fmmvXXqUjfWrzPE,9499
+sinatools/arabert/araelectra/run_finetuning.py,sha256=JecbrSmGikBNyid4JKRZ49Rm5xFpt02WfgIIcs3TpcU,12976
+sinatools/arabert/araelectra/run_pretraining.py,sha256=1K2aAFTY0p3iaLY0xkhTlm6v0B-Zun8SwEzz-K6RXM4,20665
+sinatools/arabert/araelectra/finetune/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
+sinatools/arabert/araelectra/finetune/feature_spec.py,sha256=cqNlBa2KK_G1-vkKm1EJUv6BoS3gesCUAHwVagZB6wM,1888
+sinatools/arabert/araelectra/finetune/preprocessing.py,sha256=1mf7-IxknCRsobQZ-VV1zs4Cwt-mfOtoVxysDJa9LZ0,6657
+sinatools/arabert/araelectra/finetune/scorer.py,sha256=PjRg0P5ANCtul2ute7ccq3mRCCoIAoCb-lVLlwd4rVY,1571
+sinatools/arabert/araelectra/finetune/task.py,sha256=zM8M4PGSIrY2u6ytpmkQEXxG-jjoeN9wouEyVR23qeQ,1991
+sinatools/arabert/araelectra/finetune/task_builder.py,sha256=Zsoiuw5M3Ca8QhaZVLVLZyWw09K5R75UeMuPmazMlHI,2768
+sinatools/arabert/araelectra/model/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
+sinatools/arabert/araelectra/model/modeling.py,sha256=5XLIutnmr-SFQOV_XntJ-U5evSCY-J2e9NjvlwVXKkk,40877
+sinatools/arabert/araelectra/model/optimization.py,sha256=BCMb_C5hgBw7wC9ZR8AQ4lwoPopqLIcSiqcCrIjx9XU,7254
+sinatools/arabert/araelectra/model/tokenization.py,sha256=9CkyPzs3L6OEPzN-7EWQDNQmW2mIJoZD4o1rn6xLdL4,11082
+sinatools/arabert/araelectra/pretrain/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
+sinatools/arabert/araelectra/pretrain/pretrain_data.py,sha256=NLgIcLAq1-MgtBNXYu_isDxnOY5k67SyADYy-8nzBok,5413
+sinatools/arabert/araelectra/pretrain/pretrain_helpers.py,sha256=nFl7LEdxAU5kKwiodqJHzi-ty9jMFsCCNYOF__A69j8,9255
+sinatools/arabert/araelectra/util/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
+sinatools/arabert/araelectra/util/training_utils.py,sha256=7h_J1ljUWM0ynBcofEtjZWL_oAfZtTxEemQLkixgn-0,4142
+sinatools/arabert/araelectra/util/utils.py,sha256=G0UAETUCZMlU9R9ASD9AXrWZeodWI1aZJEE9F-goaH4,2591
+sinatools/arabert/aragpt2/__init__.py,sha256=aQkKhQwWaS61wYEeOdx682upeMWFPUjLxXSs7JM1sOA,18
+sinatools/arabert/aragpt2/create_pretraining_data.py,sha256=fFa2_DAyTwc8L2IqQbshsh_Ia26nj1qtVLzC6DooSac,3105
+sinatools/arabert/aragpt2/train_bpe_tokenizer.py,sha256=b-8zHQ02fLmZV4GfjnrPptwjpX259F41SlnWzBrflMA,1888
+sinatools/arabert/aragpt2/gpt2/__init__.py,sha256=aQkKhQwWaS61wYEeOdx682upeMWFPUjLxXSs7JM1sOA,18
+sinatools/arabert/aragpt2/gpt2/lamb_optimizer.py,sha256=uN3Dcx-6n2_OwepyymRrGrB4EcSkR8b2ZczZrOr7bpY,6263
+sinatools/arabert/aragpt2/gpt2/optimization.py,sha256=iqh23cypRSRUt53wt2G5SbNNpJMwERM7gZAOKVh5l4U,8411
+sinatools/arabert/aragpt2/gpt2/run_pretraining.py,sha256=4jjkUbvTO1DHoKJ89yKtlkkofcND_fyAunQ-mlnJhTM,13298
+sinatools/arabert/aragpt2/grover/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sinatools/arabert/aragpt2/grover/dataloader.py,sha256=-FWPTjtsvweEE1WaWRHBXfOSbsGiUmnXT3qK7KJP8cM,6853
+sinatools/arabert/aragpt2/grover/modeling.py,sha256=XcUvFwqRaxAwWiJstrH2FPBvDJe03pTWIyipdMfWj9g,38280
+sinatools/arabert/aragpt2/grover/modeling_gpt2.py,sha256=WFpCWn1792yATFzt8rZ0rpWvExfbLzV2BqiEs7llFUw,51602
+sinatools/arabert/aragpt2/grover/optimization_adafactor.py,sha256=1geOsCWuv5xxtSnKDz9a8aY5SVwZ1MGq-xVQDBg4Gpg,9765
+sinatools/arabert/aragpt2/grover/train_tpu.py,sha256=qNgLI_j6-KYkTMJfVoFlh4NIKweY1aPz1XPDw6odld0,7102
+sinatools/arabert/aragpt2/grover/utils.py,sha256=V5wMUxK03r5g_pb7R3_uGLOPqQJfbIB0VaJ8ZDM4XAo,8473
+sinatools/morphology/ALMA_multi_word.py,sha256=GPM2-N7_5JIZwNdi1we6gBG0rh59AlGM0XWYxmEE7jY,1283
+sinatools/morphology/__init__.py,sha256=avTxtexZELp1Fya6cBNFLyeYPB31OcmQOlT2L-uAQnI,1386
+sinatools/morphology/morph_analyzer.py,sha256=tA78gWg6iaE_G1c2xqxZoXZWNbvHBJLrTSxPyir5Xn8,6941
+sinatools/ner/__init__.py,sha256=8R8epTEyvpbreLYTrC5M5lctlzZrNr7T7B4KmENnB3I,341
+sinatools/ner/entity_extractor.py,sha256=amVU6tXoAAL9NcadfJlx1qyEPlxBY8wRo5Tn-ZLOVIw,3236
+sinatools/salma/__init__.py,sha256=_by3PsXetNjkxSyg24nF592T-21JEWhPXzMAPzwDOhQ,378
+sinatools/salma/settings.py,sha256=b_AqTxVWALuGXnsMd9KhnnwIo9-JEoWOTekB-7_xJCU,1111
+sinatools/salma/views.py,sha256=G5W5BSr770NapWz5j6hcuwInrR40JKG-LkzP1OpcYeA,18416
+sinatools/salma/wsd.py,sha256=vCiiR5h3bjAOHi3yxxkh_7GUgBWKQf297aHbO4Z8CBk,4436
+sinatools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+sinatools/utils/charsets.py,sha256=rs82oZJqRqosZdTKXfFAJfJ5t4PxjMM_oAPsiWSWuwU,2817
+sinatools/utils/implication.py,sha256=MsbI6S1LNY-fCxGMxFTuaV639r3QijkkdcfH48rvY7A,27804
+sinatools/utils/jaccard.py,sha256=S7OgvaMqkN5HFgTZkKhMCNAuAnQ0LhRyXPN79jAzmKM,10113
+sinatools/utils/parser.py,sha256=CPPtCrsbxUqsjhY5C9wTOgkAs6iw0k_WvMUxLEPM1IU,6168
+sinatools/utils/readfile.py,sha256=xE4LEaCqXJIk9v37QUSSmWb-aY3UnCFUNb7uVdx3cpM,133
+sinatools/utils/text_transliteration.py,sha256=NQoXrxI-h0UXnvVtDA3skNJduxIy0IW26r46N4tDxGk,8766
+sinatools/utils/tokenizer.py,sha256=QHyrVqJA_On4rKxexiWR2ovq4pI1-u6iZkdhRbK9tew,6676
+sinatools/utils/tokenizers_words.py,sha256=efNfOil9qDNVJ9yynk_8sqf65PsL-xtsHG7y2SZCkjQ,656
+sinatools/utils/utils.py,sha256=vKkFOkYclMu8nXS_VZb6Kobx8QGKW9onXkkLCeiRb6g,32
+SinaTools-0.1.7.dist-info/AUTHORS.rst,sha256=aTWeWlIdfLi56iLJfIUAwIrmqDcgxXKLji75_Fjzjyg,174
+SinaTools-0.1.7.dist-info/LICENSE,sha256=uwsKYG4TayHXNANWdpfMN2lVW4dimxQjA_7vuCVhD70,1088
+SinaTools-0.1.7.dist-info/METADATA,sha256=TWtbd8m_tSIStY0O0mLGnf5y5zR0Yk7PVFAkBOwqrTo,1569
+SinaTools-0.1.7.dist-info/WHEEL,sha256=6T3TYZE4YFi2HTS1BeZHNXAi8N52OZT4O-dJ6-ome_4,116
+SinaTools-0.1.7.dist-info/entry_points.txt,sha256=9uGvOGRicf-CsHMaFyQjq1odtr3RMeOvEfiZwpDQ9VU,926
+SinaTools-0.1.7.dist-info/top_level.txt,sha256=8tNdPTeJKw3TQCaua8IJIx6N6WpgZZmVekf1OdBNJpE,10
+SinaTools-0.1.7.dist-info/RECORD,,

SinaTools-0.1.7.dist-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,18 @@
+[console_scripts]
+alma_multi_word = sinatools.CLI.morphology.ALMA_multi_word:main
+appdatadir = sinatools.CLI.DataDownload.get_appdatadir:main
+arStrip = sinatools.CLI.utils.arStrip:main
+arabi_ner = sinatools.CLI.ner.entity_extractor:main
+arabi_ner2 = sinatools.CLI.ner.corpus_entity_extractor:main
+corpus_tokenizer = sinatools.CLI.utils.corpus_tokenizer:main
+download_files = sinatools.CLI.DataDownload.download_files:main
+implication = sinatools.CLI.utils.implication:main
+install_env = sinatools.install_env:main
+jaccard_similarity = sinatools.CLI.utils.jaccard:main
+morphology_analyzer = sinatools.CLI.morphology.morph_analyzer:main
+remove_latin = sinatools.CLI.utils.remove_latin:main
+remove_punctuation = sinatools.CLI.utils.remove_punctuation:main
+salma = sinatools.CLI.salma.salma_tools:main
+sentence_tokenizer = sinatools.CLI.utils.sentence_tokenizer:main
+transliterate = sinatools.CLI.utils.text_transliteration:main

SinaTools-0.1.7.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ sinatools

{nlptools → sinatools}/CLI/DataDownload/download_files.py RENAMED Viewed

@@ -2,17 +2,17 @@
 About:
 ------
-The sina_download_files tool is a command-line interface for downloading various NLP resources from pre-specified URLs. It is a part of the nlptools package and provides options to choose which files to download and to specify a download directory. The tool automatically handles file extraction for zip and tar.gz files.
+The download_files tool is a command-line interface for downloading various NLP resources from pre-specified URLs. It is a part of the sinatools package and provides options to choose which files to download and to specify a download directory. The tool automatically handles file extraction for zip and tar.gz files.
 Usage:
 ------
-Below is the usage information that can be generated by running sina_download_files --help.
+Below is the usage information that can be generated by running download_files --help.
 .. code-block:: none
     Usage:
-     sina_download_files [OPTIONS]
+     download_files [OPTIONS]
 .. code-block:: none
@@ -26,7 +26,7 @@ Examples:
 .. code-block:: none
-    sina_download_files -f morph ner
+    download_files -f morph ner
     This command will download only the `morph` and `ner` files to the default directory.
 Note:
@@ -42,10 +42,10 @@ Note:
 """
 import argparse
-from nlptools.DataDownload.downloader import download_file
-from nlptools.DataDownload.downloader import download_files
-from nlptools.DataDownload.downloader import get_appdatadir
-from nlptools.DataDownload.downloader import urls
+from sinatools.DataDownload.downloader import download_file
+from sinatools.DataDownload.downloader import download_files
+from sinatools.DataDownload.downloader import get_appdatadir
+from sinatools.DataDownload.downloader import urls
 def main():
@@ -68,4 +68,4 @@ def main():
 if __name__ == '__main__':
     main()
-#sina_download_files -f morph ner
+#download_files -f morph ner

{nlptools → sinatools}/CLI/morphology/ALMA_multi_word.py RENAMED Viewed

@@ -1,16 +1,16 @@
 """
 About:
 ------
-The sina_alma_multi_word tool performs multi-word morphological analysis using SinaTools' `ALMA_multi_word` utility. Given a multi-word Arabic text input, it returns a detailed analysis in JSON format.
+The alma_multi_word tool performs multi-word morphological analysis using SinaTools' `ALMA_multi_word` utility. Given a multi-word Arabic text input, it returns a detailed analysis in JSON format.
 Usage:
 ------
-Below is the usage information that can be generated by running sina_alma_multi_word --help.
+Below is the usage information that can be generated by running alma_multi_word --help.
 .. code-block:: none
-    sina_alma_multi_word --multi_word=MULTI_WORD_TEXT
-    sina_alma_multi_word --file
+    alma_multi_word --multi_word=MULTI_WORD_TEXT
+    alma_multi_word --file
 Options:
 --------
@@ -27,25 +27,15 @@ Examples:
 .. code-block:: none
-  sina_alma_multi_word --multi_word "Your multi-word text here"
-  sina_alma_multi_word --file "path/to/your/file.txt"
-Note:
------
-.. code-block:: none
-    - Ensure that the text input is appropriately encoded in UTF-8 or compatible formats.
-    - The tool returns results in JSON format with proper indentation for better readability.
-    - The quality and accuracy of the analysis depend on the underlying capabilities of the SinaTools' `ALMA_multi_word` utility.
-    - The tool is specifically designed for multi-word input. For single-word morphological analysis, other specific utilities/tools might be more appropriate.
+  alma_multi_word --multi_word "Your multi-word text here"
+  alma_multi_word --file "path/to/your/file.txt"
 """
 import argparse
-from nlptools.morphology.ALMA_multi_word import ALMA_multi_word
+from sinatools.morphology.ALMA_multi_word import ALMA_multi_word
 import json
-from nlptools.utils.readfile import read_file
+from sinatools.utils.readfile import read_file
 def main():
     parser = argparse.ArgumentParser(description='Multi-Word Analysis using SinaTools')
@@ -71,5 +61,5 @@ def main():
 if __name__ == '__main__':
     main()
-#sina_alma_multi_word --multi_word "Your multi-word text here"
-#sina_alma_multi_word --file "path/to/your/file.txt"
+#alma_multi_word --multi_word "Your multi-word text here"
+#alma_multi_word --file "path/to/your/file.txt"

sinatools/CLI/morphology/morph_analyzer.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""
+About:
+------
+The morphology_analyzer command is designed to provide morphological analysis for Arabic text using the SinaTools morph_analyzer component. Users can specify the language and desired analysis task (lemmatization, part-of-speech tagging, or full morphological analysis), and flag.
+Usage:
+------
+Below is the usage information that can be generated by running morphology_analyzer --help.
+.. code-block:: none
+    morphology_analyzer --text=TEXT [OPTIONS]
+    morphology_analyzer --file=FILE [OPTIONS]
+Options:
+--------
+.. code-block:: none
+  --text TEXT
+        The text that needs to be morphologically analyzed.
+  --file FILE
+        File containing the text to be morphologically analyzed
+  --language LANGUAGE [default=MSA]
+        Specifies the language for the analysis. In the current version, MSA is only supported.
+  --task TASK [default=full]
+        Determines the specific type of morphological analysis to be performed. Available options are:
+          - lemmatization: the morphological solution includes only the lemma_id, lemma, token, and token frequency.
+          - pos: the morphological solution includes only the part-of-speech, token, and token frequency.
+          - root: the morphological solution includes only the root, token, and token frequency.
+          - full: the morphological solution includes the lemma_id, lemma, part-of-speech, root, token, and token frequency.
+        The default is full.
+  --flag FLAG [default=1]
+        The flag to filter the returned results. If the flag is `1`, the solution with the highest frequency will be returned. If the flag is `*`, all solutions will be returned, ordered descendingly, with the highest frequency solution first. The default flag if not specified is `1`.
+Examples:
+---------
+.. code-block:: none
+  morphology_analyzer --text "Your Arabic text here" --language MSA --task full --flag 1
+  morphology_analyzer --file "path/to/your/file.txt" --language MSA --task full --flag 1
+"""
+import argparse
+from sinatools.morphology.morph_analyzer import analyze
+from sinatools.utils.readfile import read_file
+def main():
+    parser = argparse.ArgumentParser(description='Morphological Analysis using SinaTools')
+    parser.add_argument('--text', type=str, help='Text to be morphologically analyzed')
+    parser.add_argument('--file', type=str, help='File containing the text to be morphologically analyzed')
+    parser.add_argument('--language', type=str, default='MSA', help='Language for analysis (default: MSA)')
+    parser.add_argument('--task', type=str, default='full', choices=['lemmatization', 'pos', 'root', 'full'], help='Task for the result filter [lemmatization, pos, root, full] (default: full)')
+    parser.add_argument('--flag', type=str, default='1', choices=['1','*'], help='The flag to filter the returned results')
+    args = parser.parse_args()
+    if args.text is None and args.file is None:
+        print("Error: Either --text or --file argument must be provided.")
+        return
+    # Get the input either from the --text argument or from the file specified in the --file argument
+    input_text = args.text if args.text else " ".join(read_file(args.file))
+    # Perform morphological analysis
+    results = analyze(input_text, args.language, args.task, args.flag)
+    # Print the results
+    for result in results:
+        print(result)
+if __name__ == '__main__':
+    main()

nlptools/CLI/arabiner/bin/infer2.py → sinatools/CLI/ner/corpus_entity_extractor.py RENAMED Viewed

@@ -1,8 +1,10 @@
 import os
 import csv
-from nlptools.utils.sentence_tokenizer import sent_tokenize
-from nlptools.morphology.tokenizers_words import simple_word_tokenize
+from sinatools.utils.tokenizer import sentence_tokenizer
+from sinatools.utils.tokenizers_words import simple_word_tokenize
 import pandas as pd
+import argparse
+from sinatools.ner.entity_extractor import ner
 """
 CSV NER Tagging Tool
@@ -14,12 +16,6 @@ Run the script with the following command:
 arabi_ner2  input.csv --text-columns "TextColumn1,TextColumn2" --additional-columns "Column3,Column4" --output-csv output.csv
 """
-import argparse
-import pandas as pd
-from nlptools.utils.sentence_tokenizer import sent_tokenize
-from nlptools.morphology.tokenizers_words import simple_word_tokenize
-from nlptools.arabiner.bin.infer import ner
 def infer(sentence):
     output = ner(sentence)
     return [word[1] for word in output]
@@ -39,7 +35,7 @@ def corpus_tokenizer(input_csv, output_csv, text_column, additional_columns, row
         df = pd.read_csv(input_csv)
         for index, row in df.iterrows():
-            sentences = sent_tokenize(row[text_column], dot=True, new_line=True, question_mark=False, exclamation_mark=False)
+            sentences = sentence_tokenizer(row[text_column], dot=True, new_line=True, question_mark=False, exclamation_mark=False)
             for sentence_id, sentence in enumerate(sentences, start=1):
                 words = simple_word_tokenize(sentence)
                 global_sentence_id += 1

nlptools/CLI/arabiner/bin/infer.py → sinatools/CLI/ner/entity_extractor.py RENAMED Viewed

@@ -45,9 +45,9 @@ Note:
 import argparse
 import json
 import pandas as pd
-from nlptools.arabiner.bin.infer import ner
-from nlptools.utils.corpus_tokenizer import corpus_tokenizer
-from nlptools.morphology.tokenizers_words import simple_word_tokenize
+from sinatools.ner.entity_extractor import ner
+from sinatools.utils.tokenizer import corpus_tokenizer
+from sinatools.utils.tokenizers_words import simple_word_tokenize
 def infer(sentence):
@@ -110,8 +110,4 @@ if __name__ == '__main__':
     main()
 #arabi_ner --text "Your text here."
-#arabi_ner --dir /path/to/your/directory --output_csv output.csv
-#Each unique sentence in the CSV file is processed once by the infer function to get the NER tags for all the words in the sentence.
-#The current_word_position variable is used to keep track of the position within the list of NER tags returned by infer, ensuring that each word in the CSV file is assigned the correct NER tag.
-#The final CSV file will contain an additional column, NER tags, which contains the NER tag for each word in the Sentence column of the CSV file.
+#arabi_ner --dir /path/to/your/directory --output_csv output.csv

{nlptools → sinatools}/CLI/salma/salma_tools.py RENAMED Viewed

@@ -11,8 +11,8 @@ Below is the usage information that can be generated by running the command with
 .. code-block:: none
-    sina_salma --text=TEXT
-    sina_salma --file=INPUT_FILE
+    salma --text=TEXT
+    salma --file=INPUT_FILE
 Options:
 --------
@@ -27,8 +27,8 @@ Examples:
 ---------
 .. code-block:: none
-    sina_salma --text "your Arabic sentence here"
-    sina_salma --file "path/to/your/file.txt"
+    salma --text "your Arabic sentence here"
+    salma --file "path/to/your/file.txt"
 Note:
 -----
@@ -42,8 +42,8 @@ Note:
 import argparse
 import json
-from nlptools.salma.views import SALMA
-from nlptools.utils.readfile import read_file
+from sinatools.salma.views import SALMA
+from sinatools.utils.readfile import read_file
 def main():
     parser = argparse.ArgumentParser(description='Arabic text stripping tool using SinaTools')
@@ -64,5 +64,5 @@ def main():
 if __name__ == "__main__":
     main()
-#sina_salma --text "your Arabic sentence here"
-#sina_salma --file "path/to/your/file.txt"
+#salma --text "your Arabic sentence here"
+#salma --file "path/to/your/file.txt"

{nlptools → sinatools}/CLI/utils/arStrip.py RENAMED Viewed

@@ -2,17 +2,17 @@
 About:
 ------
-The sina_arStrip tool offers functionality to strip various elements from Arabic text using the SinaTools' `arStrip` utility. It provides flexibility to selectively strip diacritics, small diacritics, shaddah, digits, alif, and special characters.
+The arStrip command offers functionality to strip various elements from Arabic text using the SinaTools' `arStrip` utility. It provides flexibility to selectively strip diacritics, small diacritics, shaddah, digits, alif, and special characters.
 Usage:
 ------
-Below is the usage information that can be generated by running sina_arStrip --help.
+Below is the usage information that can be generated by running arStrip --help.
 .. code-block:: none
     Usage:
-        sina_arStrip --text=TEXT [OPTIONS]
-        sina_arStrip --file "path/to/your/file.txt" [OPTIONS]
+        arStrip --text=TEXT [OPTIONS]
+        arStrip --file "path/to/your/file.txt" [OPTIONS]
 .. code-block:: none
@@ -43,27 +43,16 @@ Below is the usage information that can be generated by running sina_arStrip --h
 Examples:
 ---------
-.. code-block:: none
-    sina_arStrip --text "مُختَبَر سينا لحوسبة اللغة!" --diacs=True --smallDiacs=False --shaddah=True --digit=False --alif=False --specialChars=False
-    sina_arStrip --file "path/to/your/file.txt" --diacs=True --smallDiacs=False --shaddah=True --digit=False --alif=False --specialChars=False
-Note:
------
 .. code-block:: none
-    - This tool is specific to Arabic text, as it focuses on Arabic linguistic elements.
-    - Ensure that the text input is appropriately encoded in UTF-8 or compatible formats.
-    - Stripping certain elements might change the meaning or readability of the text. Use it judiciously.
+    arStrip --text "مُختَبَر سينا لحوسبة اللغة!" --diacs=True --smallDiacs=False --shaddah=True --digit=False --alif=False --specialChars=False
+    arStrip --file "path/to/your/file.txt" --diacs=True --smallDiacs=False --shaddah=True --digit=False --alif=False --specialChars=False
 """
 import argparse
-from nlptools.utils.parser import arStrip
-from nlptools.utils.readfile import read_file
+from sinatools.utils.parser import arStrip
+from sinatools.utils.readfile import read_file
 def main():
     parser = argparse.ArgumentParser(description='Arabic text stripping tool using SinaTools')
@@ -95,5 +84,5 @@ def main():
 if __name__ == '__main__':
     main()
-#sina_arStrip --text "example text" --diacs=True
-#sina_arStrip --file "path/to/your/file.txt" --diacs=True
+#arStrip --text "example text" --diacs=True
+#arStrip --file "path/to/your/file.txt" --diacs=True

sinatools/CLI/utils/corpus_tokenizer.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""
+About:
+------
+The corpus_tokenizer command offers functionality to tokenize a corpus and write the results to a CSV file. It recursively searches through a specified directory for text files, tokenizes the content, and outputs the results, including various metadata, to a specified CSV file.
+Usage:
+-------
+Below is the usage information that can be generated by running corpus_tokenizer --help.
+.. code-block:: none
+    Usage:
+        corpus_tokenizer dir_path output_csv
+.. code-block:: none
+    dir_path
+        The path to the directory containing the text files.
+    output_csv
+        The path to the output CSV file.
+Examples:
+---------
+.. code-block:: none
+    corpus_tokenizer --dir_path "/path/to/text/directory/of/files" --output_csv  "outputFile.csv"
+"""
+import argparse
+from sinatools.utils.tokenizer import corpus_tokenizer
+# Define the main function that will parse the arguments
+def main():
+    # Create an ArgumentParser object
+    parser = argparse.ArgumentParser(description='Tokenize the corpus and write the results to a CSV file.')
+    # Add arguments to the parser
+    parser.add_argument('--dir_path', type=str, help='The path to the directory containing the text files.')
+    parser.add_argument('--output_csv', type=str, help='The path to the output CSV file.')
+    # Parse the command-line arguments
+    args = parser.parse_args()
+    # Call the corpus_tokenizer function with the parsed arguments
+    corpus_tokenizer(args.dir_path, args.output_csv)
+# Call the main function when the script is executed
+if __name__ == '__main__':
+    main()

{nlptools → sinatools}/CLI/utils/implication.py RENAMED Viewed

@@ -1,18 +1,18 @@
 """
 About:
 ------
-The sina_implication tool evaluates the implication between two words using the functionalities provided by the `Implication` class of SinaTools. This tool can be utilized to determine the relationship between two words and understand if one implies the other.
+The implication tool evaluates the implication between two words using the functionalities provided by the `Implication` class of SinaTools. This tool can be utilized to determine the relationship between two words and understand if one implies the other.
 Usage:
 ------
-Below is the usage information that can be generated by running sina_implication --help.
+Below is the usage information that can be generated by running implication --help.
 .. code-block:: none
     Usage:
-        sina_implication --inputWord1=WORD1 --inputWord2=WORD2
+        implication --inputWord1=WORD1 --inputWord2=WORD2
-        sina_implication --inputFile1=File1 --inputFile2=File2
+        implication --inputFile1=File1 --inputFile2=File2
 .. code-block:: none
@@ -33,9 +33,9 @@ Examples:
 .. code-block:: none
-      sina_implication --inputWord1 "word1" --inputWord2 "word2"
+      implication --inputWord1 "word1" --inputWord2 "word2"
-      sina_implication --file1 "path/to/your/file1.txt" --file2 "path/to/your/file2.txt"
+      implication --file1 "path/to/your/file1.txt" --file2 "path/to/your/file2.txt"
 Note:
 -----
@@ -47,7 +47,7 @@ Note:
 """
 import argparse
-from nlptools.utils.implication import Implication
+from sinatools.utils.implication import Implication
 def read_file(file_path):
     with open(file_path, 'r', encoding='utf-8') as file:
@@ -86,7 +86,7 @@ def main():
 if __name__ == '__main__':
     main()
-# sina_implication --inputWord1 "word1" --inputWord2 "word2"
-# sina_implication --file1 "path/to/your/firstfile.txt" --file2 "path/to/your/secondfile.txt"
+# implication --inputWord1 "word1" --inputWord2 "word2"
+# implication --file1 "path/to/your/firstfile.txt" --file2 "path/to/your/secondfile.txt"

SinaTools 0.1.4__py2.py3-none-any.whl → 0.1.7__py2.py3-none-any.whl

SinaTools 0.1.4py2.py3-none-any.whl → 0.1.7py2.py3-none-any.whl