SinaTools 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. SinaTools-0.1.1/AUTHORS.rst +13 -0
  2. SinaTools-0.1.1/CONTRIBUTING.rst +128 -0
  3. SinaTools-0.1.1/HISTORY.rst +8 -0
  4. SinaTools-0.1.1/LICENSE +22 -0
  5. SinaTools-0.1.1/MANIFEST.in +28 -0
  6. SinaTools-0.1.1/PKG-INFO +58 -0
  7. SinaTools-0.1.1/README.rst +37 -0
  8. SinaTools-0.1.1/SinaTools.egg-info/PKG-INFO +58 -0
  9. SinaTools-0.1.1/SinaTools.egg-info/SOURCES.txt +191 -0
  10. SinaTools-0.1.1/SinaTools.egg-info/dependency_links.txt +1 -0
  11. SinaTools-0.1.1/SinaTools.egg-info/entry_points.txt +18 -0
  12. SinaTools-0.1.1/SinaTools.egg-info/not-zip-safe +1 -0
  13. SinaTools-0.1.1/SinaTools.egg-info/requires.txt +12 -0
  14. SinaTools-0.1.1/SinaTools.egg-info/top_level.txt +1 -0
  15. SinaTools-0.1.1/docs/Makefile +20 -0
  16. SinaTools-0.1.1/docs/build/_images/download.png +0 -0
  17. SinaTools-0.1.1/docs/build/_static/download.png +0 -0
  18. SinaTools-0.1.1/docs/build/_static/file.png +0 -0
  19. SinaTools-0.1.1/docs/build/_static/minus.png +0 -0
  20. SinaTools-0.1.1/docs/build/_static/plus.png +0 -0
  21. SinaTools-0.1.1/docs/build/html/_images/SinaLogo.jpg +0 -0
  22. SinaTools-0.1.1/docs/build/html/_images/download.png +0 -0
  23. SinaTools-0.1.1/docs/build/html/_static/SinaLogo.jpg +0 -0
  24. SinaTools-0.1.1/docs/build/html/_static/download.png +0 -0
  25. SinaTools-0.1.1/docs/build/html/_static/file.png +0 -0
  26. SinaTools-0.1.1/docs/build/html/_static/minus.png +0 -0
  27. SinaTools-0.1.1/docs/build/html/_static/plus.png +0 -0
  28. SinaTools-0.1.1/docs/make.bat +35 -0
  29. SinaTools-0.1.1/docs/source/License.rst +24 -0
  30. SinaTools-0.1.1/docs/source/Overview.rst +15 -0
  31. SinaTools-0.1.1/docs/source/_static/SinaLogo.jpg +0 -0
  32. SinaTools-0.1.1/docs/source/_static/download.png +0 -0
  33. SinaTools-0.1.1/docs/source/about.rst +13 -0
  34. SinaTools-0.1.1/docs/source/api/DataDownload/downloader.rst +7 -0
  35. SinaTools-0.1.1/docs/source/api/DataDownload.rst +15 -0
  36. SinaTools-0.1.1/docs/source/api/arabiner/bin/infer.rst +12 -0
  37. SinaTools-0.1.1/docs/source/api/arabiner.rst +15 -0
  38. SinaTools-0.1.1/docs/source/api/morphology/morph_analyzer.rst +9 -0
  39. SinaTools-0.1.1/docs/source/api/morphology.rst +15 -0
  40. SinaTools-0.1.1/docs/source/api/salma/views.rst +12 -0
  41. SinaTools-0.1.1/docs/source/api/salma.rst +14 -0
  42. SinaTools-0.1.1/docs/source/api/utils/corpus_tokenizer.rst +5 -0
  43. SinaTools-0.1.1/docs/source/api/utils/implication.rst +5 -0
  44. SinaTools-0.1.1/docs/source/api/utils/jaccard.rst +5 -0
  45. SinaTools-0.1.1/docs/source/api/utils/parser.rst +11 -0
  46. SinaTools-0.1.1/docs/source/api/utils/sentence_tokenizer.rst +5 -0
  47. SinaTools-0.1.1/docs/source/api/utils/text_transliteration.rst +5 -0
  48. SinaTools-0.1.1/docs/source/api/utils.rst +20 -0
  49. SinaTools-0.1.1/docs/source/api.rst +16 -0
  50. SinaTools-0.1.1/docs/source/authors.rst +1 -0
  51. SinaTools-0.1.1/docs/source/cli_tools/DataDownload/download_files.rst +5 -0
  52. SinaTools-0.1.1/docs/source/cli_tools/DataDownload/get_appdatadir.rst +5 -0
  53. SinaTools-0.1.1/docs/source/cli_tools/DataDownload.rst +15 -0
  54. SinaTools-0.1.1/docs/source/cli_tools/arabiner/infer.rst +5 -0
  55. SinaTools-0.1.1/docs/source/cli_tools/arabiner.rst +21 -0
  56. SinaTools-0.1.1/docs/source/cli_tools/morphology/ALMA_multi_word.rst +5 -0
  57. SinaTools-0.1.1/docs/source/cli_tools/morphology/morph_analyzer.rst +5 -0
  58. SinaTools-0.1.1/docs/source/cli_tools/morphology.rst +22 -0
  59. SinaTools-0.1.1/docs/source/cli_tools/salma/salma_tools.rst +20 -0
  60. SinaTools-0.1.1/docs/source/cli_tools/salma.rst +22 -0
  61. SinaTools-0.1.1/docs/source/cli_tools/utils/arStrip.rst +5 -0
  62. SinaTools-0.1.1/docs/source/cli_tools/utils/corpus_tokenizer.rst +5 -0
  63. SinaTools-0.1.1/docs/source/cli_tools/utils/implication.rst +5 -0
  64. SinaTools-0.1.1/docs/source/cli_tools/utils/jaccard.rst +5 -0
  65. SinaTools-0.1.1/docs/source/cli_tools/utils/latin_remove.rst +5 -0
  66. SinaTools-0.1.1/docs/source/cli_tools/utils/remove_punc.rst +5 -0
  67. SinaTools-0.1.1/docs/source/cli_tools/utils/sentence_tokenizer.rst +5 -0
  68. SinaTools-0.1.1/docs/source/cli_tools/utils/text_transliteration.rst +5 -0
  69. SinaTools-0.1.1/docs/source/cli_tools/utils.rst +22 -0
  70. SinaTools-0.1.1/docs/source/cli_tools.rst +17 -0
  71. SinaTools-0.1.1/docs/source/conf.py +208 -0
  72. SinaTools-0.1.1/docs/source/index.rst +19 -0
  73. SinaTools-0.1.1/docs/source/installation.rst +78 -0
  74. SinaTools-0.1.1/docs/source/readme.rst +1 -0
  75. SinaTools-0.1.1/nlptools/CLI/DataDownload/download_files.py +71 -0
  76. SinaTools-0.1.1/nlptools/CLI/arabiner/bin/infer.py +117 -0
  77. SinaTools-0.1.1/nlptools/CLI/arabiner/bin/infer2.py +81 -0
  78. SinaTools-0.1.1/nlptools/CLI/morphology/ALMA_multi_word.py +75 -0
  79. SinaTools-0.1.1/nlptools/CLI/morphology/morph_analyzer.py +91 -0
  80. SinaTools-0.1.1/nlptools/CLI/salma/salma_tools.py +68 -0
  81. SinaTools-0.1.1/nlptools/CLI/utils/__init__.py +0 -0
  82. SinaTools-0.1.1/nlptools/CLI/utils/arStrip.py +99 -0
  83. SinaTools-0.1.1/nlptools/CLI/utils/corpus_tokenizer.py +74 -0
  84. SinaTools-0.1.1/nlptools/CLI/utils/implication.py +92 -0
  85. SinaTools-0.1.1/nlptools/CLI/utils/jaccard.py +96 -0
  86. SinaTools-0.1.1/nlptools/CLI/utils/latin_remove.py +51 -0
  87. SinaTools-0.1.1/nlptools/CLI/utils/remove_Punc.py +53 -0
  88. SinaTools-0.1.1/nlptools/CLI/utils/sentence_tokenizer.py +90 -0
  89. SinaTools-0.1.1/nlptools/CLI/utils/text_transliteration.py +77 -0
  90. SinaTools-0.1.1/nlptools/DataDownload/__init__.py +0 -0
  91. SinaTools-0.1.1/nlptools/DataDownload/downloader.py +185 -0
  92. SinaTools-0.1.1/nlptools/VERSION +1 -0
  93. SinaTools-0.1.1/nlptools/__init__.py +5 -0
  94. SinaTools-0.1.1/nlptools/arabert/__init__.py +1 -0
  95. SinaTools-0.1.1/nlptools/arabert/arabert/__init__.py +14 -0
  96. SinaTools-0.1.1/nlptools/arabert/arabert/create_classification_data.py +260 -0
  97. SinaTools-0.1.1/nlptools/arabert/arabert/create_pretraining_data.py +534 -0
  98. SinaTools-0.1.1/nlptools/arabert/arabert/extract_features.py +444 -0
  99. SinaTools-0.1.1/nlptools/arabert/arabert/lamb_optimizer.py +158 -0
  100. SinaTools-0.1.1/nlptools/arabert/arabert/modeling.py +1027 -0
  101. SinaTools-0.1.1/nlptools/arabert/arabert/optimization.py +202 -0
  102. SinaTools-0.1.1/nlptools/arabert/arabert/run_classifier.py +1078 -0
  103. SinaTools-0.1.1/nlptools/arabert/arabert/run_pretraining.py +593 -0
  104. SinaTools-0.1.1/nlptools/arabert/arabert/run_squad.py +1440 -0
  105. SinaTools-0.1.1/nlptools/arabert/arabert/tokenization.py +414 -0
  106. SinaTools-0.1.1/nlptools/arabert/araelectra/__init__.py +1 -0
  107. SinaTools-0.1.1/nlptools/arabert/araelectra/build_openwebtext_pretraining_dataset.py +103 -0
  108. SinaTools-0.1.1/nlptools/arabert/araelectra/build_pretraining_dataset.py +230 -0
  109. SinaTools-0.1.1/nlptools/arabert/araelectra/build_pretraining_dataset_single_file.py +90 -0
  110. SinaTools-0.1.1/nlptools/arabert/araelectra/configure_finetuning.py +172 -0
  111. SinaTools-0.1.1/nlptools/arabert/araelectra/configure_pretraining.py +143 -0
  112. SinaTools-0.1.1/nlptools/arabert/araelectra/finetune/__init__.py +14 -0
  113. SinaTools-0.1.1/nlptools/arabert/araelectra/finetune/feature_spec.py +56 -0
  114. SinaTools-0.1.1/nlptools/arabert/araelectra/finetune/preprocessing.py +173 -0
  115. SinaTools-0.1.1/nlptools/arabert/araelectra/finetune/scorer.py +54 -0
  116. SinaTools-0.1.1/nlptools/arabert/araelectra/finetune/task.py +74 -0
  117. SinaTools-0.1.1/nlptools/arabert/araelectra/finetune/task_builder.py +70 -0
  118. SinaTools-0.1.1/nlptools/arabert/araelectra/flops_computation.py +215 -0
  119. SinaTools-0.1.1/nlptools/arabert/araelectra/model/__init__.py +14 -0
  120. SinaTools-0.1.1/nlptools/arabert/araelectra/model/modeling.py +1029 -0
  121. SinaTools-0.1.1/nlptools/arabert/araelectra/model/optimization.py +193 -0
  122. SinaTools-0.1.1/nlptools/arabert/araelectra/model/tokenization.py +355 -0
  123. SinaTools-0.1.1/nlptools/arabert/araelectra/pretrain/__init__.py +14 -0
  124. SinaTools-0.1.1/nlptools/arabert/araelectra/pretrain/pretrain_data.py +160 -0
  125. SinaTools-0.1.1/nlptools/arabert/araelectra/pretrain/pretrain_helpers.py +229 -0
  126. SinaTools-0.1.1/nlptools/arabert/araelectra/run_finetuning.py +323 -0
  127. SinaTools-0.1.1/nlptools/arabert/araelectra/run_pretraining.py +469 -0
  128. SinaTools-0.1.1/nlptools/arabert/araelectra/util/__init__.py +14 -0
  129. SinaTools-0.1.1/nlptools/arabert/araelectra/util/training_utils.py +112 -0
  130. SinaTools-0.1.1/nlptools/arabert/araelectra/util/utils.py +109 -0
  131. SinaTools-0.1.1/nlptools/arabert/aragpt2/__init__.py +2 -0
  132. SinaTools-0.1.1/nlptools/arabert/aragpt2/create_pretraining_data.py +95 -0
  133. SinaTools-0.1.1/nlptools/arabert/aragpt2/gpt2/__init__.py +2 -0
  134. SinaTools-0.1.1/nlptools/arabert/aragpt2/gpt2/lamb_optimizer.py +158 -0
  135. SinaTools-0.1.1/nlptools/arabert/aragpt2/gpt2/optimization.py +225 -0
  136. SinaTools-0.1.1/nlptools/arabert/aragpt2/gpt2/run_pretraining.py +397 -0
  137. SinaTools-0.1.1/nlptools/arabert/aragpt2/grover/__init__.py +0 -0
  138. SinaTools-0.1.1/nlptools/arabert/aragpt2/grover/dataloader.py +161 -0
  139. SinaTools-0.1.1/nlptools/arabert/aragpt2/grover/modeling.py +803 -0
  140. SinaTools-0.1.1/nlptools/arabert/aragpt2/grover/modeling_gpt2.py +1196 -0
  141. SinaTools-0.1.1/nlptools/arabert/aragpt2/grover/optimization_adafactor.py +234 -0
  142. SinaTools-0.1.1/nlptools/arabert/aragpt2/grover/train_tpu.py +187 -0
  143. SinaTools-0.1.1/nlptools/arabert/aragpt2/grover/utils.py +234 -0
  144. SinaTools-0.1.1/nlptools/arabert/aragpt2/train_bpe_tokenizer.py +59 -0
  145. SinaTools-0.1.1/nlptools/arabert/preprocess.py +818 -0
  146. SinaTools-0.1.1/nlptools/arabiner/__init__.py +0 -0
  147. SinaTools-0.1.1/nlptools/arabiner/bin/__init__.py +14 -0
  148. SinaTools-0.1.1/nlptools/arabiner/bin/eval.py +87 -0
  149. SinaTools-0.1.1/nlptools/arabiner/bin/infer.py +91 -0
  150. SinaTools-0.1.1/nlptools/arabiner/bin/process.py +140 -0
  151. SinaTools-0.1.1/nlptools/arabiner/bin/train.py +221 -0
  152. SinaTools-0.1.1/nlptools/arabiner/data/__init__.py +1 -0
  153. SinaTools-0.1.1/nlptools/arabiner/data/datasets.py +146 -0
  154. SinaTools-0.1.1/nlptools/arabiner/data/transforms.py +118 -0
  155. SinaTools-0.1.1/nlptools/arabiner/nn/BaseModel.py +22 -0
  156. SinaTools-0.1.1/nlptools/arabiner/nn/BertNestedTagger.py +34 -0
  157. SinaTools-0.1.1/nlptools/arabiner/nn/BertSeqTagger.py +17 -0
  158. SinaTools-0.1.1/nlptools/arabiner/nn/__init__.py +3 -0
  159. SinaTools-0.1.1/nlptools/arabiner/trainers/BaseTrainer.py +117 -0
  160. SinaTools-0.1.1/nlptools/arabiner/trainers/BertNestedTrainer.py +203 -0
  161. SinaTools-0.1.1/nlptools/arabiner/trainers/BertTrainer.py +163 -0
  162. SinaTools-0.1.1/nlptools/arabiner/trainers/__init__.py +3 -0
  163. SinaTools-0.1.1/nlptools/arabiner/utils/__init__.py +0 -0
  164. SinaTools-0.1.1/nlptools/arabiner/utils/data.py +124 -0
  165. SinaTools-0.1.1/nlptools/arabiner/utils/helpers.py +151 -0
  166. SinaTools-0.1.1/nlptools/arabiner/utils/metrics.py +69 -0
  167. SinaTools-0.1.1/nlptools/environment.yml +227 -0
  168. SinaTools-0.1.1/nlptools/install_env.py +13 -0
  169. SinaTools-0.1.1/nlptools/morphology/ALMA_multi_word.py +34 -0
  170. SinaTools-0.1.1/nlptools/morphology/__init__.py +52 -0
  171. SinaTools-0.1.1/nlptools/morphology/charsets.py +60 -0
  172. SinaTools-0.1.1/nlptools/morphology/morph_analyzer.py +170 -0
  173. SinaTools-0.1.1/nlptools/morphology/settings.py +8 -0
  174. SinaTools-0.1.1/nlptools/morphology/tokenizers_words.py +19 -0
  175. SinaTools-0.1.1/nlptools/nlptools.py +1 -0
  176. SinaTools-0.1.1/nlptools/salma/__init__.py +12 -0
  177. SinaTools-0.1.1/nlptools/salma/settings.py +31 -0
  178. SinaTools-0.1.1/nlptools/salma/views.py +459 -0
  179. SinaTools-0.1.1/nlptools/salma/wsd.py +126 -0
  180. SinaTools-0.1.1/nlptools/utils/__init__.py +0 -0
  181. SinaTools-0.1.1/nlptools/utils/corpus_tokenizer.py +73 -0
  182. SinaTools-0.1.1/nlptools/utils/implication.py +662 -0
  183. SinaTools-0.1.1/nlptools/utils/jaccard.py +247 -0
  184. SinaTools-0.1.1/nlptools/utils/parser.py +147 -0
  185. SinaTools-0.1.1/nlptools/utils/readfile.py +3 -0
  186. SinaTools-0.1.1/nlptools/utils/sentence_tokenizer.py +53 -0
  187. SinaTools-0.1.1/nlptools/utils/text_transliteration.py +232 -0
  188. SinaTools-0.1.1/nlptools/utils/utils.py +2 -0
  189. SinaTools-0.1.1/setup.cfg +34 -0
  190. SinaTools-0.1.1/setup.py +95 -0
  191. SinaTools-0.1.1/tests/__init__.py +1 -0
  192. SinaTools-0.1.1/tests/test_nlptools.py +21 -0
@@ -0,0 +1,13 @@
1
+ =======
2
+ Credits
3
+ =======
4
+
5
+ Development Lead
6
+ ----------------
7
+
8
+ * SinaLab <sina.institute.bzu@gmail.com>
9
+
10
+ Contributors
11
+ ------------
12
+
13
+ None yet. Why not be the first?
@@ -0,0 +1,128 @@
1
+ .. highlight:: shell
2
+
3
+ ============
4
+ Contributing
5
+ ============
6
+
7
+ Contributions are welcome, and they are greatly appreciated! Every little bit
8
+ helps, and credit will always be given.
9
+
10
+ You can contribute in many ways:
11
+
12
+ Types of Contributions
13
+ ----------------------
14
+
15
+ Report Bugs
16
+ ~~~~~~~~~~~
17
+
18
+ Report bugs at https://github.com/SinaLab/nlptools/issues.
19
+
20
+ If you are reporting a bug, please include:
21
+
22
+ * Your operating system name and version.
23
+ * Any details about your local setup that might be helpful in troubleshooting.
24
+ * Detailed steps to reproduce the bug.
25
+
26
+ Fix Bugs
27
+ ~~~~~~~~
28
+
29
+ Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
30
+ wanted" is open to whoever wants to implement it.
31
+
32
+ Implement Features
33
+ ~~~~~~~~~~~~~~~~~~
34
+
35
+ Look through the GitHub issues for features. Anything tagged with "enhancement"
36
+ and "help wanted" is open to whoever wants to implement it.
37
+
38
+ Write Documentation
39
+ ~~~~~~~~~~~~~~~~~~~
40
+
41
+ nlptools could always use more documentation, whether as part of the
42
+ official nlptools docs, in docstrings, or even on the web in blog posts,
43
+ articles, and such.
44
+
45
+ Submit Feedback
46
+ ~~~~~~~~~~~~~~~
47
+
48
+ The best way to send feedback is to file an issue at https://github.com/SinaLab/nlptools/issues.
49
+
50
+ If you are proposing a feature:
51
+
52
+ * Explain in detail how it would work.
53
+ * Keep the scope as narrow as possible, to make it easier to implement.
54
+ * Remember that this is a volunteer-driven project, and that contributions
55
+ are welcome :)
56
+
57
+ Get Started!
58
+ ------------
59
+
60
+ Ready to contribute? Here's how to set up `nlptools` for local development.
61
+
62
+ 1. Fork the `nlptools` repo on GitHub.
63
+ 2. Clone your fork locally::
64
+
65
+ $ git clone git@github.com:your_name_here/nlptools.git
66
+
67
+ 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
68
+
69
+ $ mkvirtualenv nlptools
70
+ $ cd nlptools/
71
+ $ python setup.py develop
72
+
73
+ 4. Create a branch for local development::
74
+
75
+ $ git checkout -b name-of-your-bugfix-or-feature
76
+
77
+ Now you can make your changes locally.
78
+
79
+ 5. When you're done making changes, check that your changes pass flake8 and the
80
+ tests, including testing other Python versions with tox::
81
+
82
+ $ flake8 nlptools tests
83
+ $ python setup.py test or pytest
84
+ $ tox
85
+
86
+ To get flake8 and tox, just pip install them into your virtualenv.
87
+
88
+ 6. Commit your changes and push your branch to GitHub::
89
+
90
+ $ git add .
91
+ $ git commit -m "Your detailed description of your changes."
92
+ $ git push origin name-of-your-bugfix-or-feature
93
+
94
+ 7. Submit a pull request through the GitHub website.
95
+
96
+ Pull Request Guidelines
97
+ -----------------------
98
+
99
+ Before you submit a pull request, check that it meets these guidelines:
100
+
101
+ 1. The pull request should include tests.
102
+ 2. If the pull request adds functionality, the docs should be updated. Put
103
+ your new functionality into a function with a docstring, and add the
104
+ feature to the list in README.rst.
105
+ 3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check
106
+ https://travis-ci.com/sina_institute/nlptools/pull_requests
107
+ and make sure that the tests pass for all supported Python versions.
108
+
109
+ Tips
110
+ ----
111
+
112
+ To run a subset of tests::
113
+
114
+
115
+ $ python -m unittest tests.test_nlptools
116
+
117
+ Deploying
118
+ ---------
119
+
120
+ A reminder for the maintainers on how to deploy.
121
+ Make sure all your changes are committed (including an entry in HISTORY.rst).
122
+ Then run::
123
+
124
+ $ bump2version patch # possible: major / minor / patch
125
+ $ git push
126
+ $ git push --tags
127
+
128
+ Travis will then deploy to PyPI if tests pass.
@@ -0,0 +1,8 @@
1
+ =======
2
+ History
3
+ =======
4
+
5
+ 0.1.0 (2023-04-15)
6
+ ------------------
7
+
8
+ * First release on PyPI.
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023, SinaLab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,28 @@
1
+ include AUTHORS.rst
2
+ include CONTRIBUTING.rst
3
+ include HISTORY.rst
4
+ include LICENSE
5
+ include README.rst
6
+
7
+ recursive-include tests *
8
+ recursive-exclude * __pycache__
9
+ recursive-exclude * *.py[co]
10
+
11
+ recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
12
+
13
+
14
+ include setup.py
15
+ include nlptools/VERSION
16
+ include nlptools/utils/*.py
17
+ include nlptools/arabiner/*.py
18
+ include nlptools/arabert/*.py
19
+ include nlptools/DataDownload/*.py
20
+ include nlptools/morphology/*.py
21
+ include nlptools/salma/*.py
22
+ include nlptools/CLI/arabiner/bin/*.py
23
+ include nlptools/CLI/morphology/*.py
24
+ include nlptools/CLI/salma/*.py
25
+ include nlptools/CLI/utils/*.py
26
+ include nlptools/CLI/DataDownload/*.py
27
+ include tests/*.py
28
+ global-exclude *~
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 1.0
2
+ Name: SinaTools
3
+ Version: 0.1.1
4
+ Summary: UNKNOWN
5
+ Home-page: https://github.com/SinaLab/nlptools
6
+ Author: UNKNOWN
7
+ Author-email: UNKNOWN
8
+ License: MIT license
9
+ Description: ========
10
+ nlptools
11
+ ========
12
+
13
+
14
+ .. image:: https://img.shields.io/pypi/v/nlptools.svg
15
+ :target: https://pypi.python.org/pypi/SinaTools
16
+
17
+ .. image:: https://img.shields.io/travis/sina_institute/nlptools.svg
18
+ :target: https://travis-ci.com/sina_institute/SinaTools
19
+
20
+ .. image:: https://readthedocs.org/projects/nlptools/badge/?version=latest
21
+ :target: https://SinaTools.readthedocs.io/en/latest/?version=latest
22
+ :alt: Documentation Status
23
+
24
+
25
+
26
+
27
+ Python Boilerplate contains all the boilerplate you need to create a Python package.
28
+
29
+
30
+ * Free software: MIT license
31
+ * Documentation: https://SinaTools.readthedocs.io.
32
+
33
+
34
+ Features
35
+ --------
36
+
37
+ * TODO
38
+
39
+ Credits
40
+ -------
41
+
42
+ This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
43
+
44
+ .. _Cookiecutter: https://github.com/audreyr/cookiecutter
45
+ .. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
46
+
47
+
48
+ =======
49
+ History
50
+ =======
51
+
52
+ 0.1.0 (2023-04-15)
53
+ ------------------
54
+
55
+ * First release on PyPI.
56
+
57
+ Keywords: nlptools
58
+ Platform: UNKNOWN
@@ -0,0 +1,37 @@
1
+ ========
2
+ nlptools
3
+ ========
4
+
5
+
6
+ .. image:: https://img.shields.io/pypi/v/nlptools.svg
7
+ :target: https://pypi.python.org/pypi/SinaTools
8
+
9
+ .. image:: https://img.shields.io/travis/sina_institute/nlptools.svg
10
+ :target: https://travis-ci.com/sina_institute/SinaTools
11
+
12
+ .. image:: https://readthedocs.org/projects/nlptools/badge/?version=latest
13
+ :target: https://SinaTools.readthedocs.io/en/latest/?version=latest
14
+ :alt: Documentation Status
15
+
16
+
17
+
18
+
19
+ Python Boilerplate contains all the boilerplate you need to create a Python package.
20
+
21
+
22
+ * Free software: MIT license
23
+ * Documentation: https://SinaTools.readthedocs.io.
24
+
25
+
26
+ Features
27
+ --------
28
+
29
+ * TODO
30
+
31
+ Credits
32
+ -------
33
+
34
+ This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
35
+
36
+ .. _Cookiecutter: https://github.com/audreyr/cookiecutter
37
+ .. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 1.0
2
+ Name: SinaTools
3
+ Version: 0.1.1
4
+ Summary: UNKNOWN
5
+ Home-page: https://github.com/SinaLab/nlptools
6
+ Author: UNKNOWN
7
+ Author-email: UNKNOWN
8
+ License: MIT license
9
+ Description: ========
10
+ nlptools
11
+ ========
12
+
13
+
14
+ .. image:: https://img.shields.io/pypi/v/nlptools.svg
15
+ :target: https://pypi.python.org/pypi/SinaTools
16
+
17
+ .. image:: https://img.shields.io/travis/sina_institute/nlptools.svg
18
+ :target: https://travis-ci.com/sina_institute/SinaTools
19
+
20
+ .. image:: https://readthedocs.org/projects/nlptools/badge/?version=latest
21
+ :target: https://SinaTools.readthedocs.io/en/latest/?version=latest
22
+ :alt: Documentation Status
23
+
24
+
25
+
26
+
27
+ Python Boilerplate contains all the boilerplate you need to create a Python package.
28
+
29
+
30
+ * Free software: MIT license
31
+ * Documentation: https://SinaTools.readthedocs.io.
32
+
33
+
34
+ Features
35
+ --------
36
+
37
+ * TODO
38
+
39
+ Credits
40
+ -------
41
+
42
+ This package was created with Cookiecutter_ and the `audreyr/cookiecutter-pypackage`_ project template.
43
+
44
+ .. _Cookiecutter: https://github.com/audreyr/cookiecutter
45
+ .. _`audreyr/cookiecutter-pypackage`: https://github.com/audreyr/cookiecutter-pypackage
46
+
47
+
48
+ =======
49
+ History
50
+ =======
51
+
52
+ 0.1.0 (2023-04-15)
53
+ ------------------
54
+
55
+ * First release on PyPI.
56
+
57
+ Keywords: nlptools
58
+ Platform: UNKNOWN
@@ -0,0 +1,191 @@
1
+ AUTHORS.rst
2
+ CONTRIBUTING.rst
3
+ HISTORY.rst
4
+ LICENSE
5
+ MANIFEST.in
6
+ README.rst
7
+ setup.cfg
8
+ setup.py
9
+ SinaTools.egg-info/PKG-INFO
10
+ SinaTools.egg-info/SOURCES.txt
11
+ SinaTools.egg-info/dependency_links.txt
12
+ SinaTools.egg-info/entry_points.txt
13
+ SinaTools.egg-info/not-zip-safe
14
+ SinaTools.egg-info/requires.txt
15
+ SinaTools.egg-info/top_level.txt
16
+ docs/Makefile
17
+ docs/make.bat
18
+ docs/build/_images/download.png
19
+ docs/build/_static/download.png
20
+ docs/build/_static/file.png
21
+ docs/build/_static/minus.png
22
+ docs/build/_static/plus.png
23
+ docs/build/html/_images/SinaLogo.jpg
24
+ docs/build/html/_images/download.png
25
+ docs/build/html/_static/SinaLogo.jpg
26
+ docs/build/html/_static/download.png
27
+ docs/build/html/_static/file.png
28
+ docs/build/html/_static/minus.png
29
+ docs/build/html/_static/plus.png
30
+ docs/source/License.rst
31
+ docs/source/Overview.rst
32
+ docs/source/about.rst
33
+ docs/source/api.rst
34
+ docs/source/authors.rst
35
+ docs/source/cli_tools.rst
36
+ docs/source/conf.py
37
+ docs/source/index.rst
38
+ docs/source/installation.rst
39
+ docs/source/readme.rst
40
+ docs/source/_static/SinaLogo.jpg
41
+ docs/source/_static/download.png
42
+ docs/source/api/DataDownload.rst
43
+ docs/source/api/arabiner.rst
44
+ docs/source/api/morphology.rst
45
+ docs/source/api/salma.rst
46
+ docs/source/api/utils.rst
47
+ docs/source/api/DataDownload/downloader.rst
48
+ docs/source/api/arabiner/bin/infer.rst
49
+ docs/source/api/morphology/morph_analyzer.rst
50
+ docs/source/api/salma/views.rst
51
+ docs/source/api/utils/corpus_tokenizer.rst
52
+ docs/source/api/utils/implication.rst
53
+ docs/source/api/utils/jaccard.rst
54
+ docs/source/api/utils/parser.rst
55
+ docs/source/api/utils/sentence_tokenizer.rst
56
+ docs/source/api/utils/text_transliteration.rst
57
+ docs/source/cli_tools/DataDownload.rst
58
+ docs/source/cli_tools/arabiner.rst
59
+ docs/source/cli_tools/morphology.rst
60
+ docs/source/cli_tools/salma.rst
61
+ docs/source/cli_tools/utils.rst
62
+ docs/source/cli_tools/DataDownload/download_files.rst
63
+ docs/source/cli_tools/DataDownload/get_appdatadir.rst
64
+ docs/source/cli_tools/arabiner/infer.rst
65
+ docs/source/cli_tools/morphology/ALMA_multi_word.rst
66
+ docs/source/cli_tools/morphology/morph_analyzer.rst
67
+ docs/source/cli_tools/salma/salma_tools.rst
68
+ docs/source/cli_tools/utils/arStrip.rst
69
+ docs/source/cli_tools/utils/corpus_tokenizer.rst
70
+ docs/source/cli_tools/utils/implication.rst
71
+ docs/source/cli_tools/utils/jaccard.rst
72
+ docs/source/cli_tools/utils/latin_remove.rst
73
+ docs/source/cli_tools/utils/remove_punc.rst
74
+ docs/source/cli_tools/utils/sentence_tokenizer.rst
75
+ docs/source/cli_tools/utils/text_transliteration.rst
76
+ nlptools/VERSION
77
+ nlptools/__init__.py
78
+ nlptools/environment.yml
79
+ nlptools/install_env.py
80
+ nlptools/nlptools.py
81
+ nlptools/CLI/DataDownload/download_files.py
82
+ nlptools/CLI/arabiner/bin/infer.py
83
+ nlptools/CLI/arabiner/bin/infer2.py
84
+ nlptools/CLI/morphology/ALMA_multi_word.py
85
+ nlptools/CLI/morphology/morph_analyzer.py
86
+ nlptools/CLI/salma/salma_tools.py
87
+ nlptools/CLI/utils/__init__.py
88
+ nlptools/CLI/utils/arStrip.py
89
+ nlptools/CLI/utils/corpus_tokenizer.py
90
+ nlptools/CLI/utils/implication.py
91
+ nlptools/CLI/utils/jaccard.py
92
+ nlptools/CLI/utils/latin_remove.py
93
+ nlptools/CLI/utils/remove_Punc.py
94
+ nlptools/CLI/utils/sentence_tokenizer.py
95
+ nlptools/CLI/utils/text_transliteration.py
96
+ nlptools/DataDownload/__init__.py
97
+ nlptools/DataDownload/downloader.py
98
+ nlptools/arabert/__init__.py
99
+ nlptools/arabert/preprocess.py
100
+ nlptools/arabert/arabert/__init__.py
101
+ nlptools/arabert/arabert/create_classification_data.py
102
+ nlptools/arabert/arabert/create_pretraining_data.py
103
+ nlptools/arabert/arabert/extract_features.py
104
+ nlptools/arabert/arabert/lamb_optimizer.py
105
+ nlptools/arabert/arabert/modeling.py
106
+ nlptools/arabert/arabert/optimization.py
107
+ nlptools/arabert/arabert/run_classifier.py
108
+ nlptools/arabert/arabert/run_pretraining.py
109
+ nlptools/arabert/arabert/run_squad.py
110
+ nlptools/arabert/arabert/tokenization.py
111
+ nlptools/arabert/araelectra/__init__.py
112
+ nlptools/arabert/araelectra/build_openwebtext_pretraining_dataset.py
113
+ nlptools/arabert/araelectra/build_pretraining_dataset.py
114
+ nlptools/arabert/araelectra/build_pretraining_dataset_single_file.py
115
+ nlptools/arabert/araelectra/configure_finetuning.py
116
+ nlptools/arabert/araelectra/configure_pretraining.py
117
+ nlptools/arabert/araelectra/flops_computation.py
118
+ nlptools/arabert/araelectra/run_finetuning.py
119
+ nlptools/arabert/araelectra/run_pretraining.py
120
+ nlptools/arabert/araelectra/finetune/__init__.py
121
+ nlptools/arabert/araelectra/finetune/feature_spec.py
122
+ nlptools/arabert/araelectra/finetune/preprocessing.py
123
+ nlptools/arabert/araelectra/finetune/scorer.py
124
+ nlptools/arabert/araelectra/finetune/task.py
125
+ nlptools/arabert/araelectra/finetune/task_builder.py
126
+ nlptools/arabert/araelectra/model/__init__.py
127
+ nlptools/arabert/araelectra/model/modeling.py
128
+ nlptools/arabert/araelectra/model/optimization.py
129
+ nlptools/arabert/araelectra/model/tokenization.py
130
+ nlptools/arabert/araelectra/pretrain/__init__.py
131
+ nlptools/arabert/araelectra/pretrain/pretrain_data.py
132
+ nlptools/arabert/araelectra/pretrain/pretrain_helpers.py
133
+ nlptools/arabert/araelectra/util/__init__.py
134
+ nlptools/arabert/araelectra/util/training_utils.py
135
+ nlptools/arabert/araelectra/util/utils.py
136
+ nlptools/arabert/aragpt2/__init__.py
137
+ nlptools/arabert/aragpt2/create_pretraining_data.py
138
+ nlptools/arabert/aragpt2/train_bpe_tokenizer.py
139
+ nlptools/arabert/aragpt2/gpt2/__init__.py
140
+ nlptools/arabert/aragpt2/gpt2/lamb_optimizer.py
141
+ nlptools/arabert/aragpt2/gpt2/optimization.py
142
+ nlptools/arabert/aragpt2/gpt2/run_pretraining.py
143
+ nlptools/arabert/aragpt2/grover/__init__.py
144
+ nlptools/arabert/aragpt2/grover/dataloader.py
145
+ nlptools/arabert/aragpt2/grover/modeling.py
146
+ nlptools/arabert/aragpt2/grover/modeling_gpt2.py
147
+ nlptools/arabert/aragpt2/grover/optimization_adafactor.py
148
+ nlptools/arabert/aragpt2/grover/train_tpu.py
149
+ nlptools/arabert/aragpt2/grover/utils.py
150
+ nlptools/arabiner/__init__.py
151
+ nlptools/arabiner/bin/__init__.py
152
+ nlptools/arabiner/bin/eval.py
153
+ nlptools/arabiner/bin/infer.py
154
+ nlptools/arabiner/bin/process.py
155
+ nlptools/arabiner/bin/train.py
156
+ nlptools/arabiner/data/__init__.py
157
+ nlptools/arabiner/data/datasets.py
158
+ nlptools/arabiner/data/transforms.py
159
+ nlptools/arabiner/nn/BaseModel.py
160
+ nlptools/arabiner/nn/BertNestedTagger.py
161
+ nlptools/arabiner/nn/BertSeqTagger.py
162
+ nlptools/arabiner/nn/__init__.py
163
+ nlptools/arabiner/trainers/BaseTrainer.py
164
+ nlptools/arabiner/trainers/BertNestedTrainer.py
165
+ nlptools/arabiner/trainers/BertTrainer.py
166
+ nlptools/arabiner/trainers/__init__.py
167
+ nlptools/arabiner/utils/__init__.py
168
+ nlptools/arabiner/utils/data.py
169
+ nlptools/arabiner/utils/helpers.py
170
+ nlptools/arabiner/utils/metrics.py
171
+ nlptools/morphology/ALMA_multi_word.py
172
+ nlptools/morphology/__init__.py
173
+ nlptools/morphology/charsets.py
174
+ nlptools/morphology/morph_analyzer.py
175
+ nlptools/morphology/settings.py
176
+ nlptools/morphology/tokenizers_words.py
177
+ nlptools/salma/__init__.py
178
+ nlptools/salma/settings.py
179
+ nlptools/salma/views.py
180
+ nlptools/salma/wsd.py
181
+ nlptools/utils/__init__.py
182
+ nlptools/utils/corpus_tokenizer.py
183
+ nlptools/utils/implication.py
184
+ nlptools/utils/jaccard.py
185
+ nlptools/utils/parser.py
186
+ nlptools/utils/readfile.py
187
+ nlptools/utils/sentence_tokenizer.py
188
+ nlptools/utils/text_transliteration.py
189
+ nlptools/utils/utils.py
190
+ tests/__init__.py
191
+ tests/test_nlptools.py
@@ -0,0 +1,18 @@
1
+ [console_scripts]
2
+ arabi_ner = nlptools.CLI.arabiner.bin.infer:main
3
+ arabi_ner2 = nlptools.CLI.arabiner.bin.infer2:main
4
+ install_env = nlptools.install_env:main
5
+ sina_alma_multi_word = nlptools.CLI.morphology.ALMA_multi_word:main
6
+ sina_appdatadir = nlptools.CLI.DataDownload.get_appdatadir:main
7
+ sina_arStrip = nlptools.CLI.utils.arStrip:main
8
+ sina_corpus_tokenizer = nlptools.CLI.utils.corpus_tokenizer:main
9
+ sina_download_files = nlptools.CLI.DataDownload.download_files:main
10
+ sina_implication = nlptools.CLI.utils.implication:main
11
+ sina_jaccard_similarity = nlptools.CLI.utils.jaccard:main
12
+ sina_morph_analyze = nlptools.CLI.morphology.morph_analyzer:main
13
+ sina_remove_latin = nlptools.CLI.utils.latin_remove:main
14
+ sina_remove_punctuation = nlptools.CLI.utils.remove_Punc:main
15
+ sina_salma = nlptools.CLI.salma.salma_tools:main
16
+ sina_sentence_tokenize = nlptools.CLI.utils.sentence_tokenizer:main
17
+ sina_transliterate = nlptools.CLI.utils.text_transliteration:main
18
+
@@ -0,0 +1,12 @@
1
+ six
2
+ farasapy
3
+ tqdm
4
+ requests
5
+ regex
6
+ pathlib
7
+ torch==1.13.0
8
+ transformers==4.24.0
9
+ torchtext==0.14.0
10
+ torchvision==0.14.0
11
+ seqeval==1.2.2
12
+ natsort==7.1.1
@@ -0,0 +1 @@
1
+ nlptools
@@ -0,0 +1,20 @@
1
+ # Minimal makefile for Sphinx documentation
2
+ #
3
+
4
+ # You can set these variables from the command line.
5
+ SOURCEDIR = source
6
+ SPHINXOPTS =
7
+ SPHINXBUILD = sphinx-build -c $(SOURCEDIR) -c $(SOURCEDIR)/config
8
+ SPHINXPROJ = nlptools
9
+ BUILDDIR = _build
10
+
11
+ # Put it first so that "make" without argument is like "make help".
12
+ help:
13
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14
+
15
+ .PHONY: help Makefile
16
+
17
+ # Catch-all target: route all unknown targets to Sphinx using the new
18
+ # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19
+ %: Makefile
20
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
@@ -0,0 +1,35 @@
1
+ @ECHO OFF
2
+
3
+ pushd %~dp0
4
+
5
+ REM Command file for Sphinx documentation
6
+
7
+ if "%SPHINXBUILD%" == "" (
8
+ set SPHINXBUILD=sphinx-build
9
+ )
10
+ set SOURCEDIR=source
11
+ set BUILDDIR=build
12
+
13
+ if "%1" == "" goto help
14
+
15
+ %SPHINXBUILD% >NUL 2>NUL
16
+ if errorlevel 9009 (
17
+ echo.
18
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19
+ echo.installed, then set the SPHINXBUILD environment variable to point
20
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
21
+ echo.may add the Sphinx directory to PATH.
22
+ echo.
23
+ echo.If you don't have Sphinx installed, grab it from
24
+ echo.http://sphinx-doc.org/
25
+ exit /b 1
26
+ )
27
+
28
+ %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29
+ goto end
30
+
31
+ :help
32
+ %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33
+
34
+ :end
35
+ popd