SinaTools 0.1.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. SinaTools-0.1.1.data/data/nlptools/environment.yml +227 -0
  2. SinaTools-0.1.1.dist-info/AUTHORS.rst +13 -0
  3. SinaTools-0.1.1.dist-info/LICENSE +22 -0
  4. SinaTools-0.1.1.dist-info/METADATA +72 -0
  5. SinaTools-0.1.1.dist-info/RECORD +122 -0
  6. SinaTools-0.1.1.dist-info/WHEEL +6 -0
  7. SinaTools-0.1.1.dist-info/entry_points.txt +18 -0
  8. SinaTools-0.1.1.dist-info/top_level.txt +1 -0
  9. nlptools/CLI/DataDownload/download_files.py +71 -0
  10. nlptools/CLI/arabiner/bin/infer.py +117 -0
  11. nlptools/CLI/arabiner/bin/infer2.py +81 -0
  12. nlptools/CLI/morphology/ALMA_multi_word.py +75 -0
  13. nlptools/CLI/morphology/morph_analyzer.py +91 -0
  14. nlptools/CLI/salma/salma_tools.py +68 -0
  15. nlptools/CLI/utils/__init__.py +0 -0
  16. nlptools/CLI/utils/arStrip.py +99 -0
  17. nlptools/CLI/utils/corpus_tokenizer.py +74 -0
  18. nlptools/CLI/utils/implication.py +92 -0
  19. nlptools/CLI/utils/jaccard.py +96 -0
  20. nlptools/CLI/utils/latin_remove.py +51 -0
  21. nlptools/CLI/utils/remove_Punc.py +53 -0
  22. nlptools/CLI/utils/sentence_tokenizer.py +90 -0
  23. nlptools/CLI/utils/text_transliteration.py +77 -0
  24. nlptools/DataDownload/__init__.py +0 -0
  25. nlptools/DataDownload/downloader.py +185 -0
  26. nlptools/VERSION +1 -0
  27. nlptools/__init__.py +5 -0
  28. nlptools/arabert/__init__.py +1 -0
  29. nlptools/arabert/arabert/__init__.py +14 -0
  30. nlptools/arabert/arabert/create_classification_data.py +260 -0
  31. nlptools/arabert/arabert/create_pretraining_data.py +534 -0
  32. nlptools/arabert/arabert/extract_features.py +444 -0
  33. nlptools/arabert/arabert/lamb_optimizer.py +158 -0
  34. nlptools/arabert/arabert/modeling.py +1027 -0
  35. nlptools/arabert/arabert/optimization.py +202 -0
  36. nlptools/arabert/arabert/run_classifier.py +1078 -0
  37. nlptools/arabert/arabert/run_pretraining.py +593 -0
  38. nlptools/arabert/arabert/run_squad.py +1440 -0
  39. nlptools/arabert/arabert/tokenization.py +414 -0
  40. nlptools/arabert/araelectra/__init__.py +1 -0
  41. nlptools/arabert/araelectra/build_openwebtext_pretraining_dataset.py +103 -0
  42. nlptools/arabert/araelectra/build_pretraining_dataset.py +230 -0
  43. nlptools/arabert/araelectra/build_pretraining_dataset_single_file.py +90 -0
  44. nlptools/arabert/araelectra/configure_finetuning.py +172 -0
  45. nlptools/arabert/araelectra/configure_pretraining.py +143 -0
  46. nlptools/arabert/araelectra/finetune/__init__.py +14 -0
  47. nlptools/arabert/araelectra/finetune/feature_spec.py +56 -0
  48. nlptools/arabert/araelectra/finetune/preprocessing.py +173 -0
  49. nlptools/arabert/araelectra/finetune/scorer.py +54 -0
  50. nlptools/arabert/araelectra/finetune/task.py +74 -0
  51. nlptools/arabert/araelectra/finetune/task_builder.py +70 -0
  52. nlptools/arabert/araelectra/flops_computation.py +215 -0
  53. nlptools/arabert/araelectra/model/__init__.py +14 -0
  54. nlptools/arabert/araelectra/model/modeling.py +1029 -0
  55. nlptools/arabert/araelectra/model/optimization.py +193 -0
  56. nlptools/arabert/araelectra/model/tokenization.py +355 -0
  57. nlptools/arabert/araelectra/pretrain/__init__.py +14 -0
  58. nlptools/arabert/araelectra/pretrain/pretrain_data.py +160 -0
  59. nlptools/arabert/araelectra/pretrain/pretrain_helpers.py +229 -0
  60. nlptools/arabert/araelectra/run_finetuning.py +323 -0
  61. nlptools/arabert/araelectra/run_pretraining.py +469 -0
  62. nlptools/arabert/araelectra/util/__init__.py +14 -0
  63. nlptools/arabert/araelectra/util/training_utils.py +112 -0
  64. nlptools/arabert/araelectra/util/utils.py +109 -0
  65. nlptools/arabert/aragpt2/__init__.py +2 -0
  66. nlptools/arabert/aragpt2/create_pretraining_data.py +95 -0
  67. nlptools/arabert/aragpt2/gpt2/__init__.py +2 -0
  68. nlptools/arabert/aragpt2/gpt2/lamb_optimizer.py +158 -0
  69. nlptools/arabert/aragpt2/gpt2/optimization.py +225 -0
  70. nlptools/arabert/aragpt2/gpt2/run_pretraining.py +397 -0
  71. nlptools/arabert/aragpt2/grover/__init__.py +0 -0
  72. nlptools/arabert/aragpt2/grover/dataloader.py +161 -0
  73. nlptools/arabert/aragpt2/grover/modeling.py +803 -0
  74. nlptools/arabert/aragpt2/grover/modeling_gpt2.py +1196 -0
  75. nlptools/arabert/aragpt2/grover/optimization_adafactor.py +234 -0
  76. nlptools/arabert/aragpt2/grover/train_tpu.py +187 -0
  77. nlptools/arabert/aragpt2/grover/utils.py +234 -0
  78. nlptools/arabert/aragpt2/train_bpe_tokenizer.py +59 -0
  79. nlptools/arabert/preprocess.py +818 -0
  80. nlptools/arabiner/__init__.py +0 -0
  81. nlptools/arabiner/bin/__init__.py +14 -0
  82. nlptools/arabiner/bin/eval.py +87 -0
  83. nlptools/arabiner/bin/infer.py +91 -0
  84. nlptools/arabiner/bin/process.py +140 -0
  85. nlptools/arabiner/bin/train.py +221 -0
  86. nlptools/arabiner/data/__init__.py +1 -0
  87. nlptools/arabiner/data/datasets.py +146 -0
  88. nlptools/arabiner/data/transforms.py +118 -0
  89. nlptools/arabiner/nn/BaseModel.py +22 -0
  90. nlptools/arabiner/nn/BertNestedTagger.py +34 -0
  91. nlptools/arabiner/nn/BertSeqTagger.py +17 -0
  92. nlptools/arabiner/nn/__init__.py +3 -0
  93. nlptools/arabiner/trainers/BaseTrainer.py +117 -0
  94. nlptools/arabiner/trainers/BertNestedTrainer.py +203 -0
  95. nlptools/arabiner/trainers/BertTrainer.py +163 -0
  96. nlptools/arabiner/trainers/__init__.py +3 -0
  97. nlptools/arabiner/utils/__init__.py +0 -0
  98. nlptools/arabiner/utils/data.py +124 -0
  99. nlptools/arabiner/utils/helpers.py +151 -0
  100. nlptools/arabiner/utils/metrics.py +69 -0
  101. nlptools/environment.yml +227 -0
  102. nlptools/install_env.py +13 -0
  103. nlptools/morphology/ALMA_multi_word.py +34 -0
  104. nlptools/morphology/__init__.py +52 -0
  105. nlptools/morphology/charsets.py +60 -0
  106. nlptools/morphology/morph_analyzer.py +170 -0
  107. nlptools/morphology/settings.py +8 -0
  108. nlptools/morphology/tokenizers_words.py +19 -0
  109. nlptools/nlptools.py +1 -0
  110. nlptools/salma/__init__.py +12 -0
  111. nlptools/salma/settings.py +31 -0
  112. nlptools/salma/views.py +459 -0
  113. nlptools/salma/wsd.py +126 -0
  114. nlptools/utils/__init__.py +0 -0
  115. nlptools/utils/corpus_tokenizer.py +73 -0
  116. nlptools/utils/implication.py +662 -0
  117. nlptools/utils/jaccard.py +247 -0
  118. nlptools/utils/parser.py +147 -0
  119. nlptools/utils/readfile.py +3 -0
  120. nlptools/utils/sentence_tokenizer.py +53 -0
  121. nlptools/utils/text_transliteration.py +232 -0
  122. nlptools/utils/utils.py +2 -0
@@ -0,0 +1,51 @@
1
+ """
2
+ About:
3
+ ------
4
+ The sina_remove_latin tool performs delete latin characters from the input text.
5
+
6
+ Usage:
7
+ ------
8
+ Below is the usage information that can be generated by running sina_remove_latin --help.
9
+
10
+ .. code-block:: none
11
+
12
+ Usage:
13
+ sina_remove_latin --text=TEXT
14
+ sina_remove_latin --file "path/to/your/file.txt"
15
+
16
+ Examples:
17
+ ---------
18
+
19
+ .. code-block:: none
20
+
21
+ sina_remove_punctuation --text "123test"
22
+
23
+ sina_remove_punctuation --file "path/to/your/file.txt"
24
+
25
+ Note:
26
+ -----
27
+
28
+ .. code-block:: none
29
+
30
+ - This tool is specific to Arabic text, as it focuses on Arabic linguistic elements.
31
+ - Ensure that the text input is appropriately encoded in UTF-8 or compatible formats.
32
+ - This tool for latin characters, if the input text is an Arabic characters or numbers the output will be the same input
33
+
34
+ """
35
+
36
+ import argparse
37
+ from nlptools.utils.parser import remove_latin
38
+
39
+
40
+ def main():
41
+ parser = argparse.ArgumentParser(description='remove latin characters from the text')
42
+
43
+ parser.add_argument('--text', type=str, required=True, help='The input text')
44
+ args = parser.parse_args()
45
+ result = remove_latin(args.text)
46
+
47
+ print(result)
48
+ if __name__ == '__main__':
49
+ main()
50
+
51
+ #sina_remove_latin --text "123test"
@@ -0,0 +1,53 @@
1
+ """
2
+ About:
3
+ ------
4
+ The sina_remove_punctuation tool performs delete punctuation marks from the input text.
5
+
6
+ Usage:
7
+ ------
8
+ Below is the usage information that can be generated by running sina_remove_punctuation --help.
9
+
10
+ .. code-block:: none
11
+
12
+ Usage:
13
+ sina_remove_punctuation --text=TEXT
14
+ sina_remove_punctuation --file "path/to/your/file.txt"
15
+
16
+ Examples:
17
+ ---------
18
+
19
+ .. code-block:: none
20
+
21
+ sina_remove_punctuation --text "te%s@t...!!?"
22
+
23
+ sina_remove_punctuation --file "path/to/your/file.txt"
24
+
25
+ Note:
26
+ -----
27
+
28
+ .. code-block:: none
29
+
30
+ - This tool is specific to Arabic text, as it focuses on Arabic linguistic elements.
31
+ - Ensure that the text input is appropriately encoded in UTF-8 or compatible formats.
32
+ """
33
+
34
+ import argparse
35
+ from nlptools.utils.parser import remove_punctuation
36
+ #from nlptools.utils.parser import read_file
37
+ #from nlptools.utils.parser import write_file
38
+
39
+
40
+ def main():
41
+ parser = argparse.ArgumentParser(description='remove punctuation marks from the text')
42
+
43
+ parser.add_argument('--text',required=True,help="input text")
44
+ # parser.add_argument('myFile', type=argparse.FileType('r'),help='Input file csv')
45
+ args = parser.parse_args()
46
+ result = remove_punctuation(args.text)
47
+
48
+ print(result)
49
+ if __name__ == '__main__':
50
+ main()
51
+
52
+ #sina_remove_punctuation --text "your text"
53
+
@@ -0,0 +1,90 @@
1
+ """
2
+ About:
3
+ ------
4
+
5
+ The sina_sentence_tokenize tool allows you to tokenize text into sentences using the SinaTools utility. It provides
6
+ flexibility in tokenizing at different punctuation marks, including dots, question marks, and exclamation marks. It also
7
+ allows tokenization at new lines.
8
+
9
+ Usage:
10
+ ------
11
+ Below is the usage information that can be generated by running sina_sentence_tokenize --help.
12
+
13
+ .. code-block:: none
14
+
15
+ Usage:
16
+ sina_sentence_tokenize --text=TEXT [options]
17
+
18
+ sina_sentence_tokenize --file=FILE [options]
19
+
20
+ .. code-block:: none
21
+
22
+ Options:
23
+ --text TEXT
24
+ Text to be tokenized into sentences.
25
+ --file FILE
26
+ File containing the text to be tokenized into sentences
27
+ --dot
28
+ Tokenize at dots.
29
+ --new_line
30
+ Tokenize at new lines.
31
+ --question_mark
32
+ Tokenize at question marks.
33
+ --exclamation_mark
34
+ Tokenize at exclamation marks.
35
+
36
+ Examples:
37
+ ---------
38
+
39
+ .. code-block:: none
40
+
41
+ sina_sentence_tokenize --text "Your text here. Does it work? Yes! Try with new lines." --dot --question_mark --exclamation_mark
42
+
43
+ sina_sentence_tokenize --file "path/to/your/file.txt" --dot --question_mark --exclamation_mark
44
+
45
+ Note:
46
+ -----
47
+
48
+ .. code-block:: none
49
+
50
+ - The tokenization options allow for a customized experience. You can choose any combination of the options, or even none
51
+ - of them, to achieve the desired sentence tokenization behavior. If no tokenization options are provided, the tool will
52
+ - use default settings as implemented in the underlying `sent_tokenize` function of SinaTools.
53
+
54
+ """
55
+ import argparse
56
+ from nlptools.utils.sentence_tokenizer import sent_tokenize
57
+ from nlptools.utils.readfile import read_file
58
+
59
+ def main():
60
+ parser = argparse.ArgumentParser(description='Sentence Tokenization using SinaTools')
61
+
62
+ # Adding arguments for the text, file, and tokenization options
63
+ parser.add_argument('--text', type=str, help='Text to be tokenized into sentences')
64
+ parser.add_argument('--file', type=str, help='File containing the text to be tokenized into sentences')
65
+ parser.add_argument('--dot', action='store_true', help='Tokenize at dots')
66
+ parser.add_argument('--new_line', action='store_true', help='Tokenize at new lines')
67
+ parser.add_argument('--question_mark', action='store_true', help='Tokenize at question marks')
68
+ parser.add_argument('--exclamation_mark', action='store_true', help='Tokenize at exclamation marks')
69
+
70
+ args = parser.parse_args()
71
+
72
+ # Check if either text or file is provided
73
+ if args.text is None and args.file is None:
74
+ print("Either --text or --file argument must be provided.")
75
+ return
76
+
77
+ text_content = args.text if args.text else read_file(args.file)
78
+
79
+ # Perform sentence tokenization
80
+ sentences = sent_tokenize(" ".join(text_content), dot=args.dot, new_line=args.new_line,
81
+ question_mark=args.question_mark, exclamation_mark=args.exclamation_mark)
82
+
83
+ # Print each sentence in a new line
84
+ for sentence in sentences:
85
+ print(sentence)
86
+
87
+ if __name__ == '__main__':
88
+ main()
89
+ #sina_sentence_tokenize --text "Your text here. Does it work? Yes! Try with new lines." --dot --question_mark --exclamation_mark
90
+ #sina_sentence_tokenize --file "path/to/your/file.txt" --dot --question_mark --exclamation_mark
@@ -0,0 +1,77 @@
1
+ """
2
+ About:
3
+ ------
4
+
5
+ The sina_transliterate tool allows you to transliterate text using the SinaTools' utility. This command-line utility
6
+ takes in a text and a desired schema, and outputs the transliterated text.
7
+
8
+ Usage:
9
+ ------
10
+ Below is the usage information that can be generated by running sina_transliterate --help.
11
+
12
+ Usage:
13
+ ------
14
+
15
+ .. code-block:: none
16
+
17
+ sina_transliterate --text=TEXT --schema=SCHEMA
18
+
19
+ sina_transliterate --file=FILE --schema=SCHEMA
20
+
21
+ Options:
22
+ --------
23
+
24
+ .. code-block:: none
25
+
26
+ --text TEXT
27
+ Text to be transliterated.
28
+ --schema SCHEMA
29
+ Transliteration schema to be used, which is bw2ar or ar2bw.
30
+
31
+ Examples:
32
+ ---------
33
+
34
+ .. code-block:: none
35
+
36
+ sina_transliterate --text "klmp" --schema "bw2ar"
37
+ sina_transliterate --file "path/to/your/file.txt" --schema "ar2bw"
38
+
39
+ Note:
40
+ -----
41
+
42
+ .. code-block:: none
43
+
44
+ For available transliteration schemas and more details, please refer to the SinaTools' documentation or the source code
45
+ of the function `perform_transliteration`.
46
+
47
+ """
48
+ import argparse
49
+ from nlptools.utils.text_transliteration import perform_transliteration
50
+ from nlptools.utils.readfile import read_file
51
+
52
+ def main():
53
+ parser = argparse.ArgumentParser(description='Perform text transliteration using SinaTools')
54
+
55
+ # Adding arguments for the text, file, and schema
56
+ parser.add_argument('--text', type=str, help='Text to be transliterated')
57
+ parser.add_argument('--file', type=str, help='File containing the text to be transliterated')
58
+ parser.add_argument('--schema', type=str, required=True, help='Transliteration schema to be used')
59
+
60
+ args = parser.parse_args()
61
+
62
+ # Check if either text or file is provided
63
+ if args.text is None and args.file is None:
64
+ print("Either --text or --file argument must be provided.")
65
+ return
66
+
67
+ text_content = args.text if args.text else " ".join(read_file(args.file))
68
+ # Perform transliteration
69
+ result = perform_transliteration(text_content, args.schema)
70
+
71
+ print(result)
72
+
73
+ if __name__ == '__main__':
74
+ main()
75
+
76
+ #sina_transliterate --text "example text" --schema "bw2ar"
77
+ #sina_transliterate --file "path/to/your/file.txt" --schema "bw2ar"
File without changes
@@ -0,0 +1,185 @@
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+ import requests
5
+ import zipfile
6
+ from tqdm import tqdm
7
+ import tarfile
8
+ urls = {
9
+ 'morph': 'https://portal.sina.birzeit.edu/ALMA27012000.pickle',
10
+ 'ner': 'https://portal.sina.birzeit.edu/Wj27012000.tar.gz',
11
+ 'salma_model': 'https://portal.sina.birzeit.edu/bert-base-arabertv02_22_May_2021_00h_allglosses_unused01.zip',
12
+ 'salma_tokenizer': 'https://portal.sina.birzeit.edu/bert-base-arabertv02.zip',
13
+ 'glosses_dic': 'https://portal.sina.birzeit.edu/glosses_dic.pickle',
14
+ 'lemma_dic': 'https://portal.sina.birzeit.edu/lemmas_dic.pickle',
15
+ 'five_grams': 'https://portal.sina.birzeit.edu/five_grams.pickle',
16
+ 'four_grams':'https://portal.sina.birzeit.edu/four_grams.pickle',
17
+ 'three_grams':'https://portal.sina.birzeit.edu/three_grams.pickle',
18
+ 'two_grams':'https://portal.sina.birzeit.edu/two_grams.pickle'
19
+ }
20
+
21
+ def get_appdatadir():
22
+ """
23
+ This method checks if the directory exists and creates if it doesn't. And returns the path to the directory where the application data is stored.
24
+
25
+ Returns:
26
+ --------
27
+ Path: A pathlib.Path object representing the path to the application data directory.
28
+
29
+ Raises:
30
+ -------
31
+ None.
32
+
33
+ **Example:**
34
+
35
+ .. highlight:: python
36
+ .. code-block:: python
37
+
38
+ from nlptools.DataDownload import downloader
39
+
40
+ path = downloader.get_appdatadir()
41
+
42
+ Windows: 'C:/Users/<Username>/AppData/Roaming/nlptools'
43
+ MacOS: '/Users/<Username>/Library/Application Support/nlptools'
44
+ Linux: '/home/<Username>/.nlptools'
45
+ Google Colab: '/content/nlptools'
46
+
47
+ """
48
+ home = str(Path.home())
49
+ if 'google.colab' in sys.modules:
50
+ path = Path('/content/nlptools')
51
+ elif sys.platform == 'win32':
52
+ path = Path(home, 'AppData/Roaming/nlptools')
53
+ elif sys.platform == 'darwin':
54
+ path = Path(home, 'Library/Application Support/nlptools')
55
+ else:
56
+ path = Path(home, '.nlptools')
57
+
58
+ if not os.path.exists(path):
59
+ os.makedirs(path)
60
+
61
+ return path
62
+
63
+ def download_file(url='https://portal.sina.birzeit.edu/Wj27012000.tar.gz', dest_path=get_appdatadir()):
64
+ """
65
+ Downloads a file from the specified URL and saves it to the specified destination path.
66
+
67
+ Args:
68
+ url (:obj:`str`): The URL of the file to be downloaded.
69
+ dest_path (:obj:`str`): The destination path to save the downloaded file to. Defaults
70
+ to the user's application data directory.
71
+
72
+
73
+ Returns:
74
+ :obj:`str`: The absolute path of the downloaded file.
75
+
76
+ Raises:
77
+ requests.exceptions.HTTPError: If there was an HTTP error during the request.
78
+
79
+ Note:
80
+ This method uses the `requests` and `tqdm` libraries. It also checks if the
81
+ compressed downloaded file type and extracts it.
82
+
83
+ **Example:**
84
+
85
+ .. highlight:: python
86
+ .. code-block:: python
87
+
88
+ download_file(url='https://example.com/data.zip', dest_path='data/')
89
+
90
+ """
91
+ filename = os.path.basename(url)
92
+ file_path = os.path.join(dest_path, filename)
93
+
94
+ print(filename)
95
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
96
+
97
+ try:
98
+ with requests.get(url, headers=headers, stream=True) as r:
99
+ r.raise_for_status()
100
+ with open(file_path, 'wb') as f:
101
+ total_size = int(r.headers.get('content-length', 0))
102
+ block_size = 8192
103
+ progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
104
+ for chunk in r.iter_content(chunk_size=block_size):
105
+ if chunk:
106
+ f.write(chunk)
107
+ progress_bar.update(len(chunk))
108
+ progress_bar.close()
109
+
110
+ # Check the file type and extract accordingly
111
+ file_extension = os.path.splitext(file_path)[1]
112
+ extracted_folder_name = os.path.splitext(file_path)[0]
113
+
114
+ if file_extension == '.zip':
115
+ extract_zip(file_path, extracted_folder_name)
116
+ elif file_extension == '.gz':
117
+
118
+ extract_tar(file_path, extracted_folder_name)
119
+ elif file_extension =='.pickle':
120
+ print(f'Done: {file_extension}')
121
+
122
+ else:
123
+ print(f'Unsupported file type for extraction: {file_extension}')
124
+
125
+ return file_path
126
+
127
+ except requests.exceptions.HTTPError as e:
128
+ if e.response.status_code == 403:
129
+ print(f'Error 403: Forbidden. The requested file URL {url} could not be downloaded due to insufficient permissions. Please check the URL and try again.')
130
+ else:
131
+ print('An error occurred while downloading the file:', e)
132
+
133
+ def extract_zip(file_path, extracted_folder_name):
134
+ """
135
+ Extracts the contents of a ZIP file to the specified folder.
136
+
137
+ Args:
138
+ file_path (str): The path to the ZIP file.
139
+ extracted_folder_name (str): The name of the folder where the contents will be extracted.
140
+
141
+ Returns:
142
+ None
143
+ """
144
+ with zipfile.ZipFile(file_path, 'r') as zip_file:
145
+ zip_file.extractall(extracted_folder_name)
146
+
147
+
148
+ def extract_tar(file_path, dest_path):
149
+ """
150
+ Extracts the contents of a tar.gz file to the specified destination path.
151
+
152
+ Args:
153
+ file_path (str): The path to the tar.gz file.
154
+ dest_path (str): The destination path where the contents will be extracted.
155
+
156
+ Returns:
157
+ str: The path to the extracted folder if successful, or None if extraction failed.
158
+ """
159
+ try:
160
+ with tarfile.open(file_path, 'r:gz') as tar:
161
+ # Remove the extension from the file name
162
+ extracted_folder_name = os.path.splitext(os.path.basename(file_path))[0]
163
+ extracted_folder_path = os.path.join(dest_path, extracted_folder_name)
164
+
165
+ # Extract the contents to the destination path
166
+ tar.extractall(dest_path)
167
+
168
+ # Remove the compressed file
169
+ os.remove(file_path)
170
+
171
+ return extracted_folder_path
172
+
173
+ except tarfile.ReadError:
174
+ print(f'Failed to extract the file: {file_path}')
175
+ return None
176
+
177
+
178
+ def download_files():
179
+ """
180
+ Downloads multiple files from a dictionary of URLs using the download_file() function.
181
+ Returns:
182
+ None
183
+ """
184
+ for url in urls.values():
185
+ download_file(url)
nlptools/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
nlptools/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """Top-level package for nlptools."""
2
+
3
+ __author__ = """SinaLab"""
4
+ __email__ = 'sina.institute.bzu@gmail.com'
5
+ __version__ = '0.8.5'
@@ -0,0 +1 @@
1
+ # coding=utf-8
@@ -0,0 +1,14 @@
1
+ # coding=utf-8
2
+ # Copyright 2018 The Google AI Language Team Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.