PyPI - SinaTools - Versions diffs - 0.1.4__py2.py3-none-any.whl → 0.1.8__py2.py3-none-any.whl - Mend

SinaTools 0.1.4py2.py3-none-any.whl → 0.1.8py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

{nlptools → sinatools}/CLI/utils/jaccard.py RENAMED Viewed

@@ -1,18 +1,18 @@
 """
 About:
 ------
-The sina_jaccard tool computes the Jaccard similarity between two sets of strings. The Jaccard similarity is the size of the intersection divided by the size of the union of the sample sets. It provides a measure of similarity between two sets.
+The jaccard tool computes the Jaccard similarity between two sets of strings. The Jaccard similarity is the size of the intersection divided by the size of the union of the sample sets. It provides a measure of similarity between two sets.
 Usage:
 ------
-Below is the usage information that can be generated by running sina_jaccard --help.
+Below is the usage information that can be generated by running jaccard --help.
 .. code-block:: none
     Usage:
-        sina_jaccard --list1="WORD1, WORD2"  --list2="WORD1,WORD2" --delimiter="DELIMITER"  --selection="SELECTION"  [OPTIONS]
+        jaccard --list1="WORD1, WORD2"  --list2="WORD1,WORD2" --delimiter="DELIMITER"  --selection="SELECTION"  [OPTIONS]
-        sina_jaccard --file1=File1 --file2=File2 --delimiter="DELIMITER"  --selection="SELECTION"  [OPTIONS]
+        jaccard --file1=File1 --file2=File2 --delimiter="DELIMITER"  --selection="SELECTION"  [OPTIONS]
 .. code-block:: none
@@ -39,9 +39,9 @@ Examples:
 .. code-block:: none
-      sina_jaccard --list1 "word1,word2"  --list2 "word1, word2" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic
+      jaccard --list1 "word1,word2"  --list2 "word1, word2" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic
-      sina_jaccard --file1 "path/to/your/file1.txt"  --file2 "path/to/your/file2.txt" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic
+      jaccard --file1 "path/to/your/file1.txt"  --file2 "path/to/your/file2.txt" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic
 Note:
 -----
@@ -55,8 +55,8 @@ Note:
 """
 import argparse
-from nlptools.utils.jaccard import jaccard
-from nlptools.utils.readfile import read_file
+from sinatools.utils.jaccard import jaccard
+from sinatools.utils.readfile import read_file
 def main():
@@ -92,5 +92,5 @@ def main():
 if __name__ == '__main__':
     main()
-# sina_jaccard_similarity --list1 "word1,word2"  --list2 "word1, word2" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic
-# sina_jaccard_similarity --file1 "path/to/your/file1.txt" --file2 "path/to/your/file2.txt" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic
+# jaccard_similarity --list1 "word1,word2"  --list2 "word1, word2" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic
+# jaccard_similarity --file1 "path/to/your/file1.txt" --file2 "path/to/your/file2.txt" --delimiter ","  --selection "jaccardAll" --ignoreAllDiacriticsButNotShadda --ignoreShaddaDiacritic

sinatools/CLI/utils/remove_latin.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""
+About:
+------
+The remove_latin command performs delete latin characters from the input text.
+Usage:
+------
+Below is the usage information that can be generated by running remove_latin --help.
+.. code-block:: none
+    remove_latin --text=TEXT
+    remove_latin --file "path/to/your/file.txt"
+Examples:
+---------
+.. code-block:: none
+    latin_remove --text "123test"
+    latin_remove --file "path/to/your/file.txt"
+"""
+import argparse
+from sinatools.utils.parser import remove_latin
+def main():
+    parser = argparse.ArgumentParser(description='remove latin characters from the text')
+    parser.add_argument('--text', type=str, required=True, help='The input text')
+    args = parser.parse_args()
+    result = remove_latin(args.text)
+    print(result)
+    if __name__ == '__main__':
+        main()

sinatools/CLI/utils/remove_punctuation.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""
+About:
+------
+The remove_punctuation command performs delete punctuation marks from the input text.
+Usage:
+------
+Below is the usage information that can be generated by running remove_punctuation --help.
+.. code-block:: none
+    Usage:
+        remove_punctuation --text=TEXT
+        remove_punctuation --file "path/to/your/file.txt"
+Examples:
+---------
+.. code-block:: none
+    remove_punctuation --text "te%s@t...!!?"
+    remove_punctuation --file "path/to/your/file.txt"
+"""
+import argparse
+from sinatools.utils.parser import remove_punctuation
+#from sinatools.utils.parser import read_file
+#from sinatools.utils.parser import write_file
+def main():
+    parser = argparse.ArgumentParser(description='remove punctuation marks from the text')
+    parser.add_argument('--text',required=True,help="input text")
+   # parser.add_argument('myFile', type=argparse.FileType('r'),help='Input file csv')
+    args = parser.parse_args()
+    result = remove_punctuation(args.text)
+    print(result)
+    if __name__ == '__main__':
+        main()

{nlptools → sinatools}/CLI/utils/sentence_tokenizer.py RENAMED Viewed

@@ -2,20 +2,19 @@
 About:
 ------
-The sina_sentence_tokenize tool allows you to tokenize text into sentences using the SinaTools utility. It provides
+The sentence_tokenizer command allows you to tokenize text into sentences using the SinaTools utility. It provides
 flexibility in tokenizing at different punctuation marks, including dots, question marks, and exclamation marks. It also
 allows tokenization at new lines.
 Usage:
 ------
-Below is the usage information that can be generated by running sina_sentence_tokenize --help.
+Below is the usage information that can be generated by running sentence_tokenizer --help.
 .. code-block:: none
     Usage:
-        sina_sentence_tokenize --text=TEXT [options]
-        sina_sentence_tokenize --file=FILE [options]
+        sentence_tokenizer --text=TEXT [options]
+        sentence_tokenizer --file=FILE [options]
 .. code-block:: none
@@ -38,23 +37,13 @@ Examples:
 .. code-block:: none
-  sina_sentence_tokenize --text "Your text here. Does it work? Yes! Try with new lines." --dot --question_mark --exclamation_mark
-  sina_sentence_tokenize --file "path/to/your/file.txt" --dot --question_mark --exclamation_mark
-Note:
------
-.. code-block:: none
-    - The tokenization options allow for a customized experience. You can choose any combination of the options, or even none
-    - of them, to achieve the desired sentence tokenization behavior. If no tokenization options are provided, the tool will
-    - use default settings as implemented in the underlying `sent_tokenize` function of SinaTools.
+  sentence_tokenizer --text "Your text here. Does it work? Yes! Try with new lines." --dot --question_mark --exclamation_mark
+  sentence_tokenizer --file "path/to/your/file.txt" --dot --question_mark --exclamation_mark
 """
 import argparse
-from nlptools.utils.sentence_tokenizer import sent_tokenize
-from nlptools.utils.readfile import read_file
+from sinatools.utils.tokenizer import sentence_tokenizer
+from sinatools.utils.readfile import read_file
 def main():
     parser = argparse.ArgumentParser(description='Sentence Tokenization using SinaTools')
@@ -77,7 +66,7 @@ def main():
     text_content = args.text if args.text else read_file(args.file)
     # Perform sentence tokenization
-    sentences = sent_tokenize(" ".join(text_content), dot=args.dot, new_line=args.new_line,
+    sentences = sentence_tokenizer(" ".join(text_content), dot=args.dot, new_line=args.new_line,
                               question_mark=args.question_mark, exclamation_mark=args.exclamation_mark)
     # Print each sentence in a new line
@@ -86,5 +75,3 @@ def main():
 if __name__ == '__main__':
     main()
-#sina_sentence_tokenize --text "Your text here. Does it work? Yes! Try with new lines." --dot --question_mark --exclamation_mark
-#sina_sentence_tokenize --file "path/to/your/file.txt" --dot --question_mark --exclamation_mark

{nlptools → sinatools}/CLI/utils/text_transliteration.py RENAMED Viewed

@@ -2,21 +2,21 @@
 About:
 ------
-The sina_transliterate tool allows you to transliterate text using the SinaTools' utility. This command-line utility
+The transliterate tool allows you to transliterate text using the SinaTools' utility. This command-line utility
 takes in a text and a desired schema, and outputs the transliterated text.
 Usage:
 ------
-Below is the usage information that can be generated by running sina_transliterate --help.
+Below is the usage information that can be generated by running transliterate --help.
     Usage:
     ------
 .. code-block:: none
-        sina_transliterate --text=TEXT --schema=SCHEMA
+        transliterate --text=TEXT --schema=SCHEMA
-        sina_transliterate --file=FILE --schema=SCHEMA
+        transliterate --file=FILE --schema=SCHEMA
 Options:
 --------
@@ -33,21 +33,14 @@ Examples:
 .. code-block:: none
-    sina_transliterate --text "klmp" --schema "bw2ar"
-    sina_transliterate --file "path/to/your/file.txt" --schema "ar2bw"
+    transliterate --text "klmp" --schema "bw2ar"
+    transliterate --file "path/to/your/file.txt" --schema "ar2bw"
-Note:
------
-.. code-block:: none
-    For available transliteration schemas and more details, please refer to the SinaTools' documentation or the source code
-    of the function `perform_transliteration`.
 """
 import argparse
-from nlptools.utils.text_transliteration import perform_transliteration
-from nlptools.utils.readfile import read_file
+from sinatools.utils.text_transliteration import perform_transliteration
+from sinatools.utils.readfile import read_file
 def main():
     parser = argparse.ArgumentParser(description='Perform text transliteration using SinaTools')
@@ -73,5 +66,5 @@ def main():
 if __name__ == '__main__':
     main()
-#sina_transliterate --text "example text" --schema "bw2ar"
-#sina_transliterate --file "path/to/your/file.txt" --schema "bw2ar"
+#transliterate --text "example text" --schema "bw2ar"
+#transliterate --file "path/to/your/file.txt" --schema "bw2ar"

{nlptools → sinatools}/DataDownload/downloader.py RENAMED Viewed

@@ -35,25 +35,25 @@ def get_appdatadir():
     .. highlight:: python
     .. code-block:: python
-        from nlptools.DataDownload import downloader
+        from sinatools.DataDownload import downloader
         path = downloader.get_appdatadir()
-        Windows: 'C:/Users/<Username>/AppData/Roaming/nlptools'
-        MacOS: '/Users/<Username>/Library/Application Support/nlptools'
-        Linux: '/home/<Username>/.nlptools'
-        Google Colab: '/content/nlptools'
+        Windows: 'C:/Users/<Username>/AppData/Roaming/sinatools'
+        MacOS: '/Users/<Username>/Library/Application Support/sinatools'
+        Linux: '/home/<Username>/.sinatools'
+        Google Colab: '/content/sinatools'
     """
     home = str(Path.home())
     if 'google.colab' in sys.modules:
-        path = Path('/content/nlptools')
+        path = Path('/content/sinatools')
     elif sys.platform == 'win32':
-        path = Path(home, 'AppData/Roaming/nlptools')
+        path = Path(home, 'AppData/Roaming/sinatools')
     elif sys.platform == 'darwin':
-        path = Path(home, 'Library/Application Support/nlptools')
+        path = Path(home, 'Library/Application Support/sinatools')
     else:
-        path = Path(home, '.nlptools')
+        path = Path(home, '.sinatools')
     if not os.path.exists(path):
         os.makedirs(path)

sinatools/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.8

{nlptools → sinatools}/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Top-level package for nlptools."""
+"""Top-level package for sinatools."""
 __author__ = """SinaLab"""
 __email__ = 'sina.institute.bzu@gmail.com'

{nlptools → sinatools}/morphology/ALMA_multi_word.py RENAMED Viewed

@@ -1,13 +1,12 @@
-from nlptools.morphology import settings
-from nlptools.utils.parser import arStrip
+from sinatools.utils.parser import arStrip
 import json
+from . import dictionary
 def ALMA_multi_word(multi_word):
     undiac_multi_word = arStrip(multi_word, True, True, True, False, True, False)  # diacs , smallDiacs , shaddah ,  digit , alif , specialChars
     result_word = []
-    if undiac_multi_word in settings.div_dic.keys():
-        result_word = settings.div_dic[undiac_multi_word]
+    if undiac_multi_word in dictionary.keys():
+        result_word = dictionary[undiac_multi_word]
     my_json = {}
     glosses_list = []

{nlptools → sinatools}/morphology/__init__.py RENAMED Viewed

@@ -1,23 +1,13 @@
-from nlptools.morphology import settings
 import pickle
-from nlptools.DataDownload import downloader
+from sinatools.DataDownload import downloader
 import os
-#filename = 'ALMA27012000.pickle'
-#path =downloader.get_appdatadir()
-#file_path = os.path.join(path, filename)
-#with open(file_path, 'rb') as f:
-#    #Load the serialized data from the file
-#    settings.div_dic = pickle.load(f)
+dictionary = {}
 filename = 'lemmas_dic.pickle'
-path =downloader.get_appdatadir()
+path = downloader.get_appdatadir()
 file_path = os.path.join(path, filename)
 with open(file_path, 'rb') as f:
-    #Load the serialized data from the file
-    settings.div_dic = pickle.load(f)
+    dictionary = pickle.load(f)
 #filename_five = 'five_grams.pickle'
 #path =downloader.get_appdatadir()

sinatools/morphology/morph_analyzer.py ADDED Viewed

@@ -0,0 +1,172 @@
+import re
+from sinatools.utils.tokenizers_words import simple_word_tokenize
+from sinatools.utils.parser import arStrip
+from sinatools.utils.charsets import AR_CHARSET, AR_DIAC_CHARSET
+from sinatools.DataDownload.downloader import get_appdatadir
+from . import dictionary
+_IS_AR_RE = re.compile(u'^[' + re.escape(u''.join(AR_CHARSET)) + u']+$')
+def find_solution(token, language, flag):
+    if token in dictionary.keys():
+        resulted_solutions = []
+        solutions = dictionary[token]
+        if flag == '1':
+           solutions = [solutions[0]]
+        for solution in solutions:
+            # token, freq, lemma, lemma_id, root, pos
+            resulted_solutions.append([token, solution[0], solution[1], solution[2], solution[3], solution[4]])
+        return resulted_solutions
+    else:
+        return []
+def analyze(text, language ='MSA', task ='full', flag="1"):
+   """
+    This method processes an input text and returns morphological analysis for each token within the text, based on the specified language, task, and flag. As follows:
+    If:
+        The task is lemmatization, the morphological solution includes only the lemma_id, lemma, token, and token frequency.
+        The task is pos, the morphological solution includes only the part-of-speech, token, and token frequency.
+        The task is root, the morphological solution includes only the root, token, and token frequency.
+        The task is full, the morphological solution includes the lemma_id, lemma, part-of-speech, root, token, and token frequency.
+    Args:
+        text (:obj:`str`): The Arabic text to be morphologically analyzed.
+        language (:obj:`str`): The type of the input text. Currently, only Modern Standard Arabic (MSA) is supported.
+        task (:obj:`str`): The task to filter the results by. Options are [lemmatization, pos, root, full]. The default task if not specified is `full`.
+        flag (:obj:`str`): The flag to filter the returned results. If the flag is `1`, the solution with the highest frequency will be returned. If the flag is `*`, all solutions will be returned, ordered descendingly, with the highest frequency solution first. The default flag if not specified is `1`.
+    Returns:
+        list (:obj:`list`): A list of JSON objects, where each JSON could be contains:
+            token: The token from the original text.
+            lemma: The lemma of the token.
+            lemma_id: The id of the lemma.
+            pos: The part-of-speech of the token.
+            root: The root of the token.
+            frequency: The frequency of the token.
+    **Example:**
+     .. highlight:: python
+     .. code-block:: python
+        from sinatools.morphology.morph_analyzer import analyze
+        #Return the morpological solution for each token in this text
+        #Example: task = full
+        analyze('ذهب الولد الى المدرسة')
+        [
+            {
+              "token": "ذهب",
+              "lemma": "ذَهَبَ",
+              "lemma_id": "202001617",
+              "root": "ذ ه ب",
+              "pos": "فعل ماضي",
+              "frequency": "82202"
+            },{
+               "token": "الولد",
+               "lemma": "وَلَدٌ",
+               "lemma_id": "202003092",
+               "root": "و ل د",
+               "pos": "اسم",
+               "frequency": "19066"
+            },{
+               "token": "إلى",
+               "lemma": "إِلَى",
+               "lemma_id": "202000856",
+               "root": "إ ل ى",
+               "pos": "حرف جر",
+               "frequency": "7367507"
+            },{
+               "token": "المدرسة",
+               "lemma": "مَدْرَسَةٌ",
+               "lemma_id": "202002620",
+               "root": "د ر س",
+               "pos": "اسم",
+               "frequency": "145285"
+            }
+        ]
+   """
+   output_list = []
+   tokens = simple_word_tokenize(text)
+   for token in tokens:
+         result_token = []
+         token = arStrip(token , False , True , False , False , False , False)
+         token = re.sub('[ٱ]','ﺍ',token)
+         # token, freq, lemma, lemma_id, root, pos
+         solution = [token, 0, token+"_0", 0, token, ""]
+         if token.isdigit():
+            solution[5] = "digit" #pos
+         elif not _is_ar(token):
+            solution[5] = "Foreign" #pos
+         else:
+            result_token = find_solution(token,language,flag)
+            if result_token == []:
+               token_without_al = re.sub(r'^[ﻝ]','',re.sub(r'^[ﺍ]','',token))
+               if len(token_without_al) > 5  :
+                  result_token = find_solution(token_without_al, language, flag)
+            if result_token == []:
+              # try with replace ﻩ with ﺓ
+               result_token = find_solution(re.sub(r'[ﻩ]$','ﺓ',token), language, flag)
+            if result_token == []:
+               # try with unify Alef
+               word_with_unify_alef = arStrip(token , False , False , False , False , True , False) # Unify Alef
+               result_token = find_solution(word_with_unify_alef, language, flag)
+            if result_token == []:
+               # try with remove diac
+               word_undiac = arStrip(token , True , False , True , True , False , False) # remove diacs, shaddah ,  digit
+               result_token = find_solution(word_undiac, language, flag)
+            if result_token == []:
+               # try with remove diac and unify alef
+               word_undiac = arStrip(token , True , True , True , False, True , False) # diacs , smallDiacs , shaddah ,  alif
+               result_token = find_solution(word_undiac, language, flag)
+         if result_token != []:
+               output_list += result_token
+         else:
+            output_list += [solution]
+   return filter_results(output_list, task)
+def filter_results(data, task):
+    filtered_data = []
+    # token, freq, lemma, lemma_id, root, pos
+    if task == 'lemmatization':
+        filtered_data = [{'token': item[0], 'lemma': item[2], 'lemma_id': item[3], 'frequency': item[1]} for item in data]
+    elif task == 'pos':
+        filtered_data = [{'token': item[0], 'pos': item[5], 'frequency': item[1]} for item in data]
+    elif task == 'root':
+        filtered_data = [{'token': item[0], 'root': item[4], 'frequency': item[1]} for item in data]
+    else:
+        filtered_data = [{'token': item[0], 'lemma': item[2], 'lemma_id': item[3], 'root': item[4], 'pos':item[5], 'frequency': item[1]} for item in data]
+    return filtered_data
+def _is_ar(word):
+    return _IS_AR_RE.match(word) is not None

sinatools/ner/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+from sinatools.DataDownload import downloader
+import os
+from sinatools.ner.utils.helpers import load_checkpoint
+tagger = None
+tag_vocab = None
+train_config = None
+filename = 'Wj27012000.tar'
+path =downloader.get_appdatadir()
+model_path = os.path.join(path, filename)
+tagger, tag_vocab, train_config = load_checkpoint(model_path)

nlptools/arabiner/bin/infer.py → sinatools/ner/entity_extractor.py RENAMED Viewed

@@ -1,9 +1,10 @@
 import os
 from collections import namedtuple
-from nlptools.arabiner.utils.helpers import load_checkpoint
-from nlptools.arabiner.utils.data import get_dataloaders, text2segments
-from nlptools.DataDownload import downloader
-import nlptools
+from sinatools.ner.utils.helpers import load_checkpoint
+from sinatools.ner.utils.data import get_dataloaders, text2segments
+from sinatools.DataDownload import downloader
+from . import tag_vocab, train_config, tagger
 def ner(text, batch_size=32):
     """
     This method takes a text as input, and a batch size, then performs named entity recognition (NER) on the input text and returns a list of tagged mentions.
@@ -20,7 +21,7 @@ def ner(text, batch_size=32):
      .. highlight:: python
      .. code-block:: python
-            from nlptools.arabiner.bin import infer
+            from sinatools.arabiner.bin import infer
             infer.ner('ذهب محمد الى جامعة بيرزيت')
             #the output
@@ -42,19 +43,19 @@ def ner(text, batch_size=32):
     dataset, token_vocab = text2segments(text)
     vocabs = namedtuple("Vocab", ["tags", "tokens"])
-    vocab = vocabs(tokens=token_vocab, tags=nlptools.tag_vocab)
+    vocab = vocabs(tokens=token_vocab, tags=tag_vocab)
     # From the datasets generate the dataloaders
     dataloader = get_dataloaders(
         (dataset,),
         vocab,
-        nlptools.train_config.data_config,
+        train_config.data_config,
         batch_size=batch_size,
         shuffle=(False,),
     )[0]
     # Perform inference on the text and get back the tagged segments
-    segments = nlptools.tagger.infer(dataloader)
+    segments = tagger.infer(dataloader)
     segments_lists = []
     # Print results
     for segment in segments:

{nlptools → sinatools}/salma/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
-from nlptools.salma import settings
+from sinatools.salma import settings
 import pickle
-from nlptools.DataDownload import downloader
+from sinatools.DataDownload import downloader
 import os
 #filename = 'glosses_dic.pickle'

{nlptools → sinatools}/salma/settings.py RENAMED Viewed

@@ -6,7 +6,7 @@ import pandas as pd
-from nlptools.DataDownload import downloader
+from sinatools.DataDownload import downloader
 import os
 glosses_dic = {}

{nlptools → sinatools}/salma/views.py RENAMED Viewed

@@ -1,12 +1,12 @@
 import json
-from nlptools.salma import settings
-from nlptools.salma.wsd import normalizearabert
-from nlptools.salma.wsd import GlossPredictor
-from nlptools.utils.parser import arStrip
-from nlptools.morphology.tokenizers_words import simple_word_tokenize
-from nlptools.morphology.ALMA_multi_word import ALMA_multi_word
-from nlptools.morphology.morph_analyzer import analyze
-#from nlptools.arabiner.bin.infer import ner
+from sinatools.salma import settings
+from sinatools.salma.wsd import normalizearabert
+from sinatools.salma.wsd import GlossPredictor
+from sinatools.utils.parser import arStrip
+from sinatools.utils.tokenizers_words import simple_word_tokenize
+from sinatools.morphology.ALMA_multi_word import ALMA_multi_word
+from sinatools.morphology.morph_analyzer import analyze
+#from sinatools.ner.entity_extractor import ner
 def delete_form_list(position, word_lemma):
     #"""
@@ -424,7 +424,7 @@ def SALMA(sentence):
     .. highlight:: python
     .. code-block:: python
-        from nlptools.salma.views import SALMA
+        from sinatools.salma.views import SALMA
         JSON = SALMA("مختبر سينا لحوسبة اللغة والذكاء الإصطناعي. في جامعة بيرزيت.")
         print(JSON["resp"])

{nlptools → sinatools}/salma/wsd.py RENAMED Viewed

@@ -1,11 +1,11 @@
-from nlptools.salma import settings
+from sinatools.salma import settings
 import re
 import warnings
 warnings.filterwarnings("ignore")
 import torch
 import numpy as np
 import pandas as pd
-from nlptools.arabert.preprocess import ArabertPreprocessor
+from sinatools.arabert.preprocess import ArabertPreprocessor
 def normalizearabert(s):
   model_name = 'aubmindlab/bert-base-arabertv02'

{nlptools/morphology → sinatools/utils}/charsets.py RENAMED Viewed

@@ -1,6 +1,4 @@
-# -*- coding: utf-8 -*-
-# We acknoledge that this file  charsets.py is imported from Camel tools citation. url
-#
+# We acknowledge that this file, charsets.py, is imported from Camel Tools. [https://camel-tools.readthedocs.io/en/latest/api/utils/charsets.html].
 import unicodedata

SinaTools 0.1.4__py2.py3-none-any.whl → 0.1.8__py2.py3-none-any.whl

SinaTools 0.1.4py2.py3-none-any.whl → 0.1.8py2.py3-none-any.whl