PyPI - SinaTools - Versions diffs - 0.1.35__py2.py3-none-any.whl → 0.1.37__py2.py3-none-any.whl - Mend

SinaTools 0.1.35py2.py3-none-any.whl → 0.1.37py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{SinaTools-0.1.35.dist-info → SinaTools-0.1.37.dist-info}/METADATA +63 -64
{SinaTools-0.1.35.dist-info → SinaTools-0.1.37.dist-info}/RECORD +15 -19
{SinaTools-0.1.35.dist-info → SinaTools-0.1.37.dist-info}/WHEEL +6 -6
{SinaTools-0.1.35.dist-info → SinaTools-0.1.37.dist-info}/entry_points.txt +0 -1
sinatools/CLI/DataDownload/download_files.py +9 -8
sinatools/VERSION +1 -1
sinatools/ner/trainers/BertNestedTrainer.py +203 -203
sinatools/ner/trainers/BertTrainer.py +163 -163
sinatools/ner/trainers/__init__.py +2 -2
sinatools/utils/similarity.py +62 -27
sinatools/wsd/disambiguator.py +14 -90
sinatools/ner/data.py +0 -124
sinatools/ner/relation_extractor.py +0 -201
sinatools/utils/implication.py +0 -662
sinatools/utils/jaccard.py +0 -247
{SinaTools-0.1.35.data → SinaTools-0.1.37.data}/data/sinatools/environment.yml +0 -0
{SinaTools-0.1.35.dist-info → SinaTools-0.1.37.dist-info}/AUTHORS.rst +0 -0
{SinaTools-0.1.35.dist-info → SinaTools-0.1.37.dist-info}/LICENSE +0 -0
{SinaTools-0.1.35.dist-info → SinaTools-0.1.37.dist-info}/top_level.txt +0 -0

sinatools/ner/trainers/BertTrainer.py CHANGED Viewed

@@ -1,163 +1,163 @@
-import os
-import logging
-import torch
-import numpy as np
-from sinatools.ner.trainers import BaseTrainer
-from sinatools.ner.metrics import compute_single_label_metrics
-logger = logging.getLogger(__name__)
-class BertTrainer(BaseTrainer):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-    def train(self):
-        best_val_loss, test_loss = np.inf, np.inf
-        num_train_batch = len(self.train_dataloader)
-        patience = self.patience
-        for epoch_index in range(self.max_epochs):
-            self.current_epoch = epoch_index
-            train_loss = 0
-            for batch_index, (_, gold_tags, _, _, logits) in enumerate(self.tag(
-                self.train_dataloader, is_train=True
-            ), 1):
-                self.current_timestep += 1
-                batch_loss = self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
-                batch_loss.backward()
-                # Avoid exploding gradient by doing gradient clipping
-                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
-                self.optimizer.step()
-                self.scheduler.step()
-                train_loss += batch_loss.item()
-                if self.current_timestep % self.log_interval == 0:
-                    logger.info(
-                        "Epoch %d | Batch %d/%d | Timestep %d | LR %.10f | Loss %f",
-                        epoch_index,
-                        batch_index,
-                        num_train_batch,
-                        self.current_timestep,
-                        self.optimizer.param_groups[0]['lr'],
-                        batch_loss.item()
-                    )
-            train_loss /= num_train_batch
-            logger.info("** Evaluating on validation dataset **")
-            val_preds, segments, valid_len, val_loss = self.eval(self.val_dataloader)
-            val_metrics = compute_single_label_metrics(segments)
-            epoch_summary_loss = {
-                "train_loss": train_loss,
-                "val_loss": val_loss
-            }
-            epoch_summary_metrics = {
-                "val_micro_f1": val_metrics.micro_f1,
-                "val_precision": val_metrics.precision,
-                "val_recall": val_metrics.recall
-            }
-            logger.info(
-                "Epoch %d | Timestep %d | Train Loss %f | Val Loss %f | F1 %f",
-                epoch_index,
-                self.current_timestep,
-                train_loss,
-                val_loss,
-                val_metrics.micro_f1
-            )
-            if val_loss < best_val_loss:
-                patience = self.patience
-                best_val_loss = val_loss
-                logger.info("** Validation improved, evaluating test data **")
-                test_preds, segments, valid_len, test_loss = self.eval(self.test_dataloader)
-                self.segments_to_file(segments, os.path.join(self.output_path, "predictions.txt"))
-                test_metrics = compute_single_label_metrics(segments)
-                epoch_summary_loss["test_loss"] = test_loss
-                epoch_summary_metrics["test_micro_f1"] = test_metrics.micro_f1
-                epoch_summary_metrics["test_precision"] = test_metrics.precision
-                epoch_summary_metrics["test_recall"] = test_metrics.recall
-                logger.info(
-                    f"Epoch %d | Timestep %d | Test Loss %f | F1 %f",
-                    epoch_index,
-                    self.current_timestep,
-                    test_loss,
-                    test_metrics.micro_f1
-                )
-                self.save()
-            else:
-                patience -= 1
-            # No improvements, terminating early
-            if patience == 0:
-                logger.info("Early termination triggered")
-                break
-            self.summary_writer.add_scalars("Loss", epoch_summary_loss, global_step=self.current_timestep)
-            self.summary_writer.add_scalars("Metrics", epoch_summary_metrics, global_step=self.current_timestep)
-    def eval(self, dataloader):
-        golds, preds, segments, valid_lens = list(), list(), list(), list()
-        loss = 0
-        for _, gold_tags, tokens, valid_len, logits in self.tag(
-            dataloader, is_train=False
-        ):
-            loss += self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
-            preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
-            segments += tokens
-            valid_lens += list(valid_len)
-        loss /= len(dataloader)
-        # Update segments, attach predicted tags to each token
-        segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
-        return preds, segments, valid_lens, loss.item()
-    def infer(self, dataloader):
-        golds, preds, segments, valid_lens = list(), list(), list(), list()
-        for _, gold_tags, tokens, valid_len, logits in self.tag(
-            dataloader, is_train=False
-        ):
-            preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
-            segments += tokens
-            valid_lens += list(valid_len)
-        segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
-        return segments
-    def to_segments(self, segments, preds, valid_lens, vocab):
-        if vocab is None:
-            vocab = self.vocab
-        tagged_segments = list()
-        tokens_stoi = vocab.tokens.get_stoi()
-        tags_itos = vocab.tags[0].get_itos()
-        unk_id = tokens_stoi["UNK"]
-        for segment, pred, valid_len in zip(segments, preds, valid_lens):
-            # First, the token at 0th index [CLS] and token at nth index [SEP]
-            # Combine the tokens with their corresponding predictions
-            segment_pred = zip(segment[1:valid_len-1], pred[1:valid_len-1])
-            # Ignore the sub-tokens/subwords, which are identified with text being UNK
-            segment_pred = list(filter(lambda t: tokens_stoi[t[0].text] != unk_id, segment_pred))
-            # Attach the predicted tags to each token
-            list(map(lambda t: setattr(t[0], 'pred_tag', [{"tag": tags_itos[t[1]]}]), segment_pred))
-            # We are only interested in the tagged tokens, we do no longer need raw model predictions
-            tagged_segment = [t for t, _ in segment_pred]
-            tagged_segments.append(tagged_segment)
-        return tagged_segments
+import os
+import logging
+import torch
+import numpy as np
+from sinatools.ner.trainers import BaseTrainer
+from sinatools.ner.metrics import compute_single_label_metrics
+logger = logging.getLogger(__name__)
+class BertTrainer(BaseTrainer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def train(self):
+        best_val_loss, test_loss = np.inf, np.inf
+        num_train_batch = len(self.train_dataloader)
+        patience = self.patience
+        for epoch_index in range(self.max_epochs):
+            self.current_epoch = epoch_index
+            train_loss = 0
+            for batch_index, (_, gold_tags, _, _, logits) in enumerate(self.tag(
+                self.train_dataloader, is_train=True
+            ), 1):
+                self.current_timestep += 1
+                batch_loss = self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
+                batch_loss.backward()
+                # Avoid exploding gradient by doing gradient clipping
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
+                self.optimizer.step()
+                self.scheduler.step()
+                train_loss += batch_loss.item()
+                if self.current_timestep % self.log_interval == 0:
+                    logger.info(
+                        "Epoch %d | Batch %d/%d | Timestep %d | LR %.10f | Loss %f",
+                        epoch_index,
+                        batch_index,
+                        num_train_batch,
+                        self.current_timestep,
+                        self.optimizer.param_groups[0]['lr'],
+                        batch_loss.item()
+                    )
+            train_loss /= num_train_batch
+            logger.info("** Evaluating on validation dataset **")
+            val_preds, segments, valid_len, val_loss = self.eval(self.val_dataloader)
+            val_metrics = compute_single_label_metrics(segments)
+            epoch_summary_loss = {
+                "train_loss": train_loss,
+                "val_loss": val_loss
+            }
+            epoch_summary_metrics = {
+                "val_micro_f1": val_metrics.micro_f1,
+                "val_precision": val_metrics.precision,
+                "val_recall": val_metrics.recall
+            }
+            logger.info(
+                "Epoch %d | Timestep %d | Train Loss %f | Val Loss %f | F1 %f",
+                epoch_index,
+                self.current_timestep,
+                train_loss,
+                val_loss,
+                val_metrics.micro_f1
+            )
+            if val_loss < best_val_loss:
+                patience = self.patience
+                best_val_loss = val_loss
+                logger.info("** Validation improved, evaluating test data **")
+                test_preds, segments, valid_len, test_loss = self.eval(self.test_dataloader)
+                self.segments_to_file(segments, os.path.join(self.output_path, "predictions.txt"))
+                test_metrics = compute_single_label_metrics(segments)
+                epoch_summary_loss["test_loss"] = test_loss
+                epoch_summary_metrics["test_micro_f1"] = test_metrics.micro_f1
+                epoch_summary_metrics["test_precision"] = test_metrics.precision
+                epoch_summary_metrics["test_recall"] = test_metrics.recall
+                logger.info(
+                    f"Epoch %d | Timestep %d | Test Loss %f | F1 %f",
+                    epoch_index,
+                    self.current_timestep,
+                    test_loss,
+                    test_metrics.micro_f1
+                )
+                self.save()
+            else:
+                patience -= 1
+            # No improvements, terminating early
+            if patience == 0:
+                logger.info("Early termination triggered")
+                break
+            self.summary_writer.add_scalars("Loss", epoch_summary_loss, global_step=self.current_timestep)
+            self.summary_writer.add_scalars("Metrics", epoch_summary_metrics, global_step=self.current_timestep)
+    def eval(self, dataloader):
+        golds, preds, segments, valid_lens = list(), list(), list(), list()
+        loss = 0
+        for _, gold_tags, tokens, valid_len, logits in self.tag(
+            dataloader, is_train=False
+        ):
+            loss += self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
+            preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
+            segments += tokens
+            valid_lens += list(valid_len)
+        loss /= len(dataloader)
+        # Update segments, attach predicted tags to each token
+        segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
+        return preds, segments, valid_lens, loss.item()
+    def infer(self, dataloader):
+        golds, preds, segments, valid_lens = list(), list(), list(), list()
+        for _, gold_tags, tokens, valid_len, logits in self.tag(
+            dataloader, is_train=False
+        ):
+            preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
+            segments += tokens
+            valid_lens += list(valid_len)
+        segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
+        return segments
+    def to_segments(self, segments, preds, valid_lens, vocab):
+        if vocab is None:
+            vocab = self.vocab
+        tagged_segments = list()
+        tokens_stoi = vocab.tokens.get_stoi()
+        tags_itos = vocab.tags[0].get_itos()
+        unk_id = tokens_stoi["UNK"]
+        for segment, pred, valid_len in zip(segments, preds, valid_lens):
+            # First, the token at 0th index [CLS] and token at nth index [SEP]
+            # Combine the tokens with their corresponding predictions
+            segment_pred = zip(segment[1:valid_len-1], pred[1:valid_len-1])
+            # Ignore the sub-tokens/subwords, which are identified with text being UNK
+            segment_pred = list(filter(lambda t: tokens_stoi[t[0].text] != unk_id, segment_pred))
+            # Attach the predicted tags to each token
+            list(map(lambda t: setattr(t[0], 'pred_tag', [{"tag": tags_itos[t[1]]}]), segment_pred))
+            # We are only interested in the tagged tokens, we do no longer need raw model predictions
+            tagged_segment = [t for t, _ in segment_pred]
+            tagged_segments.append(tagged_segment)
+        return tagged_segments

sinatools/ner/trainers/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from sinatools.ner.trainers.BaseTrainer import BaseTrainer
-from sinatools.ner.trainers.BertTrainer import BertTrainer
+from sinatools.ner.trainers.BaseTrainer import BaseTrainer
+from sinatools.ner.trainers.BertTrainer import BertTrainer
 from sinatools.ner.trainers.BertNestedTrainer import BertNestedTrainer

sinatools/utils/similarity.py CHANGED Viewed

@@ -101,56 +101,91 @@ def get_intersection(list1, list2, ignore_all_diacritics_but_not_shadda=False, i
-def get_union(list1, list2, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic):
-    """
-    Computes the union of two sets of Arabic words, considering the differences in their diacritization. The method provides two options for handling diacritics: (i) ignore all diacritics except for shadda, and (ii) ignore the shadda diacritic as well. You can try the demo online.
+# def get_union(list1, list2, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic):
+#     """
+#     Computes the union of two sets of Arabic words, considering the differences in their diacritization. The method provides two options for handling diacritics: (i) ignore all diacritics except for shadda, and (ii) ignore the shadda diacritic as well. You can try the demo online.
-    Args:
-        list1 (:obj:`list`): The first list.
-        list2 (:obj:`bool`): The second list.
-        ignore_all_diacratics_but_not_shadda (:obj:`bool`, optional) – A flag to ignore all diacratics except for the shadda. Defaults to False.
-        ignore_shadda_diacritic (:obj:`bool`, optional) – A flag to ignore the shadda diacritic. Defaults to False.
+#     Args:
+#         list1 (:obj:`list`): The first list.
+#         list2 (:obj:`bool`): The second list.
+#         ignore_all_diacratics_but_not_shadda (:obj:`bool`, optional) – A flag to ignore all diacratics except for the shadda. Defaults to False.
+#         ignore_shadda_diacritic (:obj:`bool`, optional) – A flag to ignore the shadda diacritic. Defaults to False.
-    Returns:
-        :obj:`list`: The union of the two lists, ignoring diacritics if flags are true.
+#     Returns:
+#         :obj:`list`: The union of the two lists, ignoring diacritics if flags are true.
-    **Example:**
+#     **Example:**
-    .. highlight:: python
-    .. code-block:: python
+#     .. highlight:: python
+#     .. code-block:: python
-        from sinatools.utils.similarity import get_union
-        list1 = ["كتب","فَعل","فَعَلَ"]
-        list2 = ["كتب","فَعّل"]
-        print(get_union(list1, list2, False, True))
-        #output: ["كتب" ,"فَعل" ,"فَعَلَ"]
-    """
-    list1 = [str(i) for i in list1 if i not in (None, ' ', '')]
+#         from sinatools.utils.similarity import get_union
+#         list1 = ["كتب","فَعل","فَعَلَ"]
+#         list2 = ["كتب","فَعّل"]
+#         print(get_union(list1, list2, False, True))
+#         #output: ["كتب" ,"فَعل" ,"فَعَلَ"]
+#     """
+#     list1 = [str(i) for i in list1 if i not in (None, ' ', '')]
+#     list2 = [str(i) for i in list2 if i not in (None, ' ', '')]
+#     union_list = []
+#     for list1_word in list1:
+#         word1 = normalize_word(list1_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
+#         union_list.append(word1)
+#     for list2_word in list2:
+#         word2 = normalize_word(list2_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
+#         union_list.append(word2)
+#     i = 0
+#     while i < len(union_list):
+#         j = i + 1
+#         while j < len(union_list):
+#             non_preferred_word = get_non_preferred_word(union_list[i], union_list[j])
+#             if (non_preferred_word != "#"):
+#                 union_list.remove(non_preferred_word)
+#             j = j + 1
+#         i = i + 1
+#     return union_list
+def get_union(list1, list2, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic):
+    list1 = [str(i) for i in list1 if i not in (None, ' ', '')]
     list2 = [str(i) for i in list2 if i not in (None, ' ', '')]
     union_list = []
+    # Normalize and add words from list1
     for list1_word in list1:
         word1 = normalize_word(list1_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
-        union_list.append(word1)
+        if word1 not in union_list:
+            union_list.append(word1)
+    # Normalize and add words from list2
     for list2_word in list2:
         word2 = normalize_word(list2_word, ignore_all_diacritics_but_not_shadda, ignore_shadda_diacritic)
-        union_list.append(word2)
+        if word2 not in union_list:
+            union_list.append(word2)
     i = 0
     while i < len(union_list):
         j = i + 1
         while j < len(union_list):
             non_preferred_word = get_non_preferred_word(union_list[i], union_list[j])
-            if (non_preferred_word != "#"):
+            if non_preferred_word != "#":
                 union_list.remove(non_preferred_word)
-            j = j + 1
-        i = i + 1
+                j -= 1
+            j += 1
+        i += 1
     return union_list
 def get_jaccard_similarity(list1: list, list2: list, ignore_all_diacritics_but_not_shadda: bool, ignore_shadda_diacritic: bool) -> float:
@@ -184,7 +219,7 @@ def get_jaccard_similarity(list1: list, list2: list, ignore_all_diacritics_but_n
     return float(len(intersection_list)) / float(len(union_list))
-def get_jaccard(delimiter, str1, str2, selection, ignoreAllDiacriticsButNotShadda=True, ignoreShaddaDiacritic=True):
+def get_jaccard(delimiter, selection, str1, str2, ignoreAllDiacriticsButNotShadda=True, ignoreShaddaDiacritic=True):
     """
     Calculates and returns the Jaccard similarity values (union, intersection, or Jaccard similarity) between two lists of Arabic words, considering the differences in their diacritization. The method provides two options for handling diacritics: (i) ignore all diacritics except for shadda, and (ii) ignore the shadda diacritic as well. You can try the demo online.

sinatools/wsd/disambiguator.py CHANGED Viewed

@@ -8,10 +8,6 @@ from sinatools.morphology.ALMA_multi_word import ALMA_multi_word
 from sinatools.morphology.morph_analyzer import analyze
 from sinatools.ner.entity_extractor import extract
 from . import glosses_dic
-import time
-#import concurrent
-#import threading
-import multiprocessing
 def distill_entities(entities):
@@ -260,7 +256,7 @@ def find_named_entities(string):
    return found_entities
-def find_glosses_using_ALMA(word, glosses_dic):
+def find_glosses_using_ALMA(word):
    data = analyze(word, language ='MSA', task ='full', flag="1")
    Diac_lemma = ""
@@ -306,7 +302,7 @@ def disambiguate_glosses_using_SALMA(glosses, Diac_lemma, Undiac_lemma, word, se
       return my_json
-def find_glosses(input_sentence, two_word_lemma, three_word_lemma,four_word_lemma, five_word_lemma, ner, glosses_dic):
+def find_glosses(input_sentence, two_word_lemma, three_word_lemma,four_word_lemma, five_word_lemma, ner):
       output_list = []
       position = 0
       while position < len(input_sentence):
@@ -393,7 +389,7 @@ def find_glosses(input_sentence, two_word_lemma, three_word_lemma,four_word_lemm
          if flag == "False": # Not found in ner or in multi_word_dictionary, ASK ALMA
             word = input_sentence[position]
-            word, Undiac_lemma, Diac_lemma, pos , concept_count, glosses = find_glosses_using_ALMA(word, glosses_dic)
+            word, Undiac_lemma, Diac_lemma, pos , concept_count, glosses = find_glosses_using_ALMA(word)
             my_json = {}
             my_json['word'] = word
             my_json['concept_count'] = concept_count
@@ -436,95 +432,26 @@ def disambiguate_glosses_main(word, sentence):
       glosses = word['glosses']
       Diac_lemma = word['Diac_lemma']
       Undiac_lemma = word['Undiac_lemma']
-      start = time.time()
-      x = disambiguate_glosses_using_SALMA(glosses, Diac_lemma, Undiac_lemma, input_word, sentence)
-      end = time.time()
-      print(f"disambiguate time: {end - start}")
-      return x
-def init_resources():
-    global glosses_dic
-# Wrapper function for multiprocessing
-def disambiguate_glosses_in_parallel(word_and_sentence):
-    word, sentence = word_and_sentence
-    return disambiguate_glosses_main(word, sentence)
+      return disambiguate_glosses_using_SALMA(glosses, Diac_lemma, Undiac_lemma, input_word, sentence)
 def WSD(sentence):
-   start = time.time()
    input_sentence = simple_word_tokenize(sentence)
-   end = time.time()
-   print(f"tokenizer time: {end - start}")
-   start = time.time()
    five_word_lemma = find_five_word_lemma(input_sentence)
-   end = time.time()
-   print(f"5grams time: {end - start}")
-   start = time.time()
    four_word_lemma = find_four_word_lemma(input_sentence)
-   end = time.time()
-   print(f"4grams time: {end - start}")
-   start = time.time()
    three_word_lemma = find_three_word_lemma(input_sentence)
-   end = time.time()
-   print(f"3grams time: {end - start}")
-   start = time.time()
    two_word_lemma = find_two_word_lemma(input_sentence)
-   end = time.time()
-   print(f"2grams time: {end - start}")
-   start = time.time()
    ner = find_named_entities(" ".join(input_sentence))
-   end = time.time()
-   print(f"ner time: {end - start}")
-   start = time.time()
-   output_list = find_glosses(input_sentence, two_word_lemma, three_word_lemma, four_word_lemma, five_word_lemma, ner, glosses_dic_shared)
-   end = time.time()
-   print(f"lookup time: {end - start}")
-#    for word in output_list:
-    #  start = time.time()
-    #  results.append(disambiguate_glosses_main(word, sentence))
-    #  end = time.time()
-    #  print(f"disambiguate time: {end - start}")
-#    return results
-#    with concurrent.futures.ProcessPoolExecutor() as executor:
-    #    results = list(executor.map(lambda word: disambiguate_glosses_main(word, sentence), output_list))
-#    return results
-   # Create and start threads
-#    for word in output_list:
-    #    thread = threading.Thread(target=worker, args=(word, sentence))
-    #    threads.append(thread)
-    #    thread.start()
-#
-#    for thread in threads:
-    #    thread.join()
-#
-#    return threading_results
-    # Number of CPUs
-   num_cpus = multiprocessing.cpu_count()
-   print("num_cpus : ", num_cpus)
-   # Create a manager to hold shared data
-#    with multiprocessing.Manager() as manager:
-    #    glosses_dic_shared = manager.dict(glosses_dic)
-    #    with multiprocessing.Pool(num_cpus) as pool:
-            # arguments = [(word, sentence) for word in output_list]
-            # results = pool.starmap(disambiguate_glosses_main, arguments)
-   with multiprocessing.Pool(initializer=init_resources) as pool:
-        # Map the list of words to the disambiguation function in parallel
-        results = pool.map(disambiguate_glosses_in_parallel, [(word, sentence) for word in output_list])
+   output_list = find_glosses(input_sentence, two_word_lemma, three_word_lemma, four_word_lemma, five_word_lemma, ner)
+   results = []
+   for word in output_list:
+      results.append(disambiguate_glosses_main(word, sentence))
    return results
@@ -570,8 +497,5 @@ def disambiguate(sentence):
        content = ["Input is too long"]
        return content
     else:
-       start = time.time()
        results = WSD(sentence)
-       end = time.time()
-       print(f"WSD total time: {end - start}")
        return results

SinaTools 0.1.35__py2.py3-none-any.whl → 0.1.37__py2.py3-none-any.whl

SinaTools 0.1.35py2.py3-none-any.whl → 0.1.37py2.py3-none-any.whl