PyPI - ddi-fw - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

ddi-fw 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

ddi_fw/datasets/__init__.py +12 -0
ddi_fw/datasets/core.py +416 -0
ddi_fw/datasets/db_utils.py +204 -0
ddi_fw/datasets/embedding_generator.py +66 -0
ddi_fw/datasets/embedding_generator_new.py +105 -0
ddi_fw/datasets/feature_vector_generation.py +100 -0
ddi_fw/datasets/idf_helper.py +71 -0
ddi_fw/drugbank/__init__.py +2 -0
ddi_fw/drugbank/drugbank_parser.py +154 -0
ddi_fw/drugbank/drugbank_processor.py +343 -0
ddi_fw/drugbank/drugbank_processor_org.py +272 -0
ddi_fw/drugbank/event_extractor.py +127 -0
ddi_fw/experiments/__init__.py +2 -0
ddi_fw/experiments/custom_torch_model.py +66 -0
ddi_fw/experiments/evaluation_helper.py +232 -0
ddi_fw/experiments/tensorflow_helper.py +296 -0
ddi_fw/experiments/test.py +59 -0
ddi_fw/ner/__init__.py +1 -0
ddi_fw/ner/mmlrestclient.py +155 -0
ddi_fw/ner/ner.py +340 -0
ddi_fw/utils/__init__.py +3 -0
ddi_fw/utils/enums.py +23 -0
ddi_fw/utils/utils.py +103 -0
ddi_fw/utils/zip_helper.py +66 -0
{ddi_fw-0.0.1.dist-info → ddi_fw-0.0.2.dist-info}/METADATA +1 -1
ddi_fw-0.0.2.dist-info/RECORD +28 -0
ddi_fw-0.0.2.dist-info/top_level.txt +5 -0
ddi_fw-0.0.1.dist-info/RECORD +0 -4
ddi_fw-0.0.1.dist-info/top_level.txt +0 -1
{ddi_fw-0.0.1.dist-info → ddi_fw-0.0.2.dist-info}/WHEEL +0 -0

ddi_fw/drugbank/event_extractor.py ADDED Viewed

@@ -0,0 +1,127 @@
+'''
+copied from https://github.com/YifanDengWHU/DDIMDL/blob/master/NLPProcess.py and reorganized
+'''
+# import stanfordnlp
+# stanfordnlp.download("en")
+import pandas as pd
+import stanza
+# stanza.download("en")
+import numpy as np
+class EventExtractor:
+    def __init__(self, druglist, use_cache=True):
+        self.druglist = druglist
+        self.druglist2 = ['_'.join(d.replace('.', ' ').replace(
+            ',', ' ').replace('-', ' ').split(' ')) for d in druglist]
+        # self.events = events
+        self.pipeline = stanza.Pipeline(use_gpu=True)
+        self.cache = dict()
+    def prepare_event_text(self, event):
+        for ex, new in zip(self.druglist, self.druglist2):
+            event = event.replace(ex, new)
+        return event
+    def extract_all(self, events):
+        mechanisms = []
+        actions = []
+        drugA_list = []
+        drugB_list = []
+        for i in range(len(events)):
+            mechanism, action, drugA, drugB = self.extract(events[i])
+            mechanisms.append(mechanism)
+            actions.append(action)
+            drugA_list.append(drugA)
+            drugB_list.append(drugB)
+        return mechanisms, actions, drugA_list, drugB_list
+    def extract(self, event):
+        if event in self.cache:
+            return self.cache[event]
+        event = self.prepare_event_text(event)
+        drugA = None
+        drugB = None
+        def addMechanism(node):
+            if int(sonsNum[int(node-1)]) == 0:
+                return
+            else:
+                for k in sons[node-1]:
+                    if int(k) == 0:
+                        break
+                    if dependency[int(k - 1)].text == drugA or dependency[int(k - 1)].text == drugB:
+                        continue
+                    quene.append(int(k))
+                    addMechanism(int(k))
+            return quene
+        doc = self.pipeline(event)
+        dependency = []
+        for j in range(len(doc.sentences[0].words)):
+            dependency.append(doc.sentences[0].words[j])
+        sons = np.zeros((len(dependency), len(dependency)))
+        sonsNum = np.zeros(len(dependency))
+        flag = False
+        count = 0
+        for j in dependency:
+            # if j.dependency_relation=='root':
+            if j.deprel == 'root':
+                # root=int(j.index)
+                root = int(j.id)
+                action = j.lemma
+            if j.text in self.druglist2:
+                if count < 2:
+                    if flag == True:
+                        drugB = j.text
+                        count += 1
+                    else:
+                        drugA = j.text
+                        flag = True
+                        count += 1
+            sonsNum[j.head-1] += 1
+            sons[j.head-1, int(sonsNum[j.head-1]-1)] = int(j.id)
+        quene = []
+        for j in range(int(sonsNum[root-1])):
+            if dependency[int(sons[root-1, j]-1)].deprel == 'obj' or dependency[int(sons[root-1, j]-1)].deprel == 'nsubj:pass':
+                quene.append(int(sons[root-1, j]))
+                break
+        quene = addMechanism(quene[0])
+        quene.sort()
+        mechanism = " ".join(dependency[j-1].text for j in quene)
+        if mechanism == "the fluid retaining activities":
+            mechanism = "the fluid"
+        if mechanism == "atrioventricular blocking ( AV block )":
+            mechanism = 'the atrioventricular blocking ( AV block ) activities increase'
+        self.cache[event] = (mechanism, action,
+                             drugA.replace('_', ' ') if drugA != None else '',
+                             drugB.replace('_', ' ') if drugB != None else '')
+        if drugA == '' or drugB == '':
+            print(event)
+        return mechanism, action, drugA.replace('_', ' ') if drugA != None else '',  drugB.replace('_', ' ') if drugB != None else ''
+# drugs_pickle_path = 'drugbank/output/drugs.pkl'
+# drugs_df = pd.read_pickle(drugs_pickle_path)
+# drug_names = drugs_df['name'].to_list()
+# drug_names = ['Lepirudin','Ursodeoxycholic acid']
+# event_extractor = EventExtractor(
+#     drug_names)
+# mechanisms, actions, drugA_list, drugB_list = event_extractor.extract_all(
+#     ['The risk or severity of bleeding and bruising can be increased when Lepirudin is combined with Ursodeoxycholic acid'])
+# # mechanism, action, drugA, drugB = event_extractor.extract(
+# #     'Bivalirudin may increase the anticoagulant activities of Bromfenac')
+# print(mechanisms)

ddi_fw/experiments/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .tensorflow_helper import TFMultiModal, TFSingleModal
2	+ from .evaluation_helper import evaluate, Metrics

ddi_fw/experiments/custom_torch_model.py ADDED Viewed

@@ -0,0 +1,66 @@
+import torch
+class ExtendedTorchModule(torch.nn.Module):
+  def __init__(self,model):
+    super().__init__()
+    self.model = model
+  def train(self,dataloader_train, criterion, optimizer, epoch_count = 10):
+    for epoch in range(epoch_count):  # loop over the dataset multiple times
+      running_loss = 0.0
+      for i, data in enumerate(dataloader_train, 0):
+          # get the inputs; data is a list of [inputs, labels]
+          inputs, labels = data
+          # zero the parameter gradients
+          optimizer.zero_grad()
+          # forward + backward + optimize
+          outputs = self(inputs)
+          loss = criterion(outputs, labels)
+          loss.backward()
+          optimizer.step()
+          # print statistics
+          running_loss += loss.item()
+          if i % 5000 == 4999:    # print every 2000 mini-batches
+              print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 5000:.3f}')
+              running_loss = 0.0
+    print('Finished Training')
+  def forward(self, x):
+    x = x.to(torch.float32)
+    # for f in self.module_list:
+    #     x = f(x)
+    # return x
+    return self.model(x)
+  def compute_outputs(self, dataloader_test):
+    output_arr = []
+    with torch.no_grad():
+      for data in dataloader_test:
+          inputs, labels = data
+          # calculate outputs by running inputs through the network
+          outputs = self(inputs)
+          output_arr.append(outputs.numpy())
+    # <ipython-input-44-114ac3037693>:54: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:245.)
+    t = torch.tensor(output_arr)
+    return torch.squeeze(t)
+  # def compute_accuracy(self, dataloader_test):
+  #   correct = 0
+  #   total = 0
+  #   # since we're not training, we don't need to calculate the gradients for our outputs
+  #   with torch.no_grad():
+  #       for data in dataloader_test:
+  #           inputs, labels = data
+  #           # calculate outputs by running inputs through the network
+  #           outputs = self(inputs)
+  #           # the class with the highest energy is what we choose as prediction
+  #           _, predicted = torch.max(outputs.data, 1)
+  #           total += labels.size(0)
+  #           correct += (predicted == labels).sum().item()
+  #   print(f'Accuracy of the network: {100 * correct // total} %')

ddi_fw/experiments/evaluation_helper.py ADDED Viewed

@@ -0,0 +1,232 @@
+import numpy as np
+from sklearn import metrics
+from sklearn.metrics import accuracy_score, precision_recall_curve
+from sklearn.metrics import f1_score
+from sklearn.metrics import precision_score
+from sklearn.metrics import recall_score
+from sklearn.metrics import roc_auc_score
+from sklearn.metrics import auc
+from sklearn.metrics import classification_report
+from sklearn.preprocessing import OneHotEncoder
+def __format__(d,floating_number_precision = 4):
+    if type(d) is dict:
+        d = {k: __round__(v,floating_number_precision) for k, v in d.items()}
+    else:
+        d = round(d,floating_number_precision)
+    return d
+def __round__(v,floating_number_precision = 4):
+    if type(v) is list or type(v) is set:
+       return [round(item,floating_number_precision) for item in v]
+    else:
+        return round(v,floating_number_precision)
+class Metrics():
+    def __init__(self, label):
+        self.label = label
+    def classification_report(self,classification_report):
+        self.classification_report = classification_report
+    def accuracy(self, accuracy):
+        self.accuracy = accuracy
+    def precision(self, precision):
+        self.precision = precision
+    def recall(self, recall):
+        self.recall = recall
+    def f1_score(self, f1_score):
+        self.f1_score = f1_score
+    def roc_auc(self, roc_auc):
+        self.roc_auc = roc_auc
+    def roc_aupr(self, roc_aupr):
+        self.roc_aupr = roc_aupr
+    def format_float(self, floating_number_precision = 4):
+        self.accuracy = round(self.accuracy,floating_number_precision)
+        self.precision = __format__( self.precision ,floating_number_precision)
+        self.recall = __format__( self.recall ,floating_number_precision)
+        self.f1_score = __format__( self.f1_score ,floating_number_precision)
+        self.roc_auc = __format__( self.roc_auc ,floating_number_precision)
+        self.roc_aupr = __format__( self.roc_aupr ,floating_number_precision)
+# taken from https://github.com/YifanDengWHU/DDIMDL/blob/master/DDIMDL.py#L214
+def roc_aupr_score(y_true, y_score, average="macro"):
+    def _binary_roc_aupr_score(y_true, y_score):
+        precision, recall, pr_thresholds = precision_recall_curve(
+            y_true, y_score)
+        # precision, recall, pr_thresholds = metrics.roc_curve(y, pred, pos_label=2)
+        return auc(precision, recall)
+    def _average_binary_score(binary_metric, y_true, y_score, average):  # y_true= y_one_hot
+        if average == "binary":
+            return binary_metric(y_true, y_score)
+        if average == "micro":
+            y_true = y_true.ravel()
+            y_score = y_score.ravel()
+        if y_true.ndim == 1:
+            y_true = y_true.reshape((-1, 1))
+        if y_score.ndim == 1:
+            y_score = y_score.reshape((-1, 1))
+        n_classes = y_score.shape[1]
+        score = np.zeros((n_classes,))
+        for c in range(n_classes):
+            y_true_c = y_true.take([c], axis=1).ravel()
+            y_score_c = y_score.take([c], axis=1).ravel()
+            score[c] = binary_metric(y_true_c, y_score_c)
+        return np.average(score)
+    return _average_binary_score(_binary_roc_aupr_score, y_true, y_score, average)
+# actual and pred are one-hot encoded
+def evaluate(actual, pred, info = '' ,print=False):
+    y_pred = np.argmax(pred, axis=1)
+    y_true = np.argmax(actual, axis=1)
+    c_report = classification_report(y_true, y_pred, output_dict = True)
+    metrics = Metrics(info)
+    precision = dict()
+    recall = dict()
+    f_score = dict()
+    roc_aupr = dict()
+    roc_auc = dict()
+    # Compute Precision-Recall and ROC-AUPR for each class
+    for i in range(actual.shape[1]):
+        precision[i], recall[i], _ = precision_recall_curve(
+            actual[:, i].ravel(), pred[:, i].ravel())
+        roc_aupr[i] = auc(recall[i], precision[i])
+        precision[i] = precision[i].tolist()
+        recall[i] = recall[i].tolist()
+        classes = [1 if i == np.argmax(y) else 0 for y in y_true]
+        # roc_auc[i] = roc_auc_score(classes, pred[:,i])
+    roc_auc["weighted"] = roc_auc_score(
+        actual, pred, multi_class='ovr', average='weighted')
+    roc_auc["macro"] = roc_auc_score(
+        actual, pred, multi_class='ovr', average='macro')
+    roc_auc["micro"] = roc_auc_score(
+        actual, pred, multi_class='ovr', average='micro')
+    # Compute micro-average Precision-Recall curve and ROC-AUPR
+    precision["micro_event"], recall["micro_event"], _ = precision_recall_curve(
+        actual.ravel(), pred.ravel())
+    roc_aupr["micro"] = auc(recall["micro_event"], precision["micro_event"])
+    precision["micro_event"] = precision["micro_event"].tolist()
+    recall["micro_event"] = recall["micro_event"].tolist()
+    # weighted_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='weighted')
+    # macro_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='macro')
+    # micro_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='micro')
+    # macro_aupr_score = roc_aupr_score(actual, pred, average='macro')
+    # micro_aupr_score = roc_aupr_score(actual, pred, average='micro')
+    acc = accuracy_score(y_true, y_pred)
+    precision['weighted'] = precision_score(y_true, y_pred, average='weighted')
+    precision['macro'] = precision_score(y_true, y_pred, average='macro')
+    precision['micro'] = precision_score(y_true, y_pred, average='micro')
+    recall['weighted'] = recall_score(y_true, y_pred, average='weighted')
+    recall['macro'] = recall_score(y_true, y_pred, average='macro')
+    recall['micro'] = recall_score(y_true, y_pred, average='micro')
+    f_score['weighted'] = f1_score(y_true, y_pred, average='weighted')
+    f_score['macro'] = f1_score(y_true, y_pred, average='macro')
+    f_score['micro'] = f1_score(y_true, y_pred, average='micro')
+    # acc = accuracy_score(y_true, y_pred)
+    # weighted_precision = precision_score(y_true, y_pred, average='weighted')
+    # macro_precision = precision_score(y_true, y_pred, average='macro')
+    # micro_precision = precision_score(y_true, y_pred, average='micro')
+    # weighted_recall_score = recall_score(y_true, y_pred, average='weighted')
+    # macro_recall_score = recall_score(y_true, y_pred, average='macro')
+    # micro_recall_score = recall_score(y_true, y_pred, average='micro')
+    # weighted_f1_score = f1_score(y_true, y_pred, average='weighted')
+    # macro_f1_score = f1_score(y_true, y_pred, average='macro')
+    # micro_f1_score = f1_score(y_true, y_pred, average='micro')
+    if print:
+        print(
+            f'''Accuracy: {acc}
+            , Precision:{precision['weighted']}
+            , Recall: {recall['weighted']}
+            , F1-score: {f_score['weighted']}
+            ''')
+    logs = {'accuracy': acc,
+            'weighted_precision': precision['weighted'],
+            'macro_precision': precision['macro'],
+            'micro_precision': precision['micro'],
+            'weighted_recall_score': recall['weighted'],
+            'macro_recall_score': recall['macro'],
+            'micro_recall_score': recall['micro'],
+            'weighted_f1_score': f_score['weighted'],
+            'macro_f1_score': f_score['macro'],
+            'micro_f1_score': f_score['micro'],
+            # 'weighted_roc_auc_score': weighted_roc_auc_score,
+            # 'macro_roc_auc_score': macro_roc_auc_score,
+            # 'micro_roc_auc_score': micro_roc_auc_score,
+            # 'macro_aupr_score': macro_aupr_score,
+            # 'micro_aupr_score': micro_aupr_score
+            "micro_roc_aupr": roc_aupr['micro'],
+            # "micro_precision_from_precision_recall_curve":precision["micro"],
+            # "micro_recall_from_precision_recall_curve":recall["micro"],
+            "weighted_roc_auc": roc_auc['weighted'],
+            "macro_roc_auc": roc_auc['macro'],
+            "micro_roc_auc": roc_auc['micro']
+            }
+    metrics.accuracy(acc)
+    metrics.precision(precision)
+    metrics.recall(recall)
+    metrics.f1_score(f_score)
+    metrics.roc_auc(roc_auc)
+    metrics.roc_aupr(roc_aupr)
+    metrics.classification_report(c_report)
+    return logs, metrics
+# # Sample integer array
+# integer_array = np.array([0, 1, 2, 1, 0])
+# # Reshape the integer array to a column vector
+# integer_array = integer_array.reshape(-1, 1)
+# # Create OneHotEncoder object
+# encoder = OneHotEncoder(sparse_output=False)
+# # Fit and transform the integer array to one-hot encoded array
+# y_true = encoder.fit_transform(integer_array)
+# # y_true = np.array([[1, 0, 0],
+# #                    [0, 1, 0],
+# #                    [0, 0, 1],
+# #                    [1, 0, 0],
+# #                    [0, 0, 1]],
+# #                    )
+# y_score = np.array([[0.6, 0.2, 0.2],
+#                     [0.2, 0.5, 0.3],
+#                     [0.1, 0.2, 0.7],
+#                     [0.1, 0.8, 0.1],
+#                     [0.1, 0.6, 0.3]])
+# y = np.array([-1, -1, 1, 1])
+# pred = np.array([0.1, 0.4, 0.35, 0.8])
+# evaluate(y_true,y_score)
+# fpr, tpr, thresholds = metrics.roc_curve(y, pred)
+# print(metrics.auc(fpr, tpr))
+# print(roc_aupr_score(y,pred))

ddi-fw 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

ddi-fw 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl