ddi-fw 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,127 @@
1
+ '''
2
+ copied from https://github.com/YifanDengWHU/DDIMDL/blob/master/NLPProcess.py and reorganized
3
+ '''
4
+
5
+ # import stanfordnlp
6
+ # stanfordnlp.download("en")
7
+ import pandas as pd
8
+ import stanza
9
+ # stanza.download("en")
10
+
11
+ import numpy as np
12
+
13
+
14
+ class EventExtractor:
15
+ def __init__(self, druglist, use_cache=True):
16
+ self.druglist = druglist
17
+ self.druglist2 = ['_'.join(d.replace('.', ' ').replace(
18
+ ',', ' ').replace('-', ' ').split(' ')) for d in druglist]
19
+ # self.events = events
20
+ self.pipeline = stanza.Pipeline(use_gpu=True)
21
+ self.cache = dict()
22
+
23
+ def prepare_event_text(self, event):
24
+ for ex, new in zip(self.druglist, self.druglist2):
25
+ event = event.replace(ex, new)
26
+ return event
27
+
28
+ def extract_all(self, events):
29
+ mechanisms = []
30
+ actions = []
31
+ drugA_list = []
32
+ drugB_list = []
33
+ for i in range(len(events)):
34
+ mechanism, action, drugA, drugB = self.extract(events[i])
35
+ mechanisms.append(mechanism)
36
+ actions.append(action)
37
+ drugA_list.append(drugA)
38
+ drugB_list.append(drugB)
39
+ return mechanisms, actions, drugA_list, drugB_list
40
+
41
+ def extract(self, event):
42
+ if event in self.cache:
43
+ return self.cache[event]
44
+ event = self.prepare_event_text(event)
45
+ drugA = None
46
+ drugB = None
47
+
48
+ def addMechanism(node):
49
+ if int(sonsNum[int(node-1)]) == 0:
50
+ return
51
+ else:
52
+ for k in sons[node-1]:
53
+ if int(k) == 0:
54
+ break
55
+ if dependency[int(k - 1)].text == drugA or dependency[int(k - 1)].text == drugB:
56
+ continue
57
+ quene.append(int(k))
58
+ addMechanism(int(k))
59
+ return quene
60
+
61
+ doc = self.pipeline(event)
62
+ dependency = []
63
+ for j in range(len(doc.sentences[0].words)):
64
+ dependency.append(doc.sentences[0].words[j])
65
+ sons = np.zeros((len(dependency), len(dependency)))
66
+ sonsNum = np.zeros(len(dependency))
67
+ flag = False
68
+ count = 0
69
+ for j in dependency:
70
+ # if j.dependency_relation=='root':
71
+ if j.deprel == 'root':
72
+ # root=int(j.index)
73
+ root = int(j.id)
74
+ action = j.lemma
75
+ if j.text in self.druglist2:
76
+ if count < 2:
77
+ if flag == True:
78
+ drugB = j.text
79
+ count += 1
80
+ else:
81
+ drugA = j.text
82
+ flag = True
83
+ count += 1
84
+ sonsNum[j.head-1] += 1
85
+ sons[j.head-1, int(sonsNum[j.head-1]-1)] = int(j.id)
86
+ quene = []
87
+ for j in range(int(sonsNum[root-1])):
88
+ if dependency[int(sons[root-1, j]-1)].deprel == 'obj' or dependency[int(sons[root-1, j]-1)].deprel == 'nsubj:pass':
89
+ quene.append(int(sons[root-1, j]))
90
+ break
91
+ quene = addMechanism(quene[0])
92
+ quene.sort()
93
+
94
+ mechanism = " ".join(dependency[j-1].text for j in quene)
95
+ if mechanism == "the fluid retaining activities":
96
+ mechanism = "the fluid"
97
+ if mechanism == "atrioventricular blocking ( AV block )":
98
+ mechanism = 'the atrioventricular blocking ( AV block ) activities increase'
99
+
100
+ self.cache[event] = (mechanism, action,
101
+ drugA.replace('_', ' ') if drugA != None else '',
102
+ drugB.replace('_', ' ') if drugB != None else '')
103
+
104
+
105
+ if drugA == '' or drugB == '':
106
+ print(event)
107
+
108
+ return mechanism, action, drugA.replace('_', ' ') if drugA != None else '', drugB.replace('_', ' ') if drugB != None else ''
109
+
110
+
111
+ # drugs_pickle_path = 'drugbank/output/drugs.pkl'
112
+ # drugs_df = pd.read_pickle(drugs_pickle_path)
113
+
114
+ # drug_names = drugs_df['name'].to_list()
115
+
116
+
117
+ # drug_names = ['Lepirudin','Ursodeoxycholic acid']
118
+ # event_extractor = EventExtractor(
119
+ # drug_names)
120
+
121
+ # mechanisms, actions, drugA_list, drugB_list = event_extractor.extract_all(
122
+ # ['The risk or severity of bleeding and bruising can be increased when Lepirudin is combined with Ursodeoxycholic acid'])
123
+ # # mechanism, action, drugA, drugB = event_extractor.extract(
124
+ # # 'Bivalirudin may increase the anticoagulant activities of Bromfenac')
125
+
126
+
127
+ # print(mechanisms)
@@ -0,0 +1,2 @@
1
+ from .tensorflow_helper import TFMultiModal, TFSingleModal
2
+ from .evaluation_helper import evaluate, Metrics
@@ -0,0 +1,66 @@
1
+ import torch
2
+
3
+ class ExtendedTorchModule(torch.nn.Module):
4
+ def __init__(self,model):
5
+ super().__init__()
6
+ self.model = model
7
+
8
+ def train(self,dataloader_train, criterion, optimizer, epoch_count = 10):
9
+ for epoch in range(epoch_count): # loop over the dataset multiple times
10
+
11
+ running_loss = 0.0
12
+ for i, data in enumerate(dataloader_train, 0):
13
+ # get the inputs; data is a list of [inputs, labels]
14
+ inputs, labels = data
15
+
16
+ # zero the parameter gradients
17
+ optimizer.zero_grad()
18
+
19
+ # forward + backward + optimize
20
+ outputs = self(inputs)
21
+ loss = criterion(outputs, labels)
22
+ loss.backward()
23
+ optimizer.step()
24
+
25
+ # print statistics
26
+ running_loss += loss.item()
27
+ if i % 5000 == 4999: # print every 2000 mini-batches
28
+ print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 5000:.3f}')
29
+ running_loss = 0.0
30
+ print('Finished Training')
31
+
32
+ def forward(self, x):
33
+ x = x.to(torch.float32)
34
+ # for f in self.module_list:
35
+ # x = f(x)
36
+ # return x
37
+ return self.model(x)
38
+
39
+ def compute_outputs(self, dataloader_test):
40
+ output_arr = []
41
+ with torch.no_grad():
42
+ for data in dataloader_test:
43
+ inputs, labels = data
44
+ # calculate outputs by running inputs through the network
45
+ outputs = self(inputs)
46
+ output_arr.append(outputs.numpy())
47
+
48
+ # <ipython-input-44-114ac3037693>:54: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:245.)
49
+ t = torch.tensor(output_arr)
50
+ return torch.squeeze(t)
51
+
52
+ # def compute_accuracy(self, dataloader_test):
53
+ # correct = 0
54
+ # total = 0
55
+ # # since we're not training, we don't need to calculate the gradients for our outputs
56
+ # with torch.no_grad():
57
+ # for data in dataloader_test:
58
+ # inputs, labels = data
59
+ # # calculate outputs by running inputs through the network
60
+ # outputs = self(inputs)
61
+ # # the class with the highest energy is what we choose as prediction
62
+ # _, predicted = torch.max(outputs.data, 1)
63
+ # total += labels.size(0)
64
+ # correct += (predicted == labels).sum().item()
65
+
66
+ # print(f'Accuracy of the network: {100 * correct // total} %')
@@ -0,0 +1,232 @@
1
+ import numpy as np
2
+ from sklearn import metrics
3
+ from sklearn.metrics import accuracy_score, precision_recall_curve
4
+ from sklearn.metrics import f1_score
5
+ from sklearn.metrics import precision_score
6
+ from sklearn.metrics import recall_score
7
+ from sklearn.metrics import roc_auc_score
8
+ from sklearn.metrics import auc
9
+ from sklearn.metrics import classification_report
10
+ from sklearn.preprocessing import OneHotEncoder
11
+
12
+ def __format__(d,floating_number_precision = 4):
13
+ if type(d) is dict:
14
+ d = {k: __round__(v,floating_number_precision) for k, v in d.items()}
15
+ else:
16
+ d = round(d,floating_number_precision)
17
+ return d
18
+
19
+ def __round__(v,floating_number_precision = 4):
20
+ if type(v) is list or type(v) is set:
21
+ return [round(item,floating_number_precision) for item in v]
22
+ else:
23
+ return round(v,floating_number_precision)
24
+
25
+
26
+ class Metrics():
27
+ def __init__(self, label):
28
+ self.label = label
29
+
30
+ def classification_report(self,classification_report):
31
+ self.classification_report = classification_report
32
+
33
+ def accuracy(self, accuracy):
34
+ self.accuracy = accuracy
35
+
36
+ def precision(self, precision):
37
+ self.precision = precision
38
+
39
+ def recall(self, recall):
40
+ self.recall = recall
41
+
42
+ def f1_score(self, f1_score):
43
+ self.f1_score = f1_score
44
+
45
+ def roc_auc(self, roc_auc):
46
+ self.roc_auc = roc_auc
47
+
48
+ def roc_aupr(self, roc_aupr):
49
+ self.roc_aupr = roc_aupr
50
+
51
+ def format_float(self, floating_number_precision = 4):
52
+ self.accuracy = round(self.accuracy,floating_number_precision)
53
+ self.precision = __format__( self.precision ,floating_number_precision)
54
+ self.recall = __format__( self.recall ,floating_number_precision)
55
+ self.f1_score = __format__( self.f1_score ,floating_number_precision)
56
+ self.roc_auc = __format__( self.roc_auc ,floating_number_precision)
57
+ self.roc_aupr = __format__( self.roc_aupr ,floating_number_precision)
58
+
59
+
60
+ # taken from https://github.com/YifanDengWHU/DDIMDL/blob/master/DDIMDL.py#L214
61
+ def roc_aupr_score(y_true, y_score, average="macro"):
62
+ def _binary_roc_aupr_score(y_true, y_score):
63
+ precision, recall, pr_thresholds = precision_recall_curve(
64
+ y_true, y_score)
65
+ # precision, recall, pr_thresholds = metrics.roc_curve(y, pred, pos_label=2)
66
+ return auc(precision, recall)
67
+
68
+ def _average_binary_score(binary_metric, y_true, y_score, average): # y_true= y_one_hot
69
+ if average == "binary":
70
+ return binary_metric(y_true, y_score)
71
+ if average == "micro":
72
+ y_true = y_true.ravel()
73
+ y_score = y_score.ravel()
74
+ if y_true.ndim == 1:
75
+ y_true = y_true.reshape((-1, 1))
76
+ if y_score.ndim == 1:
77
+ y_score = y_score.reshape((-1, 1))
78
+ n_classes = y_score.shape[1]
79
+ score = np.zeros((n_classes,))
80
+ for c in range(n_classes):
81
+ y_true_c = y_true.take([c], axis=1).ravel()
82
+ y_score_c = y_score.take([c], axis=1).ravel()
83
+ score[c] = binary_metric(y_true_c, y_score_c)
84
+ return np.average(score)
85
+
86
+ return _average_binary_score(_binary_roc_aupr_score, y_true, y_score, average)
87
+
88
+ # actual and pred are one-hot encoded
89
+
90
+
91
+ def evaluate(actual, pred, info = '' ,print=False):
92
+
93
+ y_pred = np.argmax(pred, axis=1)
94
+ y_true = np.argmax(actual, axis=1)
95
+ c_report = classification_report(y_true, y_pred, output_dict = True)
96
+
97
+
98
+ metrics = Metrics(info)
99
+
100
+ precision = dict()
101
+ recall = dict()
102
+ f_score = dict()
103
+ roc_aupr = dict()
104
+ roc_auc = dict()
105
+
106
+ # Compute Precision-Recall and ROC-AUPR for each class
107
+ for i in range(actual.shape[1]):
108
+ precision[i], recall[i], _ = precision_recall_curve(
109
+ actual[:, i].ravel(), pred[:, i].ravel())
110
+ roc_aupr[i] = auc(recall[i], precision[i])
111
+ precision[i] = precision[i].tolist()
112
+ recall[i] = recall[i].tolist()
113
+ classes = [1 if i == np.argmax(y) else 0 for y in y_true]
114
+ # roc_auc[i] = roc_auc_score(classes, pred[:,i])
115
+
116
+ roc_auc["weighted"] = roc_auc_score(
117
+ actual, pred, multi_class='ovr', average='weighted')
118
+ roc_auc["macro"] = roc_auc_score(
119
+ actual, pred, multi_class='ovr', average='macro')
120
+ roc_auc["micro"] = roc_auc_score(
121
+ actual, pred, multi_class='ovr', average='micro')
122
+
123
+ # Compute micro-average Precision-Recall curve and ROC-AUPR
124
+ precision["micro_event"], recall["micro_event"], _ = precision_recall_curve(
125
+ actual.ravel(), pred.ravel())
126
+ roc_aupr["micro"] = auc(recall["micro_event"], precision["micro_event"])
127
+ precision["micro_event"] = precision["micro_event"].tolist()
128
+ recall["micro_event"] = recall["micro_event"].tolist()
129
+ # weighted_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='weighted')
130
+ # macro_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='macro')
131
+ # micro_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='micro')
132
+
133
+ # macro_aupr_score = roc_aupr_score(actual, pred, average='macro')
134
+ # micro_aupr_score = roc_aupr_score(actual, pred, average='micro')
135
+
136
+ acc = accuracy_score(y_true, y_pred)
137
+
138
+ precision['weighted'] = precision_score(y_true, y_pred, average='weighted')
139
+ precision['macro'] = precision_score(y_true, y_pred, average='macro')
140
+ precision['micro'] = precision_score(y_true, y_pred, average='micro')
141
+
142
+ recall['weighted'] = recall_score(y_true, y_pred, average='weighted')
143
+ recall['macro'] = recall_score(y_true, y_pred, average='macro')
144
+ recall['micro'] = recall_score(y_true, y_pred, average='micro')
145
+
146
+ f_score['weighted'] = f1_score(y_true, y_pred, average='weighted')
147
+ f_score['macro'] = f1_score(y_true, y_pred, average='macro')
148
+ f_score['micro'] = f1_score(y_true, y_pred, average='micro')
149
+
150
+ # acc = accuracy_score(y_true, y_pred)
151
+
152
+ # weighted_precision = precision_score(y_true, y_pred, average='weighted')
153
+ # macro_precision = precision_score(y_true, y_pred, average='macro')
154
+ # micro_precision = precision_score(y_true, y_pred, average='micro')
155
+
156
+ # weighted_recall_score = recall_score(y_true, y_pred, average='weighted')
157
+ # macro_recall_score = recall_score(y_true, y_pred, average='macro')
158
+ # micro_recall_score = recall_score(y_true, y_pred, average='micro')
159
+
160
+ # weighted_f1_score = f1_score(y_true, y_pred, average='weighted')
161
+ # macro_f1_score = f1_score(y_true, y_pred, average='macro')
162
+ # micro_f1_score = f1_score(y_true, y_pred, average='micro')
163
+
164
+ if print:
165
+ print(
166
+ f'''Accuracy: {acc}
167
+ , Precision:{precision['weighted']}
168
+ , Recall: {recall['weighted']}
169
+ , F1-score: {f_score['weighted']}
170
+ ''')
171
+
172
+ logs = {'accuracy': acc,
173
+ 'weighted_precision': precision['weighted'],
174
+ 'macro_precision': precision['macro'],
175
+ 'micro_precision': precision['micro'],
176
+ 'weighted_recall_score': recall['weighted'],
177
+ 'macro_recall_score': recall['macro'],
178
+ 'micro_recall_score': recall['micro'],
179
+ 'weighted_f1_score': f_score['weighted'],
180
+ 'macro_f1_score': f_score['macro'],
181
+ 'micro_f1_score': f_score['micro'],
182
+ # 'weighted_roc_auc_score': weighted_roc_auc_score,
183
+ # 'macro_roc_auc_score': macro_roc_auc_score,
184
+ # 'micro_roc_auc_score': micro_roc_auc_score,
185
+ # 'macro_aupr_score': macro_aupr_score,
186
+ # 'micro_aupr_score': micro_aupr_score
187
+ "micro_roc_aupr": roc_aupr['micro'],
188
+ # "micro_precision_from_precision_recall_curve":precision["micro"],
189
+ # "micro_recall_from_precision_recall_curve":recall["micro"],
190
+ "weighted_roc_auc": roc_auc['weighted'],
191
+ "macro_roc_auc": roc_auc['macro'],
192
+ "micro_roc_auc": roc_auc['micro']
193
+ }
194
+ metrics.accuracy(acc)
195
+ metrics.precision(precision)
196
+ metrics.recall(recall)
197
+ metrics.f1_score(f_score)
198
+ metrics.roc_auc(roc_auc)
199
+ metrics.roc_aupr(roc_aupr)
200
+ metrics.classification_report(c_report)
201
+ return logs, metrics
202
+
203
+
204
+ # # Sample integer array
205
+ # integer_array = np.array([0, 1, 2, 1, 0])
206
+
207
+ # # Reshape the integer array to a column vector
208
+ # integer_array = integer_array.reshape(-1, 1)
209
+
210
+ # # Create OneHotEncoder object
211
+ # encoder = OneHotEncoder(sparse_output=False)
212
+
213
+ # # Fit and transform the integer array to one-hot encoded array
214
+ # y_true = encoder.fit_transform(integer_array)
215
+ # # y_true = np.array([[1, 0, 0],
216
+ # # [0, 1, 0],
217
+ # # [0, 0, 1],
218
+ # # [1, 0, 0],
219
+ # # [0, 0, 1]],
220
+ # # )
221
+ # y_score = np.array([[0.6, 0.2, 0.2],
222
+ # [0.2, 0.5, 0.3],
223
+ # [0.1, 0.2, 0.7],
224
+ # [0.1, 0.8, 0.1],
225
+ # [0.1, 0.6, 0.3]])
226
+
227
+ # y = np.array([-1, -1, 1, 1])
228
+ # pred = np.array([0.1, 0.4, 0.35, 0.8])
229
+ # evaluate(y_true,y_score)
230
+ # fpr, tpr, thresholds = metrics.roc_curve(y, pred)
231
+ # print(metrics.auc(fpr, tpr))
232
+ # print(roc_aupr_score(y,pred))