ddi-fw 0.0.149__py3-none-any.whl → 0.0.151__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/__init__.py +1 -1
- ddi_fw/datasets/core.py +147 -341
- ddi_fw/datasets/dataset_splitter.py +39 -0
- ddi_fw/datasets/ddi_mdl/base.py +194 -130
- ddi_fw/datasets/ddi_mdl/debug.log +1 -0
- ddi_fw/datasets/embedding_generator.py +2 -1
- ddi_fw/langchain/embeddings.py +1 -0
- ddi_fw/ml/evaluation_helper.py +47 -178
- ddi_fw/ml/ml_helper.py +125 -81
- ddi_fw/ml/model_wrapper.py +2 -2
- ddi_fw/ml/pytorch_wrapper.py +175 -72
- ddi_fw/ml/tensorflow_wrapper.py +131 -39
- ddi_fw/ner/ner.py +93 -39
- ddi_fw/pipeline/multi_modal_combination_strategy.py +4 -2
- ddi_fw/pipeline/multi_pipeline.py +2 -15
- ddi_fw/pipeline/ner_pipeline.py +15 -6
- ddi_fw/pipeline/pipeline.py +157 -93
- ddi_fw/{test/compress_json_test.py → utils/json_helper.py} +1 -15
- {ddi_fw-0.0.149.dist-info → ddi_fw-0.0.151.dist-info}/METADATA +6 -3
- {ddi_fw-0.0.149.dist-info → ddi_fw-0.0.151.dist-info}/RECORD +22 -31
- {ddi_fw-0.0.149.dist-info → ddi_fw-0.0.151.dist-info}/WHEEL +1 -1
- ddi_fw/test/__init__.py +0 -0
- ddi_fw/test/basic_test.py +0 -15
- ddi_fw/test/combination_test.py +0 -12
- ddi_fw/test/date_test.py +0 -15
- ddi_fw/test/idf_score.py +0 -54
- ddi_fw/test/jaccard_similarity.py +0 -85
- ddi_fw/test/mlfow_test.py +0 -165
- ddi_fw/test/sklearn-tfidf.py +0 -16
- ddi_fw/test/test.py +0 -93
- ddi_fw/test/torch_cuda_test.py +0 -9
- ddi_fw/test/type_guarding_test.py +0 -18
- {ddi_fw-0.0.149.dist-info → ddi_fw-0.0.151.dist-info}/top_level.txt +0 -0
ddi_fw/ml/evaluation_helper.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
from typing import Any, Dict, List, Literal, Union
|
1
2
|
import numpy as np
|
3
|
+
from pydantic import BaseModel, Field
|
2
4
|
from sklearn import metrics
|
3
5
|
from sklearn.metrics import accuracy_score, precision_recall_curve
|
4
6
|
from sklearn.metrics import f1_score
|
@@ -9,43 +11,50 @@ from sklearn.metrics import auc
|
|
9
11
|
from sklearn.metrics import classification_report
|
10
12
|
from sklearn.preprocessing import OneHotEncoder
|
11
13
|
|
12
|
-
def __format__(d,floating_number_precision
|
13
|
-
if
|
14
|
-
d = {k: __round__(v,floating_number_precision) for k, v in d.items()}
|
14
|
+
def __format__(d: Union[Dict[str, Union[List[float], float]], float], floating_number_precision=4) -> Union[Dict[str, Union[List[float], float]], float]:
|
15
|
+
if isinstance(d, dict):
|
16
|
+
d = {k: __round__(v, floating_number_precision) for k, v in d.items()}
|
15
17
|
else:
|
16
|
-
d = round(d,floating_number_precision)
|
18
|
+
d = round(d, floating_number_precision)
|
17
19
|
return d
|
18
20
|
|
19
|
-
def __round__(v,floating_number_precision = 4):
|
21
|
+
def __round__(v,floating_number_precision = 4) -> Union[List[float], float]:
|
20
22
|
if type(v) is list or type(v) is set:
|
21
23
|
return [round(item,floating_number_precision) for item in v]
|
22
24
|
else:
|
23
25
|
return round(v,floating_number_precision)
|
24
26
|
|
25
27
|
|
26
|
-
class Metrics():
|
27
|
-
|
28
|
-
|
28
|
+
class Metrics(BaseModel):
|
29
|
+
label: str
|
30
|
+
accuracy: float = 0.0
|
31
|
+
precision: Any= None
|
32
|
+
recall: Any= None
|
33
|
+
f1_score: Any= None
|
34
|
+
roc_auc: Any= None
|
35
|
+
roc_aupr: Any= None
|
36
|
+
classification_report: Any = None
|
29
37
|
|
30
|
-
|
38
|
+
|
39
|
+
def set_classification_report(self,classification_report):
|
31
40
|
self.classification_report = classification_report
|
32
41
|
|
33
|
-
def
|
42
|
+
def set_accuracy(self, accuracy):
|
34
43
|
self.accuracy = accuracy
|
35
44
|
|
36
|
-
def
|
45
|
+
def set_precision(self, precision):
|
37
46
|
self.precision = precision
|
38
47
|
|
39
|
-
def
|
48
|
+
def set_recall(self, recall):
|
40
49
|
self.recall = recall
|
41
50
|
|
42
|
-
def
|
51
|
+
def set_f1_score(self, f1_score):
|
43
52
|
self.f1_score = f1_score
|
44
53
|
|
45
|
-
def
|
54
|
+
def set_roc_auc(self, roc_auc):
|
46
55
|
self.roc_auc = roc_auc
|
47
56
|
|
48
|
-
def
|
57
|
+
def set_roc_aupr(self, roc_aupr):
|
49
58
|
self.roc_aupr = roc_aupr
|
50
59
|
|
51
60
|
def format_float(self, floating_number_precision = 4):
|
@@ -56,6 +65,7 @@ class Metrics():
|
|
56
65
|
self.roc_auc = __format__( self.roc_auc ,floating_number_precision)
|
57
66
|
self.roc_aupr = __format__( self.roc_aupr ,floating_number_precision)
|
58
67
|
|
68
|
+
|
59
69
|
|
60
70
|
# taken from https://github.com/YifanDengWHU/DDIMDL/blob/master/DDIMDL.py#L214
|
61
71
|
def roc_aupr_score(y_true, y_score, average="macro"):
|
@@ -86,7 +96,7 @@ def roc_aupr_score(y_true, y_score, average="macro"):
|
|
86
96
|
return _average_binary_score(_binary_roc_aupr_score, y_true, y_score, average)
|
87
97
|
|
88
98
|
|
89
|
-
def evaluate(actual, pred, info='',
|
99
|
+
def evaluate(actual, pred, info='', print_detail=False):
|
90
100
|
# Precompute y_true and y_pred
|
91
101
|
y_true = np.argmax(actual, axis=1)
|
92
102
|
y_pred = np.argmax(pred, axis=1)
|
@@ -95,7 +105,7 @@ def evaluate(actual, pred, info='', print=False):
|
|
95
105
|
c_report = classification_report(y_true, y_pred, output_dict=True)
|
96
106
|
|
97
107
|
# Metrics initialization
|
98
|
-
metrics = Metrics(info)
|
108
|
+
metrics = Metrics(label= info)
|
99
109
|
|
100
110
|
n_classes = actual.shape[1]
|
101
111
|
|
@@ -104,14 +114,15 @@ def evaluate(actual, pred, info='', print=False):
|
|
104
114
|
f_score = {}
|
105
115
|
roc_aupr = {}
|
106
116
|
roc_auc = {
|
107
|
-
"weighted": 0,
|
108
|
-
"macro": 0,
|
109
|
-
"micro": 0
|
117
|
+
"weighted": 0.0,
|
118
|
+
"macro": 0.0,
|
119
|
+
"micro": 0.0
|
110
120
|
}
|
111
121
|
|
112
122
|
# Preallocate lists
|
113
|
-
precision_vals = [[] for _ in range(n_classes)]
|
114
|
-
recall_vals = [[] for _ in range(n_classes)]
|
123
|
+
precision_vals: List[np.ndarray] = [np.array([]) for _ in range(n_classes)]
|
124
|
+
recall_vals: List[np.ndarray] = [np.array([]) for _ in range(n_classes)]
|
125
|
+
|
115
126
|
|
116
127
|
# Compute metrics for each class
|
117
128
|
for i in range(n_classes):
|
@@ -120,9 +131,9 @@ def evaluate(actual, pred, info='', print=False):
|
|
120
131
|
roc_aupr[i] = auc(recall_vals[i], precision_vals[i])
|
121
132
|
|
122
133
|
# Calculate ROC AUC scores
|
123
|
-
roc_auc["weighted"] = roc_auc_score(actual, pred, multi_class='ovr', average='weighted')
|
124
|
-
roc_auc["macro"] = roc_auc_score(actual, pred, multi_class='ovr', average='macro')
|
125
|
-
roc_auc["micro"] = roc_auc_score(actual, pred, multi_class='ovr', average='micro')
|
134
|
+
roc_auc["weighted"] = float(roc_auc_score(actual, pred, multi_class='ovr', average='weighted'))
|
135
|
+
roc_auc["macro"] = float(roc_auc_score(actual, pred, multi_class='ovr', average='macro'))
|
136
|
+
roc_auc["micro"] = float(roc_auc_score(actual, pred, multi_class='ovr', average='micro'))
|
126
137
|
|
127
138
|
# Micro-average Precision-Recall curve and ROC-AUPR
|
128
139
|
precision["micro_event"], recall["micro_event"], _ = precision_recall_curve(actual.ravel(), pred.ravel())
|
@@ -136,126 +147,13 @@ def evaluate(actual, pred, info='', print=False):
|
|
136
147
|
acc = accuracy_score(y_true, y_pred)
|
137
148
|
|
138
149
|
# Aggregate precision, recall, and f_score
|
139
|
-
for avg_type in ['weighted', 'macro', 'micro']:
|
150
|
+
# for avg_type in ['weighted', 'macro', 'micro']:
|
151
|
+
for avg_type in Literal['weighted', 'macro', 'micro'].__args__:
|
140
152
|
precision[avg_type] = precision_score(y_true, y_pred, average=avg_type)
|
141
153
|
recall[avg_type] = recall_score(y_true, y_pred, average=avg_type)
|
142
154
|
f_score[avg_type] = f1_score(y_true, y_pred, average=avg_type)
|
143
155
|
|
144
|
-
if
|
145
|
-
print(
|
146
|
-
f'''Accuracy: {acc}
|
147
|
-
, Precision:{precision['weighted']}
|
148
|
-
, Recall: {recall['weighted']}
|
149
|
-
, F1-score: {f_score['weighted']}
|
150
|
-
''')
|
151
|
-
|
152
|
-
logs = {'accuracy': acc,
|
153
|
-
'weighted_precision': precision['weighted'],
|
154
|
-
'macro_precision': precision['macro'],
|
155
|
-
'micro_precision': precision['micro'],
|
156
|
-
'weighted_recall_score': recall['weighted'],
|
157
|
-
'macro_recall_score': recall['macro'],
|
158
|
-
'micro_recall_score': recall['micro'],
|
159
|
-
'weighted_f1_score': f_score['weighted'],
|
160
|
-
'macro_f1_score': f_score['macro'],
|
161
|
-
'micro_f1_score': f_score['micro'],
|
162
|
-
# 'weighted_roc_auc_score': weighted_roc_auc_score,
|
163
|
-
# 'macro_roc_auc_score': macro_roc_auc_score,
|
164
|
-
# 'micro_roc_auc_score': micro_roc_auc_score,
|
165
|
-
# 'macro_aupr_score': macro_aupr_score,
|
166
|
-
# 'micro_aupr_score': micro_aupr_score
|
167
|
-
"micro_roc_aupr": roc_aupr['micro'],
|
168
|
-
# "micro_precision_from_precision_recall_curve":precision["micro"],
|
169
|
-
# "micro_recall_from_precision_recall_curve":recall["micro"],
|
170
|
-
"weighted_roc_auc": roc_auc['weighted'],
|
171
|
-
"macro_roc_auc": roc_auc['macro'],
|
172
|
-
"micro_roc_auc": roc_auc['micro']
|
173
|
-
}
|
174
|
-
metrics.accuracy(acc)
|
175
|
-
metrics.precision(precision)
|
176
|
-
metrics.recall(recall)
|
177
|
-
metrics.f1_score(f_score)
|
178
|
-
metrics.roc_auc(roc_auc)
|
179
|
-
metrics.roc_aupr(roc_aupr)
|
180
|
-
metrics.classification_report(c_report)
|
181
|
-
return logs, metrics
|
182
|
-
|
183
|
-
|
184
|
-
# actual and pred are one-hot encoded
|
185
|
-
def evaluate_ex(actual, pred, info = '' ,print=False):
|
186
|
-
|
187
|
-
y_pred = np.argmax(pred, axis=1)
|
188
|
-
y_true = np.argmax(actual, axis=1)
|
189
|
-
c_report = classification_report(y_true, y_pred, output_dict = True)
|
190
|
-
|
191
|
-
|
192
|
-
metrics = Metrics(info)
|
193
|
-
|
194
|
-
precision = dict()
|
195
|
-
recall = dict()
|
196
|
-
f_score = dict()
|
197
|
-
roc_aupr = dict()
|
198
|
-
roc_auc = dict()
|
199
|
-
|
200
|
-
# Compute Precision-Recall and ROC-AUPR for each class
|
201
|
-
for i in range(actual.shape[1]):
|
202
|
-
precision[i], recall[i], _ = precision_recall_curve(
|
203
|
-
actual[:, i].ravel(), pred[:, i].ravel())
|
204
|
-
roc_aupr[i] = auc(recall[i], precision[i])
|
205
|
-
precision[i] = precision[i].tolist()
|
206
|
-
recall[i] = recall[i].tolist()
|
207
|
-
classes = [1 if i == np.argmax(y) else 0 for y in y_true]
|
208
|
-
# roc_auc[i] = roc_auc_score(classes, pred[:,i])
|
209
|
-
|
210
|
-
roc_auc["weighted"] = roc_auc_score(
|
211
|
-
actual, pred, multi_class='ovr', average='weighted')
|
212
|
-
roc_auc["macro"] = roc_auc_score(
|
213
|
-
actual, pred, multi_class='ovr', average='macro')
|
214
|
-
roc_auc["micro"] = roc_auc_score(
|
215
|
-
actual, pred, multi_class='ovr', average='micro')
|
216
|
-
|
217
|
-
# Compute micro-average Precision-Recall curve and ROC-AUPR
|
218
|
-
precision["micro_event"], recall["micro_event"], _ = precision_recall_curve(
|
219
|
-
actual.ravel(), pred.ravel())
|
220
|
-
roc_aupr["micro"] = auc(recall["micro_event"], precision["micro_event"])
|
221
|
-
precision["micro_event"] = precision["micro_event"].tolist()
|
222
|
-
recall["micro_event"] = recall["micro_event"].tolist()
|
223
|
-
# weighted_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='weighted')
|
224
|
-
# macro_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='macro')
|
225
|
-
# micro_roc_auc_score = roc_auc_score(actual, pred, multi_class='ovr', average='micro')
|
226
|
-
|
227
|
-
# macro_aupr_score = roc_aupr_score(actual, pred, average='macro')
|
228
|
-
# micro_aupr_score = roc_aupr_score(actual, pred, average='micro')
|
229
|
-
|
230
|
-
acc = accuracy_score(y_true, y_pred)
|
231
|
-
|
232
|
-
precision['weighted'] = precision_score(y_true, y_pred, average='weighted')
|
233
|
-
precision['macro'] = precision_score(y_true, y_pred, average='macro')
|
234
|
-
precision['micro'] = precision_score(y_true, y_pred, average='micro')
|
235
|
-
|
236
|
-
recall['weighted'] = recall_score(y_true, y_pred, average='weighted')
|
237
|
-
recall['macro'] = recall_score(y_true, y_pred, average='macro')
|
238
|
-
recall['micro'] = recall_score(y_true, y_pred, average='micro')
|
239
|
-
|
240
|
-
f_score['weighted'] = f1_score(y_true, y_pred, average='weighted')
|
241
|
-
f_score['macro'] = f1_score(y_true, y_pred, average='macro')
|
242
|
-
f_score['micro'] = f1_score(y_true, y_pred, average='micro')
|
243
|
-
|
244
|
-
# acc = accuracy_score(y_true, y_pred)
|
245
|
-
|
246
|
-
# weighted_precision = precision_score(y_true, y_pred, average='weighted')
|
247
|
-
# macro_precision = precision_score(y_true, y_pred, average='macro')
|
248
|
-
# micro_precision = precision_score(y_true, y_pred, average='micro')
|
249
|
-
|
250
|
-
# weighted_recall_score = recall_score(y_true, y_pred, average='weighted')
|
251
|
-
# macro_recall_score = recall_score(y_true, y_pred, average='macro')
|
252
|
-
# micro_recall_score = recall_score(y_true, y_pred, average='micro')
|
253
|
-
|
254
|
-
# weighted_f1_score = f1_score(y_true, y_pred, average='weighted')
|
255
|
-
# macro_f1_score = f1_score(y_true, y_pred, average='macro')
|
256
|
-
# micro_f1_score = f1_score(y_true, y_pred, average='micro')
|
257
|
-
|
258
|
-
if print:
|
156
|
+
if print_detail:
|
259
157
|
print(
|
260
158
|
f'''Accuracy: {acc}
|
261
159
|
, Precision:{precision['weighted']}
|
@@ -285,42 +183,13 @@ def evaluate_ex(actual, pred, info = '' ,print=False):
|
|
285
183
|
"macro_roc_auc": roc_auc['macro'],
|
286
184
|
"micro_roc_auc": roc_auc['micro']
|
287
185
|
}
|
288
|
-
metrics.
|
289
|
-
metrics.
|
290
|
-
metrics.
|
291
|
-
metrics.
|
292
|
-
metrics.
|
293
|
-
metrics.
|
294
|
-
metrics.
|
186
|
+
metrics.set_accuracy(acc)
|
187
|
+
metrics.set_precision(precision)
|
188
|
+
metrics.set_recall(recall)
|
189
|
+
metrics.set_f1_score(f_score)
|
190
|
+
metrics.set_roc_auc(roc_auc)
|
191
|
+
metrics.set_roc_aupr(roc_aupr)
|
192
|
+
metrics.set_classification_report(c_report)
|
295
193
|
return logs, metrics
|
296
194
|
|
297
|
-
|
298
|
-
# # Sample integer array
|
299
|
-
# integer_array = np.array([0, 1, 2, 1, 0])
|
300
|
-
|
301
|
-
# # Reshape the integer array to a column vector
|
302
|
-
# integer_array = integer_array.reshape(-1, 1)
|
303
|
-
|
304
|
-
# # Create OneHotEncoder object
|
305
|
-
# encoder = OneHotEncoder(sparse_output=False)
|
306
|
-
|
307
|
-
# # Fit and transform the integer array to one-hot encoded array
|
308
|
-
# y_true = encoder.fit_transform(integer_array)
|
309
|
-
# # y_true = np.array([[1, 0, 0],
|
310
|
-
# # [0, 1, 0],
|
311
|
-
# # [0, 0, 1],
|
312
|
-
# # [1, 0, 0],
|
313
|
-
# # [0, 0, 1]],
|
314
|
-
# # )
|
315
|
-
# y_score = np.array([[0.6, 0.2, 0.2],
|
316
|
-
# [0.2, 0.5, 0.3],
|
317
|
-
# [0.1, 0.2, 0.7],
|
318
|
-
# [0.1, 0.8, 0.1],
|
319
|
-
# [0.1, 0.6, 0.3]])
|
320
|
-
|
321
|
-
# y = np.array([-1, -1, 1, 1])
|
322
|
-
# pred = np.array([0.1, 0.4, 0.35, 0.8])
|
323
|
-
# evaluate(y_true,y_score)
|
324
|
-
# fpr, tpr, thresholds = metrics.roc_curve(y, pred)
|
325
|
-
# print(metrics.auc(fpr, tpr))
|
326
|
-
# print(roc_aupr_score(y,pred))
|
195
|
+
|
ddi_fw/ml/ml_helper.py
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
from typing import Dict, List, Tuple
|
1
|
+
from typing import Callable, Dict, List, Tuple
|
2
2
|
from matplotlib import pyplot as plt
|
3
3
|
from ddi_fw.ml.model_wrapper import Result
|
4
4
|
from ddi_fw.ml.pytorch_wrapper import PTModelWrapper
|
5
5
|
from ddi_fw.ml.tensorflow_wrapper import TFModelWrapper
|
6
6
|
from ddi_fw.utils.package_helper import get_import
|
7
7
|
import tensorflow as tf
|
8
|
-
from tensorflow import keras
|
9
|
-
from keras
|
10
|
-
from keras.layers import Dense, Dropout, Input, Activation
|
11
|
-
from keras.callbacks import EarlyStopping
|
8
|
+
from tensorflow.python import keras
|
9
|
+
from tensorflow.python.keras import Model, Sequential
|
10
|
+
from tensorflow.python.keras.layers import Dense, Dropout, Input, Activation
|
11
|
+
from tensorflow.python.keras.callbacks import EarlyStopping
|
12
12
|
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
|
13
13
|
import numpy as np
|
14
14
|
|
@@ -29,26 +29,72 @@ import ddi_fw.utils as utils
|
|
29
29
|
# np.random.seed(2)
|
30
30
|
# np.set_printoptions(precision=4)
|
31
31
|
|
32
|
+
|
32
33
|
class MultiModalRunner:
|
33
34
|
# todo model related parameters to config
|
34
|
-
def __init__(self, library, multi_modal):
|
35
|
+
def __init__(self, library, multi_modal, use_mlflow=True):
|
35
36
|
self.library = library
|
36
37
|
self.multi_modal = multi_modal
|
38
|
+
self.use_mlflow = use_mlflow
|
37
39
|
self.result = Result()
|
38
40
|
|
41
|
+
def _mlflow_(self, func: Callable):
|
42
|
+
if self.use_mlflow:
|
43
|
+
func()
|
44
|
+
|
39
45
|
def set_data(self, items, train_idx_arr, val_idx_arr, y_test_label):
|
40
46
|
self.items = items
|
41
47
|
self.train_idx_arr = train_idx_arr
|
42
48
|
self.val_idx_arr = val_idx_arr
|
43
49
|
self.y_test_label = y_test_label
|
44
50
|
|
45
|
-
def __create_model(self,library):
|
51
|
+
def __create_model(self, library):
|
46
52
|
if library == 'tensorflow':
|
47
53
|
return TFModelWrapper
|
48
54
|
elif library == 'pytorch':
|
49
55
|
return PTModelWrapper
|
50
56
|
else:
|
51
|
-
raise ValueError(
|
57
|
+
raise ValueError(
|
58
|
+
"Unsupported library type. Choose 'tensorflow' or 'pytorch'.")
|
59
|
+
|
60
|
+
def __predict(self,single_results):
|
61
|
+
item_dict = {t[0]: t for t in self.items}
|
62
|
+
print("multi_modal")
|
63
|
+
print(self.multi_modal)
|
64
|
+
print(item_dict.keys())
|
65
|
+
|
66
|
+
for m in self.multi_modal:
|
67
|
+
name = m.get('name')
|
68
|
+
input_type = m.get('input_type')
|
69
|
+
input = m.get('input')
|
70
|
+
inputs = m.get('inputs')
|
71
|
+
model_type = get_import(m.get("model_type"))
|
72
|
+
kwargs = m.get('params')
|
73
|
+
T = self.__create_model(self.library)
|
74
|
+
single_modal = T(self.date, name, model_type, **kwargs)
|
75
|
+
if input_type == '1D':
|
76
|
+
item = item_dict[input]
|
77
|
+
single_modal.set_data(
|
78
|
+
self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
79
|
+
elif input_type == '2D':
|
80
|
+
# check keys
|
81
|
+
filtered_dict = {k: item_dict[k]
|
82
|
+
for k in inputs if k in item_dict}
|
83
|
+
print(filtered_dict.keys())
|
84
|
+
first_input = next(iter(filtered_dict.values()))
|
85
|
+
train_data_list = [f[1] for f in filtered_dict.values()]
|
86
|
+
test_data_list = [f[3] for f in filtered_dict.values()]
|
87
|
+
train_data = np.stack(train_data_list, axis=1)
|
88
|
+
test_data = np.stack(test_data_list, axis=1)
|
89
|
+
train_label = first_input[2]
|
90
|
+
test_label = first_input[4]
|
91
|
+
single_modal.set_data(
|
92
|
+
self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
|
93
|
+
else:
|
94
|
+
raise Exception("check configurations")
|
95
|
+
logs, metrics, prediction = single_modal.fit_and_evaluate()
|
96
|
+
self.result.add_metric(name, metrics)
|
97
|
+
single_results[name] = prediction
|
52
98
|
|
53
99
|
def predict(self, combinations: list = [], generate_combinations=False):
|
54
100
|
self.prefix = utils.utc_time_as_string()
|
@@ -62,82 +108,80 @@ class MultiModalRunner:
|
|
62
108
|
combinations = []
|
63
109
|
for i in range(2, len(l) + 1):
|
64
110
|
combinations.extend(list(itertools.combinations(l, i))) # all
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
# single_modal=T(self.date, item[0], self.model_func, self.batch_size, self.epochs)
|
111
|
-
# single_modal.set_data(
|
112
|
-
# self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
113
|
-
# logs, metrics, prediction = single_modal.predict()
|
114
|
-
# # self.result.add_log(item[0], logs)
|
115
|
-
# #Check
|
116
|
-
# self.result.add_metric(item[0], metrics)
|
117
|
-
# single_results[item[0]] = prediction
|
118
|
-
# # sum = sum + prediction
|
119
|
-
|
120
|
-
if combinations:
|
121
|
-
self.evaluate_combinations(single_results, combinations)
|
111
|
+
if self.use_mlflow:
|
112
|
+
with mlflow.start_run(run_name=self.prefix, description="***") as run:
|
113
|
+
self.__predict(single_results)
|
114
|
+
# self.level_0_run_id = run.info.run_id
|
115
|
+
# item_dict = {t[0]: t for t in self.items}
|
116
|
+
# print("multi_modal")
|
117
|
+
# print(self.multi_modal)
|
118
|
+
# print(item_dict.keys())
|
119
|
+
|
120
|
+
# for m in self.multi_modal:
|
121
|
+
# name = m.get('name')
|
122
|
+
# input_type = m.get('input_type')
|
123
|
+
# input = m.get('input')
|
124
|
+
# inputs = m.get('inputs')
|
125
|
+
# model_type = get_import(m.get("model_type"))
|
126
|
+
# kwargs = m.get('params')
|
127
|
+
# T = self.__create_model(self.library)
|
128
|
+
# single_modal = T(self.date, name, model_type, **kwargs)
|
129
|
+
# if input_type == '1D':
|
130
|
+
# item = item_dict[input]
|
131
|
+
# single_modal.set_data(
|
132
|
+
# self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
133
|
+
# elif input_type == '2D':
|
134
|
+
# # check keys
|
135
|
+
# filtered_dict = {k: item_dict[k]
|
136
|
+
# for k in inputs if k in item_dict}
|
137
|
+
# print(filtered_dict.keys())
|
138
|
+
# first_input = next(iter(filtered_dict.values()))
|
139
|
+
# train_data_list = [f[1] for f in filtered_dict.values()]
|
140
|
+
# test_data_list = [f[3] for f in filtered_dict.values()]
|
141
|
+
# train_data = np.stack(train_data_list, axis=1)
|
142
|
+
# test_data = np.stack(test_data_list, axis=1)
|
143
|
+
# train_label = first_input[2]
|
144
|
+
# test_label = first_input[4]
|
145
|
+
# single_modal.set_data(
|
146
|
+
# self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
|
147
|
+
# else:
|
148
|
+
# raise Exception("check configurations")
|
149
|
+
# logs, metrics, prediction = single_modal.fit_and_evaluate()
|
150
|
+
# self.result.add_metric(name, metrics)
|
151
|
+
# single_results[name] = prediction
|
152
|
+
else:
|
153
|
+
self.__predict(single_results)
|
154
|
+
if combinations:
|
155
|
+
self.evaluate_combinations(single_results, combinations)
|
122
156
|
# TODO: sum'a gerek yok
|
123
157
|
return self.result
|
124
158
|
|
125
159
|
def evaluate_combinations(self, single_results, combinations):
|
126
160
|
for combination in combinations:
|
127
161
|
combination_descriptor = '-'.join(combination)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
162
|
+
if self.use_mlflow:
|
163
|
+
with mlflow.start_run(run_name=combination_descriptor, description="***", nested=True) as combination_run:
|
164
|
+
self.__evaluate_combinations(
|
165
|
+
single_results, combination, combination_descriptor, combination_run.info.artifact_uri)
|
166
|
+
else:
|
167
|
+
self.__evaluate_combinations(
|
168
|
+
single_results, combination, combination_descriptor, None)
|
169
|
+
|
170
|
+
def __evaluate_combinations(self, single_results, combination, combination_descriptor, artifact_uri):
|
171
|
+
prediction = np.zeros(
|
172
|
+
(self.y_test_label.shape[0], self.y_test_label.shape[1]))
|
173
|
+
for item in combination:
|
174
|
+
prediction = prediction + single_results[item]
|
175
|
+
logs, metrics = evaluate(
|
176
|
+
actual=self.y_test_label, pred=prediction, info=combination_descriptor)
|
177
|
+
if self.use_mlflow:
|
178
|
+
mlflow.log_metrics(logs)
|
179
|
+
metrics.format_float()
|
180
|
+
# TODO path bulunamadı hatası aldık
|
181
|
+
if artifact_uri:
|
182
|
+
print(
|
183
|
+
f'combination_artifact_uri:{artifact_uri}')
|
184
|
+
utils.compress_and_save_data(
|
185
|
+
metrics.__dict__, artifact_uri, f'{self.date}_metrics.gzip')
|
186
|
+
# self.result.add_log(combination_descriptor,logs)
|
187
|
+
# self.result.add_metric(combination_descriptor,metrics)
|
ddi_fw/ml/model_wrapper.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Dict, List, Tuple
|
1
|
+
from typing import Any, Dict, List, Tuple
|
2
2
|
|
3
3
|
from ddi_fw.ml.evaluation_helper import Metrics
|
4
4
|
|
@@ -31,5 +31,5 @@ class ModelWrapper:
|
|
31
31
|
self.test_label = test_label
|
32
32
|
# https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
|
33
33
|
|
34
|
-
def predict(self)
|
34
|
+
def predict(self)-> Any:
|
35
35
|
pass
|