ddi-fw 0.0.73__py3-none-any.whl → 0.0.75__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  from .tensorflow_helper import TFMultiModal, TFSingleModal,Result
2
2
  from .evaluation_helper import evaluate, Metrics
3
3
  from .pipeline import Experiment
4
- from .pipeline_ner import NerParameterSearch
4
+ from .pipeline_ner import NerParameterSearch
5
+ from .ml_helper import SingleModal,MultiModalRunner
@@ -0,0 +1,137 @@
1
+ from typing import Dict, List, Tuple
2
+ from matplotlib import pyplot as plt
3
+ from ddi_fw.experiments.ml_pt import PTSingleModal
4
+ from ddi_fw.experiments.ml_tf import TFSingleModal
5
+ import tensorflow as tf
6
+ from tensorflow import keras
7
+ from keras.models import Model, Sequential
8
+ from keras.layers import Dense, Dropout, Input, Activation, BatchNormalization
9
+ from keras.callbacks import EarlyStopping
10
+ from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
11
+ import numpy as np
12
+
13
+ import mlflow
14
+ from mlflow.utils.autologging_utils import batch_metrics_logger
15
+ import time
16
+
17
+ from mlflow.models import infer_signature
18
+ from ddi_fw.experiments.evaluation_helper import Metrics, evaluate
19
+
20
+ # import tf2onnx
21
+ # import onnx
22
+
23
+ import itertools
24
+ import ddi_fw.utils as utils
25
+
26
+ tf.random.set_seed(1)
27
+ np.random.seed(2)
28
+ np.set_printoptions(precision=4)
29
+
30
+
31
+ class Result:
32
+ def __init__(self) -> None:
33
+ self.log_dict = {}
34
+ self.metric_dict = {}
35
+
36
+ def add_log(self, key, logs):
37
+ self.log_dict[key] = logs
38
+
39
+ def add_metric(self, key, metrics):
40
+ self.metric_dict[key] = metrics
41
+
42
+
43
+ class SingleModal:
44
+ def __init__(self, date, descriptor, model_func, batch_size=128, epochs=100):
45
+ self.date = date
46
+ self.descriptor = descriptor
47
+ self.model_func = model_func
48
+ self.batch_size = batch_size
49
+ self.epochs = epochs
50
+
51
+ def set_data(self, train_idx_arr, val_idx_arr, train_data, train_label, test_data, test_label):
52
+ self.train_idx_arr = train_idx_arr
53
+ self.val_idx_arr = val_idx_arr
54
+ self.train_data = train_data
55
+ self.train_label = train_label
56
+ self.test_data = test_data
57
+ self.test_label = test_label
58
+ # https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
59
+
60
+ def predict(self) -> Tuple[Dict[str, float], Metrics, List[float]]:
61
+ pass
62
+
63
+
64
+ class MultiModalRunner:
65
+ # todo model related parameters to config
66
+ def __init__(self, library ,model_func, batch_size=128, epochs=100):
67
+ self.library = library
68
+ self.model_func = model_func
69
+ self.batch_size = batch_size
70
+ self.epochs = epochs
71
+ self.result = Result()
72
+
73
+ def set_data(self, items, train_idx_arr, val_idx_arr, y_test_label):
74
+ self.items = items
75
+ self.train_idx_arr = train_idx_arr
76
+ self.val_idx_arr = val_idx_arr
77
+ self.y_test_label = y_test_label
78
+
79
+ def __create_multi_modal(self,library):
80
+ if library == 'tensorflow':
81
+ return TFSingleModal
82
+ elif library == 'pytorch':
83
+ return PTSingleModal
84
+ else:
85
+ raise ValueError("Unsupported library type. Choose 'tensorflow' or 'pytorch'.")
86
+
87
+ def predict(self, combinations: list = [], generate_combinations=False):
88
+ self.prefix = utils.utc_time_as_string()
89
+ self.date = utils.utc_time_as_string_simple_format()
90
+ sum = np.zeros(
91
+ (self.y_test_label.shape[0], self.y_test_label.shape[1]))
92
+ single_results = dict()
93
+
94
+ if generate_combinations:
95
+ l = [item[0] for item in self.items]
96
+ combinations = []
97
+ for i in range(2, len(l) + 1):
98
+ combinations.extend(list(itertools.combinations(l, i))) # all
99
+
100
+ with mlflow.start_run(run_name=self.prefix, description="***") as run:
101
+ self.level_0_run_id = run.info.run_id
102
+ for item in self.items:
103
+ print(item[0])
104
+ T =self.__create_multi_modal(self.library)
105
+ single_modal=T(self.date, item[0], self.model_func, self.batch_size, self.epochs)
106
+ single_modal.set_data(
107
+ self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
108
+ logs, metrics, prediction = single_modal.predict()
109
+ # self.result.add_log(item[0], logs)
110
+ # self.result.add_metric(item[0], metrics)
111
+ single_results[item[0]] = prediction
112
+ # sum = sum + prediction
113
+
114
+ if combinations:
115
+ self.evaluate_combinations(single_results, combinations)
116
+ # TODO: sum'a gerek yok
117
+ return self.result
118
+
119
+ def evaluate_combinations(self, single_results, combinations):
120
+ for combination in combinations:
121
+ combination_descriptor = '-'.join(combination)
122
+ with mlflow.start_run(run_name=combination_descriptor, description="***", nested=True) as combination_run:
123
+ prediction = np.zeros(
124
+ (self.y_test_label.shape[0], self.y_test_label.shape[1]))
125
+ for item in combination:
126
+ prediction = prediction + single_results[item]
127
+ logs, metrics = evaluate(
128
+ actual=self.y_test_label, pred=prediction, info=combination_descriptor)
129
+ mlflow.log_metrics(logs)
130
+ metrics.format_float()
131
+ # TODO path bulunamadı hatası aldık
132
+ print(
133
+ f'combination_artifact_uri:{combination_run.info.artifact_uri}')
134
+ utils.compress_and_save_data(
135
+ metrics.__dict__, combination_run.info.artifact_uri, f'{self.date}_metrics.gzip')
136
+ # self.result.add_log(combination_descriptor,logs)
137
+ # self.result.add_metric(combination_descriptor,metrics)
@@ -0,0 +1,83 @@
1
+ import mlflow
2
+ import torch
3
+ from ddi_fw.experiments.ml_helper import SingleModal
4
+ from ddi_fw.experiments.evaluation_helper import evaluate
5
+
6
+
7
+ class PTSingleModal(SingleModal):
8
+ def __init__(self, date, descriptor, model_func, batch_size=128, epochs=100, **kwargs):
9
+ super().__init__(date, descriptor, model_func, batch_size, epochs)
10
+ self.optimizer = kwargs['optimizer']
11
+ self.criterion = kwargs['criterion']
12
+
13
+ def _create_dataloader(self, data, labels):
14
+ dataset = torch.utils.data.TensorDataset(data, labels)
15
+ return torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True)
16
+
17
+ def predict(self):
18
+ print(self.train_data.shape)
19
+
20
+ with mlflow.start_run(run_name=self.descriptor, description="***", nested=True) as run:
21
+ models = {}
22
+ # models_val_acc = {}
23
+
24
+ for i, (train_idx, val_idx) in enumerate(zip(self.train_idx_arr, self.val_idx_arr)):
25
+ print(f"Validation {i}")
26
+
27
+ with mlflow.start_run(run_name=f'Validation {i}', description='CV models', nested=True) as cv_fit:
28
+ model = self.model_func(self.train_data.shape[1])
29
+ models[f'validation_{i}'] = model
30
+
31
+ # Create DataLoaders
32
+ X_train_cv = torch.tensor(self.train_data[train_idx], dtype=torch.float16)
33
+ y_train_cv = torch.tensor(self.train_label[train_idx], dtype=torch.float16)
34
+ X_valid_cv = torch.tensor(self.train_data[val_idx], dtype=torch.float16)
35
+ y_valid_cv = torch.tensor(self.train_label[val_idx], dtype=torch.float16)
36
+
37
+ train_loader = self._create_dataloader(X_train_cv, y_train_cv)
38
+ valid_loader = self._create_dataloader(X_valid_cv, y_valid_cv)
39
+
40
+ optimizer = self.optimizer
41
+ criterion = self.criterion
42
+ best_val_loss = float('inf')
43
+
44
+ for epoch in range(self.epochs):
45
+ model.train()
46
+ for batch_X, batch_y in train_loader:
47
+ optimizer.zero_grad()
48
+ output = model(batch_X)
49
+ loss = criterion(output, batch_y)
50
+ loss.backward()
51
+ optimizer.step()
52
+
53
+ model.eval()
54
+ with torch.no_grad():
55
+ val_loss = self._validate(model, valid_loader)
56
+
57
+ # Callbacks after each epoch
58
+ for callback in self.callbacks:
59
+ callback.on_epoch_end(epoch, logs={'loss': loss.item(), 'val_loss': val_loss.item()})
60
+
61
+ if val_loss < best_val_loss:
62
+ best_val_loss = val_loss
63
+ best_model = model
64
+
65
+ # Evaluate on test data
66
+ with torch.no_grad():
67
+ pred = best_model(torch.tensor(self.test_data, dtype=torch.float16))
68
+ logs, metrics = evaluate(
69
+ actual=self.test_label, pred=pred.numpy(), info=self.descriptor)
70
+ mlflow.log_metrics(logs)
71
+
72
+ return logs, metrics, pred.numpy()
73
+
74
+ def _validate(self, model, valid_loader):
75
+ total_loss = 0
76
+ criterion = self.criterion
77
+
78
+ for batch_X, batch_y in valid_loader:
79
+ output = model(batch_X)
80
+ loss = criterion(output, batch_y)
81
+ total_loss += loss.item()
82
+
83
+ return total_loss / len(valid_loader)
@@ -0,0 +1,148 @@
1
+ from typing import Dict, List, Tuple
2
+ from matplotlib import pyplot as plt
3
+ from ddi_fw.experiments.ml_helper import SingleModal
4
+ import tensorflow as tf
5
+ from tensorflow import keras
6
+ from keras.models import Model, Sequential
7
+ from keras.layers import Dense, Dropout, Input, Activation, BatchNormalization
8
+ from keras.callbacks import EarlyStopping
9
+ from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
10
+ import numpy as np
11
+
12
+ import mlflow
13
+ from mlflow.utils.autologging_utils import batch_metrics_logger
14
+ import time
15
+
16
+ from mlflow.models import infer_signature
17
+ from ddi_fw.experiments.evaluation_helper import Metrics, evaluate
18
+
19
+ # import tf2onnx
20
+ # import onnx
21
+
22
+ import itertools
23
+ import ddi_fw.utils as utils
24
+
25
+
26
+ class TFSingleModal(SingleModal):
27
+ # https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
28
+ def predict(self):
29
+ print(self.train_data.shape)
30
+
31
+ # Failed to convert a NumPy array to a Tensor
32
+ with mlflow.start_run(run_name=self.descriptor, description="***", nested=True) as run:
33
+ models = dict()
34
+ histories = dict()
35
+ models_val_acc = dict()
36
+ # with batch_metrics_logger(run_id) as metrics_logger:
37
+ for i, (train_idx, val_idx) in enumerate(zip(self.train_idx_arr, self.val_idx_arr)):
38
+ print(f"Validation {i}")
39
+
40
+ with mlflow.start_run(run_name=f'Validation {i}', description='CV models', nested=True) as cv_fit:
41
+ model = self.model_func(self.train_data.shape[1])
42
+ models[f'validation_{i}'] = model
43
+ X_train_cv = self.train_data[train_idx]
44
+ y_train_cv = self.train_label[train_idx]
45
+ X_valid_cv = self.train_data[val_idx]
46
+ y_valid_cv = self.train_label[val_idx]
47
+
48
+ early_stopping = EarlyStopping(
49
+ monitor='val_loss', patience=10, verbose=0, mode='auto')
50
+ custom_callback = CustomCallback()
51
+ history = model.fit(X_train_cv, y_train_cv,
52
+ batch_size=self.batch_size,
53
+ epochs=self.epochs,
54
+ validation_data=(
55
+ X_valid_cv, y_valid_cv),
56
+ callbacks=[early_stopping, custom_callback])
57
+ # histories[f'validation_{i}'] = history
58
+ models_val_acc[f'validation_{i}'] = history.history['val_accuracy'][-1]
59
+ # Saving each CV model
60
+
61
+ best_model_key = max(models_val_acc, key=models_val_acc.get)
62
+ best_model = models[best_model_key]
63
+ best_model.evaluate(self.test_data, self.test_label,
64
+ callbacks=[custom_callback])
65
+ pred = best_model.predict(self.test_data)
66
+
67
+ logs, metrics = evaluate(
68
+ actual=self.test_label, pred=pred, info=self.descriptor)
69
+ metrics.format_float()
70
+ mlflow.log_metrics(logs)
71
+ mlflow.log_param('best_cv', best_model_key)
72
+ signature = infer_signature(
73
+ self.train_data,
74
+ # generate_signature_output(model,X_valid_cv)
75
+ # params=params,
76
+ )
77
+
78
+ mlflow.keras.save_model(
79
+ best_model,
80
+ path=run.info.artifact_uri + '/model',
81
+ signature=signature,
82
+ )
83
+ print(run.info.artifact_uri)
84
+ # todo tf2onnx not compatible with keras > 2.15
85
+ # onnx_model, _ = tf2onnx.convert.from_keras(
86
+ # best_model, input_signature=None, opset=13)
87
+ # onnx.save(onnx_model, run.info.artifact_uri +
88
+ # '/model/model.onnx')
89
+ utils.compress_and_save_data(
90
+ metrics.__dict__, run.info.artifact_uri, f'{self.date}_metrics.gzip')
91
+
92
+ return logs, metrics, pred
93
+
94
+
95
+ class CustomCallback(keras.callbacks.Callback):
96
+ def on_train_begin(self, logs=None):
97
+ keys = list(logs.keys())
98
+ mlflow.log_param("train_begin_keys", keys)
99
+ config = self.model.optimizer.get_config()
100
+ for attribute in config:
101
+ mlflow.log_param("opt_" + attribute, config[attribute])
102
+
103
+ sum_list = []
104
+ self.model.summary(print_fn=sum_list.append)
105
+ summary = "\n".join(sum_list)
106
+ mlflow.log_text(summary, artifact_file="model_summary.txt")
107
+
108
+ def on_train_end(self, logs=None):
109
+ print(logs)
110
+ mlflow.log_metrics(logs)
111
+
112
+ def on_epoch_begin(self, epoch, logs=None):
113
+ keys = list(logs.keys())
114
+
115
+ def on_epoch_end(self, epoch, logs=None):
116
+ keys = list(logs.keys())
117
+
118
+ def on_test_begin(self, logs=None):
119
+ keys = list(logs.keys())
120
+
121
+ def on_test_end(self, logs=None):
122
+ mlflow.log_metrics(logs)
123
+ print(logs)
124
+
125
+ def on_predict_begin(self, logs=None):
126
+ keys = list(logs.keys())
127
+
128
+ def on_predict_end(self, logs=None):
129
+ keys = list(logs.keys())
130
+ mlflow.log_metrics(logs)
131
+
132
+ def on_train_batch_begin(self, batch, logs=None):
133
+ keys = list(logs.keys())
134
+
135
+ def on_train_batch_end(self, batch, logs=None):
136
+ keys = list(logs.keys())
137
+
138
+ def on_test_batch_begin(self, batch, logs=None):
139
+ keys = list(logs.keys())
140
+
141
+ def on_test_batch_end(self, batch, logs=None):
142
+ keys = list(logs.keys())
143
+
144
+ def on_predict_batch_begin(self, batch, logs=None):
145
+ keys = list(logs.keys())
146
+
147
+ def on_predict_batch_end(self, batch, logs=None):
148
+ keys = list(logs.keys())
@@ -78,9 +78,10 @@ class TFMultiModal:
78
78
  single_modal.set_data(
79
79
  self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
80
80
  logs, metrics, prediction = single_modal.predict()
81
- # self.result.add_log(item[0], logs)
81
+ self.result.add_log(item[0], logs)
82
82
  # self.result.add_metric(item[0], metrics)
83
- single_results[item[0]] = prediction
83
+ # single_results[item[0]] = prediction
84
+ single_results[item[0]] = tf.nn.softmax(prediction).numpy()
84
85
  # sum = sum + prediction
85
86
 
86
87
  if combinations:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.73
3
+ Version: 0.0.75
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -36,10 +36,17 @@ Requires-Dist: scikit-learn==1.5.2
36
36
  Requires-Dist: scipy==1.13.1
37
37
  Requires-Dist: accelerate==0.33.0
38
38
  Requires-Dist: sentence-transformers==3.0.1
39
+ Requires-Dist: transformers==4.42.4
39
40
  Requires-Dist: stanza==1.9.2
40
41
  Requires-Dist: tokenizers==0.19.1
41
42
  Requires-Dist: tqdm==4.66.5
42
43
  Requires-Dist: xmlschema==3.4.2
43
44
  Requires-Dist: zipp==3.20.2
44
45
  Requires-Dist: py7zr==0.22.0
46
+ Requires-Dist: openai==1.52.2
47
+ Requires-Dist: langchain==0.3.4
48
+ Requires-Dist: chromadb==0.5.15
49
+ Requires-Dist: langchain-community==0.3.3
50
+ Requires-Dist: datasets==3.0.2
51
+ Requires-Dist: unstructured==0.16.3
45
52
 
@@ -55,13 +55,16 @@ ddi_fw/drugbank/drugbank_parser.py,sha256=lxUuhB0s8ef_aPNDs0V8ClKF7-KIWugNIV9gVs
55
55
  ddi_fw/drugbank/drugbank_processor.py,sha256=vmkt68n9nFLevufgGyXhOSDtTo4G1XzwT9PVncGTXtk,18127
56
56
  ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
57
57
  ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
58
- ddi_fw/experiments/__init__.py,sha256=5L2xSolpFycNnflqOMdvJSiqRB16ExA5bbVGORKFX04,195
58
+ ddi_fw/experiments/__init__.py,sha256=FwfHXSKhWrkAYq5-FEFZqCl7i3udr4mfxZEYNadlvAI,248
59
59
  ddi_fw/experiments/custom_torch_model.py,sha256=iQ_R_EApzD2JCcASN8cie6D21oh7VCxaOQ45_dkiGwc,2576
60
60
  ddi_fw/experiments/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
61
+ ddi_fw/experiments/ml_helper.py,sha256=OAFYCrwsvCdtAGo407Cwu4xvl7GMq0vwa2gndFImSUA,5468
62
+ ddi_fw/experiments/ml_pt.py,sha256=9Tl_kn5u6CRqMcJBpfqhCXamyMTrU8v97zJMUPm6K3A,3727
63
+ ddi_fw/experiments/ml_tf.py,sha256=jWqq5sQjoazNxtDiUlFWtygOiga3FSZLsmrZm0vifcE,5788
61
64
  ddi_fw/experiments/pipeline.py,sha256=N07EBv2IGa9oD0A1XxvUktDjGHi0SFmt3QqupF2rs3k,5681
62
65
  ddi_fw/experiments/pipeline_builder_pattern.py,sha256=w6x7ietk4vONCAvUfssPycaRUQIYUJsbCNNj3BTASBI,5454
63
66
  ddi_fw/experiments/pipeline_ner.py,sha256=unxEJCYrG6wEZjLmqvGdLRTMOBwELbGKkdygSpAR3b8,5043
64
- ddi_fw/experiments/tensorflow_helper.py,sha256=xUnbntWyc2Wm4TvmVFAnpwLHg-o13oM26GUHom6d5m0,11776
67
+ ddi_fw/experiments/tensorflow_helper.py,sha256=m3Mppl-tbccTMAKLpZg2YC0xpcukkyQihPw_uwAlRRY,11857
65
68
  ddi_fw/experiments/test.py,sha256=z1TfBpK75zGKpp2ZU8f6APjZlgBFthaCBN61YB9ma4o,2049
66
69
  ddi_fw/langchain/__init__.py,sha256=8dBPZivc01WWaCH8sZ_UV8-XPyo74e9Qy6-fYgAiNLE,248
67
70
  ddi_fw/langchain/embeddings.py,sha256=8J_SfO9pyET2W-Ltzq0_r9EchFzBsYdUabiOMma42Us,7515
@@ -86,7 +89,7 @@ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
86
89
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
87
90
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
88
91
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
89
- ddi_fw-0.0.73.dist-info/METADATA,sha256=_TGLs-BxfZCpTdHvOUpiuniNKSF9iJDRkipbMn6ovR4,1720
90
- ddi_fw-0.0.73.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
91
- ddi_fw-0.0.73.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
92
- ddi_fw-0.0.73.dist-info/RECORD,,
92
+ ddi_fw-0.0.75.dist-info/METADATA,sha256=XXvBSuoVcdQ-npCJpaZiFACOsnk2_1EjwC2YMtOJxEk,1966
93
+ ddi_fw-0.0.75.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
94
+ ddi_fw-0.0.75.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
95
+ ddi_fw-0.0.75.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.2.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5