ddi-fw 0.0.78__py3-none-any.whl → 0.0.79__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/experiments/__init__.py +1 -2
- ddi_fw/ml/__init__.py +4 -0
- ddi_fw/{experiments → ml}/ml_helper.py +3 -32
- ddi_fw/ml/model_wrapper.py +35 -0
- ddi_fw/{experiments/ml_pt.py → ml/pytorch_wrapper.py} +1 -1
- ddi_fw/{experiments/ml_tf.py → ml/tensorflow_wrapper.py} +1 -2
- ddi_fw/pipeline/__init__.py +3 -0
- ddi_fw/pipeline/multi_modal_combination_strategy.py +39 -0
- ddi_fw/pipeline/multi_pipeline.py +111 -0
- ddi_fw/pipeline/pipeline.py +126 -0
- {ddi_fw-0.0.78.dist-info → ddi_fw-0.0.79.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.78.dist-info → ddi_fw-0.0.79.dist-info}/RECORD +14 -9
- ddi_fw/experiments/pipeline_builder_pattern.py +0 -152
- {ddi_fw-0.0.78.dist-info → ddi_fw-0.0.79.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.78.dist-info → ddi_fw-0.0.79.dist-info}/top_level.txt +0 -0
ddi_fw/experiments/__init__.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
from .tensorflow_helper import TFMultiModal, TFSingleModal,Result
|
2
2
|
from .evaluation_helper import evaluate, Metrics
|
3
3
|
from .pipeline import Experiment
|
4
|
-
from .pipeline_ner import NerParameterSearch
|
5
|
-
from .ml_helper import ModelWrapper,MultiModalRunner
|
4
|
+
from .pipeline_ner import NerParameterSearch
|
ddi_fw/ml/__init__.py
ADDED
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import Dict, List, Tuple
|
2
2
|
from matplotlib import pyplot as plt
|
3
|
-
from ddi_fw.
|
3
|
+
from ddi_fw.ml.model_wrapper import Result
|
4
|
+
from ddi_fw.ml.pytorch_wrapper import PTModelWrapper
|
4
5
|
from ddi_fw.experiments.ml_tf import TFModelWrapper
|
5
6
|
import tensorflow as tf
|
6
7
|
from tensorflow import keras
|
@@ -28,37 +29,7 @@ np.random.seed(2)
|
|
28
29
|
np.set_printoptions(precision=4)
|
29
30
|
|
30
31
|
|
31
|
-
class Result:
|
32
|
-
def __init__(self) -> None:
|
33
|
-
self.log_dict = {}
|
34
|
-
self.metric_dict = {}
|
35
32
|
|
36
|
-
def add_log(self, key, logs):
|
37
|
-
self.log_dict[key] = logs
|
38
|
-
|
39
|
-
def add_metric(self, key, metrics):
|
40
|
-
self.metric_dict[key] = metrics
|
41
|
-
|
42
|
-
|
43
|
-
class ModelWrapper:
|
44
|
-
def __init__(self, date, descriptor, model_func, batch_size=128, epochs=100):
|
45
|
-
self.date = date
|
46
|
-
self.descriptor = descriptor
|
47
|
-
self.model_func = model_func
|
48
|
-
self.batch_size = batch_size
|
49
|
-
self.epochs = epochs
|
50
|
-
|
51
|
-
def set_data(self, train_idx_arr, val_idx_arr, train_data, train_label, test_data, test_label):
|
52
|
-
self.train_idx_arr = train_idx_arr
|
53
|
-
self.val_idx_arr = val_idx_arr
|
54
|
-
self.train_data = train_data
|
55
|
-
self.train_label = train_label
|
56
|
-
self.test_data = test_data
|
57
|
-
self.test_label = test_label
|
58
|
-
# https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
|
59
|
-
|
60
|
-
def predict(self) -> Tuple[Dict[str, float], Metrics, List[float]]:
|
61
|
-
pass
|
62
33
|
|
63
34
|
|
64
35
|
class MultiModalRunner:
|
@@ -101,7 +72,7 @@ class MultiModalRunner:
|
|
101
72
|
self.level_0_run_id = run.info.run_id
|
102
73
|
for item in self.items:
|
103
74
|
print(item[0])
|
104
|
-
T =self.__create_multi_modal(self.library)
|
75
|
+
T = self.__create_multi_modal(self.library)
|
105
76
|
single_modal=T(self.date, item[0], self.model_func, self.batch_size, self.epochs)
|
106
77
|
single_modal.set_data(
|
107
78
|
self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
@@ -0,0 +1,35 @@
|
|
1
|
+
from typing import Dict, List, Tuple
|
2
|
+
|
3
|
+
from ddi_fw.experiments.evaluation_helper import Metrics
|
4
|
+
|
5
|
+
class Result:
|
6
|
+
def __init__(self) -> None:
|
7
|
+
self.log_dict = {}
|
8
|
+
self.metric_dict = {}
|
9
|
+
|
10
|
+
def add_log(self, key, logs):
|
11
|
+
self.log_dict[key] = logs
|
12
|
+
|
13
|
+
def add_metric(self, key, metrics):
|
14
|
+
self.metric_dict[key] = metrics
|
15
|
+
|
16
|
+
|
17
|
+
class ModelWrapper:
|
18
|
+
def __init__(self, date, descriptor, model_func, batch_size=128, epochs=100):
|
19
|
+
self.date = date
|
20
|
+
self.descriptor = descriptor
|
21
|
+
self.model_func = model_func
|
22
|
+
self.batch_size = batch_size
|
23
|
+
self.epochs = epochs
|
24
|
+
|
25
|
+
def set_data(self, train_idx_arr, val_idx_arr, train_data, train_label, test_data, test_label):
|
26
|
+
self.train_idx_arr = train_idx_arr
|
27
|
+
self.val_idx_arr = val_idx_arr
|
28
|
+
self.train_data = train_data
|
29
|
+
self.train_label = train_label
|
30
|
+
self.test_data = test_data
|
31
|
+
self.test_label = test_label
|
32
|
+
# https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
|
33
|
+
|
34
|
+
def predict(self) -> Tuple[Dict[str, float], Metrics, List[float]]:
|
35
|
+
pass
|
@@ -1,6 +1,5 @@
|
|
1
|
-
from typing import Dict, List, Tuple
|
2
1
|
from matplotlib import pyplot as plt
|
3
|
-
from ddi_fw.
|
2
|
+
from ddi_fw.ml.model_wrapper import ModelWrapper
|
4
3
|
import tensorflow as tf
|
5
4
|
from tensorflow import keras
|
6
5
|
from keras.models import Model, Sequential
|
@@ -0,0 +1,39 @@
|
|
1
|
+
import itertools
|
2
|
+
|
3
|
+
|
4
|
+
class CombinationStrategy():
|
5
|
+
def generate(self):
|
6
|
+
pass
|
7
|
+
|
8
|
+
|
9
|
+
class CustomCombinationStrategy(CombinationStrategy):
|
10
|
+
def __init__(self, **kwargs_combination_params):
|
11
|
+
# kwargs fonksiyona da alınabilir
|
12
|
+
self.group1 = kwargs_combination_params.get("group_1", None)
|
13
|
+
self.group2 = kwargs_combination_params.get("group_2", None)
|
14
|
+
|
15
|
+
def generate(self):
|
16
|
+
# Handle edge cases
|
17
|
+
if not self.group_1 or not self.group_2:
|
18
|
+
raise ValueError(
|
19
|
+
f"Parameters of combination strategy could not be empty.")
|
20
|
+
# return [] # Return an empty list if either group is empty
|
21
|
+
# combinations = []
|
22
|
+
# for j in self.group2:
|
23
|
+
# extended_item_group_1 = self.group_1.copy()
|
24
|
+
# extended_item_group_1.append(j)
|
25
|
+
# for i in range(2, len(extended_item_group_1) + 1):
|
26
|
+
# combinations.extend(list(itertools.combinations(extended_item_group_1, i))) #all
|
27
|
+
# combinations = list(set(combinations))
|
28
|
+
|
29
|
+
combinations = set() # Use a set to avoid duplicates directly
|
30
|
+
for j in self.group_2:
|
31
|
+
extended_item_group_1 = self.group_1.copy()
|
32
|
+
extended_item_group_1.append(j)
|
33
|
+
# Generate combinations of all lengths from 2 to len(group_1 + 1)
|
34
|
+
for i in range(2, len(extended_item_group_1) + 1):
|
35
|
+
combinations.update(itertools.combinations(
|
36
|
+
extended_item_group_1, i)) # Add combinations
|
37
|
+
|
38
|
+
# Convert set back to list (if needed) and return
|
39
|
+
return list(combinations)
|
@@ -0,0 +1,111 @@
|
|
1
|
+
import json
|
2
|
+
from pipeline import Pipeline
|
3
|
+
import importlib
|
4
|
+
|
5
|
+
|
6
|
+
def load_config(file_path):
|
7
|
+
with open(file_path, 'r') as file:
|
8
|
+
config = json.load(file)
|
9
|
+
return config
|
10
|
+
|
11
|
+
|
12
|
+
def get_import(full_path_of_import):
|
13
|
+
"""Dynamically imports an object from a module given its full path.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
full_path_of_import (str): The full path of the import (e.g., 'module.submodule.ClassName').
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
object: The imported object.
|
20
|
+
|
21
|
+
Raises:
|
22
|
+
ImportError: If the module cannot be imported.
|
23
|
+
AttributeError: If the attribute does not exist in the module.
|
24
|
+
"""
|
25
|
+
if not full_path_of_import:
|
26
|
+
raise ValueError("The import path cannot be empty.")
|
27
|
+
|
28
|
+
parts = full_path_of_import.split('.')
|
29
|
+
import_name = parts[-1]
|
30
|
+
module_name = ".".join(parts[:-1]) if len(parts) > 1 else ""
|
31
|
+
|
32
|
+
try:
|
33
|
+
module = importlib.import_module(module_name)
|
34
|
+
return getattr(module, import_name)
|
35
|
+
except ModuleNotFoundError as e:
|
36
|
+
raise ImportError(f"Module '{module_name}' could not be found.") from e
|
37
|
+
except AttributeError as e:
|
38
|
+
raise AttributeError(
|
39
|
+
f"'{module_name}' has no attribute '{import_name}'") from e
|
40
|
+
|
41
|
+
|
42
|
+
class MultiPipeline():
|
43
|
+
def __init__(self, experiments_config_file):
|
44
|
+
self.experiments_config = load_config(experiments_config_file)
|
45
|
+
self.items = []
|
46
|
+
|
47
|
+
def __create_pipeline(self, config):
|
48
|
+
library = config["library"]
|
49
|
+
batch_size = config["batch_size"]
|
50
|
+
epochs = config["epochs"]
|
51
|
+
|
52
|
+
# dataset_module = config["dataset_module"]
|
53
|
+
# dataset_name = config["dataset_name"]
|
54
|
+
|
55
|
+
experiment_name = config["experiment_name"]
|
56
|
+
experiment_description = config["experiment_description"]
|
57
|
+
experiment_tags = config["experiment_tags"]
|
58
|
+
tracking_uri = config["tracking_uri"]
|
59
|
+
artifact_location = config["artifact_location"]
|
60
|
+
columns = config["columns"]
|
61
|
+
ner_data_file = config["ner_data_file"]
|
62
|
+
ner_threshold = config["ner_threshold"]
|
63
|
+
vector_db_persist_directory = config["vector_db_persist_directory"]
|
64
|
+
vector_db_collection_name = config["vector_db_collection_name"]
|
65
|
+
embedding_pooling_strategy = get_import(
|
66
|
+
config["embedding_pooling_strategy_type"])
|
67
|
+
# Dynamically import the model and dataset classes
|
68
|
+
model_type = get_import(config["model_type"])
|
69
|
+
dataset_type = get_import(config["dataset_type"])
|
70
|
+
combination_type = get_import(config["combination_strategy"]["type"])
|
71
|
+
kwargs_combination_params = config["combination_strategy"]["params"]
|
72
|
+
|
73
|
+
# # Instantiate the classes
|
74
|
+
# model_instance = model_class()
|
75
|
+
# dataset_instance = dataset_class()
|
76
|
+
return {
|
77
|
+
"name": experiment_name,
|
78
|
+
"library": library,
|
79
|
+
"batch_size": batch_size,
|
80
|
+
"epochs": epochs,
|
81
|
+
"model_type": model_type,
|
82
|
+
"pipeline": Pipeline(
|
83
|
+
library=library,
|
84
|
+
experiment_name=experiment_name,
|
85
|
+
experiment_description=experiment_description,
|
86
|
+
experiment_tags=experiment_tags,
|
87
|
+
artifact_location=artifact_location,
|
88
|
+
tracking_uri=tracking_uri,
|
89
|
+
dataset_type=dataset_type,
|
90
|
+
columns=columns,
|
91
|
+
vector_db_persist_directory=vector_db_persist_directory,
|
92
|
+
vector_db_collection_name=vector_db_collection_name,
|
93
|
+
embedding_pooling_strategy_type=embedding_pooling_strategy,
|
94
|
+
ner_data_file=ner_data_file,
|
95
|
+
ner_threshold=ner_threshold,
|
96
|
+
combinations=combination_type(**kwargs_combination_params).generate())}
|
97
|
+
|
98
|
+
def build(self):
|
99
|
+
for config in self.experiments_config['experiments']:
|
100
|
+
item = self.__create_pipeline(config)
|
101
|
+
self.items.append(item)
|
102
|
+
|
103
|
+
def run(self):
|
104
|
+
for item in self.items:
|
105
|
+
print(f"{item['name']} is running")
|
106
|
+
pipeline = item['pipeline']
|
107
|
+
model_type = item['model_type']
|
108
|
+
batch_size = item['batch_size']
|
109
|
+
epochs = item['epochs']
|
110
|
+
pipeline.build()
|
111
|
+
pipeline.run(model_type, epochs=epochs, batch_size=batch_size)
|
@@ -0,0 +1,126 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
import chromadb
|
4
|
+
from collections import defaultdict
|
5
|
+
from ddi_fw.ner.ner import CTakesNER
|
6
|
+
from ddi_fw.langchain.embeddings import PoolingStrategy
|
7
|
+
from ddi_fw.datasets import BaseDataset, DDIMDLDataset
|
8
|
+
from ddi_fw.langchain.embeddings import SumPoolingStrategy
|
9
|
+
import mlflow
|
10
|
+
from ml import MultiModalRunner
|
11
|
+
|
12
|
+
|
13
|
+
class Pipeline:
|
14
|
+
def __init__(self,
|
15
|
+
library='TF',
|
16
|
+
experiment_name=None,
|
17
|
+
experiment_description=None,
|
18
|
+
experiment_tags=None,
|
19
|
+
artifact_location=None,
|
20
|
+
tracking_uri=None,
|
21
|
+
dataset_type: BaseDataset = None,
|
22
|
+
columns=None,
|
23
|
+
embedding_dict=None,
|
24
|
+
vector_db_persist_directory=None,
|
25
|
+
vector_db_collection_name=None,
|
26
|
+
embedding_pooling_strategy_type: PoolingStrategy = None,
|
27
|
+
ner_data_file=None,
|
28
|
+
ner_threshold=None,
|
29
|
+
combinations=None,
|
30
|
+
model=None):
|
31
|
+
self.library = library
|
32
|
+
self.experiment_name = experiment_name
|
33
|
+
self.experiment_description = experiment_description
|
34
|
+
self.experiment_tags = experiment_tags
|
35
|
+
self.artifact_location = artifact_location
|
36
|
+
self.tracking_uri = tracking_uri
|
37
|
+
self.dataset_type = dataset_type
|
38
|
+
self.columns = columns
|
39
|
+
self.embedding_dict = embedding_dict
|
40
|
+
self.vector_db_persist_directory = vector_db_persist_directory
|
41
|
+
self.vector_db_collection_name = vector_db_collection_name
|
42
|
+
self.embedding_pooling_strategy_type = embedding_pooling_strategy_type
|
43
|
+
self.ner_data_file = ner_data_file
|
44
|
+
self.ner_threshold = ner_threshold
|
45
|
+
self.combinations = combinations
|
46
|
+
self.model = model
|
47
|
+
|
48
|
+
def build(self):
|
49
|
+
# 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
|
50
|
+
kwargs = {"columns": self.columns}
|
51
|
+
for k, v in self.ner_threshold.items():
|
52
|
+
kwargs[k] = v
|
53
|
+
if self.embedding_dict == None:
|
54
|
+
if self.vector_db_persist_directory:
|
55
|
+
self.vector_db = chromadb.PersistentClient(
|
56
|
+
path=self.vector_db_persist_directory)
|
57
|
+
self.collection = self.vector_db.get_collection(
|
58
|
+
self.vector_db_collection_name)
|
59
|
+
dictionary = self.collection.get(
|
60
|
+
include=['embeddings', 'metadatas'])
|
61
|
+
|
62
|
+
embedding_dict = defaultdict(lambda: defaultdict(list))
|
63
|
+
|
64
|
+
for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
|
65
|
+
embedding_dict[metadata["type"]
|
66
|
+
][metadata["id"]].append(embedding)
|
67
|
+
|
68
|
+
embedding_size = dictionary['embeddings'].shape[1]
|
69
|
+
else:
|
70
|
+
embedding_dict = self.embedding_dict
|
71
|
+
embedding_size = list(embedding_dict['all_text'].values())[
|
72
|
+
0][0].shape
|
73
|
+
|
74
|
+
pooling_strategy = self.embedding_pooling_strategy_type()
|
75
|
+
|
76
|
+
self.ner_df = CTakesNER().load(
|
77
|
+
filename=self.ner_data_file) if self.ner_data_file else None
|
78
|
+
|
79
|
+
self.dataset = self.dataset_type(
|
80
|
+
embedding_dict=embedding_dict,
|
81
|
+
embedding_size=embedding_size,
|
82
|
+
embeddings_pooling_strategy=pooling_strategy,
|
83
|
+
ner_df=self.ner_df, **kwargs)
|
84
|
+
|
85
|
+
X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset.load()
|
86
|
+
|
87
|
+
self.dataframe = self.dataset.dataframe
|
88
|
+
# dataframe.dropna()
|
89
|
+
self.X_train = self.dataset.X_train
|
90
|
+
self.X_test = self.dataset.X_test
|
91
|
+
self.y_train = self.dataset.y_train
|
92
|
+
self.y_test = self.dataset.y_test
|
93
|
+
self.train_idx_arr = self.dataset.train_idx_arr
|
94
|
+
self.val_idx_arr = self.dataset.val_idx_arr
|
95
|
+
# Logic to set up the experiment
|
96
|
+
self.items = self.dataset.produce_inputs()
|
97
|
+
|
98
|
+
unique_classes = pd.unique(self.dataframe['event_category'])
|
99
|
+
event_num = len(unique_classes)
|
100
|
+
# droprate = 0.3
|
101
|
+
vector_size = self.dataset.drugs_df.shape[0]
|
102
|
+
|
103
|
+
print("Building the experiment with the following settings:")
|
104
|
+
print(
|
105
|
+
f"Name: {self.experiment_name}, Dataset: {self.dataset}, Model: {self.model}")
|
106
|
+
# Implement additional build logic as needed
|
107
|
+
return self
|
108
|
+
|
109
|
+
def run(self, model_func, batch_size=128, epochs=100):
|
110
|
+
mlflow.set_tracking_uri(self.tracking_uri)
|
111
|
+
|
112
|
+
if mlflow.get_experiment_by_name(self.experiment_name) == None:
|
113
|
+
mlflow.create_experiment(
|
114
|
+
self.experiment_name, self.artifact_location)
|
115
|
+
mlflow.set_experiment_tags(self.experiment_tags)
|
116
|
+
mlflow.set_experiment(self.experiment_name)
|
117
|
+
|
118
|
+
y_test_label = self.items[0][4]
|
119
|
+
multi_modal_runner = MultiModalRunner(
|
120
|
+
library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
|
121
|
+
# multi_modal = TFMultiModal(
|
122
|
+
# model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
|
123
|
+
multi_modal_runner.set_data(
|
124
|
+
self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
|
125
|
+
result = multi_modal_runner.predict(self.combinations)
|
126
|
+
return result
|
@@ -55,14 +55,10 @@ ddi_fw/drugbank/drugbank_parser.py,sha256=lxUuhB0s8ef_aPNDs0V8ClKF7-KIWugNIV9gVs
|
|
55
55
|
ddi_fw/drugbank/drugbank_processor.py,sha256=vmkt68n9nFLevufgGyXhOSDtTo4G1XzwT9PVncGTXtk,18127
|
56
56
|
ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
|
57
57
|
ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
|
58
|
-
ddi_fw/experiments/__init__.py,sha256=
|
58
|
+
ddi_fw/experiments/__init__.py,sha256=5L2xSolpFycNnflqOMdvJSiqRB16ExA5bbVGORKFX04,195
|
59
59
|
ddi_fw/experiments/custom_torch_model.py,sha256=iQ_R_EApzD2JCcASN8cie6D21oh7VCxaOQ45_dkiGwc,2576
|
60
60
|
ddi_fw/experiments/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
|
61
|
-
ddi_fw/experiments/ml_helper.py,sha256=1mH6IFhOG4eZ-GueTa_-8V9OATapWp_VclnCCpVwnnE,5473
|
62
|
-
ddi_fw/experiments/ml_pt.py,sha256=VHKegdX5-RyUpNN_l6XxMc2ZrSQg4h8uuQALInQXkRg,3730
|
63
|
-
ddi_fw/experiments/ml_tf.py,sha256=flOsVxCrok5zIlT4OHbk3NzEhtgyybMAQOENI6Itn9I,5791
|
64
61
|
ddi_fw/experiments/pipeline.py,sha256=4ltPCcfLZ1fFpiOd8ahPognI6NLmRLzJvUqyFpn3z18,5693
|
65
|
-
ddi_fw/experiments/pipeline_builder_pattern.py,sha256=w6x7ietk4vONCAvUfssPycaRUQIYUJsbCNNj3BTASBI,5454
|
66
62
|
ddi_fw/experiments/pipeline_ner.py,sha256=unxEJCYrG6wEZjLmqvGdLRTMOBwELbGKkdygSpAR3b8,5043
|
67
63
|
ddi_fw/experiments/tensorflow_helper.py,sha256=m3Mppl-tbccTMAKLpZg2YC0xpcukkyQihPw_uwAlRRY,11857
|
68
64
|
ddi_fw/experiments/test.py,sha256=z1TfBpK75zGKpp2ZU8f6APjZlgBFthaCBN61YB9ma4o,2049
|
@@ -70,9 +66,18 @@ ddi_fw/langchain/__init__.py,sha256=8dBPZivc01WWaCH8sZ_UV8-XPyo74e9Qy6-fYgAiNLE,
|
|
70
66
|
ddi_fw/langchain/embeddings.py,sha256=8J_SfO9pyET2W-Ltzq0_r9EchFzBsYdUabiOMma42Us,7515
|
71
67
|
ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
|
72
68
|
ddi_fw/langchain/storage.py,sha256=uy5clVB07So2eFbRGdAKzHIPdfEk4se33cPktis7Aa4,2716
|
69
|
+
ddi_fw/ml/__init__.py,sha256=0YubqmEpJKp3OfqlLKkD5N9L6WDWew3QEtnbdY3mqKg,180
|
70
|
+
ddi_fw/ml/ml_helper.py,sha256=juDcTi8IEQk2D4mkY4qVX75rRM0FmksULRQzyNHKw2A,4475
|
71
|
+
ddi_fw/ml/model_wrapper.py,sha256=ZExnsLMjHKL3BaI4aKkbyWTp8vbswLeF2_T3cZ73YpQ,1144
|
72
|
+
ddi_fw/ml/pytorch_wrapper.py,sha256=YdwzR5qAHFNajYB_elFqDhVKRLeajaRpopNzyQ6gIIA,3725
|
73
|
+
ddi_fw/ml/tensorflow_wrapper.py,sha256=pSeiJDuaLf9MhZVlLuLJBA-LH-H-Dl2TyYbB39iGsto,5748
|
73
74
|
ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
|
74
75
|
ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
|
75
76
|
ddi_fw/ner/ner.py,sha256=BEs9AFljAxOQrC2BEP1raSzRoypcfELS5UTdl4bjTqw,15863
|
77
|
+
ddi_fw/pipeline/__init__.py,sha256=qryVi8bTsbpbMsseOuSEi1Siign0LkbFLPWiIR7OGHE,165
|
78
|
+
ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=YkPixHVo9-4SPkY8VaWvBe1aaI5IiV4oZT4kBrm2WHQ,1635
|
79
|
+
ddi_fw/pipeline/multi_pipeline.py,sha256=UgTEcT2UfkRKR3Ri_Nrtz9GrQNQHGOSUrw9h5AwFUMI,4356
|
80
|
+
ddi_fw/pipeline/pipeline.py,sha256=WJnz5zEIa-9n4qEs8-1ubqTMPsLYjFGdFYJPbn92i98,5512
|
76
81
|
ddi_fw/test/basic_test.py,sha256=fEOGcZm1ObnsDvMiXNmdmz6YCeUrGc8V0DwlSwGhsq8,376
|
77
82
|
ddi_fw/test/combination_test.py,sha256=TWNE8sf-DSh1Q9-yRaRBc774Sn1kSMGXLwQhd2_Qynk,324
|
78
83
|
ddi_fw/test/compress_json_test.py,sha256=BGny56YqiG-pzhMoDzLKQBQI1E7o3jU0S7VYWtclAx4,1045
|
@@ -89,7 +94,7 @@ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
|
89
94
|
ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
|
90
95
|
ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
|
91
96
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
92
|
-
ddi_fw-0.0.
|
93
|
-
ddi_fw-0.0.
|
94
|
-
ddi_fw-0.0.
|
95
|
-
ddi_fw-0.0.
|
97
|
+
ddi_fw-0.0.79.dist-info/METADATA,sha256=Acf-Yb4NTk6aKueaQ3tbti_Ykxm6mkzJP-mreWb8UWI,1966
|
98
|
+
ddi_fw-0.0.79.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
99
|
+
ddi_fw-0.0.79.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
100
|
+
ddi_fw-0.0.79.dist-info/RECORD,,
|
@@ -1,152 +0,0 @@
|
|
1
|
-
import sqlite3
|
2
|
-
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
|
3
|
-
from keras.models import Model, Sequential
|
4
|
-
from keras.callbacks import EarlyStopping
|
5
|
-
from keras.layers import Dense, Dropout, Input, Activation, BatchNormalization
|
6
|
-
from tensorflow import keras
|
7
|
-
from ddi_fw.experiments import TFSingleModal, TFMultiModal
|
8
|
-
from ddi_fw.experiments import evaluate
|
9
|
-
from sklearn.preprocessing import LabelBinarizer
|
10
|
-
import numpy as np
|
11
|
-
import pandas as pd
|
12
|
-
from ddi_fw.utils import ZipHelper, Py7ZipHelper
|
13
|
-
import os
|
14
|
-
import chromadb
|
15
|
-
from collections import defaultdict
|
16
|
-
from langchain_community.vectorstores import Chroma
|
17
|
-
from ddi_fw.ner.ner import CTakesNER
|
18
|
-
from ddi_fw.langchain.embeddings import PoolingStrategy
|
19
|
-
|
20
|
-
from ddi_fw.datasets import BaseDataset, DDIMDLDataset
|
21
|
-
|
22
|
-
from ddi_fw.datasets import SumPoolingStrategy
|
23
|
-
from keras import metrics
|
24
|
-
from ddi_fw.experiments.evaluation_helper import evaluate
|
25
|
-
|
26
|
-
import mlflow
|
27
|
-
|
28
|
-
|
29
|
-
class Experiment:
|
30
|
-
def __init__(self):
|
31
|
-
pass
|
32
|
-
|
33
|
-
@staticmethod
|
34
|
-
def create():
|
35
|
-
return Experiment()
|
36
|
-
|
37
|
-
def name(self, name):
|
38
|
-
self.experiment_name = name
|
39
|
-
return self
|
40
|
-
|
41
|
-
def description(self, description):
|
42
|
-
self.experiment_description = description
|
43
|
-
return self
|
44
|
-
|
45
|
-
def tags(self, tags):
|
46
|
-
self.experiment_tags = tags
|
47
|
-
return self
|
48
|
-
|
49
|
-
def tracking_uri(self, uri):
|
50
|
-
self.tracking_uri = uri
|
51
|
-
return self
|
52
|
-
|
53
|
-
def dataset(self, dataset_type: BaseDataset):
|
54
|
-
self.dataset_type = dataset_type
|
55
|
-
return self
|
56
|
-
|
57
|
-
def columns(self, cols):
|
58
|
-
self.columns = cols
|
59
|
-
return self
|
60
|
-
|
61
|
-
def vectordb_collection(self, persist_directory, collection_name):
|
62
|
-
self.vector_db_persist_directory = persist_directory
|
63
|
-
self.vector_db_collection_name = collection_name
|
64
|
-
return self
|
65
|
-
|
66
|
-
def embedding_pooling_strategy(self, strategy_type: PoolingStrategy):
|
67
|
-
self.embedding_pooling_strategy_type = strategy_type
|
68
|
-
return self
|
69
|
-
|
70
|
-
def ner_data_file(self, ner_data_file):
|
71
|
-
self.ner_data_file = ner_data_file
|
72
|
-
self.ner_df = CTakesNER().load(filename=ner_data_file)
|
73
|
-
return self
|
74
|
-
|
75
|
-
def ner_threshold(self, threshold):
|
76
|
-
self.ner_threshold = threshold
|
77
|
-
return self
|
78
|
-
|
79
|
-
def combinations(self, combs):
|
80
|
-
self.combinations = combs
|
81
|
-
return self
|
82
|
-
|
83
|
-
def model(self, model):
|
84
|
-
self.model = model
|
85
|
-
return self
|
86
|
-
|
87
|
-
def build(self):
|
88
|
-
# 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
|
89
|
-
kwargs = {"columns": self.columns}
|
90
|
-
for k, v in self.ner_threshold.items():
|
91
|
-
kwargs[k] = v
|
92
|
-
|
93
|
-
self.vector_db = chromadb.PersistentClient(
|
94
|
-
path=self.vector_db_persist_directory)
|
95
|
-
self.collection = self.vector_db.get_collection(
|
96
|
-
self.vector_db_collection_name)
|
97
|
-
dictionary = self.collection.get(include=['embeddings', 'metadatas'])
|
98
|
-
|
99
|
-
embedding_dict = defaultdict(lambda: defaultdict(list))
|
100
|
-
|
101
|
-
for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
|
102
|
-
embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
|
103
|
-
|
104
|
-
embedding_size = dictionary['embeddings'].shape[1]
|
105
|
-
|
106
|
-
pooling_strategy = self.embedding_pooling_strategy_type()
|
107
|
-
|
108
|
-
self.dataset = self.dataset_type(
|
109
|
-
embedding_dict=embedding_dict,
|
110
|
-
embedding_size=embedding_size,
|
111
|
-
embeddings_pooling_strategy=pooling_strategy,
|
112
|
-
ner_df=self.ner_df, kwargs=kwargs)
|
113
|
-
|
114
|
-
X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset.load()
|
115
|
-
|
116
|
-
self.dataframe = self.dataset.dataframe
|
117
|
-
# dataframe.dropna()
|
118
|
-
self.X_train = self.dataset.X_train
|
119
|
-
self.X_test = self.dataset.X_test
|
120
|
-
self.y_train = self.dataset.y_train
|
121
|
-
self.y_test = self.dataset.y_test
|
122
|
-
self.train_idx_arr = self.dataset.train_idx_arr
|
123
|
-
self.val_idx_arr = self.dataset.val_idx_arr
|
124
|
-
# Logic to set up the experiment
|
125
|
-
self.items = self.dataset.produce_inputs()
|
126
|
-
|
127
|
-
unique_classes = pd.unique(self.dataframe['event_category'])
|
128
|
-
event_num = len(unique_classes)
|
129
|
-
# droprate = 0.3
|
130
|
-
vector_size = self.dataset.drugs_df.shape[0]
|
131
|
-
|
132
|
-
print("Building the experiment with the following settings:")
|
133
|
-
print(
|
134
|
-
f"Name: {self.experiment_name}, Dataset: {self.dataset}, Model: {self.model}")
|
135
|
-
# Implement additional build logic as needed
|
136
|
-
return self
|
137
|
-
|
138
|
-
def run(self, model_func, batch_size=128, epochs=100):
|
139
|
-
mlflow.set_tracking_uri(self.tracking_uri)
|
140
|
-
|
141
|
-
if mlflow.get_experiment_by_name(self.experiment_name) == None:
|
142
|
-
mlflow.create_experiment(self.experiment_name)
|
143
|
-
mlflow.set_experiment_tags(self.experiment_tags)
|
144
|
-
mlflow.set_experiment(self.experiment_name)
|
145
|
-
|
146
|
-
y_test_label = self.items[0][4]
|
147
|
-
multi_modal = TFMultiModal(
|
148
|
-
model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
|
149
|
-
multi_modal.set_data(
|
150
|
-
self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
|
151
|
-
pred, self.single_results = multi_modal.predict(self.combinations)
|
152
|
-
return self
|
File without changes
|
File without changes
|