ddi-fw 0.0.78__py3-none-any.whl → 0.0.79__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,4 @@
1
1
  from .tensorflow_helper import TFMultiModal, TFSingleModal,Result
2
2
  from .evaluation_helper import evaluate, Metrics
3
3
  from .pipeline import Experiment
4
- from .pipeline_ner import NerParameterSearch
5
- from .ml_helper import ModelWrapper,MultiModalRunner
4
+ from .pipeline_ner import NerParameterSearch
ddi_fw/ml/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .ml_helper import MultiModalRunner
2
+ from .model_wrapper import ModelWrapper,Result
3
+ from .tensorflow_wrapper import TFModelWrapper
4
+ from .pytorch_wrapper import PTModelWrapper
@@ -1,6 +1,7 @@
1
1
  from typing import Dict, List, Tuple
2
2
  from matplotlib import pyplot as plt
3
- from ddi_fw.experiments.ml_pt import PTModelWrapper
3
+ from ddi_fw.ml.model_wrapper import Result
4
+ from ddi_fw.ml.pytorch_wrapper import PTModelWrapper
4
5
  from ddi_fw.experiments.ml_tf import TFModelWrapper
5
6
  import tensorflow as tf
6
7
  from tensorflow import keras
@@ -28,37 +29,7 @@ np.random.seed(2)
28
29
  np.set_printoptions(precision=4)
29
30
 
30
31
 
31
- class Result:
32
- def __init__(self) -> None:
33
- self.log_dict = {}
34
- self.metric_dict = {}
35
32
 
36
- def add_log(self, key, logs):
37
- self.log_dict[key] = logs
38
-
39
- def add_metric(self, key, metrics):
40
- self.metric_dict[key] = metrics
41
-
42
-
43
- class ModelWrapper:
44
- def __init__(self, date, descriptor, model_func, batch_size=128, epochs=100):
45
- self.date = date
46
- self.descriptor = descriptor
47
- self.model_func = model_func
48
- self.batch_size = batch_size
49
- self.epochs = epochs
50
-
51
- def set_data(self, train_idx_arr, val_idx_arr, train_data, train_label, test_data, test_label):
52
- self.train_idx_arr = train_idx_arr
53
- self.val_idx_arr = val_idx_arr
54
- self.train_data = train_data
55
- self.train_label = train_label
56
- self.test_data = test_data
57
- self.test_label = test_label
58
- # https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
59
-
60
- def predict(self) -> Tuple[Dict[str, float], Metrics, List[float]]:
61
- pass
62
33
 
63
34
 
64
35
  class MultiModalRunner:
@@ -101,7 +72,7 @@ class MultiModalRunner:
101
72
  self.level_0_run_id = run.info.run_id
102
73
  for item in self.items:
103
74
  print(item[0])
104
- T =self.__create_multi_modal(self.library)
75
+ T = self.__create_multi_modal(self.library)
105
76
  single_modal=T(self.date, item[0], self.model_func, self.batch_size, self.epochs)
106
77
  single_modal.set_data(
107
78
  self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
@@ -0,0 +1,35 @@
1
+ from typing import Dict, List, Tuple
2
+
3
+ from ddi_fw.experiments.evaluation_helper import Metrics
4
+
5
+ class Result:
6
+ def __init__(self) -> None:
7
+ self.log_dict = {}
8
+ self.metric_dict = {}
9
+
10
+ def add_log(self, key, logs):
11
+ self.log_dict[key] = logs
12
+
13
+ def add_metric(self, key, metrics):
14
+ self.metric_dict[key] = metrics
15
+
16
+
17
+ class ModelWrapper:
18
+ def __init__(self, date, descriptor, model_func, batch_size=128, epochs=100):
19
+ self.date = date
20
+ self.descriptor = descriptor
21
+ self.model_func = model_func
22
+ self.batch_size = batch_size
23
+ self.epochs = epochs
24
+
25
+ def set_data(self, train_idx_arr, val_idx_arr, train_data, train_label, test_data, test_label):
26
+ self.train_idx_arr = train_idx_arr
27
+ self.val_idx_arr = val_idx_arr
28
+ self.train_data = train_data
29
+ self.train_label = train_label
30
+ self.test_data = test_data
31
+ self.test_label = test_label
32
+ # https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
33
+
34
+ def predict(self) -> Tuple[Dict[str, float], Metrics, List[float]]:
35
+ pass
@@ -1,7 +1,7 @@
1
1
  import mlflow
2
2
  import torch
3
- from ddi_fw.experiments.ml_helper import ModelWrapper
4
3
  from ddi_fw.experiments.evaluation_helper import evaluate
4
+ from ddi_fw.ml.model_wrapper import ModelWrapper
5
5
 
6
6
 
7
7
  class PTModelWrapper(ModelWrapper):
@@ -1,6 +1,5 @@
1
- from typing import Dict, List, Tuple
2
1
  from matplotlib import pyplot as plt
3
- from ddi_fw.experiments.ml_helper import ModelWrapper
2
+ from ddi_fw.ml.model_wrapper import ModelWrapper
4
3
  import tensorflow as tf
5
4
  from tensorflow import keras
6
5
  from keras.models import Model, Sequential
@@ -0,0 +1,3 @@
1
+ from .pipeline import Pipeline
2
+ from .multi_pipeline import MultiPipeline
3
+ from multi_modal_combination_strategy import CombinationStrategy,CustomCombinationStrategy
@@ -0,0 +1,39 @@
1
+ import itertools
2
+
3
+
4
+ class CombinationStrategy():
5
+ def generate(self):
6
+ pass
7
+
8
+
9
+ class CustomCombinationStrategy(CombinationStrategy):
10
+ def __init__(self, **kwargs_combination_params):
11
+ # kwargs fonksiyona da alınabilir
12
+ self.group1 = kwargs_combination_params.get("group_1", None)
13
+ self.group2 = kwargs_combination_params.get("group_2", None)
14
+
15
+ def generate(self):
16
+ # Handle edge cases
17
+ if not self.group_1 or not self.group_2:
18
+ raise ValueError(
19
+ f"Parameters of combination strategy could not be empty.")
20
+ # return [] # Return an empty list if either group is empty
21
+ # combinations = []
22
+ # for j in self.group2:
23
+ # extended_item_group_1 = self.group_1.copy()
24
+ # extended_item_group_1.append(j)
25
+ # for i in range(2, len(extended_item_group_1) + 1):
26
+ # combinations.extend(list(itertools.combinations(extended_item_group_1, i))) #all
27
+ # combinations = list(set(combinations))
28
+
29
+ combinations = set() # Use a set to avoid duplicates directly
30
+ for j in self.group_2:
31
+ extended_item_group_1 = self.group_1.copy()
32
+ extended_item_group_1.append(j)
33
+ # Generate combinations of all lengths from 2 to len(group_1 + 1)
34
+ for i in range(2, len(extended_item_group_1) + 1):
35
+ combinations.update(itertools.combinations(
36
+ extended_item_group_1, i)) # Add combinations
37
+
38
+ # Convert set back to list (if needed) and return
39
+ return list(combinations)
@@ -0,0 +1,111 @@
1
+ import json
2
+ from pipeline import Pipeline
3
+ import importlib
4
+
5
+
6
+ def load_config(file_path):
7
+ with open(file_path, 'r') as file:
8
+ config = json.load(file)
9
+ return config
10
+
11
+
12
+ def get_import(full_path_of_import):
13
+ """Dynamically imports an object from a module given its full path.
14
+
15
+ Args:
16
+ full_path_of_import (str): The full path of the import (e.g., 'module.submodule.ClassName').
17
+
18
+ Returns:
19
+ object: The imported object.
20
+
21
+ Raises:
22
+ ImportError: If the module cannot be imported.
23
+ AttributeError: If the attribute does not exist in the module.
24
+ """
25
+ if not full_path_of_import:
26
+ raise ValueError("The import path cannot be empty.")
27
+
28
+ parts = full_path_of_import.split('.')
29
+ import_name = parts[-1]
30
+ module_name = ".".join(parts[:-1]) if len(parts) > 1 else ""
31
+
32
+ try:
33
+ module = importlib.import_module(module_name)
34
+ return getattr(module, import_name)
35
+ except ModuleNotFoundError as e:
36
+ raise ImportError(f"Module '{module_name}' could not be found.") from e
37
+ except AttributeError as e:
38
+ raise AttributeError(
39
+ f"'{module_name}' has no attribute '{import_name}'") from e
40
+
41
+
42
+ class MultiPipeline():
43
+ def __init__(self, experiments_config_file):
44
+ self.experiments_config = load_config(experiments_config_file)
45
+ self.items = []
46
+
47
+ def __create_pipeline(self, config):
48
+ library = config["library"]
49
+ batch_size = config["batch_size"]
50
+ epochs = config["epochs"]
51
+
52
+ # dataset_module = config["dataset_module"]
53
+ # dataset_name = config["dataset_name"]
54
+
55
+ experiment_name = config["experiment_name"]
56
+ experiment_description = config["experiment_description"]
57
+ experiment_tags = config["experiment_tags"]
58
+ tracking_uri = config["tracking_uri"]
59
+ artifact_location = config["artifact_location"]
60
+ columns = config["columns"]
61
+ ner_data_file = config["ner_data_file"]
62
+ ner_threshold = config["ner_threshold"]
63
+ vector_db_persist_directory = config["vector_db_persist_directory"]
64
+ vector_db_collection_name = config["vector_db_collection_name"]
65
+ embedding_pooling_strategy = get_import(
66
+ config["embedding_pooling_strategy_type"])
67
+ # Dynamically import the model and dataset classes
68
+ model_type = get_import(config["model_type"])
69
+ dataset_type = get_import(config["dataset_type"])
70
+ combination_type = get_import(config["combination_strategy"]["type"])
71
+ kwargs_combination_params = config["combination_strategy"]["params"]
72
+
73
+ # # Instantiate the classes
74
+ # model_instance = model_class()
75
+ # dataset_instance = dataset_class()
76
+ return {
77
+ "name": experiment_name,
78
+ "library": library,
79
+ "batch_size": batch_size,
80
+ "epochs": epochs,
81
+ "model_type": model_type,
82
+ "pipeline": Pipeline(
83
+ library=library,
84
+ experiment_name=experiment_name,
85
+ experiment_description=experiment_description,
86
+ experiment_tags=experiment_tags,
87
+ artifact_location=artifact_location,
88
+ tracking_uri=tracking_uri,
89
+ dataset_type=dataset_type,
90
+ columns=columns,
91
+ vector_db_persist_directory=vector_db_persist_directory,
92
+ vector_db_collection_name=vector_db_collection_name,
93
+ embedding_pooling_strategy_type=embedding_pooling_strategy,
94
+ ner_data_file=ner_data_file,
95
+ ner_threshold=ner_threshold,
96
+ combinations=combination_type(**kwargs_combination_params).generate())}
97
+
98
+ def build(self):
99
+ for config in self.experiments_config['experiments']:
100
+ item = self.__create_pipeline(config)
101
+ self.items.append(item)
102
+
103
+ def run(self):
104
+ for item in self.items:
105
+ print(f"{item['name']} is running")
106
+ pipeline = item['pipeline']
107
+ model_type = item['model_type']
108
+ batch_size = item['batch_size']
109
+ epochs = item['epochs']
110
+ pipeline.build()
111
+ pipeline.run(model_type, epochs=epochs, batch_size=batch_size)
@@ -0,0 +1,126 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import chromadb
4
+ from collections import defaultdict
5
+ from ddi_fw.ner.ner import CTakesNER
6
+ from ddi_fw.langchain.embeddings import PoolingStrategy
7
+ from ddi_fw.datasets import BaseDataset, DDIMDLDataset
8
+ from ddi_fw.langchain.embeddings import SumPoolingStrategy
9
+ import mlflow
10
+ from ml import MultiModalRunner
11
+
12
+
13
+ class Pipeline:
14
+ def __init__(self,
15
+ library='TF',
16
+ experiment_name=None,
17
+ experiment_description=None,
18
+ experiment_tags=None,
19
+ artifact_location=None,
20
+ tracking_uri=None,
21
+ dataset_type: BaseDataset = None,
22
+ columns=None,
23
+ embedding_dict=None,
24
+ vector_db_persist_directory=None,
25
+ vector_db_collection_name=None,
26
+ embedding_pooling_strategy_type: PoolingStrategy = None,
27
+ ner_data_file=None,
28
+ ner_threshold=None,
29
+ combinations=None,
30
+ model=None):
31
+ self.library = library
32
+ self.experiment_name = experiment_name
33
+ self.experiment_description = experiment_description
34
+ self.experiment_tags = experiment_tags
35
+ self.artifact_location = artifact_location
36
+ self.tracking_uri = tracking_uri
37
+ self.dataset_type = dataset_type
38
+ self.columns = columns
39
+ self.embedding_dict = embedding_dict
40
+ self.vector_db_persist_directory = vector_db_persist_directory
41
+ self.vector_db_collection_name = vector_db_collection_name
42
+ self.embedding_pooling_strategy_type = embedding_pooling_strategy_type
43
+ self.ner_data_file = ner_data_file
44
+ self.ner_threshold = ner_threshold
45
+ self.combinations = combinations
46
+ self.model = model
47
+
48
+ def build(self):
49
+ # 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
50
+ kwargs = {"columns": self.columns}
51
+ for k, v in self.ner_threshold.items():
52
+ kwargs[k] = v
53
+ if self.embedding_dict == None:
54
+ if self.vector_db_persist_directory:
55
+ self.vector_db = chromadb.PersistentClient(
56
+ path=self.vector_db_persist_directory)
57
+ self.collection = self.vector_db.get_collection(
58
+ self.vector_db_collection_name)
59
+ dictionary = self.collection.get(
60
+ include=['embeddings', 'metadatas'])
61
+
62
+ embedding_dict = defaultdict(lambda: defaultdict(list))
63
+
64
+ for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
65
+ embedding_dict[metadata["type"]
66
+ ][metadata["id"]].append(embedding)
67
+
68
+ embedding_size = dictionary['embeddings'].shape[1]
69
+ else:
70
+ embedding_dict = self.embedding_dict
71
+ embedding_size = list(embedding_dict['all_text'].values())[
72
+ 0][0].shape
73
+
74
+ pooling_strategy = self.embedding_pooling_strategy_type()
75
+
76
+ self.ner_df = CTakesNER().load(
77
+ filename=self.ner_data_file) if self.ner_data_file else None
78
+
79
+ self.dataset = self.dataset_type(
80
+ embedding_dict=embedding_dict,
81
+ embedding_size=embedding_size,
82
+ embeddings_pooling_strategy=pooling_strategy,
83
+ ner_df=self.ner_df, **kwargs)
84
+
85
+ X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset.load()
86
+
87
+ self.dataframe = self.dataset.dataframe
88
+ # dataframe.dropna()
89
+ self.X_train = self.dataset.X_train
90
+ self.X_test = self.dataset.X_test
91
+ self.y_train = self.dataset.y_train
92
+ self.y_test = self.dataset.y_test
93
+ self.train_idx_arr = self.dataset.train_idx_arr
94
+ self.val_idx_arr = self.dataset.val_idx_arr
95
+ # Logic to set up the experiment
96
+ self.items = self.dataset.produce_inputs()
97
+
98
+ unique_classes = pd.unique(self.dataframe['event_category'])
99
+ event_num = len(unique_classes)
100
+ # droprate = 0.3
101
+ vector_size = self.dataset.drugs_df.shape[0]
102
+
103
+ print("Building the experiment with the following settings:")
104
+ print(
105
+ f"Name: {self.experiment_name}, Dataset: {self.dataset}, Model: {self.model}")
106
+ # Implement additional build logic as needed
107
+ return self
108
+
109
+ def run(self, model_func, batch_size=128, epochs=100):
110
+ mlflow.set_tracking_uri(self.tracking_uri)
111
+
112
+ if mlflow.get_experiment_by_name(self.experiment_name) == None:
113
+ mlflow.create_experiment(
114
+ self.experiment_name, self.artifact_location)
115
+ mlflow.set_experiment_tags(self.experiment_tags)
116
+ mlflow.set_experiment(self.experiment_name)
117
+
118
+ y_test_label = self.items[0][4]
119
+ multi_modal_runner = MultiModalRunner(
120
+ library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
121
+ # multi_modal = TFMultiModal(
122
+ # model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
123
+ multi_modal_runner.set_data(
124
+ self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
125
+ result = multi_modal_runner.predict(self.combinations)
126
+ return result
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.78
3
+ Version: 0.0.79
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -55,14 +55,10 @@ ddi_fw/drugbank/drugbank_parser.py,sha256=lxUuhB0s8ef_aPNDs0V8ClKF7-KIWugNIV9gVs
55
55
  ddi_fw/drugbank/drugbank_processor.py,sha256=vmkt68n9nFLevufgGyXhOSDtTo4G1XzwT9PVncGTXtk,18127
56
56
  ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
57
57
  ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
58
- ddi_fw/experiments/__init__.py,sha256=QVWBpJ9x45GJmW5vM5YwpkASr5Dam88-P5DwzcdSsLs,249
58
+ ddi_fw/experiments/__init__.py,sha256=5L2xSolpFycNnflqOMdvJSiqRB16ExA5bbVGORKFX04,195
59
59
  ddi_fw/experiments/custom_torch_model.py,sha256=iQ_R_EApzD2JCcASN8cie6D21oh7VCxaOQ45_dkiGwc,2576
60
60
  ddi_fw/experiments/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
61
- ddi_fw/experiments/ml_helper.py,sha256=1mH6IFhOG4eZ-GueTa_-8V9OATapWp_VclnCCpVwnnE,5473
62
- ddi_fw/experiments/ml_pt.py,sha256=VHKegdX5-RyUpNN_l6XxMc2ZrSQg4h8uuQALInQXkRg,3730
63
- ddi_fw/experiments/ml_tf.py,sha256=flOsVxCrok5zIlT4OHbk3NzEhtgyybMAQOENI6Itn9I,5791
64
61
  ddi_fw/experiments/pipeline.py,sha256=4ltPCcfLZ1fFpiOd8ahPognI6NLmRLzJvUqyFpn3z18,5693
65
- ddi_fw/experiments/pipeline_builder_pattern.py,sha256=w6x7ietk4vONCAvUfssPycaRUQIYUJsbCNNj3BTASBI,5454
66
62
  ddi_fw/experiments/pipeline_ner.py,sha256=unxEJCYrG6wEZjLmqvGdLRTMOBwELbGKkdygSpAR3b8,5043
67
63
  ddi_fw/experiments/tensorflow_helper.py,sha256=m3Mppl-tbccTMAKLpZg2YC0xpcukkyQihPw_uwAlRRY,11857
68
64
  ddi_fw/experiments/test.py,sha256=z1TfBpK75zGKpp2ZU8f6APjZlgBFthaCBN61YB9ma4o,2049
@@ -70,9 +66,18 @@ ddi_fw/langchain/__init__.py,sha256=8dBPZivc01WWaCH8sZ_UV8-XPyo74e9Qy6-fYgAiNLE,
70
66
  ddi_fw/langchain/embeddings.py,sha256=8J_SfO9pyET2W-Ltzq0_r9EchFzBsYdUabiOMma42Us,7515
71
67
  ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
72
68
  ddi_fw/langchain/storage.py,sha256=uy5clVB07So2eFbRGdAKzHIPdfEk4se33cPktis7Aa4,2716
69
+ ddi_fw/ml/__init__.py,sha256=0YubqmEpJKp3OfqlLKkD5N9L6WDWew3QEtnbdY3mqKg,180
70
+ ddi_fw/ml/ml_helper.py,sha256=juDcTi8IEQk2D4mkY4qVX75rRM0FmksULRQzyNHKw2A,4475
71
+ ddi_fw/ml/model_wrapper.py,sha256=ZExnsLMjHKL3BaI4aKkbyWTp8vbswLeF2_T3cZ73YpQ,1144
72
+ ddi_fw/ml/pytorch_wrapper.py,sha256=YdwzR5qAHFNajYB_elFqDhVKRLeajaRpopNzyQ6gIIA,3725
73
+ ddi_fw/ml/tensorflow_wrapper.py,sha256=pSeiJDuaLf9MhZVlLuLJBA-LH-H-Dl2TyYbB39iGsto,5748
73
74
  ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
74
75
  ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
75
76
  ddi_fw/ner/ner.py,sha256=BEs9AFljAxOQrC2BEP1raSzRoypcfELS5UTdl4bjTqw,15863
77
+ ddi_fw/pipeline/__init__.py,sha256=qryVi8bTsbpbMsseOuSEi1Siign0LkbFLPWiIR7OGHE,165
78
+ ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=YkPixHVo9-4SPkY8VaWvBe1aaI5IiV4oZT4kBrm2WHQ,1635
79
+ ddi_fw/pipeline/multi_pipeline.py,sha256=UgTEcT2UfkRKR3Ri_Nrtz9GrQNQHGOSUrw9h5AwFUMI,4356
80
+ ddi_fw/pipeline/pipeline.py,sha256=WJnz5zEIa-9n4qEs8-1ubqTMPsLYjFGdFYJPbn92i98,5512
76
81
  ddi_fw/test/basic_test.py,sha256=fEOGcZm1ObnsDvMiXNmdmz6YCeUrGc8V0DwlSwGhsq8,376
77
82
  ddi_fw/test/combination_test.py,sha256=TWNE8sf-DSh1Q9-yRaRBc774Sn1kSMGXLwQhd2_Qynk,324
78
83
  ddi_fw/test/compress_json_test.py,sha256=BGny56YqiG-pzhMoDzLKQBQI1E7o3jU0S7VYWtclAx4,1045
@@ -89,7 +94,7 @@ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
89
94
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
90
95
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
91
96
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
92
- ddi_fw-0.0.78.dist-info/METADATA,sha256=2nUPcQaInXGSs6zh6fqhDHkvyyeql-B0pdXGsfAoE6Y,1966
93
- ddi_fw-0.0.78.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
94
- ddi_fw-0.0.78.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
95
- ddi_fw-0.0.78.dist-info/RECORD,,
97
+ ddi_fw-0.0.79.dist-info/METADATA,sha256=Acf-Yb4NTk6aKueaQ3tbti_Ykxm6mkzJP-mreWb8UWI,1966
98
+ ddi_fw-0.0.79.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
99
+ ddi_fw-0.0.79.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
100
+ ddi_fw-0.0.79.dist-info/RECORD,,
@@ -1,152 +0,0 @@
1
- import sqlite3
2
- from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
3
- from keras.models import Model, Sequential
4
- from keras.callbacks import EarlyStopping
5
- from keras.layers import Dense, Dropout, Input, Activation, BatchNormalization
6
- from tensorflow import keras
7
- from ddi_fw.experiments import TFSingleModal, TFMultiModal
8
- from ddi_fw.experiments import evaluate
9
- from sklearn.preprocessing import LabelBinarizer
10
- import numpy as np
11
- import pandas as pd
12
- from ddi_fw.utils import ZipHelper, Py7ZipHelper
13
- import os
14
- import chromadb
15
- from collections import defaultdict
16
- from langchain_community.vectorstores import Chroma
17
- from ddi_fw.ner.ner import CTakesNER
18
- from ddi_fw.langchain.embeddings import PoolingStrategy
19
-
20
- from ddi_fw.datasets import BaseDataset, DDIMDLDataset
21
-
22
- from ddi_fw.datasets import SumPoolingStrategy
23
- from keras import metrics
24
- from ddi_fw.experiments.evaluation_helper import evaluate
25
-
26
- import mlflow
27
-
28
-
29
- class Experiment:
30
- def __init__(self):
31
- pass
32
-
33
- @staticmethod
34
- def create():
35
- return Experiment()
36
-
37
- def name(self, name):
38
- self.experiment_name = name
39
- return self
40
-
41
- def description(self, description):
42
- self.experiment_description = description
43
- return self
44
-
45
- def tags(self, tags):
46
- self.experiment_tags = tags
47
- return self
48
-
49
- def tracking_uri(self, uri):
50
- self.tracking_uri = uri
51
- return self
52
-
53
- def dataset(self, dataset_type: BaseDataset):
54
- self.dataset_type = dataset_type
55
- return self
56
-
57
- def columns(self, cols):
58
- self.columns = cols
59
- return self
60
-
61
- def vectordb_collection(self, persist_directory, collection_name):
62
- self.vector_db_persist_directory = persist_directory
63
- self.vector_db_collection_name = collection_name
64
- return self
65
-
66
- def embedding_pooling_strategy(self, strategy_type: PoolingStrategy):
67
- self.embedding_pooling_strategy_type = strategy_type
68
- return self
69
-
70
- def ner_data_file(self, ner_data_file):
71
- self.ner_data_file = ner_data_file
72
- self.ner_df = CTakesNER().load(filename=ner_data_file)
73
- return self
74
-
75
- def ner_threshold(self, threshold):
76
- self.ner_threshold = threshold
77
- return self
78
-
79
- def combinations(self, combs):
80
- self.combinations = combs
81
- return self
82
-
83
- def model(self, model):
84
- self.model = model
85
- return self
86
-
87
- def build(self):
88
- # 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
89
- kwargs = {"columns": self.columns}
90
- for k, v in self.ner_threshold.items():
91
- kwargs[k] = v
92
-
93
- self.vector_db = chromadb.PersistentClient(
94
- path=self.vector_db_persist_directory)
95
- self.collection = self.vector_db.get_collection(
96
- self.vector_db_collection_name)
97
- dictionary = self.collection.get(include=['embeddings', 'metadatas'])
98
-
99
- embedding_dict = defaultdict(lambda: defaultdict(list))
100
-
101
- for metadata, embedding in zip(dictionary['metadatas'], dictionary['embeddings']):
102
- embedding_dict[metadata["type"]][metadata["id"]].append(embedding)
103
-
104
- embedding_size = dictionary['embeddings'].shape[1]
105
-
106
- pooling_strategy = self.embedding_pooling_strategy_type()
107
-
108
- self.dataset = self.dataset_type(
109
- embedding_dict=embedding_dict,
110
- embedding_size=embedding_size,
111
- embeddings_pooling_strategy=pooling_strategy,
112
- ner_df=self.ner_df, kwargs=kwargs)
113
-
114
- X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset.load()
115
-
116
- self.dataframe = self.dataset.dataframe
117
- # dataframe.dropna()
118
- self.X_train = self.dataset.X_train
119
- self.X_test = self.dataset.X_test
120
- self.y_train = self.dataset.y_train
121
- self.y_test = self.dataset.y_test
122
- self.train_idx_arr = self.dataset.train_idx_arr
123
- self.val_idx_arr = self.dataset.val_idx_arr
124
- # Logic to set up the experiment
125
- self.items = self.dataset.produce_inputs()
126
-
127
- unique_classes = pd.unique(self.dataframe['event_category'])
128
- event_num = len(unique_classes)
129
- # droprate = 0.3
130
- vector_size = self.dataset.drugs_df.shape[0]
131
-
132
- print("Building the experiment with the following settings:")
133
- print(
134
- f"Name: {self.experiment_name}, Dataset: {self.dataset}, Model: {self.model}")
135
- # Implement additional build logic as needed
136
- return self
137
-
138
- def run(self, model_func, batch_size=128, epochs=100):
139
- mlflow.set_tracking_uri(self.tracking_uri)
140
-
141
- if mlflow.get_experiment_by_name(self.experiment_name) == None:
142
- mlflow.create_experiment(self.experiment_name)
143
- mlflow.set_experiment_tags(self.experiment_tags)
144
- mlflow.set_experiment(self.experiment_name)
145
-
146
- y_test_label = self.items[0][4]
147
- multi_modal = TFMultiModal(
148
- model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
149
- multi_modal.set_data(
150
- self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
151
- pred, self.single_results = multi_modal.predict(self.combinations)
152
- return self