ddi-fw 0.0.185__tar.gz → 0.0.187__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/PKG-INFO +1 -1
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/pyproject.toml +1 -1
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/core.py +27 -29
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/ml_helper.py +1 -39
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/tensorflow_wrapper.py +3 -2
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/ner_pipeline.py +3 -3
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/pipeline.py +13 -59
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/PKG-INFO +1 -1
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/README.md +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/setup.cfg +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/db_utils.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/debug.log +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/base.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/data/event.db +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/test_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/setup_._py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/event_extractor.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/embeddings.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/storage.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/evaluation_helper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/model_wrapper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ner/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ner/mmlrestclient.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ner/ner.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/enums.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/json_helper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/kaggle.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/package_helper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/py7zr_helper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/utils.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/zip_helper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/vectorization/__init__.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/vectorization/idf_helper.py +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/requires.txt +0 -0
- {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -60,10 +60,10 @@ class BaseDataset(BaseModel):
|
|
60
60
|
dataset_splitter_type: Type[DatasetSplitter]
|
61
61
|
class_column: str = 'class'
|
62
62
|
dataframe: Optional[pd.DataFrame] = None
|
63
|
-
X_train: Optional[
|
64
|
-
X_test: Optional[
|
65
|
-
y_train: Optional[
|
66
|
-
y_test: Optional[
|
63
|
+
X_train: Optional[np.ndarray] = None
|
64
|
+
X_test: Optional[np.ndarray] = None
|
65
|
+
y_train: Optional[np.ndarray] = None
|
66
|
+
y_test: Optional[np.ndarray] = None
|
67
67
|
train_indexes: Optional[pd.Index] = None
|
68
68
|
test_indexes: Optional[pd.Index] = None
|
69
69
|
train_idx_arr: Optional[List[np.ndarray]] = None
|
@@ -81,7 +81,7 @@ class BaseDataset(BaseModel):
|
|
81
81
|
self.y_train), np.array(self.y_test)
|
82
82
|
|
83
83
|
if self.columns is None or len(self.columns) == 0:
|
84
|
-
items.append([f'
|
84
|
+
items.append([f'default', np.nan_to_num(self.X_train),
|
85
85
|
y_train_label, np.nan_to_num(self.X_test), y_test_label])
|
86
86
|
else:
|
87
87
|
for index, column in enumerate(self.columns):
|
@@ -127,11 +127,12 @@ class BaseDataset(BaseModel):
|
|
127
127
|
Load the dataset. If X_train, y_train, X_test, and y_test are already provided,
|
128
128
|
skip deriving them. Otherwise, derive them from the dataframe and indices.
|
129
129
|
"""
|
130
|
-
if self.X_train
|
130
|
+
if self.X_train and self.y_train and self.X_test and self.y_test :
|
131
131
|
# Data is already provided, no need to calculate
|
132
132
|
logging.info(
|
133
133
|
"X_train, y_train, X_test, and y_test are already provided. Skipping calculation.")
|
134
|
-
return
|
134
|
+
return
|
135
|
+
# return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
|
135
136
|
|
136
137
|
self.prep()
|
137
138
|
|
@@ -150,24 +151,26 @@ class BaseDataset(BaseModel):
|
|
150
151
|
|
151
152
|
train = self.dataframe[self.dataframe.index.isin(train_idx_all)]
|
152
153
|
test = self.dataframe[self.dataframe.index.isin(test_idx_all)]
|
154
|
+
X_train = train.drop(self.class_column, axis=1)
|
155
|
+
X_train = train.drop(self.class_column, axis=1)
|
156
|
+
y_train = train[self.class_column]
|
157
|
+
X_test = test.drop(self.class_column, axis=1)
|
158
|
+
y_test = test[self.class_column]
|
159
|
+
|
160
|
+
self.X_train = np.array(X_train)
|
161
|
+
self.y_train = np.array(y_train)
|
162
|
+
self.X_test = np.array(X_test)
|
163
|
+
self.y_test = np.array(y_test)
|
153
164
|
|
154
|
-
self.
|
155
|
-
self.
|
156
|
-
self.X_test = test.drop(self.class_column, axis=1)
|
157
|
-
self.y_test = test[self.class_column]
|
158
|
-
|
159
|
-
self.train_indexes = self.X_train.index
|
160
|
-
self.test_indexes = self.X_test.index
|
165
|
+
self.train_indexes = X_train.index
|
166
|
+
self.test_indexes = X_test.index
|
161
167
|
self.train_idx_arr = train_idx_arr
|
162
168
|
self.val_idx_arr = val_idx_arr
|
163
169
|
|
164
170
|
# Dataframe to numpy array conversion
|
165
|
-
|
166
|
-
self.y_train = np.array(self.y_train)
|
167
|
-
self.X_test = np.array(self.X_test)
|
168
|
-
self.y_test = np.array(self.y_test)
|
171
|
+
|
169
172
|
|
170
|
-
return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
|
173
|
+
# return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
|
171
174
|
|
172
175
|
def __get_indexes__(self, path):
|
173
176
|
train_index_path = path+'/train_indexes.txt'
|
@@ -219,21 +222,16 @@ class BaseDataset(BaseModel):
|
|
219
222
|
|
220
223
|
X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset_splitter.split(
|
221
224
|
X=X, y=y)
|
222
|
-
self.X_train = X_train
|
223
|
-
self.X_test = X_test
|
224
|
-
self.y_train =
|
225
|
-
self.y_test = y_test
|
225
|
+
self.X_train = np.array(self.X_train)
|
226
|
+
self.X_test = np.array(self.X_test)
|
227
|
+
self.y_train = np.array(y_train.tolist())
|
228
|
+
self.y_test = np.array(y_test.tolist())
|
226
229
|
self.train_indexes = X_train.index
|
227
230
|
self.test_indexes = X_test.index
|
228
231
|
self.train_idx_arr = train_idx_arr
|
229
232
|
self.val_idx_arr = val_idx_arr
|
230
233
|
|
231
|
-
|
232
|
-
self.X_train = np.array(self.X_train)
|
233
|
-
self.y_train = np.array(self.y_train.tolist())
|
234
|
-
self.X_test = np.array(self.X_test)
|
235
|
-
self.y_test = np.array(self.y_test.tolist())
|
236
|
-
|
234
|
+
|
237
235
|
if save_indexes:
|
238
236
|
# train_pairs = [row['id1'].join(',').row['id2'] for index, row in X_train.iterrows()]
|
239
237
|
self.__save_indexes__(
|
@@ -32,7 +32,7 @@ import ddi_fw.utils as utils
|
|
32
32
|
|
33
33
|
class MultiModalRunner:
|
34
34
|
# todo model related parameters to config
|
35
|
-
def __init__(self, library, multi_modal, use_mlflow=
|
35
|
+
def __init__(self, library, multi_modal, use_mlflow=False):
|
36
36
|
self.library = library
|
37
37
|
self.multi_modal = multi_modal
|
38
38
|
self.use_mlflow = use_mlflow
|
@@ -111,44 +111,6 @@ class MultiModalRunner:
|
|
111
111
|
if self.use_mlflow:
|
112
112
|
with mlflow.start_run(run_name=self.prefix, description="***") as run:
|
113
113
|
self.__predict(single_results)
|
114
|
-
# self.level_0_run_id = run.info.run_id
|
115
|
-
# item_dict = {t[0]: t for t in self.items}
|
116
|
-
# print("multi_modal")
|
117
|
-
# print(self.multi_modal)
|
118
|
-
# print(item_dict.keys())
|
119
|
-
|
120
|
-
# for m in self.multi_modal:
|
121
|
-
# name = m.get('name')
|
122
|
-
# input_type = m.get('input_type')
|
123
|
-
# input = m.get('input')
|
124
|
-
# inputs = m.get('inputs')
|
125
|
-
# model_type = get_import(m.get("model_type"))
|
126
|
-
# kwargs = m.get('params')
|
127
|
-
# T = self.__create_model(self.library)
|
128
|
-
# single_modal = T(self.date, name, model_type, **kwargs)
|
129
|
-
# if input_type == '1D':
|
130
|
-
# item = item_dict[input]
|
131
|
-
# single_modal.set_data(
|
132
|
-
# self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
133
|
-
# elif input_type == '2D':
|
134
|
-
# # check keys
|
135
|
-
# filtered_dict = {k: item_dict[k]
|
136
|
-
# for k in inputs if k in item_dict}
|
137
|
-
# print(filtered_dict.keys())
|
138
|
-
# first_input = next(iter(filtered_dict.values()))
|
139
|
-
# train_data_list = [f[1] for f in filtered_dict.values()]
|
140
|
-
# test_data_list = [f[3] for f in filtered_dict.values()]
|
141
|
-
# train_data = np.stack(train_data_list, axis=1)
|
142
|
-
# test_data = np.stack(test_data_list, axis=1)
|
143
|
-
# train_label = first_input[2]
|
144
|
-
# test_label = first_input[4]
|
145
|
-
# single_modal.set_data(
|
146
|
-
# self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
|
147
|
-
# else:
|
148
|
-
# raise Exception("check configurations")
|
149
|
-
# logs, metrics, prediction = single_modal.fit_and_evaluate()
|
150
|
-
# self.result.add_metric(name, metrics)
|
151
|
-
# single_results[name] = prediction
|
152
114
|
else:
|
153
115
|
self.__predict(single_results)
|
154
116
|
if combinations:
|
@@ -120,7 +120,7 @@ class TFModelWrapper(ModelWrapper):
|
|
120
120
|
print(self.train_data.shape)
|
121
121
|
models = {}
|
122
122
|
models_val_acc = {}
|
123
|
-
if self.train_idx_arr
|
123
|
+
if self.train_idx_arr and self.val_idx_arr:
|
124
124
|
for i, (train_idx, val_idx) in enumerate(zip(self.train_idx_arr, self.val_idx_arr)):
|
125
125
|
print(f"Validation {i}")
|
126
126
|
|
@@ -155,7 +155,8 @@ class TFModelWrapper(ModelWrapper):
|
|
155
155
|
self.train_data, self.train_label, None, None)
|
156
156
|
models[self.descriptor] = model
|
157
157
|
models_val_acc[self.descriptor] = checkpoint.best
|
158
|
-
|
158
|
+
if models_val_acc == {}:
|
159
|
+
return model, None
|
159
160
|
best_model_key = max(models_val_acc, key=lambda k: models_val_acc[k])
|
160
161
|
# best_model_key = max(models_val_acc, key=models_val_acc.get)
|
161
162
|
best_model = models[best_model_key]
|
@@ -106,7 +106,7 @@ class NerParameterSearch:
|
|
106
106
|
**kwargs)
|
107
107
|
|
108
108
|
# train_idx_arr, val_idx_arr bir kez hesaplanması yeterli aslında
|
109
|
-
|
109
|
+
dataset.load()
|
110
110
|
group_items = dataset.produce_inputs()
|
111
111
|
for item in group_items:
|
112
112
|
# item[0] = f'threshold_{threshold}_{item[0]}'
|
@@ -115,8 +115,8 @@ class NerParameterSearch:
|
|
115
115
|
|
116
116
|
self.items.extend(group_items)
|
117
117
|
self.y_test_label = self.items[0][4]
|
118
|
-
self.train_idx_arr = train_idx_arr
|
119
|
-
self.val_idx_arr = val_idx_arr
|
118
|
+
self.train_idx_arr = dataset.train_idx_arr
|
119
|
+
self.val_idx_arr = dataset.val_idx_arr
|
120
120
|
|
121
121
|
def run(self, model_func, batch_size=128, epochs=100):
|
122
122
|
mlflow.set_tracking_uri(self.tracking_uri)
|
@@ -36,7 +36,7 @@ class Pipeline(BaseModel):
|
|
36
36
|
combinations: Optional[List[str]] = None
|
37
37
|
model: Optional[Any] = None
|
38
38
|
multi_modal: Optional[Any] = None
|
39
|
-
use_mlflow: bool =
|
39
|
+
use_mlflow: bool = False
|
40
40
|
_items:List=[]
|
41
41
|
_train_idx_arr:List|None=[]
|
42
42
|
_val_idx_arr:List|None=[]
|
@@ -53,45 +53,7 @@ class Pipeline(BaseModel):
|
|
53
53
|
|
54
54
|
class Config:
|
55
55
|
arbitrary_types_allowed = True
|
56
|
-
|
57
|
-
# class Pipeline:
|
58
|
-
# def __init__(self,
|
59
|
-
# library='tensorflow',
|
60
|
-
# experiment_name=None,
|
61
|
-
# experiment_description=None,
|
62
|
-
# experiment_tags=None,
|
63
|
-
# artifact_location=None,
|
64
|
-
# tracking_uri=None,
|
65
|
-
# dataset_type: BaseDataset = None,
|
66
|
-
# columns=None,
|
67
|
-
# embedding_dict=None,
|
68
|
-
# column_embedding_configs=None,
|
69
|
-
# vector_db_persist_directory=None,
|
70
|
-
# vector_db_collection_name=None,
|
71
|
-
# embedding_pooling_strategy_type: PoolingStrategy = None,
|
72
|
-
# ner_data_file=None,
|
73
|
-
# ner_threshold=None,
|
74
|
-
# combinations=None,
|
75
|
-
# model=None,
|
76
|
-
# multi_modal = None ):
|
77
|
-
# self.library = library
|
78
|
-
# self.experiment_name = experiment_name
|
79
|
-
# self.experiment_description = experiment_description
|
80
|
-
# self.experiment_tags = experiment_tags
|
81
|
-
# self.artifact_location = artifact_location
|
82
|
-
# self.tracking_uri = tracking_uri
|
83
|
-
# self.dataset_type = dataset_type
|
84
|
-
# self.columns = columns
|
85
|
-
# self.embedding_dict = embedding_dict
|
86
|
-
# self.column_embedding_configs = column_embedding_configs
|
87
|
-
# self.vector_db_persist_directory = vector_db_persist_directory
|
88
|
-
# self.vector_db_collection_name = vector_db_collection_name
|
89
|
-
# self.embedding_pooling_strategy_type = embedding_pooling_strategy_type
|
90
|
-
# self.ner_data_file = ner_data_file
|
91
|
-
# self.ner_threshold = ner_threshold
|
92
|
-
# self.combinations = combinations
|
93
|
-
# self.model = model
|
94
|
-
# self.multi_modal = multi_modal
|
56
|
+
|
95
57
|
|
96
58
|
def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
|
97
59
|
"""
|
@@ -194,35 +156,27 @@ class Pipeline(BaseModel):
|
|
194
156
|
embedding_dict=embedding_dict,
|
195
157
|
embedding_size=embedding_size,
|
196
158
|
embeddings_pooling_strategy=pooling_strategy,
|
197
|
-
|
159
|
+
dataset_splitter_type = self.dataset_splitter_type,
|
160
|
+
**kwargs)
|
161
|
+
elif self.dataset_type == BaseDataset:
|
162
|
+
dataset = self.dataset_type(
|
163
|
+
dataset_splitter_type = self.dataset_splitter_type,
|
198
164
|
**kwargs)
|
199
165
|
else:
|
200
166
|
dataset = self.dataset_type(**kwargs)
|
201
167
|
|
202
168
|
# X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
|
203
169
|
dataset.load()
|
204
|
-
|
205
170
|
dataframe = dataset.dataframe
|
171
|
+
b = not( dataset.X_train and dataset.y_train and dataset.X_test and dataset.y_test)
|
172
|
+
c = dataframe is None or dataframe.empty
|
206
173
|
|
207
|
-
if
|
174
|
+
if b or c:
|
208
175
|
raise ValueError("The dataset is not loaded")
|
209
|
-
|
210
|
-
# dataframe.dropna()
|
211
|
-
# X_train = dataset.X_train
|
212
|
-
# X_test = dataset.X_test
|
213
|
-
# y_train = dataset.y_train
|
214
|
-
# y_test = dataset.y_test
|
215
|
-
# self._train_idx_arr = dataset.train_idx_arr
|
216
|
-
# self._val_idx_arr = dataset.val_idx_arr
|
217
|
-
# Logic to set up the experiment
|
176
|
+
|
218
177
|
# column name, train data, train label, test data, test label
|
219
178
|
self._items = dataset.produce_inputs()
|
220
|
-
|
221
|
-
# unique_classes = pd.unique(dataframe[dataset.class_column])
|
222
|
-
# event_num = len(unique_classes)
|
223
|
-
# droprate = 0.3
|
224
|
-
# vector_size = self.dataset.drugs_df.shape[0]
|
225
|
-
|
179
|
+
|
226
180
|
print("Building the experiment with the following settings:")
|
227
181
|
print(
|
228
182
|
f"Name: {self.experiment_name}, Dataset: {dataset}, Model: {self.model}")
|
@@ -244,7 +198,7 @@ class Pipeline(BaseModel):
|
|
244
198
|
|
245
199
|
y_test_label = self.items[0][4]
|
246
200
|
multi_modal_runner = MultiModalRunner(
|
247
|
-
library=self.library, multi_modal=self.multi_modal)
|
201
|
+
library=self.library, multi_modal=self.multi_modal, use_mlflow=self.use_mlflow)
|
248
202
|
# multi_modal_runner = MultiModalRunner(
|
249
203
|
# library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
|
250
204
|
# multi_modal = TFMultiModal(
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt
RENAMED
File without changes
|
{ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|