ddi-fw 0.0.198__py3-none-any.whl → 0.0.199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/core.py +4 -3
- ddi_fw/datasets/ddi_mdl/base.py +1 -0
- ddi_fw/ml/ml_helper.py +14 -5
- ddi_fw/pipeline/multi_pipeline.py +2 -0
- ddi_fw/pipeline/pipeline.py +2 -1
- {ddi_fw-0.0.198.dist-info → ddi_fw-0.0.199.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.198.dist-info → ddi_fw-0.0.199.dist-info}/RECORD +9 -9
- {ddi_fw-0.0.198.dist-info → ddi_fw-0.0.199.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.198.dist-info → ddi_fw-0.0.199.dist-info}/top_level.txt +0 -0
ddi_fw/datasets/core.py
CHANGED
@@ -144,8 +144,8 @@ class BaseDataset(BaseModel, abc.ABC):
|
|
144
144
|
Load the dataset. If X_train, y_train, X_test, and y_test are already provided,
|
145
145
|
skip deriving them. Otherwise, derive them from the dataframe and indices.
|
146
146
|
"""
|
147
|
-
self.prep() # Prepare the dataset
|
148
147
|
self.handle_mixins() # Centralized mixin handling
|
148
|
+
self.prep() # Prepare the dataset
|
149
149
|
|
150
150
|
if self.X_train is not None or self.y_train is not None or self.X_test is not None or self.y_test is not None:
|
151
151
|
# Data is already provided, no need to calculate
|
@@ -169,8 +169,9 @@ class BaseDataset(BaseModel, abc.ABC):
|
|
169
169
|
|
170
170
|
# train = self.dataframe[self.dataframe.index.isin(train_idx_all)]
|
171
171
|
# test = self.dataframe[self.dataframe.index.isin(test_idx_all)]
|
172
|
-
|
173
|
-
|
172
|
+
columns = self.columns + [self.class_column]
|
173
|
+
train = self.dataframe.loc[self.dataframe.index.isin(train_idx_all), columns]
|
174
|
+
test = self.dataframe.loc[self.dataframe.index.isin(test_idx_all), columns]
|
174
175
|
X_train = train.drop(self.class_column, axis=1)
|
175
176
|
X_train = train.drop(self.class_column, axis=1)
|
176
177
|
y_train = train[self.class_column]
|
ddi_fw/datasets/ddi_mdl/base.py
CHANGED
@@ -81,6 +81,7 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
|
|
81
81
|
self.chemical_property_columns = chemical_property_columns
|
82
82
|
self.embedding_columns = embedding_columns
|
83
83
|
self.ner_columns = ner_columns
|
84
|
+
self.columns = [] # these variable is modified in prep method
|
84
85
|
|
85
86
|
self.class_column = 'event_category'
|
86
87
|
_db_path = HERE.joinpath('data/event.db')
|
ddi_fw/ml/ml_helper.py
CHANGED
@@ -32,9 +32,10 @@ import ddi_fw.utils as utils
|
|
32
32
|
|
33
33
|
class MultiModalRunner:
|
34
34
|
# todo model related parameters to config
|
35
|
-
def __init__(self, library, multi_modal, use_mlflow=False):
|
35
|
+
def __init__(self, library, multi_modal, default_model, use_mlflow=False):
|
36
36
|
self.library = library
|
37
37
|
self.multi_modal = multi_modal
|
38
|
+
self.default_model = default_model
|
38
39
|
self.use_mlflow = use_mlflow
|
39
40
|
self.result = Result()
|
40
41
|
|
@@ -60,14 +61,13 @@ class MultiModalRunner:
|
|
60
61
|
# TODO check single_results, 1d,2d ...
|
61
62
|
def __predict(self, single_results):
|
62
63
|
item_dict = {t[0]: t for t in self.items}
|
63
|
-
|
64
|
-
|
65
|
-
print(item_dict.keys())
|
64
|
+
if self.default_model is None and not self.multi_modal:
|
65
|
+
raise Exception("Default model and multi modal cannot be None at the same time")
|
66
66
|
|
67
67
|
if self.multi_modal:
|
68
68
|
for m in self.multi_modal:
|
69
69
|
name = m.get('name')
|
70
|
-
input_type = m.get('input_type')
|
70
|
+
# input_type = m.get('input_type')
|
71
71
|
input = m.get('input')
|
72
72
|
inputs = m.get('inputs')
|
73
73
|
model_type = get_import(m.get("model_type"))
|
@@ -100,6 +100,15 @@ class MultiModalRunner:
|
|
100
100
|
else:
|
101
101
|
raise Exception("check configurations")
|
102
102
|
else: # TODO default model maybe?
|
103
|
+
print("Default model will be used")
|
104
|
+
name = self.default_model.get('name')
|
105
|
+
# input_type = m.get('input_type')
|
106
|
+
input = self.default_model.get('input')
|
107
|
+
inputs = self.default_model.get('inputs')
|
108
|
+
model_type = get_import(self.default_model.get("model_type"))
|
109
|
+
kwargs = self.default_model.get('params')
|
110
|
+
single_modal = T(self.date, name, model_type,
|
111
|
+
use_mlflow=self.use_mlflow, **kwargs)
|
103
112
|
item = self.items[0]
|
104
113
|
single_modal.set_data(
|
105
114
|
self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
@@ -57,6 +57,7 @@ class MultiPipeline():
|
|
57
57
|
tracking_uri = config.get("tracking_uri")
|
58
58
|
artifact_location = config.get("artifact_location")
|
59
59
|
#new
|
60
|
+
default_model = config.get("default_model"),
|
60
61
|
multi_modal = config.get("multi_modal")
|
61
62
|
columns = config.get("columns")
|
62
63
|
ner_data_file = config.get("ner_data_file")
|
@@ -101,6 +102,7 @@ class MultiPipeline():
|
|
101
102
|
ner_data_file=ner_data_file,
|
102
103
|
ner_threshold=ner_threshold,
|
103
104
|
combinations=combinations,
|
105
|
+
default_model=default_model,
|
104
106
|
multi_modal= multi_modal)
|
105
107
|
elif type== "ner_search":
|
106
108
|
pipeline = NerParameterSearch(
|
ddi_fw/pipeline/pipeline.py
CHANGED
@@ -35,6 +35,7 @@ class Pipeline(BaseModel):
|
|
35
35
|
ner_threshold: Optional[dict] = None
|
36
36
|
combinations: Optional[List[str]] = None
|
37
37
|
model: Optional[Any] = None
|
38
|
+
default_model: Optional[Any] = None
|
38
39
|
multi_modal: Optional[Any] = None
|
39
40
|
use_mlflow: bool = False
|
40
41
|
_dataset: BaseDataset = []
|
@@ -193,7 +194,7 @@ class Pipeline(BaseModel):
|
|
193
194
|
|
194
195
|
y_test_label = self.items[0][4]
|
195
196
|
multi_modal_runner = MultiModalRunner(
|
196
|
-
library=self.library, multi_modal=self.multi_modal, use_mlflow=self.use_mlflow)
|
197
|
+
library=self.library, multi_modal=self.multi_modal, default_model= self.default_model , use_mlflow=self.use_mlflow)
|
197
198
|
# multi_modal_runner = MultiModalRunner(
|
198
199
|
# library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
|
199
200
|
# multi_modal = TFMultiModal(
|
@@ -1,9 +1,9 @@
|
|
1
1
|
ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
|
2
|
-
ddi_fw/datasets/core.py,sha256=
|
2
|
+
ddi_fw/datasets/core.py,sha256=vxatq85GDrvp_cdHQsCHHE-PBneGYRSMb9VyRHlBce8,16424
|
3
3
|
ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
6
|
-
ddi_fw/datasets/ddi_mdl/base.py,sha256=
|
6
|
+
ddi_fw/datasets/ddi_mdl/base.py,sha256=lmHKlpaxyH3HZDcC_kOuvDjuzb_rtOvdD7CkwA00w7k,10399
|
7
7
|
ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
|
8
8
|
ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
|
9
9
|
ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
|
@@ -74,7 +74,7 @@ ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSz
|
|
74
74
|
ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
|
75
75
|
ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
|
76
76
|
ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
|
77
|
-
ddi_fw/ml/ml_helper.py,sha256
|
77
|
+
ddi_fw/ml/ml_helper.py,sha256=xAeAe_eD7_4uAgIQooAU-Peb63j9QhxiaC5xaJPAu6I,7616
|
78
78
|
ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
|
79
79
|
ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
|
80
80
|
ddi_fw/ml/tensorflow_wrapper.py,sha256=lNJvg3odqMKmILecOMdcOCAOrwzWZDzxB0DWGcYWsPg,12952
|
@@ -83,9 +83,9 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
|
|
83
83
|
ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
|
84
84
|
ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
|
85
85
|
ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
|
86
|
-
ddi_fw/pipeline/multi_pipeline.py,sha256=
|
86
|
+
ddi_fw/pipeline/multi_pipeline.py,sha256=L0Apy2Z909GnR8KMX_q7-kluZmIG3CT0yfMuL22poaQ,5737
|
87
87
|
ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
|
88
|
-
ddi_fw/pipeline/pipeline.py,sha256=
|
88
|
+
ddi_fw/pipeline/pipeline.py,sha256=s6fYtgAyWcap4oC6bAKZATju8BnasRtLopnM2Blt2Fo,9125
|
89
89
|
ddi_fw/utils/__init__.py,sha256=HC32XkYQTYH_9vt0eX6tqQngEFG-R70hGrYkT-BcHCk,519
|
90
90
|
ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
|
91
91
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
@@ -99,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
99
99
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
100
100
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
|
101
101
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
102
|
-
ddi_fw-0.0.
|
103
|
-
ddi_fw-0.0.
|
104
|
-
ddi_fw-0.0.
|
105
|
-
ddi_fw-0.0.
|
102
|
+
ddi_fw-0.0.199.dist-info/METADATA,sha256=Qunuy5iDVljrNJKv67-lvvk3PC-1GxZOqfbpiYY-JVM,2542
|
103
|
+
ddi_fw-0.0.199.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
104
|
+
ddi_fw-0.0.199.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
105
|
+
ddi_fw-0.0.199.dist-info/RECORD,,
|
File without changes
|
File without changes
|