ddi-fw 0.0.198__py3-none-any.whl → 0.0.200__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -144,8 +144,8 @@ class BaseDataset(BaseModel, abc.ABC):
144
144
  Load the dataset. If X_train, y_train, X_test, and y_test are already provided,
145
145
  skip deriving them. Otherwise, derive them from the dataframe and indices.
146
146
  """
147
- self.prep() # Prepare the dataset
148
147
  self.handle_mixins() # Centralized mixin handling
148
+ self.prep() # Prepare the dataset
149
149
 
150
150
  if self.X_train is not None or self.y_train is not None or self.X_test is not None or self.y_test is not None:
151
151
  # Data is already provided, no need to calculate
@@ -169,8 +169,9 @@ class BaseDataset(BaseModel, abc.ABC):
169
169
 
170
170
  # train = self.dataframe[self.dataframe.index.isin(train_idx_all)]
171
171
  # test = self.dataframe[self.dataframe.index.isin(test_idx_all)]
172
- train = self.dataframe.loc[self.dataframe.index.isin(train_idx_all), self.columns]
173
- test = self.dataframe.loc[self.dataframe.index.isin(test_idx_all), self.columns]
172
+ columns = self.columns + [self.class_column]
173
+ train = self.dataframe.loc[self.dataframe.index.isin(train_idx_all), columns]
174
+ test = self.dataframe.loc[self.dataframe.index.isin(test_idx_all), columns]
174
175
  X_train = train.drop(self.class_column, axis=1)
175
176
  X_train = train.drop(self.class_column, axis=1)
176
177
  y_train = train[self.class_column]
@@ -81,6 +81,7 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
81
81
  self.chemical_property_columns = chemical_property_columns
82
82
  self.embedding_columns = embedding_columns
83
83
  self.ner_columns = ner_columns
84
+ self.columns = [] # these variable is modified in prep method
84
85
 
85
86
  self.class_column = 'event_category'
86
87
  _db_path = HERE.joinpath('data/event.db')
ddi_fw/ml/ml_helper.py CHANGED
@@ -32,9 +32,10 @@ import ddi_fw.utils as utils
32
32
 
33
33
  class MultiModalRunner:
34
34
  # todo model related parameters to config
35
- def __init__(self, library, multi_modal, use_mlflow=False):
35
+ def __init__(self, library, multi_modal, default_model, use_mlflow=False):
36
36
  self.library = library
37
37
  self.multi_modal = multi_modal
38
+ self.default_model = default_model
38
39
  self.use_mlflow = use_mlflow
39
40
  self.result = Result()
40
41
 
@@ -60,14 +61,13 @@ class MultiModalRunner:
60
61
  # TODO check single_results, 1d,2d ...
61
62
  def __predict(self, single_results):
62
63
  item_dict = {t[0]: t for t in self.items}
63
- print("multi_modal")
64
- print(self.multi_modal)
65
- print(item_dict.keys())
64
+ if self.default_model is None and not self.multi_modal:
65
+ raise Exception("Default model and multi modal cannot be None at the same time")
66
66
 
67
67
  if self.multi_modal:
68
68
  for m in self.multi_modal:
69
69
  name = m.get('name')
70
- input_type = m.get('input_type')
70
+ # input_type = m.get('input_type')
71
71
  input = m.get('input')
72
72
  inputs = m.get('inputs')
73
73
  model_type = get_import(m.get("model_type"))
@@ -99,14 +99,25 @@ class MultiModalRunner:
99
99
  self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
100
100
  else:
101
101
  raise Exception("check configurations")
102
- else: # TODO default model maybe?
103
- item = self.items[0]
102
+ logs, metrics, prediction = single_modal.fit_and_evaluate()
103
+ self.result.add_metric(name, metrics)
104
+ single_results[name] = prediction
105
+ else: # TODO default model maybe?
106
+ print("Default model will be used")
107
+ name = self.default_model.get('name')
108
+ # input_type = m.get('input_type')
109
+ model_type = get_import(self.default_model.get("model_type"))
110
+ kwargs = self.default_model.get('params')
111
+ for item in self.items:
112
+ T = self.__create_model(self.library)
113
+ single_modal = T(self.date, name, model_type,
114
+ use_mlflow=self.use_mlflow, **kwargs)
104
115
  single_modal.set_data(
105
116
  self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
106
117
 
107
- logs, metrics, prediction = single_modal.fit_and_evaluate()
108
- self.result.add_metric(name, metrics)
109
- single_results[name] = prediction
118
+ logs, metrics, prediction = single_modal.fit_and_evaluate()
119
+ self.result.add_metric(name, metrics)
120
+ single_results[name] = prediction
110
121
 
111
122
  def predict(self, combinations: list = [], generate_combinations=False):
112
123
  self.prefix = utils.utc_time_as_string()
@@ -57,6 +57,7 @@ class MultiPipeline():
57
57
  tracking_uri = config.get("tracking_uri")
58
58
  artifact_location = config.get("artifact_location")
59
59
  #new
60
+ default_model = config.get("default_model"),
60
61
  multi_modal = config.get("multi_modal")
61
62
  columns = config.get("columns")
62
63
  ner_data_file = config.get("ner_data_file")
@@ -101,6 +102,7 @@ class MultiPipeline():
101
102
  ner_data_file=ner_data_file,
102
103
  ner_threshold=ner_threshold,
103
104
  combinations=combinations,
105
+ default_model=default_model,
104
106
  multi_modal= multi_modal)
105
107
  elif type== "ner_search":
106
108
  pipeline = NerParameterSearch(
@@ -35,6 +35,7 @@ class Pipeline(BaseModel):
35
35
  ner_threshold: Optional[dict] = None
36
36
  combinations: Optional[List[str]] = None
37
37
  model: Optional[Any] = None
38
+ default_model: Optional[Any] = None
38
39
  multi_modal: Optional[Any] = None
39
40
  use_mlflow: bool = False
40
41
  _dataset: BaseDataset = []
@@ -193,7 +194,7 @@ class Pipeline(BaseModel):
193
194
 
194
195
  y_test_label = self.items[0][4]
195
196
  multi_modal_runner = MultiModalRunner(
196
- library=self.library, multi_modal=self.multi_modal, use_mlflow=self.use_mlflow)
197
+ library=self.library, multi_modal=self.multi_modal, default_model= self.default_model , use_mlflow=self.use_mlflow)
197
198
  # multi_modal_runner = MultiModalRunner(
198
199
  # library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
199
200
  # multi_modal = TFMultiModal(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.198
3
+ Version: 0.0.200
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,9 +1,9 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=yfnJwyF9oV2RUErFSAKSyxQQeL1tmLiq7SfADhn1Cgk,16379
2
+ ddi_fw/datasets/core.py,sha256=vxatq85GDrvp_cdHQsCHHE-PBneGYRSMb9VyRHlBce8,16424
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
6
- ddi_fw/datasets/ddi_mdl/base.py,sha256=8WFc0iLT5PF6IOUStqKVIKR74D8WBuwXm_uMiV4OFsk,10324
6
+ ddi_fw/datasets/ddi_mdl/base.py,sha256=lmHKlpaxyH3HZDcC_kOuvDjuzb_rtOvdD7CkwA00w7k,10399
7
7
  ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
8
8
  ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
9
9
  ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
@@ -74,7 +74,7 @@ ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSz
74
74
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
75
75
  ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
76
76
  ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
77
- ddi_fw/ml/ml_helper.py,sha256=-pgnLhuPBp60IZxAvS11oDYggrgQY3GRHkAwQVWM9XI,6943
77
+ ddi_fw/ml/ml_helper.py,sha256=IFLpMa5oxwnhRXHNNYCITyaXPp7Rrid9bpsjp_Ss5SQ,7725
78
78
  ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
79
79
  ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
80
80
  ddi_fw/ml/tensorflow_wrapper.py,sha256=lNJvg3odqMKmILecOMdcOCAOrwzWZDzxB0DWGcYWsPg,12952
@@ -83,9 +83,9 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
83
83
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
84
84
  ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
85
85
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
86
- ddi_fw/pipeline/multi_pipeline.py,sha256=fYyvwIOscUahjXd3QO5RSFrp1LliGR7RzOZyAXrXXz4,5637
86
+ ddi_fw/pipeline/multi_pipeline.py,sha256=L0Apy2Z909GnR8KMX_q7-kluZmIG3CT0yfMuL22poaQ,5737
87
87
  ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
88
- ddi_fw/pipeline/pipeline.py,sha256=CUHuy1nNgGD-eUcLnWFXcmSoTGssmg4ZFRAY1Cufey0,9047
88
+ ddi_fw/pipeline/pipeline.py,sha256=s6fYtgAyWcap4oC6bAKZATju8BnasRtLopnM2Blt2Fo,9125
89
89
  ddi_fw/utils/__init__.py,sha256=HC32XkYQTYH_9vt0eX6tqQngEFG-R70hGrYkT-BcHCk,519
90
90
  ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
91
91
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
@@ -99,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
99
99
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
100
100
  ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
101
101
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
102
- ddi_fw-0.0.198.dist-info/METADATA,sha256=z3otymNU3l4737h3tkMaP0UMhZdLBtzS4ELP4wIcVt8,2542
103
- ddi_fw-0.0.198.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
104
- ddi_fw-0.0.198.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
105
- ddi_fw-0.0.198.dist-info/RECORD,,
102
+ ddi_fw-0.0.200.dist-info/METADATA,sha256=IkLebplT9KXZhVHSinJkay9r_ZmHs7n-jVeLpKW_kC4,2542
103
+ ddi_fw-0.0.200.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
104
+ ddi_fw-0.0.200.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
105
+ ddi_fw-0.0.200.dist-info/RECORD,,