ddi-fw 0.0.189__py3-none-any.whl → 0.0.191__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -73,6 +73,7 @@ class BaseDataset(BaseModel):
73
73
  class Config:
74
74
  arbitrary_types_allowed = True
75
75
 
76
+ # TODO columns yoksa tüm feature'lar alınıyor, bu pipeline'da nasıl yapılacak?
76
77
  def produce_inputs(self):
77
78
  items = []
78
79
  if self.X_train is None or self.X_test is None:
@@ -127,15 +128,15 @@ class BaseDataset(BaseModel):
127
128
  Load the dataset. If X_train, y_train, X_test, and y_test are already provided,
128
129
  skip deriving them. Otherwise, derive them from the dataframe and indices.
129
130
  """
130
- if self.X_train and self.y_train and self.X_test and self.y_test :
131
+ self.prep()
132
+
133
+ if self.X_train is not None or self.y_train is not None or self.X_test is not None or self.y_test is not None:
131
134
  # Data is already provided, no need to calculate
132
135
  logging.info(
133
136
  "X_train, y_train, X_test, and y_test are already provided. Skipping calculation.")
134
137
  return
135
138
  # return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
136
139
 
137
- self.prep()
138
-
139
140
  if self.index_path is None:
140
141
  raise Exception(
141
142
  "There is no index path. Please call split_dataset or provide indices.")
@@ -156,7 +157,7 @@ class BaseDataset(BaseModel):
156
157
  y_train = train[self.class_column]
157
158
  X_test = test.drop(self.class_column, axis=1)
158
159
  y_test = test[self.class_column]
159
-
160
+
160
161
  self.X_train = np.array(X_train)
161
162
  # self.y_train = np.array(y_train)
162
163
  self.y_train = np.array(y_train.tolist())
@@ -170,7 +171,6 @@ class BaseDataset(BaseModel):
170
171
  self.val_idx_arr = val_idx_arr
171
172
 
172
173
  # Dataframe to numpy array conversion
173
-
174
174
 
175
175
  # return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
176
176
 
@@ -226,14 +226,13 @@ class BaseDataset(BaseModel):
226
226
  X=X, y=y)
227
227
  self.X_train = np.array(X_train)
228
228
  self.X_test = np.array(X_test)
229
- self.y_train = np.array(y_train.tolist())
229
+ self.y_train = np.array(y_train.tolist())
230
230
  self.y_test = np.array(y_test.tolist())
231
231
  self.train_indexes = X_train.index
232
232
  self.test_indexes = X_test.index
233
233
  self.train_idx_arr = train_idx_arr
234
234
  self.val_idx_arr = val_idx_arr
235
235
 
236
-
237
236
  if save_indexes:
238
237
  # train_pairs = [row['id1'].join(',').row['id2'] for index, row in X_train.iterrows()]
239
238
  self.__save_indexes__(
ddi_fw/ml/ml_helper.py CHANGED
@@ -57,42 +57,53 @@ class MultiModalRunner:
57
57
  raise ValueError(
58
58
  "Unsupported library type. Choose 'tensorflow' or 'pytorch'.")
59
59
 
60
+ # TODO check single_results, 1d,2d ...
60
61
  def __predict(self, single_results):
61
62
  item_dict = {t[0]: t for t in self.items}
62
63
  print("multi_modal")
63
64
  print(self.multi_modal)
64
65
  print(item_dict.keys())
65
66
 
66
- for m in self.multi_modal:
67
- name = m.get('name')
68
- input_type = m.get('input_type')
69
- input = m.get('input')
70
- inputs = m.get('inputs')
71
- model_type = get_import(m.get("model_type"))
72
- kwargs = m.get('params')
73
- T = self.__create_model(self.library)
74
- single_modal = T(self.date, name, model_type,
75
- use_mlflow=self.use_mlflow, **kwargs)
76
- if input_type == '1D':
77
- item = item_dict[input]
78
- single_modal.set_data(
79
- self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
80
- elif input_type == '2D':
81
- # check keys
82
- filtered_dict = {k: item_dict[k]
83
- for k in inputs if k in item_dict}
84
- print(filtered_dict.keys())
85
- first_input = next(iter(filtered_dict.values()))
86
- train_data_list = [f[1] for f in filtered_dict.values()]
87
- test_data_list = [f[3] for f in filtered_dict.values()]
88
- train_data = np.stack(train_data_list, axis=1)
89
- test_data = np.stack(test_data_list, axis=1)
90
- train_label = first_input[2]
91
- test_label = first_input[4]
92
- single_modal.set_data(
93
- self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
67
+ if self.multi_modal:
68
+ for m in self.multi_modal:
69
+ name = m.get('name')
70
+ input_type = m.get('input_type')
71
+ input = m.get('input')
72
+ inputs = m.get('inputs')
73
+ model_type = get_import(m.get("model_type"))
74
+ kwargs = m.get('params')
75
+ T = self.__create_model(self.library)
76
+ single_modal = T(self.date, name, model_type,
77
+ use_mlflow=self.use_mlflow, **kwargs)
78
+
79
+ if input is not None and inputs is not None:
80
+ raise Exception("input and inputs should not be used together")
81
+
82
+ if input_type == '1D':
83
+ item = item_dict[input]
84
+ single_modal.set_data(
85
+ self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
86
+ elif input_type == '2D':
87
+ # check keys
88
+ filtered_dict = {k: item_dict[k]
89
+ for k in inputs if k in item_dict}
90
+ print(filtered_dict.keys())
91
+ first_input = next(iter(filtered_dict.values()))
92
+ train_data_list = [f[1] for f in filtered_dict.values()]
93
+ test_data_list = [f[3] for f in filtered_dict.values()]
94
+ train_data = np.stack(train_data_list, axis=1)
95
+ test_data = np.stack(test_data_list, axis=1)
96
+ train_label = first_input[2]
97
+ test_label = first_input[4]
98
+ single_modal.set_data(
99
+ self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
100
+ else:
101
+ raise Exception("check configurations")
94
102
  else:
95
- raise Exception("check configurations")
103
+ item = self.items[0]
104
+ single_modal.set_data(
105
+ self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
106
+
96
107
  logs, metrics, prediction = single_modal.fit_and_evaluate()
97
108
  self.result.add_metric(name, metrics)
98
109
  single_results[name] = prediction
@@ -50,6 +50,7 @@ class MultiPipeline():
50
50
  type = config.get("type")
51
51
  library = config.get("library")
52
52
 
53
+ use_mlflow = config.get("use_mlflow")
53
54
  experiment_name = config.get("experiment_name")
54
55
  experiment_description = config.get("experiment_description")
55
56
  experiment_tags = config.get("experiment_tags")
@@ -84,6 +85,7 @@ class MultiPipeline():
84
85
  if type == "general":
85
86
  pipeline = Pipeline(
86
87
  library=library,
88
+ use_mlflow=use_mlflow,
87
89
  experiment_name=experiment_name,
88
90
  experiment_description=experiment_description,
89
91
  experiment_tags=experiment_tags,
@@ -37,10 +37,15 @@ class Pipeline(BaseModel):
37
37
  model: Optional[Any] = None
38
38
  multi_modal: Optional[Any] = None
39
39
  use_mlflow: bool = False
40
+ _dataset: BaseDataset = []
40
41
  _items: List = []
41
42
  _train_idx_arr: List | None = []
42
43
  _val_idx_arr: List | None = []
43
44
 
45
+ @property
46
+ def dataset(self) -> BaseDataset:
47
+ return self._dataset
48
+
44
49
  @property
45
50
  def items(self) -> List:
46
51
  return self._items
@@ -168,7 +173,10 @@ class Pipeline(BaseModel):
168
173
  dataset = self.dataset_type(**kwargs)
169
174
 
170
175
  # X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
176
+
171
177
  dataset.load()
178
+ self._dataset = dataset
179
+
172
180
  dataframe = dataset.dataframe
173
181
 
174
182
  # Check if any of the arrays are None or empty
@@ -180,7 +188,7 @@ class Pipeline(BaseModel):
180
188
  # Check if the dataframe is None or empty
181
189
  is_dataframe_valid = dataframe is not None and not dataframe.empty
182
190
 
183
- if not (is_data_valid and is_dataframe_valid):
191
+ if not (is_data_valid or is_dataframe_valid):
184
192
  raise ValueError("The dataset is not loaded")
185
193
 
186
194
  # column name, train data, train label, test data, test label
ddi_fw/utils/__init__.py CHANGED
@@ -4,4 +4,5 @@ from .py7zr_helper import Py7ZipHelper
4
4
  from .enums import UMLSCodeTypes, DrugBankTextDataTypes
5
5
  from .package_helper import get_import
6
6
  from .kaggle import create_kaggle_dataset
7
- from .categorical_data_encoding_checker import is_one_hot_encoded, is_binary_encoded, is_binary_vector,is_label_encoded
7
+ from .categorical_data_encoding_checker import is_one_hot_encoded, is_binary_encoded, is_binary_vector,is_label_encoded
8
+ from .numpy_utils import adjust_array_dims
@@ -0,0 +1,27 @@
1
+ import numpy as np
2
+
3
+ def adjust_array_dims(arr, final_ndim=2):
4
+ # Add axes if array has fewer dimensions than final_ndim
5
+ while arr.ndim < final_ndim:
6
+ arr = arr[:, np.newaxis] # Add a new axis
7
+
8
+ # Drop axes if array has more dimensions than final_ndim
9
+ while arr.ndim > final_ndim:
10
+ arr = np.squeeze(arr, axis=-1) # Remove the last axis
11
+
12
+ return arr
13
+
14
+
15
+
16
+ # # Example usage
17
+ # arr_1d = np.array([1, 2, 3, 4, 5])
18
+
19
+ # # Convert to a 3D array (iteratively adds axes)
20
+ # arr_3d = adjust_array_dims(arr_1d, final_ndim=3)
21
+ # print(arr_3d)
22
+ # print("Shape of arr_3d:", arr_3d.shape)
23
+
24
+ # # Convert to a 2D array (iteratively drops axes)
25
+ # arr_2d = adjust_array_dims(arr_3d, final_ndim=2)
26
+ # print(arr_2d)
27
+ # print("Shape of arr_2d:", arr_2d.shape)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.189
3
+ Version: 0.0.191
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,5 +1,5 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=WWWd5SGHVUpJn-IJF1p1PScSWpb7VfQdcMTroufkgUk,10734
2
+ ddi_fw/datasets/core.py,sha256=IaEk4T7f590rAAAG7Nc45mofeutX85mNxhikFs7mzpE,10839
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
@@ -74,7 +74,7 @@ ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSz
74
74
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
75
75
  ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
76
76
  ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
77
- ddi_fw/ml/ml_helper.py,sha256=xbIg0fAJeJuB7rlgUMzCFhQ4WLBXS35x5N5gCcs6-so,6367
77
+ ddi_fw/ml/ml_helper.py,sha256=ENjdpu6stRxGxqhFweZLIglCnLREtMh6ypwanf0qMGc,6940
78
78
  ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
79
79
  ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
80
80
  ddi_fw/ml/tensorflow_wrapper.py,sha256=lNJvg3odqMKmILecOMdcOCAOrwzWZDzxB0DWGcYWsPg,12952
@@ -83,14 +83,15 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
83
83
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
84
84
  ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
85
85
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
86
- ddi_fw/pipeline/multi_pipeline.py,sha256=NfcH4Ze5U-JRiH3lrxEDWj-VPxYQYtp7tq6bLCImBzs,5550
86
+ ddi_fw/pipeline/multi_pipeline.py,sha256=fYyvwIOscUahjXd3QO5RSFrp1LliGR7RzOZyAXrXXz4,5637
87
87
  ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
88
- ddi_fw/pipeline/pipeline.py,sha256=dCXZuXOlW74ZO0e_OhS9OX0dqI9abj7CQz_lkKrDIWY,9787
89
- ddi_fw/utils/__init__.py,sha256=bqIC0YjbD0YSHtO0nWUkRs4w5nu7qBV0yU72sRzwCj8,475
88
+ ddi_fw/pipeline/pipeline.py,sha256=GMMauyp0GvdaQLyQ5dPBffDDxFK28hdDtPUzdFX9-Yk,9961
89
+ ddi_fw/utils/__init__.py,sha256=HC32XkYQTYH_9vt0eX6tqQngEFG-R70hGrYkT-BcHCk,519
90
90
  ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
91
91
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
92
92
  ddi_fw/utils/json_helper.py,sha256=BVU6wmJgdXPxyqLPu3Ck_9Es5RrP1PDanKvE-OSj1D4,571
93
93
  ddi_fw/utils/kaggle.py,sha256=wKRJ18KpQ6P-CubpZklEgsDtyFpR9RUL1_HyyF6ttEE,2425
94
+ ddi_fw/utils/numpy_utils.py,sha256=gd1WNq5NpWD2MBEMTtFuS5I0h8B6FAUNcq6BVOlxdhY,797
94
95
  ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5I,1056
95
96
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
96
97
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
@@ -98,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
98
99
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
99
100
  ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
100
101
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
101
- ddi_fw-0.0.189.dist-info/METADATA,sha256=y3hik68p2UsqKr7ur8R2ix8TyPfGsveDbKWboSaRkzA,2542
102
- ddi_fw-0.0.189.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
103
- ddi_fw-0.0.189.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
104
- ddi_fw-0.0.189.dist-info/RECORD,,
102
+ ddi_fw-0.0.191.dist-info/METADATA,sha256=xrlNEz8W_iIfKVZidKR8kNa_WtrDr4EnVQwbtex_sbQ,2542
103
+ ddi_fw-0.0.191.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
104
+ ddi_fw-0.0.191.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
105
+ ddi_fw-0.0.191.dist-info/RECORD,,