ddi-fw 0.0.190__py3-none-any.whl → 0.0.192__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -73,6 +73,7 @@ class BaseDataset(BaseModel):
73
73
  class Config:
74
74
  arbitrary_types_allowed = True
75
75
 
76
+ # TODO columns yoksa tüm feature'lar alınıyor, bu pipeline'da nasıl yapılacak?
76
77
  def produce_inputs(self):
77
78
  items = []
78
79
  if self.X_train is None or self.X_test is None:
@@ -80,7 +81,10 @@ class BaseDataset(BaseModel):
80
81
  y_train_label, y_test_label = np.array(
81
82
  self.y_train), np.array(self.y_test)
82
83
 
83
- if self.columns is None or len(self.columns) == 0:
84
+ if self.columns is None or len(self.columns) == 0 or len(self.columns) == 1:
85
+ # If no columns or only one column are provided, do not change the data
86
+ # and use the entire dataset as a single input.
87
+ column = self.columns[0] if self.columns else 'default'
84
88
  items.append([f'default', np.nan_to_num(self.X_train),
85
89
  y_train_label, np.nan_to_num(self.X_test), y_test_label])
86
90
  else:
@@ -127,15 +131,15 @@ class BaseDataset(BaseModel):
127
131
  Load the dataset. If X_train, y_train, X_test, and y_test are already provided,
128
132
  skip deriving them. Otherwise, derive them from the dataframe and indices.
129
133
  """
130
- if self.X_train and self.y_train and self.X_test and self.y_test :
134
+ self.prep()
135
+
136
+ if self.X_train is not None or self.y_train is not None or self.X_test is not None or self.y_test is not None:
131
137
  # Data is already provided, no need to calculate
132
138
  logging.info(
133
139
  "X_train, y_train, X_test, and y_test are already provided. Skipping calculation.")
134
140
  return
135
141
  # return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
136
142
 
137
- self.prep()
138
-
139
143
  if self.index_path is None:
140
144
  raise Exception(
141
145
  "There is no index path. Please call split_dataset or provide indices.")
@@ -156,7 +160,7 @@ class BaseDataset(BaseModel):
156
160
  y_train = train[self.class_column]
157
161
  X_test = test.drop(self.class_column, axis=1)
158
162
  y_test = test[self.class_column]
159
-
163
+
160
164
  self.X_train = np.array(X_train)
161
165
  # self.y_train = np.array(y_train)
162
166
  self.y_train = np.array(y_train.tolist())
@@ -170,7 +174,6 @@ class BaseDataset(BaseModel):
170
174
  self.val_idx_arr = val_idx_arr
171
175
 
172
176
  # Dataframe to numpy array conversion
173
-
174
177
 
175
178
  # return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
176
179
 
@@ -226,14 +229,13 @@ class BaseDataset(BaseModel):
226
229
  X=X, y=y)
227
230
  self.X_train = np.array(X_train)
228
231
  self.X_test = np.array(X_test)
229
- self.y_train = np.array(y_train.tolist())
232
+ self.y_train = np.array(y_train.tolist())
230
233
  self.y_test = np.array(y_test.tolist())
231
234
  self.train_indexes = X_train.index
232
235
  self.test_indexes = X_test.index
233
236
  self.train_idx_arr = train_idx_arr
234
237
  self.val_idx_arr = val_idx_arr
235
238
 
236
-
237
239
  if save_indexes:
238
240
  # train_pairs = [row['id1'].join(',').row['id2'] for index, row in X_train.iterrows()]
239
241
  self.__save_indexes__(
ddi_fw/ml/ml_helper.py CHANGED
@@ -57,42 +57,53 @@ class MultiModalRunner:
57
57
  raise ValueError(
58
58
  "Unsupported library type. Choose 'tensorflow' or 'pytorch'.")
59
59
 
60
+ # TODO check single_results, 1d,2d ...
60
61
  def __predict(self, single_results):
61
62
  item_dict = {t[0]: t for t in self.items}
62
63
  print("multi_modal")
63
64
  print(self.multi_modal)
64
65
  print(item_dict.keys())
65
66
 
66
- for m in self.multi_modal:
67
- name = m.get('name')
68
- input_type = m.get('input_type')
69
- input = m.get('input')
70
- inputs = m.get('inputs')
71
- model_type = get_import(m.get("model_type"))
72
- kwargs = m.get('params')
73
- T = self.__create_model(self.library)
74
- single_modal = T(self.date, name, model_type,
75
- use_mlflow=self.use_mlflow, **kwargs)
76
- if input_type == '1D':
77
- item = item_dict[input]
67
+ if self.multi_modal:
68
+ for m in self.multi_modal:
69
+ name = m.get('name')
70
+ input_type = m.get('input_type')
71
+ input = m.get('input')
72
+ inputs = m.get('inputs')
73
+ model_type = get_import(m.get("model_type"))
74
+ kwargs = m.get('params')
75
+ T = self.__create_model(self.library)
76
+ single_modal = T(self.date, name, model_type,
77
+ use_mlflow=self.use_mlflow, **kwargs)
78
+
79
+ if input is not None and inputs is not None:
80
+ raise Exception("input and inputs should not be used together")
81
+
82
+ if input:
83
+ item = item_dict[input]
84
+ single_modal.set_data(
85
+ self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
86
+ elif inputs:
87
+ # check keys
88
+ filtered_dict = {k: item_dict[k]
89
+ for k in inputs if k in item_dict}
90
+ print(filtered_dict.keys())
91
+ first_input = next(iter(filtered_dict.values()))
92
+ train_data_list = [f[1] for f in filtered_dict.values()]
93
+ test_data_list = [f[3] for f in filtered_dict.values()]
94
+ train_data = np.stack(train_data_list, axis=1)
95
+ test_data = np.stack(test_data_list, axis=1)
96
+ train_label = first_input[2]
97
+ test_label = first_input[4]
98
+ single_modal.set_data(
99
+ self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
100
+ else:
101
+ raise Exception("check configurations")
102
+ else: # TODO default model maybe?
103
+ item = self.items[0]
78
104
  single_modal.set_data(
79
- self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
80
- elif input_type == '2D':
81
- # check keys
82
- filtered_dict = {k: item_dict[k]
83
- for k in inputs if k in item_dict}
84
- print(filtered_dict.keys())
85
- first_input = next(iter(filtered_dict.values()))
86
- train_data_list = [f[1] for f in filtered_dict.values()]
87
- test_data_list = [f[3] for f in filtered_dict.values()]
88
- train_data = np.stack(train_data_list, axis=1)
89
- test_data = np.stack(test_data_list, axis=1)
90
- train_label = first_input[2]
91
- test_label = first_input[4]
92
- single_modal.set_data(
93
- self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
94
- else:
95
- raise Exception("check configurations")
105
+ self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
106
+
96
107
  logs, metrics, prediction = single_modal.fit_and_evaluate()
97
108
  self.result.add_metric(name, metrics)
98
109
  single_results[name] = prediction
@@ -50,6 +50,7 @@ class MultiPipeline():
50
50
  type = config.get("type")
51
51
  library = config.get("library")
52
52
 
53
+ use_mlflow = config.get("use_mlflow")
53
54
  experiment_name = config.get("experiment_name")
54
55
  experiment_description = config.get("experiment_description")
55
56
  experiment_tags = config.get("experiment_tags")
@@ -84,6 +85,7 @@ class MultiPipeline():
84
85
  if type == "general":
85
86
  pipeline = Pipeline(
86
87
  library=library,
88
+ use_mlflow=use_mlflow,
87
89
  experiment_name=experiment_name,
88
90
  experiment_description=experiment_description,
89
91
  experiment_tags=experiment_tags,
@@ -37,10 +37,15 @@ class Pipeline(BaseModel):
37
37
  model: Optional[Any] = None
38
38
  multi_modal: Optional[Any] = None
39
39
  use_mlflow: bool = False
40
+ _dataset: BaseDataset = []
40
41
  _items: List = []
41
42
  _train_idx_arr: List | None = []
42
43
  _val_idx_arr: List | None = []
43
44
 
45
+ @property
46
+ def dataset(self) -> BaseDataset:
47
+ return self._dataset
48
+
44
49
  @property
45
50
  def items(self) -> List:
46
51
  return self._items
@@ -168,7 +173,10 @@ class Pipeline(BaseModel):
168
173
  dataset = self.dataset_type(**kwargs)
169
174
 
170
175
  # X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
176
+
171
177
  dataset.load()
178
+ self._dataset = dataset
179
+
172
180
  dataframe = dataset.dataframe
173
181
 
174
182
  # Check if any of the arrays are None or empty
@@ -180,7 +188,7 @@ class Pipeline(BaseModel):
180
188
  # Check if the dataframe is None or empty
181
189
  is_dataframe_valid = dataframe is not None and not dataframe.empty
182
190
 
183
- if not (is_data_valid and is_dataframe_valid):
191
+ if not (is_data_valid or is_dataframe_valid):
184
192
  raise ValueError("The dataset is not loaded")
185
193
 
186
194
  # column name, train data, train label, test data, test label
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.190
3
+ Version: 0.0.192
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,5 +1,5 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=WWWd5SGHVUpJn-IJF1p1PScSWpb7VfQdcMTroufkgUk,10734
2
+ ddi_fw/datasets/core.py,sha256=4705a94kKBueyWFXRJ3cnivAGKjrR89uBBKpxtMozOM,11080
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
@@ -74,7 +74,7 @@ ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSz
74
74
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
75
75
  ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
76
76
  ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
77
- ddi_fw/ml/ml_helper.py,sha256=xbIg0fAJeJuB7rlgUMzCFhQ4WLBXS35x5N5gCcs6-so,6367
77
+ ddi_fw/ml/ml_helper.py,sha256=-pgnLhuPBp60IZxAvS11oDYggrgQY3GRHkAwQVWM9XI,6943
78
78
  ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
79
79
  ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
80
80
  ddi_fw/ml/tensorflow_wrapper.py,sha256=lNJvg3odqMKmILecOMdcOCAOrwzWZDzxB0DWGcYWsPg,12952
@@ -83,9 +83,9 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
83
83
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
84
84
  ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
85
85
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
86
- ddi_fw/pipeline/multi_pipeline.py,sha256=NfcH4Ze5U-JRiH3lrxEDWj-VPxYQYtp7tq6bLCImBzs,5550
86
+ ddi_fw/pipeline/multi_pipeline.py,sha256=fYyvwIOscUahjXd3QO5RSFrp1LliGR7RzOZyAXrXXz4,5637
87
87
  ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
88
- ddi_fw/pipeline/pipeline.py,sha256=dCXZuXOlW74ZO0e_OhS9OX0dqI9abj7CQz_lkKrDIWY,9787
88
+ ddi_fw/pipeline/pipeline.py,sha256=GMMauyp0GvdaQLyQ5dPBffDDxFK28hdDtPUzdFX9-Yk,9961
89
89
  ddi_fw/utils/__init__.py,sha256=HC32XkYQTYH_9vt0eX6tqQngEFG-R70hGrYkT-BcHCk,519
90
90
  ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
91
91
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
@@ -99,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
99
99
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
100
100
  ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
101
101
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
102
- ddi_fw-0.0.190.dist-info/METADATA,sha256=kTsCriMdtxc5BZWLUlThWAMPoYSWjPRkLFZntodwCwM,2542
103
- ddi_fw-0.0.190.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
104
- ddi_fw-0.0.190.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
105
- ddi_fw-0.0.190.dist-info/RECORD,,
102
+ ddi_fw-0.0.192.dist-info/METADATA,sha256=MhOs-typXfDNCIRm8_9NheEkyNYmnmQbArQGstSOAas,2542
103
+ ddi_fw-0.0.192.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
104
+ ddi_fw-0.0.192.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
105
+ ddi_fw-0.0.192.dist-info/RECORD,,