ddi-fw 0.0.187__py3-none-any.whl → 0.0.189__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -158,9 +158,11 @@ class BaseDataset(BaseModel):
158
158
  y_test = test[self.class_column]
159
159
 
160
160
  self.X_train = np.array(X_train)
161
- self.y_train = np.array(y_train)
161
+ # self.y_train = np.array(y_train)
162
+ self.y_train = np.array(y_train.tolist())
162
163
  self.X_test = np.array(X_test)
163
- self.y_test = np.array(y_test)
164
+ # self.y_test = np.array(y_test)
165
+ self.y_test = np.array(y_test.tolist())
164
166
 
165
167
  self.train_indexes = X_train.index
166
168
  self.test_indexes = X_test.index
@@ -222,8 +224,8 @@ class BaseDataset(BaseModel):
222
224
 
223
225
  X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset_splitter.split(
224
226
  X=X, y=y)
225
- self.X_train = np.array(self.X_train)
226
- self.X_test = np.array(self.X_test)
227
+ self.X_train = np.array(X_train)
228
+ self.X_test = np.array(X_test)
227
229
  self.y_train = np.array(y_train.tolist())
228
230
  self.y_test = np.array(y_test.tolist())
229
231
  self.train_indexes = X_train.index
ddi_fw/ml/ml_helper.py CHANGED
@@ -57,7 +57,7 @@ class MultiModalRunner:
57
57
  raise ValueError(
58
58
  "Unsupported library type. Choose 'tensorflow' or 'pytorch'.")
59
59
 
60
- def __predict(self,single_results):
60
+ def __predict(self, single_results):
61
61
  item_dict = {t[0]: t for t in self.items}
62
62
  print("multi_modal")
63
63
  print(self.multi_modal)
@@ -71,7 +71,8 @@ class MultiModalRunner:
71
71
  model_type = get_import(m.get("model_type"))
72
72
  kwargs = m.get('params')
73
73
  T = self.__create_model(self.library)
74
- single_modal = T(self.date, name, model_type, **kwargs)
74
+ single_modal = T(self.date, name, model_type,
75
+ use_mlflow=self.use_mlflow, **kwargs)
75
76
  if input_type == '1D':
76
77
  item = item_dict[input]
77
78
  single_modal.set_data(
@@ -79,7 +80,7 @@ class MultiModalRunner:
79
80
  elif input_type == '2D':
80
81
  # check keys
81
82
  filtered_dict = {k: item_dict[k]
82
- for k in inputs if k in item_dict}
83
+ for k in inputs if k in item_dict}
83
84
  print(filtered_dict.keys())
84
85
  first_input = next(iter(filtered_dict.values()))
85
86
  train_data_list = [f[1] for f in filtered_dict.values()]
@@ -64,7 +64,7 @@ def convert_to_categorical(arr, num_classes):
64
64
 
65
65
  class TFModelWrapper(ModelWrapper):
66
66
 
67
- def __init__(self, date, descriptor, model_func, use_mlflow=True, **kwargs):
67
+ def __init__(self, date, descriptor, model_func, use_mlflow=False, **kwargs):
68
68
  super().__init__(date, descriptor, model_func, **kwargs)
69
69
  self.batch_size = kwargs.get('batch_size', 128)
70
70
  self.epochs = kwargs.get('epochs', 100)
@@ -37,23 +37,24 @@ class Pipeline(BaseModel):
37
37
  model: Optional[Any] = None
38
38
  multi_modal: Optional[Any] = None
39
39
  use_mlflow: bool = False
40
- _items:List=[]
41
- _train_idx_arr:List|None=[]
42
- _val_idx_arr:List|None=[]
43
-
40
+ _items: List = []
41
+ _train_idx_arr: List | None = []
42
+ _val_idx_arr: List | None = []
43
+
44
44
  @property
45
45
  def items(self) -> List:
46
46
  return self._items
47
+
47
48
  @property
48
- def train_idx_arr(self) -> List|None:
49
+ def train_idx_arr(self) -> List | None:
49
50
  return self._train_idx_arr
51
+
50
52
  @property
51
- def val_idx_arr(self) -> List|None:
53
+ def val_idx_arr(self) -> List | None:
52
54
  return self._val_idx_arr
53
55
 
54
56
  class Config:
55
57
  arbitrary_types_allowed = True
56
-
57
58
 
58
59
  def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
59
60
  """
@@ -146,21 +147,22 @@ class Pipeline(BaseModel):
146
147
  # filename=self.ner_data_file) if self.ner_data_file else None
147
148
 
148
149
  dataset_splitter = self.dataset_splitter_type()
149
-
150
+
150
151
  if issubclass(self.dataset_type, TextDatasetMixin):
151
152
  key, value = next(iter(embedding_dict.items()))
152
153
  embedding_size = value[next(iter(value))][0].shape[0]
153
- pooling_strategy = self.embedding_pooling_strategy_type() if self.embedding_pooling_strategy_type else None
154
+ pooling_strategy = self.embedding_pooling_strategy_type(
155
+ ) if self.embedding_pooling_strategy_type else None
154
156
 
155
157
  dataset = self.dataset_type(
156
158
  embedding_dict=embedding_dict,
157
159
  embedding_size=embedding_size,
158
160
  embeddings_pooling_strategy=pooling_strategy,
159
- dataset_splitter_type = self.dataset_splitter_type,
161
+ dataset_splitter_type=self.dataset_splitter_type,
160
162
  **kwargs)
161
163
  elif self.dataset_type == BaseDataset:
162
- dataset = self.dataset_type(
163
- dataset_splitter_type = self.dataset_splitter_type,
164
+ dataset = self.dataset_type(
165
+ dataset_splitter_type=self.dataset_splitter_type,
164
166
  **kwargs)
165
167
  else:
166
168
  dataset = self.dataset_type(**kwargs)
@@ -168,15 +170,22 @@ class Pipeline(BaseModel):
168
170
  # X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
169
171
  dataset.load()
170
172
  dataframe = dataset.dataframe
171
- b = not( dataset.X_train and dataset.y_train and dataset.X_test and dataset.y_test)
172
- c = dataframe is None or dataframe.empty
173
173
 
174
- if b or c:
174
+ # Check if any of the arrays are None or empty
175
+ is_data_valid = (dataset.X_train is not None and dataset.X_train.size > 0 and
176
+ dataset.y_train is not None and dataset.y_train.size > 0 and
177
+ dataset.X_test is not None and dataset.X_test.size > 0 and
178
+ dataset.y_test is not None and dataset.y_test.size > 0)
179
+
180
+ # Check if the dataframe is None or empty
181
+ is_dataframe_valid = dataframe is not None and not dataframe.empty
182
+
183
+ if not (is_data_valid and is_dataframe_valid):
175
184
  raise ValueError("The dataset is not loaded")
176
-
185
+
177
186
  # column name, train data, train label, test data, test label
178
187
  self._items = dataset.produce_inputs()
179
-
188
+
180
189
  print("Building the experiment with the following settings:")
181
190
  print(
182
191
  f"Name: {self.experiment_name}, Dataset: {dataset}, Model: {self.model}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.187
3
+ Version: 0.0.189
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,5 +1,5 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=mZcGqP3Ukx5FbYSMi08uq4vYDr7jbHR3xg1qOPJmU0s,10640
2
+ ddi_fw/datasets/core.py,sha256=WWWd5SGHVUpJn-IJF1p1PScSWpb7VfQdcMTroufkgUk,10734
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
@@ -74,10 +74,10 @@ ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSz
74
74
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
75
75
  ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
76
76
  ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
77
- ddi_fw/ml/ml_helper.py,sha256=l1ZLYL3x5bHxD2bh2ezEgWDlV0ni8zGZGgj07x7KR40,6310
77
+ ddi_fw/ml/ml_helper.py,sha256=xbIg0fAJeJuB7rlgUMzCFhQ4WLBXS35x5N5gCcs6-so,6367
78
78
  ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
79
79
  ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
80
- ddi_fw/ml/tensorflow_wrapper.py,sha256=-zcbd0LBg9QNMF9K1I-JC379cS3rTO7ibgsDIOnMsoc,12951
80
+ ddi_fw/ml/tensorflow_wrapper.py,sha256=lNJvg3odqMKmILecOMdcOCAOrwzWZDzxB0DWGcYWsPg,12952
81
81
  ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
82
82
  ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
83
83
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
@@ -85,7 +85,7 @@ ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,2
85
85
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
86
86
  ddi_fw/pipeline/multi_pipeline.py,sha256=NfcH4Ze5U-JRiH3lrxEDWj-VPxYQYtp7tq6bLCImBzs,5550
87
87
  ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
88
- ddi_fw/pipeline/pipeline.py,sha256=VSILkxot_O1DJMWPavzFUH3le4zVKQydcH32SbuHZlQ,9355
88
+ ddi_fw/pipeline/pipeline.py,sha256=dCXZuXOlW74ZO0e_OhS9OX0dqI9abj7CQz_lkKrDIWY,9787
89
89
  ddi_fw/utils/__init__.py,sha256=bqIC0YjbD0YSHtO0nWUkRs4w5nu7qBV0yU72sRzwCj8,475
90
90
  ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
91
91
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
@@ -98,7 +98,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
98
98
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
99
99
  ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
100
100
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
101
- ddi_fw-0.0.187.dist-info/METADATA,sha256=dzH9YAqsPxQcvuS9h0JRNx5qtd8vGNr-1c5f0uE3c7M,2542
102
- ddi_fw-0.0.187.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
103
- ddi_fw-0.0.187.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
104
- ddi_fw-0.0.187.dist-info/RECORD,,
101
+ ddi_fw-0.0.189.dist-info/METADATA,sha256=y3hik68p2UsqKr7ur8R2ix8TyPfGsveDbKWboSaRkzA,2542
102
+ ddi_fw-0.0.189.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
103
+ ddi_fw-0.0.189.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
104
+ ddi_fw-0.0.189.dist-info/RECORD,,