ddi-fw 0.0.187__py3-none-any.whl → 0.0.188__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -222,8 +222,8 @@ class BaseDataset(BaseModel):
222
222
 
223
223
  X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset_splitter.split(
224
224
  X=X, y=y)
225
- self.X_train = np.array(self.X_train)
226
- self.X_test = np.array(self.X_test)
225
+ self.X_train = np.array(X_train)
226
+ self.X_test = np.array(X_test)
227
227
  self.y_train = np.array(y_train.tolist())
228
228
  self.y_test = np.array(y_test.tolist())
229
229
  self.train_indexes = X_train.index
@@ -37,23 +37,24 @@ class Pipeline(BaseModel):
37
37
  model: Optional[Any] = None
38
38
  multi_modal: Optional[Any] = None
39
39
  use_mlflow: bool = False
40
- _items:List=[]
41
- _train_idx_arr:List|None=[]
42
- _val_idx_arr:List|None=[]
43
-
40
+ _items: List = []
41
+ _train_idx_arr: List | None = []
42
+ _val_idx_arr: List | None = []
43
+
44
44
  @property
45
45
  def items(self) -> List:
46
46
  return self._items
47
+
47
48
  @property
48
- def train_idx_arr(self) -> List|None:
49
+ def train_idx_arr(self) -> List | None:
49
50
  return self._train_idx_arr
51
+
50
52
  @property
51
- def val_idx_arr(self) -> List|None:
53
+ def val_idx_arr(self) -> List | None:
52
54
  return self._val_idx_arr
53
55
 
54
56
  class Config:
55
57
  arbitrary_types_allowed = True
56
-
57
58
 
58
59
  def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
59
60
  """
@@ -146,21 +147,22 @@ class Pipeline(BaseModel):
146
147
  # filename=self.ner_data_file) if self.ner_data_file else None
147
148
 
148
149
  dataset_splitter = self.dataset_splitter_type()
149
-
150
+
150
151
  if issubclass(self.dataset_type, TextDatasetMixin):
151
152
  key, value = next(iter(embedding_dict.items()))
152
153
  embedding_size = value[next(iter(value))][0].shape[0]
153
- pooling_strategy = self.embedding_pooling_strategy_type() if self.embedding_pooling_strategy_type else None
154
+ pooling_strategy = self.embedding_pooling_strategy_type(
155
+ ) if self.embedding_pooling_strategy_type else None
154
156
 
155
157
  dataset = self.dataset_type(
156
158
  embedding_dict=embedding_dict,
157
159
  embedding_size=embedding_size,
158
160
  embeddings_pooling_strategy=pooling_strategy,
159
- dataset_splitter_type = self.dataset_splitter_type,
161
+ dataset_splitter_type=self.dataset_splitter_type,
160
162
  **kwargs)
161
163
  elif self.dataset_type == BaseDataset:
162
- dataset = self.dataset_type(
163
- dataset_splitter_type = self.dataset_splitter_type,
164
+ dataset = self.dataset_type(
165
+ dataset_splitter_type=self.dataset_splitter_type,
164
166
  **kwargs)
165
167
  else:
166
168
  dataset = self.dataset_type(**kwargs)
@@ -168,15 +170,22 @@ class Pipeline(BaseModel):
168
170
  # X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
169
171
  dataset.load()
170
172
  dataframe = dataset.dataframe
171
- b = not( dataset.X_train and dataset.y_train and dataset.X_test and dataset.y_test)
172
- c = dataframe is None or dataframe.empty
173
173
 
174
- if b or c:
174
+ # Check if any of the arrays are None or empty
175
+ is_data_valid = (dataset.X_train is not None and dataset.X_train.size > 0 and
176
+ dataset.y_train is not None and dataset.y_train.size > 0 and
177
+ dataset.X_test is not None and dataset.X_test.size > 0 and
178
+ dataset.y_test is not None and dataset.y_test.size > 0)
179
+
180
+ # Check if the dataframe is None or empty
181
+ is_dataframe_valid = dataframe is not None and not dataframe.empty
182
+
183
+ if not (is_data_valid and is_dataframe_valid):
175
184
  raise ValueError("The dataset is not loaded")
176
-
185
+
177
186
  # column name, train data, train label, test data, test label
178
187
  self._items = dataset.produce_inputs()
179
-
188
+
180
189
  print("Building the experiment with the following settings:")
181
190
  print(
182
191
  f"Name: {self.experiment_name}, Dataset: {dataset}, Model: {self.model}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.187
3
+ Version: 0.0.188
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,5 +1,5 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=mZcGqP3Ukx5FbYSMi08uq4vYDr7jbHR3xg1qOPJmU0s,10640
2
+ ddi_fw/datasets/core.py,sha256=eKPbntiDhqpqaV1SlrPmuSUq_9i_5INlnJuAlwj61Nk,10630
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
@@ -85,7 +85,7 @@ ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,2
85
85
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
86
86
  ddi_fw/pipeline/multi_pipeline.py,sha256=NfcH4Ze5U-JRiH3lrxEDWj-VPxYQYtp7tq6bLCImBzs,5550
87
87
  ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
88
- ddi_fw/pipeline/pipeline.py,sha256=VSILkxot_O1DJMWPavzFUH3le4zVKQydcH32SbuHZlQ,9355
88
+ ddi_fw/pipeline/pipeline.py,sha256=dCXZuXOlW74ZO0e_OhS9OX0dqI9abj7CQz_lkKrDIWY,9787
89
89
  ddi_fw/utils/__init__.py,sha256=bqIC0YjbD0YSHtO0nWUkRs4w5nu7qBV0yU72sRzwCj8,475
90
90
  ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
91
91
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
@@ -98,7 +98,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
98
98
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
99
99
  ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
100
100
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
101
- ddi_fw-0.0.187.dist-info/METADATA,sha256=dzH9YAqsPxQcvuS9h0JRNx5qtd8vGNr-1c5f0uE3c7M,2542
102
- ddi_fw-0.0.187.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
103
- ddi_fw-0.0.187.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
104
- ddi_fw-0.0.187.dist-info/RECORD,,
101
+ ddi_fw-0.0.188.dist-info/METADATA,sha256=SRAoTA4fu0suxghXx5okr-RsfC512VEotrkTCUeXBck,2542
102
+ ddi_fw-0.0.188.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
103
+ ddi_fw-0.0.188.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
104
+ ddi_fw-0.0.188.dist-info/RECORD,,