ddi-fw 0.0.178__py3-none-any.whl → 0.0.180__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -225,9 +225,9 @@ class BaseDataset(BaseModel):
225
225
 
226
226
  # Dataframe to numpy array conversion
227
227
  self.X_train = np.array(self.X_train)
228
- self.y_train = np.array(self.y_train)
228
+ self.y_train = np.array(self.y_train.tolist())
229
229
  self.X_test = np.array(self.X_test)
230
- self.y_test = np.array(self.y_test)
230
+ self.y_test = np.array(self.y_test.tolist())
231
231
 
232
232
  if save_indexes:
233
233
  # train_pairs = [row['id1'].join(',').row['id2'] for index, row in X_train.iterrows()]
@@ -136,8 +136,11 @@ class TFModelWrapper(ModelWrapper):
136
136
  print(best_model_key)
137
137
  self.best_model: Model = best_model
138
138
  pred = self.predict()
139
- pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
140
- actual = tf.keras.utils.to_categorical(self.test_label, num_classes=self.num_classes)
139
+ actual = self.test_label
140
+ if not utils.is_binary_encoded(pred):
141
+ pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
142
+ if not utils.is_binary_encoded(actual):
143
+ actual = tf.keras.utils.to_categorical(actual, num_classes=self.num_classes)
141
144
 
142
145
  logs, metrics = evaluate(
143
146
  actual=actual, pred=pred, info=self.descriptor, print_detail=print_detail)
@@ -155,8 +158,11 @@ class TFModelWrapper(ModelWrapper):
155
158
  print(best_model_key)
156
159
  self.best_model = best_model
157
160
  pred = self.predict()
158
- pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
159
- actual = tf.keras.utils.to_categorical(self.test_label, num_classes=self.num_classes)
161
+ actual = self.test_label
162
+ if not utils.is_binary_encoded(pred):
163
+ pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
164
+ if not utils.is_binary_encoded(actual):
165
+ actual = tf.keras.utils.to_categorical(actual, num_classes=self.num_classes)
160
166
 
161
167
  logs, metrics = evaluate(
162
168
  actual=actual, pred=pred, info=self.descriptor)
ddi_fw/utils/__init__.py CHANGED
@@ -3,4 +3,5 @@ from .zip_helper import ZipHelper
3
3
  from .py7zr_helper import Py7ZipHelper
4
4
  from .enums import UMLSCodeTypes, DrugBankTextDataTypes
5
5
  from .package_helper import get_import
6
- from .kaggle import create_kaggle_dataset
6
+ from .kaggle import create_kaggle_dataset
7
+ from .categorical_data_encoding_checker import is_one_hot_encoded, is_binary_encoded, is_binary_vector
@@ -0,0 +1,32 @@
1
+ import numpy as np
2
+
3
+
4
+ def is_one_hot_encoded(arr):
5
+ # Check if the array is one-hot encoded
6
+ # Ensure the input is a numpy ndarray and is 2D
7
+ if not isinstance(arr, np.ndarray):
8
+ raise ValueError("Input must be a NumPy ndarray.")
9
+ if not np.all(np.isin(arr, [0, 1])):
10
+ return False
11
+ # Check if each row (or column) has exactly one "1"
12
+ return np.all(np.sum(arr, axis=1) == 1) # For row-wise checking
13
+
14
+
15
+ def is_binary_encoded(arr):
16
+ # Ensure the input is a numpy ndarray and is 2D
17
+ if not isinstance(arr, np.ndarray):
18
+ raise ValueError("Input must be a NumPy ndarray.")
19
+ if arr.ndim != 2:
20
+ raise ValueError("Input must be a 2D array.")
21
+
22
+ # Check if all elements are either 0 or 1
23
+ return np.all(np.isin(arr, [0, 1]))
24
+
25
+
26
+ def is_binary_vector(arr):
27
+ # Ensure the input is a numpy ndarray and is 1D
28
+ if not isinstance(arr, np.ndarray):
29
+ raise ValueError("Input must be a NumPy ndarray.")
30
+ if arr.ndim != 1:
31
+ raise ValueError("Input must be a 1D array.")
32
+ return arr.ndim == 1 and np.all(np.isin(arr, [0, 1]))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.178
3
+ Version: 0.0.180
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,5 +1,5 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=6tbLMcunzfTE-BbE9GAEy_-h15N5EgK2oP5TfsHLjsk,10569
2
+ ddi_fw/datasets/core.py,sha256=H2PHwlGkEH-qvVq-zIoyIDitfXc1uHdf8Ge1EA3qGss,10587
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
@@ -77,7 +77,7 @@ ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5
77
77
  ddi_fw/ml/ml_helper.py,sha256=E6ef7f1UnQl6JBUdGDbbbI4FIS-904VGypT7tI0a598,8545
78
78
  ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
79
79
  ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
80
- ddi_fw/ml/tensorflow_wrapper.py,sha256=AeEXGbsQW6BgVf-Mgxe9NbvwNqLOqqCTGyTNxfg4G_Y,10564
80
+ ddi_fw/ml/tensorflow_wrapper.py,sha256=ihiruZwWeRLkgz8dsC0P40XaikpJSN-krVbo1LE5GUw,10858
81
81
  ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
82
82
  ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
83
83
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
@@ -86,7 +86,8 @@ ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJ
86
86
  ddi_fw/pipeline/multi_pipeline.py,sha256=NfcH4Ze5U-JRiH3lrxEDWj-VPxYQYtp7tq6bLCImBzs,5550
87
87
  ddi_fw/pipeline/ner_pipeline.py,sha256=kNGtkg5rNX5MDywzvRxmvyk-DxXAjEbYzZkp8pNlAZo,6023
88
88
  ddi_fw/pipeline/pipeline.py,sha256=70lYsluAnTWDLTlf6rbecffw3Bl34L1_6ALfLUoSvtY,11324
89
- ddi_fw/utils/__init__.py,sha256=77563ikqAtdzjjgRlLp5OAsJBbpLA1Cao8iecGaVUXQ,354
89
+ ddi_fw/utils/__init__.py,sha256=L64M3YCB56eSiB-rE2Zmn6DMNNwqHPWHq6Z_f4fi9VQ,458
90
+ ddi_fw/utils/categorical_data_encoding_checker.py,sha256=DNbxjpyD8XTqILSHHQ0_VUd61PNBSupSAuXiq5nLTK8,1122
90
91
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
91
92
  ddi_fw/utils/json_helper.py,sha256=BVU6wmJgdXPxyqLPu3Ck_9Es5RrP1PDanKvE-OSj1D4,571
92
93
  ddi_fw/utils/kaggle.py,sha256=wKRJ18KpQ6P-CubpZklEgsDtyFpR9RUL1_HyyF6ttEE,2425
@@ -97,7 +98,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
97
98
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
98
99
  ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
99
100
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
100
- ddi_fw-0.0.178.dist-info/METADATA,sha256=nSzlXlVfJLUgh8w5uhzzVoKJu-fLVUV7BrXH6GvGqhk,2542
101
- ddi_fw-0.0.178.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
102
- ddi_fw-0.0.178.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
103
- ddi_fw-0.0.178.dist-info/RECORD,,
101
+ ddi_fw-0.0.180.dist-info/METADATA,sha256=6FZKb9M4aisFLRZL03LXbKA70RVMOp9YsnjY6U8OzWs,2542
102
+ ddi_fw-0.0.180.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
103
+ ddi_fw-0.0.180.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
104
+ ddi_fw-0.0.180.dist-info/RECORD,,