ddi-fw 0.0.179__tar.gz → 0.0.181__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ml/tensorflow_wrapper.py +12 -5
  4. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/__init__.py +2 -1
  5. ddi_fw-0.0.181/src/ddi_fw/utils/categorical_data_encoding_checker.py +32 -0
  6. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  7. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw.egg-info/SOURCES.txt +1 -0
  8. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/README.md +0 -0
  9. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/setup.cfg +0 -0
  10. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/__init__.py +0 -0
  11. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/core.py +0 -0
  12. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
  13. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/db_utils.py +0 -0
  14. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
  15. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  16. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/debug.log +0 -0
  17. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  18. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  19. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  20. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  21. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  22. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  23. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  24. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  25. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  26. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  27. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  28. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  29. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  30. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  31. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  32. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  33. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  34. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  35. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  36. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  37. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  38. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  39. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  40. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  41. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  42. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/base.py +0 -0
  43. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/data/event.db +0 -0
  44. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/test_indexes.txt +0 -0
  45. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_0.txt +0 -0
  46. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_1.txt +0 -0
  47. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_2.txt +0 -0
  48. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_3.txt +0 -0
  49. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_4.txt +0 -0
  50. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt +0 -0
  51. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt +0 -0
  52. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt +0 -0
  53. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt +0 -0
  54. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt +0 -0
  55. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt +0 -0
  56. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  57. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  58. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  59. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  60. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  61. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  62. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  63. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  64. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  65. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  66. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  67. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  68. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  69. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  70. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  71. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  72. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  73. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/datasets/setup_._py +0 -0
  74. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/drugbank/__init__.py +0 -0
  75. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  76. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  77. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  78. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  79. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  80. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/langchain/__init__.py +0 -0
  81. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/langchain/embeddings.py +0 -0
  82. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  83. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/langchain/storage.py +0 -0
  84. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ml/__init__.py +0 -0
  85. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  86. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ml/ml_helper.py +0 -0
  87. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ml/model_wrapper.py +0 -0
  88. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  89. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ner/__init__.py +0 -0
  90. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  91. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/ner/ner.py +0 -0
  92. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/pipeline/__init__.py +0 -0
  93. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  94. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  95. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  96. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/pipeline/pipeline.py +0 -0
  97. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/enums.py +0 -0
  98. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/json_helper.py +0 -0
  99. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/kaggle.py +0 -0
  100. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/package_helper.py +0 -0
  101. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  102. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/utils.py +0 -0
  103. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/utils/zip_helper.py +0 -0
  104. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/vectorization/__init__.py +0 -0
  105. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
  106. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw/vectorization/idf_helper.py +0 -0
  107. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  108. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw.egg-info/requires.txt +0 -0
  109. {ddi_fw-0.0.179 → ddi_fw-0.0.181}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.179
3
+ Version: 0.0.181
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "ddi_fw"
9
- version = "0.0.179"
9
+ version = "0.0.181"
10
10
  description = "Do not use :)"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -28,9 +28,10 @@ class TFModelWrapper(ModelWrapper):
28
28
  self.epochs = kwargs.get('epochs', 100)
29
29
  self.use_mlflow = use_mlflow
30
30
 
31
+ # TODO think different settings for num_classes
31
32
  def fit_model(self, X_train, y_train, X_valid, y_valid):
32
33
  self.kwargs['input_shape'] = self.train_data.shape
33
- self.num_classes = len(np.unique(y_train))
34
+ self.num_classes = len(np.unique(y_train, axis=0))
34
35
  model = self.model_func(**self.kwargs)
35
36
  checkpoint = ModelCheckpoint(
36
37
  filepath=f'{self.descriptor}_validation.weights.h5',
@@ -136,8 +137,11 @@ class TFModelWrapper(ModelWrapper):
136
137
  print(best_model_key)
137
138
  self.best_model: Model = best_model
138
139
  pred = self.predict()
139
- pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
140
- actual = tf.keras.utils.to_categorical(self.test_label, num_classes=self.num_classes)
140
+ actual = self.test_label
141
+ if not utils.is_binary_encoded(pred):
142
+ pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
143
+ if not utils.is_binary_encoded(actual):
144
+ actual = tf.keras.utils.to_categorical(actual, num_classes=self.num_classes)
141
145
 
142
146
  logs, metrics = evaluate(
143
147
  actual=actual, pred=pred, info=self.descriptor, print_detail=print_detail)
@@ -155,8 +159,11 @@ class TFModelWrapper(ModelWrapper):
155
159
  print(best_model_key)
156
160
  self.best_model = best_model
157
161
  pred = self.predict()
158
- pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
159
- actual = tf.keras.utils.to_categorical(self.test_label, num_classes=self.num_classes)
162
+ actual = self.test_label
163
+ if not utils.is_binary_encoded(pred):
164
+ pred = tf.keras.utils.to_categorical(np.argmax(pred,axis=1), num_classes=self.num_classes)
165
+ if not utils.is_binary_encoded(actual):
166
+ actual = tf.keras.utils.to_categorical(actual, num_classes=self.num_classes)
160
167
 
161
168
  logs, metrics = evaluate(
162
169
  actual=actual, pred=pred, info=self.descriptor)
@@ -3,4 +3,5 @@ from .zip_helper import ZipHelper
3
3
  from .py7zr_helper import Py7ZipHelper
4
4
  from .enums import UMLSCodeTypes, DrugBankTextDataTypes
5
5
  from .package_helper import get_import
6
- from .kaggle import create_kaggle_dataset
6
+ from .kaggle import create_kaggle_dataset
7
+ from .categorical_data_encoding_checker import is_one_hot_encoded, is_binary_encoded, is_binary_vector
@@ -0,0 +1,32 @@
1
+ import numpy as np
2
+
3
+
4
+ def is_one_hot_encoded(arr):
5
+ # Check if the array is one-hot encoded
6
+ # Ensure the input is a numpy ndarray and is 2D
7
+ if not isinstance(arr, np.ndarray):
8
+ raise ValueError("Input must be a NumPy ndarray.")
9
+ if not np.all(np.isin(arr, [0, 1])):
10
+ return False
11
+ # Check if each row (or column) has exactly one "1"
12
+ return np.all(np.sum(arr, axis=1) == 1) # For row-wise checking
13
+
14
+
15
+ def is_binary_encoded(arr):
16
+ # Ensure the input is a numpy ndarray and is 2D
17
+ if not isinstance(arr, np.ndarray):
18
+ raise ValueError("Input must be a NumPy ndarray.")
19
+ if arr.ndim != 2:
20
+ raise ValueError("Input must be a 2D array.")
21
+
22
+ # Check if all elements are either 0 or 1
23
+ return np.all(np.isin(arr, [0, 1]))
24
+
25
+
26
+ def is_binary_vector(arr):
27
+ # Ensure the input is a numpy ndarray and is 1D
28
+ if not isinstance(arr, np.ndarray):
29
+ raise ValueError("Input must be a NumPy ndarray.")
30
+ if arr.ndim != 1:
31
+ raise ValueError("Input must be a 1D array.")
32
+ return arr.ndim == 1 and np.all(np.isin(arr, [0, 1]))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.179
3
+ Version: 0.0.181
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -94,6 +94,7 @@ src/ddi_fw/pipeline/multi_pipeline.py
94
94
  src/ddi_fw/pipeline/ner_pipeline.py
95
95
  src/ddi_fw/pipeline/pipeline.py
96
96
  src/ddi_fw/utils/__init__.py
97
+ src/ddi_fw/utils/categorical_data_encoding_checker.py
97
98
  src/ddi_fw/utils/enums.py
98
99
  src/ddi_fw/utils/json_helper.py
99
100
  src/ddi_fw/utils/kaggle.py
File without changes
File without changes
File without changes