ddi-fw 0.0.185__tar.gz → 0.0.187__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/core.py +27 -29
  4. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/ml_helper.py +1 -39
  5. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/tensorflow_wrapper.py +3 -2
  6. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/ner_pipeline.py +3 -3
  7. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/pipeline.py +13 -59
  8. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  9. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/README.md +0 -0
  10. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/setup.cfg +0 -0
  11. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/__init__.py +0 -0
  12. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
  13. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/db_utils.py +0 -0
  14. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
  15. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  16. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/debug.log +0 -0
  17. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  18. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  19. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  20. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  21. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  22. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  23. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  24. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  25. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  26. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  27. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  28. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  29. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  30. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  31. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  32. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  33. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  34. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  35. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  36. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  37. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  38. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  39. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  40. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  41. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  42. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/base.py +0 -0
  43. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/data/event.db +0 -0
  44. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/test_indexes.txt +0 -0
  45. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_0.txt +0 -0
  46. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_1.txt +0 -0
  47. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_2.txt +0 -0
  48. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_3.txt +0 -0
  49. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_4.txt +0 -0
  50. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt +0 -0
  51. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt +0 -0
  52. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt +0 -0
  53. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt +0 -0
  54. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt +0 -0
  55. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt +0 -0
  56. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  57. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  58. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  59. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  60. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  61. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  62. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  63. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  64. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  65. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  66. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  67. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  68. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  69. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  70. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  71. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  72. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  73. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/datasets/setup_._py +0 -0
  74. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/__init__.py +0 -0
  75. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  76. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  77. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  78. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  79. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  80. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/__init__.py +0 -0
  81. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/embeddings.py +0 -0
  82. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  83. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/langchain/storage.py +0 -0
  84. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/__init__.py +0 -0
  85. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  86. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/model_wrapper.py +0 -0
  87. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  88. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ner/__init__.py +0 -0
  89. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  90. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/ner/ner.py +0 -0
  91. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/__init__.py +0 -0
  92. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  93. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  94. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/__init__.py +0 -0
  95. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
  96. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/enums.py +0 -0
  97. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/json_helper.py +0 -0
  98. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/kaggle.py +0 -0
  99. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/package_helper.py +0 -0
  100. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  101. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/utils.py +0 -0
  102. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/utils/zip_helper.py +0 -0
  103. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/vectorization/__init__.py +0 -0
  104. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
  105. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw/vectorization/idf_helper.py +0 -0
  106. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  107. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  108. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/requires.txt +0 -0
  109. {ddi_fw-0.0.185 → ddi_fw-0.0.187}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.185
3
+ Version: 0.0.187
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "ddi_fw"
9
- version = "0.0.185"
9
+ version = "0.0.187"
10
10
  description = "Do not use :)"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -60,10 +60,10 @@ class BaseDataset(BaseModel):
60
60
  dataset_splitter_type: Type[DatasetSplitter]
61
61
  class_column: str = 'class'
62
62
  dataframe: Optional[pd.DataFrame] = None
63
- X_train: Optional[pd.DataFrame | np.ndarray] = None
64
- X_test: Optional[pd.DataFrame | np.ndarray] = None
65
- y_train: Optional[pd.Series | np.ndarray] = None
66
- y_test: Optional[pd.Series | np.ndarray] = None
63
+ X_train: Optional[np.ndarray] = None
64
+ X_test: Optional[np.ndarray] = None
65
+ y_train: Optional[np.ndarray] = None
66
+ y_test: Optional[np.ndarray] = None
67
67
  train_indexes: Optional[pd.Index] = None
68
68
  test_indexes: Optional[pd.Index] = None
69
69
  train_idx_arr: Optional[List[np.ndarray]] = None
@@ -81,7 +81,7 @@ class BaseDataset(BaseModel):
81
81
  self.y_train), np.array(self.y_test)
82
82
 
83
83
  if self.columns is None or len(self.columns) == 0:
84
- items.append([f'defaukt', np.nan_to_num(self.X_train),
84
+ items.append([f'default', np.nan_to_num(self.X_train),
85
85
  y_train_label, np.nan_to_num(self.X_test), y_test_label])
86
86
  else:
87
87
  for index, column in enumerate(self.columns):
@@ -127,11 +127,12 @@ class BaseDataset(BaseModel):
127
127
  Load the dataset. If X_train, y_train, X_test, and y_test are already provided,
128
128
  skip deriving them. Otherwise, derive them from the dataframe and indices.
129
129
  """
130
- if self.X_train is not None and self.y_train is not None and self.X_test is not None and self.y_test is not None:
130
+ if self.X_train and self.y_train and self.X_test and self.y_test :
131
131
  # Data is already provided, no need to calculate
132
132
  logging.info(
133
133
  "X_train, y_train, X_test, and y_test are already provided. Skipping calculation.")
134
- return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
134
+ return
135
+ # return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
135
136
 
136
137
  self.prep()
137
138
 
@@ -150,24 +151,26 @@ class BaseDataset(BaseModel):
150
151
 
151
152
  train = self.dataframe[self.dataframe.index.isin(train_idx_all)]
152
153
  test = self.dataframe[self.dataframe.index.isin(test_idx_all)]
154
+ X_train = train.drop(self.class_column, axis=1)
155
+ X_train = train.drop(self.class_column, axis=1)
156
+ y_train = train[self.class_column]
157
+ X_test = test.drop(self.class_column, axis=1)
158
+ y_test = test[self.class_column]
159
+
160
+ self.X_train = np.array(X_train)
161
+ self.y_train = np.array(y_train)
162
+ self.X_test = np.array(X_test)
163
+ self.y_test = np.array(y_test)
153
164
 
154
- self.X_train = train.drop(self.class_column, axis=1)
155
- self.y_train = train[self.class_column]
156
- self.X_test = test.drop(self.class_column, axis=1)
157
- self.y_test = test[self.class_column]
158
-
159
- self.train_indexes = self.X_train.index
160
- self.test_indexes = self.X_test.index
165
+ self.train_indexes = X_train.index
166
+ self.test_indexes = X_test.index
161
167
  self.train_idx_arr = train_idx_arr
162
168
  self.val_idx_arr = val_idx_arr
163
169
 
164
170
  # Dataframe to numpy array conversion
165
- self.X_train = np.array(self.X_train)
166
- self.y_train = np.array(self.y_train)
167
- self.X_test = np.array(self.X_test)
168
- self.y_test = np.array(self.y_test)
171
+
169
172
 
170
- return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
173
+ # return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
171
174
 
172
175
  def __get_indexes__(self, path):
173
176
  train_index_path = path+'/train_indexes.txt'
@@ -219,21 +222,16 @@ class BaseDataset(BaseModel):
219
222
 
220
223
  X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset_splitter.split(
221
224
  X=X, y=y)
222
- self.X_train = X_train
223
- self.X_test = X_test
224
- self.y_train = y_train
225
- self.y_test = y_test
225
+ self.X_train = np.array(self.X_train)
226
+ self.X_test = np.array(self.X_test)
227
+ self.y_train = np.array(y_train.tolist())
228
+ self.y_test = np.array(y_test.tolist())
226
229
  self.train_indexes = X_train.index
227
230
  self.test_indexes = X_test.index
228
231
  self.train_idx_arr = train_idx_arr
229
232
  self.val_idx_arr = val_idx_arr
230
233
 
231
- # Dataframe to numpy array conversion
232
- self.X_train = np.array(self.X_train)
233
- self.y_train = np.array(self.y_train.tolist())
234
- self.X_test = np.array(self.X_test)
235
- self.y_test = np.array(self.y_test.tolist())
236
-
234
+
237
235
  if save_indexes:
238
236
  # train_pairs = [row['id1'].join(',').row['id2'] for index, row in X_train.iterrows()]
239
237
  self.__save_indexes__(
@@ -32,7 +32,7 @@ import ddi_fw.utils as utils
32
32
 
33
33
  class MultiModalRunner:
34
34
  # todo model related parameters to config
35
- def __init__(self, library, multi_modal, use_mlflow=True):
35
+ def __init__(self, library, multi_modal, use_mlflow=False):
36
36
  self.library = library
37
37
  self.multi_modal = multi_modal
38
38
  self.use_mlflow = use_mlflow
@@ -111,44 +111,6 @@ class MultiModalRunner:
111
111
  if self.use_mlflow:
112
112
  with mlflow.start_run(run_name=self.prefix, description="***") as run:
113
113
  self.__predict(single_results)
114
- # self.level_0_run_id = run.info.run_id
115
- # item_dict = {t[0]: t for t in self.items}
116
- # print("multi_modal")
117
- # print(self.multi_modal)
118
- # print(item_dict.keys())
119
-
120
- # for m in self.multi_modal:
121
- # name = m.get('name')
122
- # input_type = m.get('input_type')
123
- # input = m.get('input')
124
- # inputs = m.get('inputs')
125
- # model_type = get_import(m.get("model_type"))
126
- # kwargs = m.get('params')
127
- # T = self.__create_model(self.library)
128
- # single_modal = T(self.date, name, model_type, **kwargs)
129
- # if input_type == '1D':
130
- # item = item_dict[input]
131
- # single_modal.set_data(
132
- # self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
133
- # elif input_type == '2D':
134
- # # check keys
135
- # filtered_dict = {k: item_dict[k]
136
- # for k in inputs if k in item_dict}
137
- # print(filtered_dict.keys())
138
- # first_input = next(iter(filtered_dict.values()))
139
- # train_data_list = [f[1] for f in filtered_dict.values()]
140
- # test_data_list = [f[3] for f in filtered_dict.values()]
141
- # train_data = np.stack(train_data_list, axis=1)
142
- # test_data = np.stack(test_data_list, axis=1)
143
- # train_label = first_input[2]
144
- # test_label = first_input[4]
145
- # single_modal.set_data(
146
- # self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
147
- # else:
148
- # raise Exception("check configurations")
149
- # logs, metrics, prediction = single_modal.fit_and_evaluate()
150
- # self.result.add_metric(name, metrics)
151
- # single_results[name] = prediction
152
114
  else:
153
115
  self.__predict(single_results)
154
116
  if combinations:
@@ -120,7 +120,7 @@ class TFModelWrapper(ModelWrapper):
120
120
  print(self.train_data.shape)
121
121
  models = {}
122
122
  models_val_acc = {}
123
- if self.train_idx_arr is not None and self.val_idx_arr is not None:
123
+ if self.train_idx_arr and self.val_idx_arr:
124
124
  for i, (train_idx, val_idx) in enumerate(zip(self.train_idx_arr, self.val_idx_arr)):
125
125
  print(f"Validation {i}")
126
126
 
@@ -155,7 +155,8 @@ class TFModelWrapper(ModelWrapper):
155
155
  self.train_data, self.train_label, None, None)
156
156
  models[self.descriptor] = model
157
157
  models_val_acc[self.descriptor] = checkpoint.best
158
-
158
+ if models_val_acc == {}:
159
+ return model, None
159
160
  best_model_key = max(models_val_acc, key=lambda k: models_val_acc[k])
160
161
  # best_model_key = max(models_val_acc, key=models_val_acc.get)
161
162
  best_model = models[best_model_key]
@@ -106,7 +106,7 @@ class NerParameterSearch:
106
106
  **kwargs)
107
107
 
108
108
  # train_idx_arr, val_idx_arr bir kez hesaplanması yeterli aslında
109
- X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = dataset.load()
109
+ dataset.load()
110
110
  group_items = dataset.produce_inputs()
111
111
  for item in group_items:
112
112
  # item[0] = f'threshold_{threshold}_{item[0]}'
@@ -115,8 +115,8 @@ class NerParameterSearch:
115
115
 
116
116
  self.items.extend(group_items)
117
117
  self.y_test_label = self.items[0][4]
118
- self.train_idx_arr = train_idx_arr
119
- self.val_idx_arr = val_idx_arr
118
+ self.train_idx_arr = dataset.train_idx_arr
119
+ self.val_idx_arr = dataset.val_idx_arr
120
120
 
121
121
  def run(self, model_func, batch_size=128, epochs=100):
122
122
  mlflow.set_tracking_uri(self.tracking_uri)
@@ -36,7 +36,7 @@ class Pipeline(BaseModel):
36
36
  combinations: Optional[List[str]] = None
37
37
  model: Optional[Any] = None
38
38
  multi_modal: Optional[Any] = None
39
- use_mlflow: bool = True
39
+ use_mlflow: bool = False
40
40
  _items:List=[]
41
41
  _train_idx_arr:List|None=[]
42
42
  _val_idx_arr:List|None=[]
@@ -53,45 +53,7 @@ class Pipeline(BaseModel):
53
53
 
54
54
  class Config:
55
55
  arbitrary_types_allowed = True
56
-
57
- # class Pipeline:
58
- # def __init__(self,
59
- # library='tensorflow',
60
- # experiment_name=None,
61
- # experiment_description=None,
62
- # experiment_tags=None,
63
- # artifact_location=None,
64
- # tracking_uri=None,
65
- # dataset_type: BaseDataset = None,
66
- # columns=None,
67
- # embedding_dict=None,
68
- # column_embedding_configs=None,
69
- # vector_db_persist_directory=None,
70
- # vector_db_collection_name=None,
71
- # embedding_pooling_strategy_type: PoolingStrategy = None,
72
- # ner_data_file=None,
73
- # ner_threshold=None,
74
- # combinations=None,
75
- # model=None,
76
- # multi_modal = None ):
77
- # self.library = library
78
- # self.experiment_name = experiment_name
79
- # self.experiment_description = experiment_description
80
- # self.experiment_tags = experiment_tags
81
- # self.artifact_location = artifact_location
82
- # self.tracking_uri = tracking_uri
83
- # self.dataset_type = dataset_type
84
- # self.columns = columns
85
- # self.embedding_dict = embedding_dict
86
- # self.column_embedding_configs = column_embedding_configs
87
- # self.vector_db_persist_directory = vector_db_persist_directory
88
- # self.vector_db_collection_name = vector_db_collection_name
89
- # self.embedding_pooling_strategy_type = embedding_pooling_strategy_type
90
- # self.ner_data_file = ner_data_file
91
- # self.ner_threshold = ner_threshold
92
- # self.combinations = combinations
93
- # self.model = model
94
- # self.multi_modal = multi_modal
56
+
95
57
 
96
58
  def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
97
59
  """
@@ -194,35 +156,27 @@ class Pipeline(BaseModel):
194
156
  embedding_dict=embedding_dict,
195
157
  embedding_size=embedding_size,
196
158
  embeddings_pooling_strategy=pooling_strategy,
197
- dataset_splitter = dataset_splitter,
159
+ dataset_splitter_type = self.dataset_splitter_type,
160
+ **kwargs)
161
+ elif self.dataset_type == BaseDataset:
162
+ dataset = self.dataset_type(
163
+ dataset_splitter_type = self.dataset_splitter_type,
198
164
  **kwargs)
199
165
  else:
200
166
  dataset = self.dataset_type(**kwargs)
201
167
 
202
168
  # X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
203
169
  dataset.load()
204
-
205
170
  dataframe = dataset.dataframe
171
+ b = not( dataset.X_train and dataset.y_train and dataset.X_test and dataset.y_test)
172
+ c = dataframe is None or dataframe.empty
206
173
 
207
- if dataframe is None: # if the dataframe is None, it means that the dataset is not loaded
174
+ if b or c:
208
175
  raise ValueError("The dataset is not loaded")
209
-
210
- # dataframe.dropna()
211
- # X_train = dataset.X_train
212
- # X_test = dataset.X_test
213
- # y_train = dataset.y_train
214
- # y_test = dataset.y_test
215
- # self._train_idx_arr = dataset.train_idx_arr
216
- # self._val_idx_arr = dataset.val_idx_arr
217
- # Logic to set up the experiment
176
+
218
177
  # column name, train data, train label, test data, test label
219
178
  self._items = dataset.produce_inputs()
220
-
221
- # unique_classes = pd.unique(dataframe[dataset.class_column])
222
- # event_num = len(unique_classes)
223
- # droprate = 0.3
224
- # vector_size = self.dataset.drugs_df.shape[0]
225
-
179
+
226
180
  print("Building the experiment with the following settings:")
227
181
  print(
228
182
  f"Name: {self.experiment_name}, Dataset: {dataset}, Model: {self.model}")
@@ -244,7 +198,7 @@ class Pipeline(BaseModel):
244
198
 
245
199
  y_test_label = self.items[0][4]
246
200
  multi_modal_runner = MultiModalRunner(
247
- library=self.library, multi_modal=self.multi_modal)
201
+ library=self.library, multi_modal=self.multi_modal, use_mlflow=self.use_mlflow)
248
202
  # multi_modal_runner = MultiModalRunner(
249
203
  # library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
250
204
  # multi_modal = TFMultiModal(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.185
3
+ Version: 0.0.187
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
File without changes
File without changes
File without changes