ddi-fw 0.0.218__tar.gz → 0.0.219__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/base.py +6 -5
  4. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +119 -4
  5. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  6. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/README.md +0 -0
  7. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/setup.cfg +0 -0
  8. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/__init__.py +0 -0
  9. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/core.py +0 -0
  10. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
  11. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/db_utils.py +0 -0
  12. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  13. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/debug.log +0 -0
  14. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  15. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  16. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  17. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  18. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  19. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  20. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  21. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  22. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  23. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  24. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  25. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  26. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  27. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  28. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  29. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  30. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  31. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  32. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  33. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  34. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  35. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  36. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  37. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  38. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  39. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/base.py +0 -0
  40. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/data/event.db +0 -0
  41. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/test_indexes.txt +0 -0
  42. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_0.txt +0 -0
  43. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_1.txt +0 -0
  44. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_2.txt +0 -0
  45. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_3.txt +0 -0
  46. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_4.txt +0 -0
  47. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt +0 -0
  48. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt +0 -0
  49. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt +0 -0
  50. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt +0 -0
  51. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt +0 -0
  52. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt +0 -0
  53. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  54. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  55. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  56. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  57. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  58. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  59. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  60. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  61. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  62. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  63. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  64. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  65. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  66. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  67. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  68. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  69. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/setup_._py +0 -0
  70. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/__init__.py +0 -0
  71. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  72. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  73. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  74. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  75. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  76. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/__init__.py +0 -0
  77. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/embeddings.py +0 -0
  78. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  79. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/storage.py +0 -0
  80. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/__init__.py +0 -0
  81. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  82. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/ml_helper.py +0 -0
  83. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/model_wrapper.py +0 -0
  84. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  85. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
  86. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/tracking_service.py +0 -0
  87. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ner/__init__.py +0 -0
  88. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  89. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ner/ner.py +0 -0
  90. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/__init__.py +0 -0
  91. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  92. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  93. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
  94. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  95. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/pipeline.py +0 -0
  96. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/__init__.py +0 -0
  97. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
  98. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/enums.py +0 -0
  99. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/json_helper.py +0 -0
  100. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/kaggle.py +0 -0
  101. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/numpy_utils.py +0 -0
  102. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/package_helper.py +0 -0
  103. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  104. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/utils.py +0 -0
  105. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/zip_helper.py +0 -0
  106. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/vectorization/__init__.py +0 -0
  107. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
  108. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/vectorization/idf_helper.py +0 -0
  109. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  110. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  111. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/requires.txt +0 -0
  112. {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.218
3
+ Version: 0.0.219
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "ddi_fw"
9
- version = "0.0.218"
9
+ version = "0.0.219"
10
10
  description = "Do not use :)"
11
11
  readme = "README.md"
12
12
  authors = [
@@ -1,5 +1,5 @@
1
1
  import pathlib
2
- from typing import List, Optional, Tuple
2
+ from typing import Any, List, Optional, Tuple
3
3
  from ddi_fw.datasets.core import BaseDataset, TextDatasetMixin, generate_sim_matrices_new, generate_vectors
4
4
  from ddi_fw.datasets.db_utils import create_connection
5
5
  import numpy as np
@@ -51,6 +51,7 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
51
51
  tui_threshold: float | None = None
52
52
  cui_threshold: float | None = None
53
53
  entities_threshold: float | None = None
54
+ _ner_threshold: dict[str,Any] |None = None
54
55
 
55
56
  # @model_validator
56
57
 
@@ -68,14 +69,14 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
68
69
  # self.additional_config = kwargs.get('dataset_additional_config', {})
69
70
  if self.additional_config:
70
71
  ner = self.additional_config.get('ner', {})
71
- self.ner_data_file = ner.get('data_file', None)
72
- self.ner_threshold = ner.get('thresholds', None)
72
+ ner_data_file = ner.get('data_file', None)
73
+ self._ner_threshold = ner.get('thresholds', None)
73
74
  # if self.ner_threshold:
74
75
  # for k, v in self.ner_threshold.items():
75
76
  # kwargs[k] = v
76
77
 
77
78
  self.ner_df = CTakesNER(df=None).load(
78
- filename=self.ner_data_file) if self.ner_data_file else None
79
+ filename=ner_data_file) if ner_data_file else None
79
80
 
80
81
  columns = kwargs['columns']
81
82
  if columns:
@@ -169,7 +170,7 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
169
170
 
170
171
  # for key in filtered_ner_df.keys():
171
172
  for key in self.ner_columns:
172
- threshold = self.ner_threshold.get(key, 0)
173
+ threshold = self._ner_threshold.get(key, 0) if self._ner_threshold else 0
173
174
  # threshold = 0
174
175
  # if key.startswith('tui'):
175
176
  # threshold = self.tui_threshold
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import pathlib
3
- from typing import List, Optional, Tuple
3
+ from typing import Any, List, Optional, Tuple
4
4
  from ddi_fw.datasets.core import BaseDataset, TextDatasetMixin, generate_sim_matrices_new, generate_vectors
5
5
  from ddi_fw.datasets.db_utils import create_connection
6
6
  import numpy as np
@@ -105,6 +105,7 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
105
105
  tui_threshold: float | None = None
106
106
  cui_threshold: float | None = None
107
107
  entities_threshold: float | None = None
108
+ _ner_threshold: dict[str,Any] |None= None
108
109
 
109
110
  # @model_validator
110
111
 
@@ -122,14 +123,14 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
122
123
  # self.additional_config = kwargs.get('dataset_additional_config', {})
123
124
  if self.additional_config:
124
125
  ner = self.additional_config.get('ner', {})
125
- self.ner_data_file = ner.get('data_file', None)
126
- self.ner_threshold = ner.get('thresholds', None)
126
+ ner_data_file = ner.get('data_file', None)
127
+ self._ner_threshold = ner.get('thresholds', None)
127
128
  # if self.ner_threshold:
128
129
  # for k, v in self.ner_threshold.items():
129
130
  # kwargs[k] = v
130
131
 
131
132
  self.ner_df = CTakesNER(df=None).load(
132
- filename=self.ner_data_file) if self.ner_data_file else None
133
+ filename=ner_data_file) if ner_data_file else None
133
134
 
134
135
  columns = kwargs['columns']
135
136
  if columns:
@@ -218,6 +219,120 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
218
219
  self.ddis_df.to_sql('event', conn, if_exists='replace', index=False)
219
220
  ZipHelper().zip_single_file(
220
221
  file_path=db_path, output_path=HERE, zip_name='mdf-sa-ddi')
222
+
223
+ def prep(self):
224
+ # self.load_drugs_and_events()
225
+ if self.drugs_df is None or self.ddis_df is None:
226
+ raise Exception("There is no data")
227
+
228
+ drug_ids = self.drugs_df['id'].to_list()
229
+
230
+ filtered_df = self.drugs_df
231
+ combined_df = filtered_df.copy()
232
+
233
+ if self.ner_df is not None and not self.ner_df.empty:
234
+ filtered_ner_df = self.ner_df[self.ner_df['drugbank_id'].isin(
235
+ drug_ids)]
236
+ filtered_ner_df = self.ner_df.copy()
237
+
238
+ # TODO: eğer kullanılan veri setinde tui, cui veya entity bilgileri yoksa o veri setine bu sütunları eklemek için aşağısı gerekli
239
+
240
+ # idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
241
+ idf_calc = IDF(filtered_ner_df, self.ner_columns)
242
+ idf_calc.calculate()
243
+ idf_scores_df = idf_calc.to_dataframe()
244
+
245
+ # for key in filtered_ner_df.keys():
246
+ for key in self.ner_columns:
247
+ threshold = self._ner_threshold.get(key, 0) if self._ner_threshold else 0
248
+ # threshold = 0
249
+ # if key.startswith('tui'):
250
+ # threshold = self.tui_threshold
251
+ # if key.startswith('cui'):
252
+ # threshold = self.cui_threshold
253
+ # if key.startswith('entities'):
254
+ # threshold = self.entities_threshold
255
+ combined_df[key] = filtered_ner_df[key]
256
+ valid_codes = idf_scores_df[idf_scores_df[key]
257
+ > threshold].index
258
+
259
+ # print(f'{key}: valid code size = {len(valid_codes)}')
260
+ combined_df[key] = combined_df[key].apply(lambda items:
261
+ [item for item in items if item in valid_codes])
262
+
263
+ moved_columns = ['id']
264
+ moved_columns.extend(self.__similarity_related_columns__)
265
+ chemical_properties_df = combined_df[moved_columns]
266
+
267
+ chemical_properties_df = chemical_properties_df.fillna("").apply(list)
268
+
269
+ # generate vectors dictionary içinde ndarray dönecek
270
+ generated_vectors = generate_vectors(
271
+ chemical_properties_df, self.__similarity_related_columns__)
272
+
273
+ # TODO if necessary
274
+ similarity_matrices = generate_sim_matrices_new(
275
+ chemical_properties_df, generated_vectors, self.__similarity_related_columns__, key_column="id")
276
+
277
+ event_categories = self.ddis_df['event_category']
278
+ labels = event_categories.tolist()
279
+ lb = LabelBinarizer()
280
+ lb.fit(labels)
281
+ classes = lb.transform(labels)
282
+
283
+ def similarity_lambda_fnc(row, value):
284
+ if row['id1'] in value:
285
+ return value[row['id1']]
286
+
287
+ def lambda_fnc(row: pd.Series, value) -> Optional[np.float16]:
288
+ if row['id1'] in value and row['id2'] in value:
289
+ return np.float16(np.hstack(
290
+ (value[row['id1']], value[row['id2']])))
291
+ return None
292
+ # return np.hstack(
293
+ # (value[row['id1']], value[row['id2']]), dtype=np.float16)
294
+
295
+ def x_fnc(row, embeddings_after_pooling):
296
+ if row['id1'] in embeddings_after_pooling:
297
+ v1 = embeddings_after_pooling[row['id1']]
298
+ else:
299
+ v1 = np.zeros(self.embedding_size)
300
+ if row['id2'] in embeddings_after_pooling:
301
+ v2 = embeddings_after_pooling[row['id2']]
302
+ else:
303
+ v2 = np.zeros(self.embedding_size)
304
+ return np.float16(np.hstack(
305
+ (v1, v2)))
306
+
307
+ for key, value in similarity_matrices.items():
308
+
309
+ print(f'sim matrix: {key}')
310
+ self.ddis_df[key] = self.ddis_df.apply(
311
+ lambda_fnc, args=(value,), axis=1)
312
+ self.columns.append(key)
313
+ print(self.ddis_df[key].head())
314
+ if isinstance(self, TextDatasetMixin):
315
+ if self.embedding_dict is not None:
316
+ for embedding_column in self.embedding_columns:
317
+ print(f"concat {embedding_column} embeddings")
318
+ embeddings_after_pooling = {k: self.pooling_strategy.apply(
319
+ v) for k, v in self.embedding_dict[embedding_column].items()}
320
+ # column_embeddings_dict = embedding_values[embedding_column]
321
+ self.ddis_df[embedding_column+'_embedding'] = self.ddis_df.apply(
322
+ x_fnc, args=(embeddings_after_pooling,), axis=1)
323
+ self.columns.append(embedding_column+'_embedding')
324
+
325
+ dataframe = self.ddis_df.copy()
326
+ if not isinstance(classes, (list, pd.Series, np.ndarray)):
327
+ raise TypeError(
328
+ "classes must be an iterable (list, Series, or ndarray)")
329
+
330
+ if len(classes) != len(dataframe):
331
+ raise ValueError(
332
+ "Length of classes must match the number of rows in the DataFrame")
333
+
334
+ dataframe[self.class_column] = list(classes)
335
+ self.set_dataframe(dataframe)
221
336
 
222
337
 
223
338
  def select_all_drugs(conn):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.218
3
+ Version: 0.0.219
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
File without changes
File without changes
File without changes