ddi-fw 0.0.218__tar.gz → 0.0.219__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/PKG-INFO +1 -1
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/pyproject.toml +1 -1
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/base.py +6 -5
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +119 -4
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/PKG-INFO +1 -1
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/README.md +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/setup.cfg +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/core.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/dataset_splitter.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/db_utils.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/debug.log +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/base.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/data/event.db +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/test_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/setup_._py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/drugbank/event_extractor.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/embeddings.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/langchain/storage.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/evaluation_helper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/ml_helper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/model_wrapper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ml/tracking_service.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ner/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ner/mmlrestclient.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/ner/ner.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/multi_pipeline_org.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/pipeline/pipeline.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/categorical_data_encoding_checker.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/enums.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/json_helper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/kaggle.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/numpy_utils.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/package_helper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/py7zr_helper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/utils.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/utils/zip_helper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/vectorization/__init__.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/vectorization/feature_vector_generation.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/vectorization/idf_helper.py +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/requires.txt +0 -0
- {ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
import pathlib
|
2
|
-
from typing import List, Optional, Tuple
|
2
|
+
from typing import Any, List, Optional, Tuple
|
3
3
|
from ddi_fw.datasets.core import BaseDataset, TextDatasetMixin, generate_sim_matrices_new, generate_vectors
|
4
4
|
from ddi_fw.datasets.db_utils import create_connection
|
5
5
|
import numpy as np
|
@@ -51,6 +51,7 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
|
|
51
51
|
tui_threshold: float | None = None
|
52
52
|
cui_threshold: float | None = None
|
53
53
|
entities_threshold: float | None = None
|
54
|
+
_ner_threshold: dict[str,Any] |None = None
|
54
55
|
|
55
56
|
# @model_validator
|
56
57
|
|
@@ -68,14 +69,14 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
|
|
68
69
|
# self.additional_config = kwargs.get('dataset_additional_config', {})
|
69
70
|
if self.additional_config:
|
70
71
|
ner = self.additional_config.get('ner', {})
|
71
|
-
|
72
|
-
self.
|
72
|
+
ner_data_file = ner.get('data_file', None)
|
73
|
+
self._ner_threshold = ner.get('thresholds', None)
|
73
74
|
# if self.ner_threshold:
|
74
75
|
# for k, v in self.ner_threshold.items():
|
75
76
|
# kwargs[k] = v
|
76
77
|
|
77
78
|
self.ner_df = CTakesNER(df=None).load(
|
78
|
-
filename=
|
79
|
+
filename=ner_data_file) if ner_data_file else None
|
79
80
|
|
80
81
|
columns = kwargs['columns']
|
81
82
|
if columns:
|
@@ -169,7 +170,7 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
|
|
169
170
|
|
170
171
|
# for key in filtered_ner_df.keys():
|
171
172
|
for key in self.ner_columns:
|
172
|
-
threshold = self.
|
173
|
+
threshold = self._ner_threshold.get(key, 0) if self._ner_threshold else 0
|
173
174
|
# threshold = 0
|
174
175
|
# if key.startswith('tui'):
|
175
176
|
# threshold = self.tui_threshold
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import os
|
2
2
|
import pathlib
|
3
|
-
from typing import List, Optional, Tuple
|
3
|
+
from typing import Any, List, Optional, Tuple
|
4
4
|
from ddi_fw.datasets.core import BaseDataset, TextDatasetMixin, generate_sim_matrices_new, generate_vectors
|
5
5
|
from ddi_fw.datasets.db_utils import create_connection
|
6
6
|
import numpy as np
|
@@ -105,6 +105,7 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
105
105
|
tui_threshold: float | None = None
|
106
106
|
cui_threshold: float | None = None
|
107
107
|
entities_threshold: float | None = None
|
108
|
+
_ner_threshold: dict[str,Any] |None= None
|
108
109
|
|
109
110
|
# @model_validator
|
110
111
|
|
@@ -122,14 +123,14 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
122
123
|
# self.additional_config = kwargs.get('dataset_additional_config', {})
|
123
124
|
if self.additional_config:
|
124
125
|
ner = self.additional_config.get('ner', {})
|
125
|
-
|
126
|
-
self.
|
126
|
+
ner_data_file = ner.get('data_file', None)
|
127
|
+
self._ner_threshold = ner.get('thresholds', None)
|
127
128
|
# if self.ner_threshold:
|
128
129
|
# for k, v in self.ner_threshold.items():
|
129
130
|
# kwargs[k] = v
|
130
131
|
|
131
132
|
self.ner_df = CTakesNER(df=None).load(
|
132
|
-
filename=
|
133
|
+
filename=ner_data_file) if ner_data_file else None
|
133
134
|
|
134
135
|
columns = kwargs['columns']
|
135
136
|
if columns:
|
@@ -218,6 +219,120 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
218
219
|
self.ddis_df.to_sql('event', conn, if_exists='replace', index=False)
|
219
220
|
ZipHelper().zip_single_file(
|
220
221
|
file_path=db_path, output_path=HERE, zip_name='mdf-sa-ddi')
|
222
|
+
|
223
|
+
def prep(self):
|
224
|
+
# self.load_drugs_and_events()
|
225
|
+
if self.drugs_df is None or self.ddis_df is None:
|
226
|
+
raise Exception("There is no data")
|
227
|
+
|
228
|
+
drug_ids = self.drugs_df['id'].to_list()
|
229
|
+
|
230
|
+
filtered_df = self.drugs_df
|
231
|
+
combined_df = filtered_df.copy()
|
232
|
+
|
233
|
+
if self.ner_df is not None and not self.ner_df.empty:
|
234
|
+
filtered_ner_df = self.ner_df[self.ner_df['drugbank_id'].isin(
|
235
|
+
drug_ids)]
|
236
|
+
filtered_ner_df = self.ner_df.copy()
|
237
|
+
|
238
|
+
# TODO: eğer kullanılan veri setinde tui, cui veya entity bilgileri yoksa o veri setine bu sütunları eklemek için aşağısı gerekli
|
239
|
+
|
240
|
+
# idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
|
241
|
+
idf_calc = IDF(filtered_ner_df, self.ner_columns)
|
242
|
+
idf_calc.calculate()
|
243
|
+
idf_scores_df = idf_calc.to_dataframe()
|
244
|
+
|
245
|
+
# for key in filtered_ner_df.keys():
|
246
|
+
for key in self.ner_columns:
|
247
|
+
threshold = self._ner_threshold.get(key, 0) if self._ner_threshold else 0
|
248
|
+
# threshold = 0
|
249
|
+
# if key.startswith('tui'):
|
250
|
+
# threshold = self.tui_threshold
|
251
|
+
# if key.startswith('cui'):
|
252
|
+
# threshold = self.cui_threshold
|
253
|
+
# if key.startswith('entities'):
|
254
|
+
# threshold = self.entities_threshold
|
255
|
+
combined_df[key] = filtered_ner_df[key]
|
256
|
+
valid_codes = idf_scores_df[idf_scores_df[key]
|
257
|
+
> threshold].index
|
258
|
+
|
259
|
+
# print(f'{key}: valid code size = {len(valid_codes)}')
|
260
|
+
combined_df[key] = combined_df[key].apply(lambda items:
|
261
|
+
[item for item in items if item in valid_codes])
|
262
|
+
|
263
|
+
moved_columns = ['id']
|
264
|
+
moved_columns.extend(self.__similarity_related_columns__)
|
265
|
+
chemical_properties_df = combined_df[moved_columns]
|
266
|
+
|
267
|
+
chemical_properties_df = chemical_properties_df.fillna("").apply(list)
|
268
|
+
|
269
|
+
# generate vectors dictionary içinde ndarray dönecek
|
270
|
+
generated_vectors = generate_vectors(
|
271
|
+
chemical_properties_df, self.__similarity_related_columns__)
|
272
|
+
|
273
|
+
# TODO if necessary
|
274
|
+
similarity_matrices = generate_sim_matrices_new(
|
275
|
+
chemical_properties_df, generated_vectors, self.__similarity_related_columns__, key_column="id")
|
276
|
+
|
277
|
+
event_categories = self.ddis_df['event_category']
|
278
|
+
labels = event_categories.tolist()
|
279
|
+
lb = LabelBinarizer()
|
280
|
+
lb.fit(labels)
|
281
|
+
classes = lb.transform(labels)
|
282
|
+
|
283
|
+
def similarity_lambda_fnc(row, value):
|
284
|
+
if row['id1'] in value:
|
285
|
+
return value[row['id1']]
|
286
|
+
|
287
|
+
def lambda_fnc(row: pd.Series, value) -> Optional[np.float16]:
|
288
|
+
if row['id1'] in value and row['id2'] in value:
|
289
|
+
return np.float16(np.hstack(
|
290
|
+
(value[row['id1']], value[row['id2']])))
|
291
|
+
return None
|
292
|
+
# return np.hstack(
|
293
|
+
# (value[row['id1']], value[row['id2']]), dtype=np.float16)
|
294
|
+
|
295
|
+
def x_fnc(row, embeddings_after_pooling):
|
296
|
+
if row['id1'] in embeddings_after_pooling:
|
297
|
+
v1 = embeddings_after_pooling[row['id1']]
|
298
|
+
else:
|
299
|
+
v1 = np.zeros(self.embedding_size)
|
300
|
+
if row['id2'] in embeddings_after_pooling:
|
301
|
+
v2 = embeddings_after_pooling[row['id2']]
|
302
|
+
else:
|
303
|
+
v2 = np.zeros(self.embedding_size)
|
304
|
+
return np.float16(np.hstack(
|
305
|
+
(v1, v2)))
|
306
|
+
|
307
|
+
for key, value in similarity_matrices.items():
|
308
|
+
|
309
|
+
print(f'sim matrix: {key}')
|
310
|
+
self.ddis_df[key] = self.ddis_df.apply(
|
311
|
+
lambda_fnc, args=(value,), axis=1)
|
312
|
+
self.columns.append(key)
|
313
|
+
print(self.ddis_df[key].head())
|
314
|
+
if isinstance(self, TextDatasetMixin):
|
315
|
+
if self.embedding_dict is not None:
|
316
|
+
for embedding_column in self.embedding_columns:
|
317
|
+
print(f"concat {embedding_column} embeddings")
|
318
|
+
embeddings_after_pooling = {k: self.pooling_strategy.apply(
|
319
|
+
v) for k, v in self.embedding_dict[embedding_column].items()}
|
320
|
+
# column_embeddings_dict = embedding_values[embedding_column]
|
321
|
+
self.ddis_df[embedding_column+'_embedding'] = self.ddis_df.apply(
|
322
|
+
x_fnc, args=(embeddings_after_pooling,), axis=1)
|
323
|
+
self.columns.append(embedding_column+'_embedding')
|
324
|
+
|
325
|
+
dataframe = self.ddis_df.copy()
|
326
|
+
if not isinstance(classes, (list, pd.Series, np.ndarray)):
|
327
|
+
raise TypeError(
|
328
|
+
"classes must be an iterable (list, Series, or ndarray)")
|
329
|
+
|
330
|
+
if len(classes) != len(dataframe):
|
331
|
+
raise ValueError(
|
332
|
+
"Length of classes must match the number of rows in the DataFrame")
|
333
|
+
|
334
|
+
dataframe[self.class_column] = list(classes)
|
335
|
+
self.set_dataframe(dataframe)
|
221
336
|
|
222
337
|
|
223
338
|
def select_all_drugs(conn):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt
RENAMED
File without changes
|
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt
RENAMED
File without changes
|
{ddi_fw-0.0.218 → ddi_fw-0.0.219}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|