ddi-fw 0.0.222__py3-none-any.whl → 0.0.224__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/ddi_mdl/base.py +2 -2
- ddi_fw/datasets/mdf_sa_ddi/base.py +23 -22
- ddi_fw/pipeline/pipeline.py +3 -1
- {ddi_fw-0.0.222.dist-info → ddi_fw-0.0.224.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.222.dist-info → ddi_fw-0.0.224.dist-info}/RECORD +7 -7
- {ddi_fw-0.0.222.dist-info → ddi_fw-0.0.224.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.222.dist-info → ddi_fw-0.0.224.dist-info}/top_level.txt +0 -0
ddi_fw/datasets/ddi_mdl/base.py
CHANGED
@@ -65,8 +65,8 @@ class DDIMDLDataset(BaseDataset,TextDatasetMixin):
|
|
65
65
|
def __init__(self, **kwargs):
|
66
66
|
|
67
67
|
super().__init__(**kwargs)
|
68
|
-
self.index_path
|
69
|
-
|
68
|
+
self.index_path = str(
|
69
|
+
pathlib.Path(__file__).resolve().parent.joinpath('indexes'))
|
70
70
|
# self.additional_config = kwargs.get('dataset_additional_config', {})
|
71
71
|
if self.additional_config:
|
72
72
|
ner = self.additional_config.get('ner', {})
|
@@ -35,7 +35,8 @@ LIST_OF_NER_COLUMNS = ['tui', 'cui', 'entities']
|
|
35
35
|
|
36
36
|
HERE = pathlib.Path(__file__).resolve().parent
|
37
37
|
|
38
|
-
|
38
|
+
|
39
|
+
class MDFSADDIDataset(BaseDataset, TextDatasetMixin):
|
39
40
|
# def __init__(self, embedding_size,
|
40
41
|
# embedding_dict,
|
41
42
|
# embeddings_pooling_strategy: PoolingStrategy,
|
@@ -64,7 +65,6 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
64
65
|
# else:
|
65
66
|
# raise Exception(f"{column} is not related this dataset")
|
66
67
|
|
67
|
-
|
68
68
|
# super().__init__(embedding_size=embedding_size,
|
69
69
|
# embedding_dict=embedding_dict,
|
70
70
|
# embeddings_pooling_strategy=embeddings_pooling_strategy,
|
@@ -88,7 +88,7 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
88
88
|
# kwargs['index_path'] = str(HERE.joinpath('indexes'))
|
89
89
|
|
90
90
|
# self.index_path = kwargs.get('index_path')
|
91
|
-
|
91
|
+
|
92
92
|
dataset_name: str = "MDFSADDIDataset"
|
93
93
|
# index_path: str = Field(default_factory=lambda: str(
|
94
94
|
# pathlib.Path(__file__).resolve().parent.joinpath('indexes')))
|
@@ -105,7 +105,7 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
105
105
|
tui_threshold: float | None = None
|
106
106
|
cui_threshold: float | None = None
|
107
107
|
entities_threshold: float | None = None
|
108
|
-
_ner_threshold: dict[str,Any] |None= None
|
108
|
+
_ner_threshold: dict[str, Any] | None = None
|
109
109
|
|
110
110
|
# @model_validator
|
111
111
|
|
@@ -119,9 +119,9 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
119
119
|
def __init__(self, **kwargs):
|
120
120
|
|
121
121
|
super().__init__(**kwargs)
|
122
|
-
self.index_path
|
123
|
-
|
124
|
-
|
122
|
+
self.index_path = str(
|
123
|
+
pathlib.Path(__file__).resolve().parent.joinpath('indexes'))
|
124
|
+
|
125
125
|
# self.additional_config = kwargs.get('dataset_additional_config', {})
|
126
126
|
if self.additional_config:
|
127
127
|
ner = self.additional_config.get('ner', {})
|
@@ -130,10 +130,10 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
130
130
|
# if self.ner_threshold:
|
131
131
|
# for k, v in self.ner_threshold.items():
|
132
132
|
# kwargs[k] = v
|
133
|
-
|
133
|
+
|
134
134
|
self.ner_df = CTakesNER(df=None).load(
|
135
135
|
filename=ner_data_file) if ner_data_file else None
|
136
|
-
|
136
|
+
|
137
137
|
columns = kwargs['columns']
|
138
138
|
if columns:
|
139
139
|
chemical_property_columns = []
|
@@ -148,13 +148,12 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
148
148
|
ner_columns.append(column)
|
149
149
|
else:
|
150
150
|
raise Exception(f"{column} is not related this dataset")
|
151
|
-
|
151
|
+
|
152
152
|
self.chemical_property_columns = chemical_property_columns
|
153
|
-
self.embedding_columns = embedding_columns
|
154
|
-
self.ner_columns = ner_columns
|
155
|
-
self.columns = []
|
156
|
-
|
157
|
-
|
153
|
+
self.embedding_columns = embedding_columns
|
154
|
+
self.ner_columns = ner_columns
|
155
|
+
self.columns = [] # these variable is modified in prep method
|
156
|
+
|
158
157
|
db_zip_path = HERE.joinpath('mdf-sa-ddi.zip')
|
159
158
|
db_path = HERE.joinpath('mdf-sa-ddi.db')
|
160
159
|
if not os.path.exists(db_zip_path):
|
@@ -166,8 +165,7 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
166
165
|
self.drugs_df = select_all_drugs_as_dataframe(conn)
|
167
166
|
self.ddis_df = select_all_events_as_dataframe(conn)
|
168
167
|
# kwargs = {'index_path': str(HERE.joinpath('indexes'))}
|
169
|
-
|
170
|
-
|
168
|
+
|
171
169
|
self.class_column = 'event_category'
|
172
170
|
|
173
171
|
self.__similarity_related_columns__ = []
|
@@ -217,11 +215,13 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
217
215
|
self.ddis_df['id2'] = self.ddis_df['name2'].apply(
|
218
216
|
lambda_fnc1) # , axis=1
|
219
217
|
if conn:
|
220
|
-
self.drugs_df.to_sql(
|
221
|
-
|
218
|
+
self.drugs_df.to_sql(
|
219
|
+
'drug', conn, if_exists='replace', index=False)
|
220
|
+
self.ddis_df.to_sql(
|
221
|
+
'event', conn, if_exists='replace', index=False)
|
222
222
|
ZipHelper().zip_single_file(
|
223
223
|
file_path=db_path, output_path=HERE, zip_name='mdf-sa-ddi')
|
224
|
-
|
224
|
+
|
225
225
|
def prep(self):
|
226
226
|
# self.load_drugs_and_events()
|
227
227
|
if self.drugs_df is None or self.ddis_df is None:
|
@@ -246,7 +246,8 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
246
246
|
|
247
247
|
# for key in filtered_ner_df.keys():
|
248
248
|
for key in self.ner_columns:
|
249
|
-
threshold = self._ner_threshold.get(
|
249
|
+
threshold = self._ner_threshold.get(
|
250
|
+
key, 0) if self._ner_threshold else 0
|
250
251
|
# threshold = 0
|
251
252
|
# if key.startswith('tui'):
|
252
253
|
# threshold = self.tui_threshold
|
@@ -272,7 +273,7 @@ class MDFSADDIDataset(BaseDataset,TextDatasetMixin):
|
|
272
273
|
generated_vectors = generate_vectors(
|
273
274
|
chemical_properties_df, self.__similarity_related_columns__)
|
274
275
|
|
275
|
-
# TODO if necessary
|
276
|
+
# TODO if necessary
|
276
277
|
similarity_matrices = generate_sim_matrices_new(
|
277
278
|
chemical_properties_df, generated_vectors, self.__similarity_related_columns__, key_column="id")
|
278
279
|
|
ddi_fw/pipeline/pipeline.py
CHANGED
@@ -105,8 +105,10 @@ class Pipeline(BaseModel):
|
|
105
105
|
# X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
|
106
106
|
|
107
107
|
dataset.load()
|
108
|
-
|
108
|
+
|
109
109
|
self._dataset = dataset
|
110
|
+
self._train_idx_arr = dataset.train_idx_arr
|
111
|
+
self._val_idx_arr = dataset.val_idx_arr
|
110
112
|
|
111
113
|
dataframe = dataset.dataframe
|
112
114
|
|
@@ -3,7 +3,7 @@ ddi_fw/datasets/core.py,sha256=PX6MX4hmeYxIWAKAx7NnJr1fpzR11xA8g8vAjYcQNN8,16936
|
|
3
3
|
ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
6
|
-
ddi_fw/datasets/ddi_mdl/base.py,sha256=
|
6
|
+
ddi_fw/datasets/ddi_mdl/base.py,sha256=1bFubHRi5idP6SYGyB3tXZPV8aUzETbrzGqnXsQFbJU,11347
|
7
7
|
ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
|
8
8
|
ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
|
9
9
|
ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
|
@@ -46,7 +46,7 @@ ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt,sha256=fFJbN0DbKH4mve
|
|
46
46
|
ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt,sha256=NhiLF_5INQCpjOlE-RIxDKy7rYwksLdx60L6HCmDKoY,81247
|
47
47
|
ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt,sha256=bPvMCJVy7jtcaYbR-5bmdB6s7gT8NSfK2wDC7iJ0O10,81308
|
48
48
|
ddi_fw/datasets/mdf_sa_ddi/__init__.py,sha256=UEFBM92y2aJjlMJw4Jx405tOAwJ88r_nHAVgAszSjuo,68
|
49
|
-
ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=
|
49
|
+
ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=27TwErhK7mjd4YzVhQgjWjlVgX4PpNq54zoNJLXp-bE,15993
|
50
50
|
ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv,sha256=EOOLF_0vVVzShoofcGYlOzpztlM1m9jJdftepHicix4,25787699
|
51
51
|
ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv,sha256=lpuMz5KxPsG6MKNuIIUmT5cZquWHQiIao8tXlmOHzq8,381321
|
52
52
|
ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip,sha256=DfN8mczGvWba2y45cPqtWtXjUDXy49VOtRfpcb0tn8c,4382827
|
@@ -87,7 +87,7 @@ ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJ
|
|
87
87
|
ddi_fw/pipeline/multi_pipeline.py,sha256=npJUXYT31fxD6kpJKSeixjbH5jNfPUwIVG7lRdBszRg,9852
|
88
88
|
ddi_fw/pipeline/multi_pipeline_org.py,sha256=AbErwu05-3YIPnCcXRsj-jxPJG8HG2H7cMZlGjzaYa8,9037
|
89
89
|
ddi_fw/pipeline/ner_pipeline.py,sha256=yp-Met2794EKcgr8_3gqt03l4v2efOdaZuAcIXTubvQ,5780
|
90
|
-
ddi_fw/pipeline/pipeline.py,sha256=
|
90
|
+
ddi_fw/pipeline/pipeline.py,sha256=q1kMkW9-fOlrA4BOGUku40U_PuEYfcbtH2EvlRM4uTM,6243
|
91
91
|
ddi_fw/utils/__init__.py,sha256=WNxkQXk-694roG50D355TGLXstfdWVb_tUyr-PM-8rg,537
|
92
92
|
ddi_fw/utils/categorical_data_encoding_checker.py,sha256=T1X70Rh4atucAuqyUZmz-iFULllY9dY0NRyV9-jTjJ0,3438
|
93
93
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
@@ -101,7 +101,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
101
101
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
102
102
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
|
103
103
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
104
|
-
ddi_fw-0.0.
|
105
|
-
ddi_fw-0.0.
|
106
|
-
ddi_fw-0.0.
|
107
|
-
ddi_fw-0.0.
|
104
|
+
ddi_fw-0.0.224.dist-info/METADATA,sha256=5dFfDc76jiVD68bVFOsMw8lUXIaklRzuWIA6HB781Ls,2631
|
105
|
+
ddi_fw-0.0.224.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
106
|
+
ddi_fw-0.0.224.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
107
|
+
ddi_fw-0.0.224.dist-info/RECORD,,
|
File without changes
|
File without changes
|