ddi-fw 0.0.145__py3-none-any.whl → 0.0.147__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/mdf_sa_ddi/base.py +49 -22
- {ddi_fw-0.0.145.dist-info → ddi_fw-0.0.147.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.145.dist-info → ddi_fw-0.0.147.dist-info}/RECORD +5 -5
- {ddi_fw-0.0.145.dist-info → ddi_fw-0.0.147.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.145.dist-info → ddi_fw-0.0.147.dist-info}/top_level.txt +0 -0
@@ -6,22 +6,63 @@ import pandas as pd
|
|
6
6
|
|
7
7
|
from ddi_fw.utils import ZipHelper
|
8
8
|
|
9
|
-
from ..
|
9
|
+
from .. import BaseDataset
|
10
|
+
from ddi_fw.langchain.embeddings import PoolingStrategy
|
11
|
+
from ..db_utils import create_connection
|
10
12
|
# from ..db_utils import create_connection, select_all_drugs_as_dataframe, select_events_with_category
|
11
13
|
|
12
14
|
HERE = pathlib.Path(__file__).resolve().parent
|
13
|
-
|
14
|
-
|
15
|
+
list_of_embedding_columns = ['all_text', 'description',
|
16
|
+
'synthesis_reference', 'indication',
|
17
|
+
'pharmacodynamics', 'mechanism_of_action',
|
18
|
+
'toxicity', 'metabolism',
|
19
|
+
'absorption', 'half_life',
|
20
|
+
'protein_binding', 'route_of_elimination',
|
21
|
+
'volume_of_distribution', 'clearance']
|
22
|
+
|
23
|
+
list_of_chemical_property_columns = ['enzyme',
|
24
|
+
'target',
|
25
|
+
'smile']
|
26
|
+
|
27
|
+
list_of_ner_columns = ['tui', 'cui', 'entities']
|
15
28
|
class MDFSADDIDataset(BaseDataset):
|
16
|
-
def __init__(self,
|
29
|
+
def __init__(self, embedding_size,
|
30
|
+
embedding_dict,
|
31
|
+
embeddings_pooling_strategy: PoolingStrategy,
|
32
|
+
ner_df,
|
33
|
+
chemical_property_columns=['enzyme',
|
17
34
|
'target',
|
18
35
|
'smile'],
|
19
36
|
embedding_columns=[],
|
20
37
|
ner_columns=[],
|
21
38
|
**kwargs):
|
22
39
|
|
23
|
-
|
24
|
-
|
40
|
+
columns = kwargs['columns']
|
41
|
+
if columns:
|
42
|
+
chemical_property_columns = []
|
43
|
+
embedding_columns=[]
|
44
|
+
ner_columns=[]
|
45
|
+
for column in columns:
|
46
|
+
if column in list_of_chemical_property_columns:
|
47
|
+
chemical_property_columns.append(column)
|
48
|
+
elif column in list_of_embedding_columns:
|
49
|
+
embedding_columns.append(column)
|
50
|
+
elif column in list_of_ner_columns:
|
51
|
+
ner_columns.append(column)
|
52
|
+
# elif column == 'smile_2':
|
53
|
+
# continue
|
54
|
+
else:
|
55
|
+
raise Exception(f"{column} is not related this dataset")
|
56
|
+
|
57
|
+
|
58
|
+
super().__init__(embedding_size=embedding_size,
|
59
|
+
embedding_dict=embedding_dict,
|
60
|
+
embeddings_pooling_strategy=embeddings_pooling_strategy,
|
61
|
+
ner_df=ner_df,
|
62
|
+
chemical_property_columns=chemical_property_columns,
|
63
|
+
embedding_columns=embedding_columns,
|
64
|
+
ner_columns=ner_columns,
|
65
|
+
**kwargs)
|
25
66
|
|
26
67
|
db_zip_path = HERE.joinpath('mdf-sa-ddi.zip')
|
27
68
|
db_path = HERE.joinpath('mdf-sa-ddi.db')
|
@@ -33,7 +74,8 @@ class MDFSADDIDataset(BaseDataset):
|
|
33
74
|
conn = create_connection(db_path)
|
34
75
|
self.drugs_df = select_all_drugs_as_dataframe(conn)
|
35
76
|
self.ddis_df = select_all_events_as_dataframe(conn)
|
36
|
-
kwargs = {'index_path': str(HERE.joinpath('indexes'))}
|
77
|
+
# kwargs = {'index_path': str(HERE.joinpath('indexes'))}
|
78
|
+
kwargs['index_path'] = str(HERE.joinpath('indexes'))
|
37
79
|
|
38
80
|
self.index_path = kwargs.get('index_path')
|
39
81
|
|
@@ -83,21 +125,6 @@ class MDFSADDIDataset(BaseDataset):
|
|
83
125
|
file_path=db_path, output_path=HERE, name='mdf-sa-ddi')
|
84
126
|
|
85
127
|
|
86
|
-
def create_connection(db_file=r"mdf-sa-ddi.db"):
|
87
|
-
""" create a database connection to the SQLite database
|
88
|
-
specified by db_file
|
89
|
-
:param db_file: database file
|
90
|
-
:return: Connection object or None
|
91
|
-
"""
|
92
|
-
conn = None
|
93
|
-
try:
|
94
|
-
conn = sqlite3.connect(db_file)
|
95
|
-
except Error as e:
|
96
|
-
print(e)
|
97
|
-
|
98
|
-
return conn
|
99
|
-
|
100
|
-
|
101
128
|
def select_all_drugs(conn):
|
102
129
|
cur = conn.cursor()
|
103
130
|
cur.execute(
|
@@ -47,7 +47,7 @@ ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt,sha256=fFJbN0DbKH4mve
|
|
47
47
|
ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt,sha256=NhiLF_5INQCpjOlE-RIxDKy7rYwksLdx60L6HCmDKoY,81247
|
48
48
|
ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt,sha256=bPvMCJVy7jtcaYbR-5bmdB6s7gT8NSfK2wDC7iJ0O10,81308
|
49
49
|
ddi_fw/datasets/mdf_sa_ddi/__init__.py,sha256=UEFBM92y2aJjlMJw4Jx405tOAwJ88r_nHAVgAszSjuo,68
|
50
|
-
ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=
|
50
|
+
ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=kYNmtg-s0V7mP-wjLMaAstNCG3vckMPQSE651RA_LAE,6502
|
51
51
|
ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv,sha256=EOOLF_0vVVzShoofcGYlOzpztlM1m9jJdftepHicix4,25787699
|
52
52
|
ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv,sha256=lpuMz5KxPsG6MKNuIIUmT5cZquWHQiIao8tXlmOHzq8,381321
|
53
53
|
ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip,sha256=DfN8mczGvWba2y45cPqtWtXjUDXy49VOtRfpcb0tn8c,4382827
|
@@ -106,7 +106,7 @@ ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5
|
|
106
106
|
ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
|
107
107
|
ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
|
108
108
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
109
|
-
ddi_fw-0.0.
|
110
|
-
ddi_fw-0.0.
|
111
|
-
ddi_fw-0.0.
|
112
|
-
ddi_fw-0.0.
|
109
|
+
ddi_fw-0.0.147.dist-info/METADATA,sha256=tP6mvgGgtbBX-YxyZOzIaEf2K8vTWh0K0BjC_3c0DPs,1965
|
110
|
+
ddi_fw-0.0.147.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
111
|
+
ddi_fw-0.0.147.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
112
|
+
ddi_fw-0.0.147.dist-info/RECORD,,
|
File without changes
|
File without changes
|