ddi-fw 0.0.145__py3-none-any.whl → 0.0.147__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,22 +6,63 @@ import pandas as pd
6
6
 
7
7
  from ddi_fw.utils import ZipHelper
8
8
 
9
- from ..core import BaseDataset
9
+ from .. import BaseDataset
10
+ from ddi_fw.langchain.embeddings import PoolingStrategy
11
+ from ..db_utils import create_connection
10
12
  # from ..db_utils import create_connection, select_all_drugs_as_dataframe, select_events_with_category
11
13
 
12
14
  HERE = pathlib.Path(__file__).resolve().parent
13
-
14
-
15
+ list_of_embedding_columns = ['all_text', 'description',
16
+ 'synthesis_reference', 'indication',
17
+ 'pharmacodynamics', 'mechanism_of_action',
18
+ 'toxicity', 'metabolism',
19
+ 'absorption', 'half_life',
20
+ 'protein_binding', 'route_of_elimination',
21
+ 'volume_of_distribution', 'clearance']
22
+
23
+ list_of_chemical_property_columns = ['enzyme',
24
+ 'target',
25
+ 'smile']
26
+
27
+ list_of_ner_columns = ['tui', 'cui', 'entities']
15
28
  class MDFSADDIDataset(BaseDataset):
16
- def __init__(self, chemical_property_columns=['enzyme',
29
+ def __init__(self, embedding_size,
30
+ embedding_dict,
31
+ embeddings_pooling_strategy: PoolingStrategy,
32
+ ner_df,
33
+ chemical_property_columns=['enzyme',
17
34
  'target',
18
35
  'smile'],
19
36
  embedding_columns=[],
20
37
  ner_columns=[],
21
38
  **kwargs):
22
39
 
23
- super().__init__(chemical_property_columns, embedding_columns,
24
- ner_columns, **kwargs)
40
+ columns = kwargs['columns']
41
+ if columns:
42
+ chemical_property_columns = []
43
+ embedding_columns=[]
44
+ ner_columns=[]
45
+ for column in columns:
46
+ if column in list_of_chemical_property_columns:
47
+ chemical_property_columns.append(column)
48
+ elif column in list_of_embedding_columns:
49
+ embedding_columns.append(column)
50
+ elif column in list_of_ner_columns:
51
+ ner_columns.append(column)
52
+ # elif column == 'smile_2':
53
+ # continue
54
+ else:
55
+ raise Exception(f"{column} is not related this dataset")
56
+
57
+
58
+ super().__init__(embedding_size=embedding_size,
59
+ embedding_dict=embedding_dict,
60
+ embeddings_pooling_strategy=embeddings_pooling_strategy,
61
+ ner_df=ner_df,
62
+ chemical_property_columns=chemical_property_columns,
63
+ embedding_columns=embedding_columns,
64
+ ner_columns=ner_columns,
65
+ **kwargs)
25
66
 
26
67
  db_zip_path = HERE.joinpath('mdf-sa-ddi.zip')
27
68
  db_path = HERE.joinpath('mdf-sa-ddi.db')
@@ -33,7 +74,8 @@ class MDFSADDIDataset(BaseDataset):
33
74
  conn = create_connection(db_path)
34
75
  self.drugs_df = select_all_drugs_as_dataframe(conn)
35
76
  self.ddis_df = select_all_events_as_dataframe(conn)
36
- kwargs = {'index_path': str(HERE.joinpath('indexes'))}
77
+ # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
78
+ kwargs['index_path'] = str(HERE.joinpath('indexes'))
37
79
 
38
80
  self.index_path = kwargs.get('index_path')
39
81
 
@@ -83,21 +125,6 @@ class MDFSADDIDataset(BaseDataset):
83
125
  file_path=db_path, output_path=HERE, name='mdf-sa-ddi')
84
126
 
85
127
 
86
- def create_connection(db_file=r"mdf-sa-ddi.db"):
87
- """ create a database connection to the SQLite database
88
- specified by db_file
89
- :param db_file: database file
90
- :return: Connection object or None
91
- """
92
- conn = None
93
- try:
94
- conn = sqlite3.connect(db_file)
95
- except Error as e:
96
- print(e)
97
-
98
- return conn
99
-
100
-
101
128
  def select_all_drugs(conn):
102
129
  cur = conn.cursor()
103
130
  cur.execute(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.145
3
+ Version: 0.0.147
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -47,7 +47,7 @@ ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt,sha256=fFJbN0DbKH4mve
47
47
  ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt,sha256=NhiLF_5INQCpjOlE-RIxDKy7rYwksLdx60L6HCmDKoY,81247
48
48
  ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt,sha256=bPvMCJVy7jtcaYbR-5bmdB6s7gT8NSfK2wDC7iJ0O10,81308
49
49
  ddi_fw/datasets/mdf_sa_ddi/__init__.py,sha256=UEFBM92y2aJjlMJw4Jx405tOAwJ88r_nHAVgAszSjuo,68
50
- ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=e56uJEU6ZZlWh9ZZCO-H7Pcqm8gQE9SQtm7ujeDKgIU,4893
50
+ ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=kYNmtg-s0V7mP-wjLMaAstNCG3vckMPQSE651RA_LAE,6502
51
51
  ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv,sha256=EOOLF_0vVVzShoofcGYlOzpztlM1m9jJdftepHicix4,25787699
52
52
  ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv,sha256=lpuMz5KxPsG6MKNuIIUmT5cZquWHQiIao8tXlmOHzq8,381321
53
53
  ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip,sha256=DfN8mczGvWba2y45cPqtWtXjUDXy49VOtRfpcb0tn8c,4382827
@@ -106,7 +106,7 @@ ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5
106
106
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
107
107
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
108
108
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
109
- ddi_fw-0.0.145.dist-info/METADATA,sha256=sXEGur4T8IymB0S1rMjVy6I4Z5Lyk3JM7djW2WERe48,1965
110
- ddi_fw-0.0.145.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
111
- ddi_fw-0.0.145.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
112
- ddi_fw-0.0.145.dist-info/RECORD,,
109
+ ddi_fw-0.0.147.dist-info/METADATA,sha256=tP6mvgGgtbBX-YxyZOzIaEf2K8vTWh0K0BjC_3c0DPs,1965
110
+ ddi_fw-0.0.147.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
111
+ ddi_fw-0.0.147.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
112
+ ddi_fw-0.0.147.dist-info/RECORD,,