ddi-fw 0.0.119__tar.gz → 0.0.121__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/core.py +0 -1
  4. ddi_fw-0.0.121/src/ddi_fw/datasets/ddi_mdl/base.py +149 -0
  5. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  6. ddi_fw-0.0.119/src/ddi_fw/datasets/ddi_mdl/base.py +0 -738
  7. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/README.md +0 -0
  8. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/setup.cfg +0 -0
  9. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/__init__.py +0 -0
  10. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/db_utils.py +0 -0
  11. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  12. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  13. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  14. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  15. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  16. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  17. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  18. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  19. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  20. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  21. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  22. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  23. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  24. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  25. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  26. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  27. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  28. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  29. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  30. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  31. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  32. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  33. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  34. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  35. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  36. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  37. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  38. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  39. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/idf_helper.py +0 -0
  40. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  41. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  42. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  43. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  44. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  45. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  46. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  47. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  48. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  49. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  50. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  51. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  52. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  53. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  54. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  55. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  56. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  57. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/datasets/setup_._py +0 -0
  58. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/drugbank/__init__.py +0 -0
  59. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  60. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  61. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  62. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  63. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  64. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/langchain/__init__.py +0 -0
  65. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/langchain/embeddings.py +0 -0
  66. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  67. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/langchain/storage.py +0 -0
  68. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ml/__init__.py +0 -0
  69. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  70. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ml/ml_helper.py +0 -0
  71. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ml/model_wrapper.py +0 -0
  72. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  73. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
  74. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ner/__init__.py +0 -0
  75. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  76. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/ner/ner.py +0 -0
  77. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/pipeline/__init__.py +0 -0
  78. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  79. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  80. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  81. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/pipeline/pipeline.py +0 -0
  82. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/basic_test.py +0 -0
  83. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/combination_test.py +0 -0
  84. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/compress_json_test.py +0 -0
  85. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/date_test.py +0 -0
  86. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/idf_score.py +0 -0
  87. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  88. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/mlfow_test.py +0 -0
  89. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  90. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/test.py +0 -0
  91. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  92. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/test/type_guarding_test.py +0 -0
  93. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/utils/__init__.py +0 -0
  94. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/utils/enums.py +0 -0
  95. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/utils/kaggle.py +0 -0
  96. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/utils/package_helper.py +0 -0
  97. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  98. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/utils/utils.py +0 -0
  99. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw/utils/zip_helper.py +0 -0
  100. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  101. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  102. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw.egg-info/requires.txt +0 -0
  103. {ddi_fw-0.0.119 → ddi_fw-0.0.121}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.119
3
+ Version: 0.0.121
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.119"
8
+ version = "0.0.121"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -57,7 +57,6 @@ class BaseDataset(ABC):
57
57
  items = []
58
58
  y_train_label, y_test_label = stack(self.y_train), stack(self.y_test)
59
59
  self.__similarity_related_columns__.append("smile_2") #TODO
60
- self.__similarity_related_columns__.append("smile_3") #TODO
61
60
  for column in self.__similarity_related_columns__:
62
61
  train_data, test_data = stack(
63
62
  self.X_train[column]), stack(self.X_test[column])
@@ -0,0 +1,149 @@
1
+ import pathlib
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+ from ddi_fw.datasets.feature_vector_generation import SimilarityMatrixGenerator
7
+ from ddi_fw.langchain.embeddings import PoolingStrategy
8
+ from .. import BaseDataset
9
+ from ..db_utils import create_connection
10
+
11
+ HERE = pathlib.Path(__file__).resolve().parent
12
+ list_of_embedding_columns = ['all_text', 'description',
13
+ 'synthesis_reference', 'indication',
14
+ 'pharmacodynamics', 'mechanism_of_action',
15
+ 'toxicity', 'metabolism',
16
+ 'absorption', 'half_life',
17
+ 'protein_binding', 'route_of_elimination',
18
+ 'volume_of_distribution', 'clearance']
19
+
20
+ list_of_chemical_property_columns = ['enzyme',
21
+ 'target',
22
+ 'pathway',
23
+ 'smile']
24
+ list_of_ner_columns = ['tui', 'cui', 'entities']
25
+
26
+
27
+ def indices_to_binary_vector(indices, vector_length=881):
28
+ # vector_length = len(indices)
29
+ # Initialize a zero vector of the given length
30
+ binary_vector = [0] * vector_length
31
+
32
+ # Set the positions specified by indices to 1
33
+ for index in indices:
34
+ if 0 <= index < vector_length:
35
+ binary_vector[index] = 1
36
+
37
+ return binary_vector
38
+
39
+ class DDIMDLDataset(BaseDataset):
40
+ def __init__(self, embedding_size,
41
+ embedding_dict,
42
+ embeddings_pooling_strategy: PoolingStrategy,
43
+ ner_df,
44
+ chemical_property_columns=['enzyme',
45
+ 'target',
46
+ 'pathway',
47
+ 'smile'],
48
+ embedding_columns=[],
49
+ ner_columns=[],
50
+ **kwargs):
51
+ columns = kwargs['columns']
52
+ if columns:
53
+ chemical_property_columns = []
54
+ embedding_columns=[]
55
+ ner_columns=[]
56
+ for column in columns:
57
+ if column in list_of_chemical_property_columns:
58
+ chemical_property_columns.append(column)
59
+ elif column in list_of_embedding_columns:
60
+ embedding_columns.append(column)
61
+ elif column in list_of_ner_columns:
62
+ ner_columns.append(column)
63
+ # elif column == 'smile_2':
64
+ # continue
65
+ else:
66
+ raise Exception(f"{column} is not related this dataset")
67
+
68
+
69
+ super().__init__(embedding_size=embedding_size,
70
+ embedding_dict=embedding_dict,
71
+ embeddings_pooling_strategy=embeddings_pooling_strategy,
72
+ ner_df=ner_df,
73
+ chemical_property_columns=chemical_property_columns,
74
+ embedding_columns=embedding_columns,
75
+ ner_columns=ner_columns,
76
+ **kwargs)
77
+
78
+ # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
79
+ kwargs['index_path'] = str(HERE.joinpath('indexes'))
80
+
81
+ db = HERE.joinpath('data/event.db')
82
+ conn = create_connection(db)
83
+ print("db prep")
84
+ self.drugs_df = self.__select_all_drugs_as_dataframe__(conn)
85
+ self.ddis_df = self.__select_all_events__(conn)
86
+ print("db bitti")
87
+ self.index_path = kwargs.get('index_path')
88
+
89
+ # jaccard_sim_dict = {}
90
+ # sim_matrix_gen = SimilarityMatrixGenerator()
91
+ # jaccard_sim_dict["smile_2"] = sim_matrix_gen.create_jaccard_similarity_matrices(
92
+ # self.drugs_df["smile_2"].to_list())
93
+
94
+ # similarity_matrices = {}
95
+ # drugbank_ids = self.drugs_df['id'].to_list()
96
+ # new_columns = {}
97
+ # for idx in range(len(drugbank_ids)):
98
+ # new_columns[idx] = drugbank_ids[idx]
99
+ # new_df = pd.DataFrame.from_dict(jaccard_sim_dict["smile_2"])
100
+ # new_df = new_df.rename(index=new_columns, columns=new_columns)
101
+ # similarity_matrices["smile_2"] = new_df
102
+
103
+
104
+ # def lambda_fnc(row, value):
105
+ # if row['id1'] in value and row['id2'] in value:
106
+ # return np.float16(np.hstack(
107
+ # (value[row['id1']], value[row['id2']])))
108
+ # for key, value in similarity_matrices.items():
109
+
110
+ # print(f'sim matrix: {key}')
111
+ # self.ddis_df[key] = self.ddis_df.apply(
112
+ # lambda_fnc, args=(value,), axis=1)
113
+ # print(self.ddis_df[key].head())
114
+ # print("init finished")
115
+
116
+ def __select_all_drugs_as_dataframe__(self, conn):
117
+ headers = ['index', 'id', 'name',
118
+ 'target', 'enzyme', 'pathway', 'smile']
119
+ cur = conn.cursor()
120
+ cur.execute(
121
+ '''select "index", id, name, target, enzyme, pathway, smile from drug''')
122
+ rows = cur.fetchall()
123
+ df = pd.DataFrame(columns=headers, data=rows)
124
+ df['enzyme'] = df['enzyme'].apply(lambda x: x.split('|'))
125
+ df['target'] = df['target'].apply(lambda x: x.split('|'))
126
+ df['pathway'] = df['pathway'].apply(lambda x: x.split('|'))
127
+ # df['smile_2'] = df['smile'].apply(lambda x: indices_to_binary_vector(indices = list(map(int, x.split('|'))), vector_length = 881))
128
+ df['smile'] = df['smile'].apply(lambda x: x.split('|'))
129
+
130
+
131
+ return df
132
+
133
+ def __select_all_events__(self, conn):
134
+ """
135
+ Query all rows in the event table
136
+ :param conn: the Connection object
137
+ :return:
138
+ """
139
+ cur = conn.cursor()
140
+ cur.execute('''
141
+ select ex."index", d1.id, d1.name, d2.id, d2.name, mechanism || ' ' ||action from extraction ex
142
+ join drug d1 on d1.name = ex.drugA
143
+ join drug d2 on d2.name = ex.drugB
144
+ ''')
145
+
146
+ rows = cur.fetchall()
147
+
148
+ headers = ["index", "id1", "name1", "id2", "name2", "event_category"]
149
+ return pd.DataFrame(columns=headers, data=rows)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.119
3
+ Version: 0.0.121
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>