ddi-fw 0.0.44__tar.gz → 0.0.46__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/core.py +30 -16
  4. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/base.py +4 -2
  5. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  6. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/README.md +0 -0
  7. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/setup.cfg +0 -0
  8. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/__init__.py +0 -0
  9. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/db_utils.py +0 -0
  10. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  11. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  12. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  13. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  14. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  15. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  16. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  17. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  18. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  19. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  20. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  21. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  22. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  23. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  24. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  25. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  26. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  27. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  28. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  29. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  30. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  31. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  32. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  33. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  34. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  35. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  36. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  37. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/embedding_generator_new.py +0 -0
  38. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  39. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/idf_helper.py +0 -0
  40. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  41. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  42. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  43. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  44. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  45. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  46. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  47. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  48. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  49. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  50. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  51. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  52. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  53. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  54. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  55. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  56. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  57. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/datasets/setup_._py +0 -0
  58. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/drugbank/__init__.py +0 -0
  59. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  60. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  61. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  62. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  63. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  64. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/experiments/__init__.py +0 -0
  65. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/experiments/custom_torch_model.py +0 -0
  66. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/experiments/evaluation_helper.py +0 -0
  67. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/experiments/tensorflow_helper.py +0 -0
  68. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/experiments/test.py +0 -0
  69. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/ner/__init__.py +0 -0
  70. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  71. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/ner/ner.py +0 -0
  72. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/basic_test.py +0 -0
  73. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/combination_test.py +0 -0
  74. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/compress_json_test.py +0 -0
  75. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/date_test.py +0 -0
  76. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/idf_score.py +0 -0
  77. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  78. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/mlfow_test.py +0 -0
  79. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  80. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/test.py +0 -0
  81. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  82. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/test/type_guarding_test.py +0 -0
  83. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/utils/__init__.py +0 -0
  84. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/utils/enums.py +0 -0
  85. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  86. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/utils/utils.py +0 -0
  87. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw/utils/zip_helper.py +0 -0
  88. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  89. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  90. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw.egg-info/requires.txt +0 -0
  91. {ddi_fw-0.0.44 → ddi_fw-0.0.46}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.44
3
+ Version: 0.0.46
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.44"
8
+ version = "0.0.46"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -22,7 +22,14 @@ def stack(df_column):
22
22
 
23
23
 
24
24
  class BaseDataset(ABC):
25
- def __init__(self,embedding_size,embedding_dict, embeddings_pooling_strategy:PoolingStrategy, ner_df, chemical_property_columns, embedding_columns, ner_columns,
25
+ def __init__(self,
26
+ embedding_size,
27
+ embedding_dict,
28
+ embeddings_pooling_strategy: PoolingStrategy,
29
+ ner_df,
30
+ chemical_property_columns,
31
+ embedding_columns,
32
+ ner_columns,
26
33
  **kwargs):
27
34
  self.embedding_size = embedding_size
28
35
  self.embedding_dict = embedding_dict
@@ -61,7 +68,7 @@ class BaseDataset(ABC):
61
68
  y_train_label, test_data, y_test_label])
62
69
  return items
63
70
 
64
- ##remove this function
71
+ # remove this function
65
72
  def generate_sim_matrices(self, chemical_properties_df, two_d_dict):
66
73
 
67
74
  jaccard_sim_dict = {}
@@ -125,7 +132,7 @@ class BaseDataset(ABC):
125
132
 
126
133
  return two_d_dict
127
134
 
128
- #todo dictionary içinde ndarray dönsün
135
+ # todo dictionary içinde ndarray dönsün
129
136
  def generate_vectors(self, chemical_properties_df):
130
137
  self.stopwatch.reset()
131
138
  self.stopwatch.start()
@@ -144,19 +151,23 @@ class BaseDataset(ABC):
144
151
  print(f'vector_generation: {self.stopwatch.elapsed}')
145
152
 
146
153
 
147
- ##remove this function
148
- def sim(self,chemical_properties_df):
154
+ # remove this function
155
+
156
+
157
+ def sim(self, chemical_properties_df):
149
158
  self.stopwatch.reset()
150
159
  self.stopwatch.start()
151
- from scipy.spatial.distance import pdist
160
+ from scipy.spatial.distance import pdist
152
161
  sim_matrix_gen = SimilarityMatrixGenerator()
153
162
 
154
163
  drugbank_ids = chemical_properties_df['id'].to_list()
155
164
  similarity_matrices = {}
156
165
  for column in self.__similarity_related_columns__:
157
- df = pd.DataFrame(np.stack(chemical_properties_df[f'{column}_vectors'].values), index = drugbank_ids)
166
+ df = pd.DataFrame(np.stack(
167
+ chemical_properties_df[f'{column}_vectors'].values), index=drugbank_ids)
158
168
  # similarity_matrices[column] = 1 - pdist(df.to_numpy(), metric='jaccard')
159
- similarity_matrices[column] = sim_matrix_gen.create_jaccard_similarity_matrices(df.to_numpy())
169
+ similarity_matrices[column] = sim_matrix_gen.create_jaccard_similarity_matrices(
170
+ df.to_numpy())
160
171
  self.stopwatch.stop()
161
172
  print(f'sim: {self.stopwatch.elapsed}')
162
173
  return similarity_matrices
@@ -177,12 +188,14 @@ class BaseDataset(ABC):
177
188
  X = self.dataframe.drop('class', axis=1)
178
189
  y = self.dataframe['class']
179
190
  X_train, X_test, y_train, y_test = train_test_split(
180
- X, y, shuffle=shuffle, test_size=test_size, stratify=np.argmax(np.vstack(y.to_numpy()),axis = 1))
191
+ X, y, shuffle=shuffle, test_size=test_size, stratify=np.argmax(np.vstack(y.to_numpy()), axis=1))
181
192
  # k_fold = KFold(n_splits=fold_size, shuffle=shuffle, random_state=1)
182
193
  # folds = k_fold.split(X_train)
183
194
 
184
- k_fold = StratifiedKFold(n_splits=fold_size, shuffle=shuffle, random_state=1)
185
- folds = k_fold.split(X_train, np.argmax(np.vstack(y_train.to_numpy()),axis = 1))
195
+ k_fold = StratifiedKFold(
196
+ n_splits=fold_size, shuffle=shuffle, random_state=1)
197
+ folds = k_fold.split(X_train, np.argmax(
198
+ np.vstack(y_train.to_numpy()), axis=1))
186
199
  train_idx_arr = []
187
200
  val_idx_arr = []
188
201
  for i, (train_index, val_index) in enumerate(folds):
@@ -269,7 +282,7 @@ class BaseDataset(ABC):
269
282
  # lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
270
283
  # x = {k: v for l in d.values.tolist() for k, v in l.items()}
271
284
  # embedding_dict[embedding_column] = x
272
-
285
+
273
286
  # self.ner_df = CTakesNER().load()
274
287
  drug_names = self.drugs_df['name'].to_list()
275
288
  drug_ids = self.drugs_df['id'].to_list()
@@ -354,7 +367,7 @@ class BaseDataset(ABC):
354
367
  # def similarity_lambda_fnc(row, value):
355
368
  # if row['id1'] in value and row['id2'] in value:
356
369
  # return value[row['id1']][row['id2']]
357
-
370
+
358
371
  def similarity_lambda_fnc(row, value):
359
372
  if row['id1'] in value:
360
373
  return value[row['id1']]
@@ -366,7 +379,7 @@ class BaseDataset(ABC):
366
379
  # return np.hstack(
367
380
  # (value[row['id1']], value[row['id2']]), dtype=np.float16)
368
381
 
369
- def x_fnc(row, embedding_column,embeddings_after_pooling):
382
+ def x_fnc(row, embedding_column, embeddings_after_pooling):
370
383
  if row['id1'] in self.embedding_dict[embedding_column]:
371
384
  v1 = embeddings_after_pooling[embedding_column][row['id1']]
372
385
  else:
@@ -387,10 +400,11 @@ class BaseDataset(ABC):
387
400
 
388
401
  for embedding_column in self.embedding_columns:
389
402
  print(f"concat {embedding_column} embeddings")
390
- embeddings_after_pooling = {k: self.embeddings_pooling_strategy.apply(v) for k,v in self.embedding_dict[embedding_column].items()}
403
+ embeddings_after_pooling = {k: self.embeddings_pooling_strategy.apply(
404
+ v) for k, v in self.embedding_dict[embedding_column].items()}
391
405
  # column_embeddings_dict = embedding_values[embedding_column]
392
406
  self.ddis_df[embedding_column+'_embedding'] = self.ddis_df.apply(
393
- x_fnc, args=(embedding_column,embeddings_after_pooling), axis=1)
407
+ x_fnc, args=(embedding_column, embeddings_after_pooling), axis=1)
394
408
 
395
409
  self.dataframe = self.ddis_df.copy()
396
410
  self.dataframe['class'] = list(classes)
@@ -1,6 +1,8 @@
1
1
  import pathlib
2
2
 
3
3
  import pandas as pd
4
+
5
+ from ddi_fw.datasets.embedding_generator_new import PoolingStrategy
4
6
  from .. import BaseDataset
5
7
  from ..db_utils import create_connection
6
8
 
@@ -8,7 +10,7 @@ HERE = pathlib.Path(__file__).resolve().parent
8
10
 
9
11
 
10
12
  class DDIMDLDataset(BaseDataset):
11
- def __init__(self, embedding_size, embedding_dict, ner_df, chemical_property_columns=['enzyme',
13
+ def __init__(self, embedding_size, embedding_dict,embeddings_pooling_strategy:PoolingStrategy, ner_df, chemical_property_columns=['enzyme',
12
14
  'target',
13
15
  'pathway',
14
16
  'smile'],
@@ -16,7 +18,7 @@ class DDIMDLDataset(BaseDataset):
16
18
  ner_columns=[],
17
19
  **kwargs):
18
20
 
19
- super().__init__(embedding_size, embedding_dict,ner_df, chemical_property_columns, embedding_columns,
21
+ super().__init__(embedding_size, embedding_dict,ner_df,embeddings_pooling_strategy, chemical_property_columns, embedding_columns,
20
22
  ner_columns, **kwargs)
21
23
 
22
24
  # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.44
3
+ Version: 0.0.46
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
File without changes
File without changes
File without changes
File without changes