ddi-fw 0.0.23__tar.gz → 0.0.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/core.py +19 -24
  4. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/base.py +2 -2
  5. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/utils/zip_helper.py +37 -7
  6. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  7. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/README.md +0 -0
  8. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/setup.cfg +0 -0
  9. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/__init__.py +0 -0
  10. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/db_utils.py +0 -0
  11. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  12. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  13. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  14. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  15. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  16. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  17. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  18. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  19. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  20. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  21. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  22. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  23. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  24. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  25. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  26. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  27. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  28. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  29. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  30. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  31. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  32. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  33. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  34. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  35. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  36. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  37. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  38. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/embedding_generator_new.py +0 -0
  39. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  40. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/idf_helper.py +0 -0
  41. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  42. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  43. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  44. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  45. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  46. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  47. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  48. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  49. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  50. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  51. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  52. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  53. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  54. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  55. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  56. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  57. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  58. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/datasets/setup_._py +0 -0
  59. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/drugbank/__init__.py +0 -0
  60. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  61. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  62. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  63. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  64. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  65. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/experiments/__init__.py +0 -0
  66. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/experiments/custom_torch_model.py +0 -0
  67. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/experiments/evaluation_helper.py +0 -0
  68. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/experiments/tensorflow_helper.py +0 -0
  69. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/experiments/test.py +0 -0
  70. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/ner/__init__.py +0 -0
  71. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  72. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/ner/ner.py +0 -0
  73. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/basic_test.py +0 -0
  74. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/combination_test.py +0 -0
  75. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/compress_json_test.py +0 -0
  76. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/date_test.py +0 -0
  77. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/idf_score.py +0 -0
  78. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  79. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/mlfow_test.py +0 -0
  80. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  81. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/test.py +0 -0
  82. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  83. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/test/type_guarding_test.py +0 -0
  84. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/utils/__init__.py +0 -0
  85. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/utils/enums.py +0 -0
  86. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw/utils/utils.py +0 -0
  87. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  88. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  89. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw.egg-info/requires.txt +0 -0
  90. {ddi_fw-0.0.23 → ddi_fw-0.0.24}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.23
3
+ Version: 0.0.24
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.23"
8
+ version = "0.0.24"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -21,8 +21,9 @@ def stack(df_column):
21
21
 
22
22
 
23
23
  class BaseDataset(ABC):
24
- def __init__(self, chemical_property_columns, embedding_columns, ner_columns,
24
+ def __init__(self,embedding_dict, chemical_property_columns, embedding_columns, ner_columns,
25
25
  **kwargs):
26
+ self.embedding_dict = embedding_dict
26
27
  self.__similarity_related_columns__ = []
27
28
  self.__similarity_related_columns__.extend(chemical_property_columns)
28
29
  self.__similarity_related_columns__.extend(ner_columns)
@@ -250,21 +251,21 @@ class BaseDataset(ABC):
250
251
 
251
252
  # her bir metin tipi için embedding oluşturursan burayı düzenle
252
253
  def prep(self):
253
- if self.embedding_columns:
254
- zip_helper = ZipHelper()
255
- zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
256
- str(HERE.joinpath('zips/embeddings')))
257
-
258
- embedding_dict = dict()
259
- for embedding_column in self.embedding_columns:
260
- embedding_file = HERE.joinpath(
261
- f'zips/embeddings/{embedding_column}_embeddings.pkl')
262
- embedding_values = pd.read_pickle(embedding_file)
263
- d = embedding_values.apply(
264
- lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
265
- x = {k: v for l in d.values.tolist() for k, v in l.items()}
266
- embedding_dict[embedding_column] = x
267
-
254
+ # if self.embedding_columns:
255
+ # zip_helper = ZipHelper()
256
+ # zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
257
+ # str(HERE.joinpath('zips/embeddings')))
258
+
259
+ # embedding_dict = dict()
260
+ # for embedding_column in self.embedding_columns:
261
+ # embedding_file = HERE.joinpath(
262
+ # f'zips/embeddings/{embedding_column}_embeddings.pkl')
263
+ # embedding_values = pd.read_pickle(embedding_file)
264
+ # d = embedding_values.apply(
265
+ # lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
266
+ # x = {k: v for l in d.values.tolist() for k, v in l.items()}
267
+ # embedding_dict[embedding_column] = x
268
+
268
269
  self.ner_df = CTakesNER().load()
269
270
  drug_names = self.drugs_df['name'].to_list()
270
271
  drug_ids = self.drugs_df['id'].to_list()
@@ -362,14 +363,8 @@ class BaseDataset(ABC):
362
363
  # (value[row['id1']], value[row['id2']]), dtype=np.float16)
363
364
 
364
365
  def x_fnc(row, embedding_values, embedding_column):
365
- # first = embedding_values[embedding_values.id == row['id1']]
366
- # second = embedding_values[embedding_values.id == row['id2']]
367
- # v1 = first.iloc[0][embedding_column+'_embedding']
368
- # v2 = second.iloc[0][embedding_column+'_embedding']
369
- v1 = embedding_dict[embedding_column][row['id1']]
370
- v2 = embedding_dict[embedding_column][row['id2']]
371
- # v1 = embedding_dict[row['id1']][embedding_column+'_embedding']
372
- # v2 = embedding_dict[row['id2']][embedding_column+'_embedding']
366
+ v1 = self.embedding_dict[embedding_column][row['id1']]
367
+ v2 = self.embedding_dict[embedding_column][row['id2']]
373
368
  return np.float16(np.hstack(
374
369
  (v1, v2)))
375
370
 
@@ -8,7 +8,7 @@ HERE = pathlib.Path(__file__).resolve().parent
8
8
 
9
9
 
10
10
  class DDIMDLDataset(BaseDataset):
11
- def __init__(self, chemical_property_columns=['enzyme',
11
+ def __init__(self, embedding_dict, chemical_property_columns=['enzyme',
12
12
  'target',
13
13
  'pathway',
14
14
  'smile'],
@@ -16,7 +16,7 @@ class DDIMDLDataset(BaseDataset):
16
16
  ner_columns=[],
17
17
  **kwargs):
18
18
 
19
- super().__init__(chemical_property_columns, embedding_columns,
19
+ super().__init__(embedding_dict, chemical_property_columns, embedding_columns,
20
20
  ner_columns, **kwargs)
21
21
 
22
22
  # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
@@ -7,21 +7,51 @@ import math
7
7
  from ddi_fw.utils.utils import create_folder_if_not_exists
8
8
 
9
9
 
10
+ def get_file_name_and_folder(file_path):
11
+ file_path_components = file_path.split('/')
12
+ file_name = file_path_components[-1]
13
+ file_path = file_path[:len(file_name)*-1-1]
14
+ return file_name, file_path
15
+
16
+
10
17
  class ZipHelper:
11
18
  def __init__(self):
12
19
  pass
13
20
 
14
- def zip_single_file(self, name, file_path, output_path):
21
+ def __zipdir__(self, file_path, zipf):
22
+ # ziph is zipfile handle
23
+ for root, dirs, files in os.walk(file_path):
24
+ for file in files:
25
+ zipf.write(os.path.join(root, file),
26
+ os.path.relpath(os.path.join(root, file),
27
+ os.path.join(file_path, '..')))
28
+
29
+ def zip_dir(self, zip_name, file_path, output_path):
30
+ create_folder_if_not_exists(output_path)
31
+ with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', z.ZIP_DEFLATED) as zipf:
32
+ self.__zipdir__(file_path, zipf)
33
+
34
+ def zip_single_file(self, zip_name, file_path, output_path):
15
35
  if not os.path.exists(output_path):
16
36
  os.makedirs(output_path)
17
- with z.ZipFile(f'{output_path}/{name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
37
+ with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
18
38
  zipObj.write(file_path, basename(file_path))
19
39
 
20
- def zip_as_multipart(self, zip_name, folder, file_name, output_path, chunk_size):
21
- file_path = folder+'/'+file_name
22
- parts_path=f"{folder}/parts"
23
- self.zip_single_file(zip_name, file_path, output_path)
40
+ def zip_as_multipart(self, zip_name, file_path, output_path, chunk_size):
41
+ parent_folder = os.path.dirname(file_path)
42
+
43
+ parts_path = f"{parent_folder}/parts"
24
44
  create_folder_if_not_exists(parts_path)
45
+ # file_name, file_extension = os.path.splitext(file_path)
46
+ # file_name = os.path.basename(file_path)
47
+ file_name, folder = get_file_name_and_folder(file_path)
48
+
49
+ if os.path.isdir(file_path):
50
+ self.zip_dir(zip_name, file_path, output_path)
51
+ elif os.path.isfile(file_path):
52
+ self.zip_single_file(zip_name, file_path, output_path)
53
+ else:
54
+ return
25
55
  with open(file_path, 'rb') as f:
26
56
  chunk_number = 1
27
57
  while True:
@@ -67,7 +97,7 @@ class ZipHelper:
67
97
  z1.extractall(path=output_path)
68
98
  print(f'{file_path} has been extracted')
69
99
 
70
- def extract_multiparts(self, output_path, parts_path,output_file):
100
+ def extract_multiparts(self, output_path, parts_path, output_file):
71
101
  input_parts = [parts_path+'/' + p for p in os.listdir(parts_path)]
72
102
  with open(f"{output_path}/{output_file}", 'wb') as outfile:
73
103
  for part in input_parts:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.23
3
+ Version: 0.0.24
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
File without changes
File without changes
File without changes
File without changes