ddi-fw 0.0.23__py3-none-any.whl → 0.0.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -21,8 +21,9 @@ def stack(df_column):
21
21
 
22
22
 
23
23
  class BaseDataset(ABC):
24
- def __init__(self, chemical_property_columns, embedding_columns, ner_columns,
24
+ def __init__(self,embedding_dict, chemical_property_columns, embedding_columns, ner_columns,
25
25
  **kwargs):
26
+ self.embedding_dict = embedding_dict
26
27
  self.__similarity_related_columns__ = []
27
28
  self.__similarity_related_columns__.extend(chemical_property_columns)
28
29
  self.__similarity_related_columns__.extend(ner_columns)
@@ -250,21 +251,21 @@ class BaseDataset(ABC):
250
251
 
251
252
  # her bir metin tipi için embedding oluşturursan burayı düzenle
252
253
  def prep(self):
253
- if self.embedding_columns:
254
- zip_helper = ZipHelper()
255
- zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
256
- str(HERE.joinpath('zips/embeddings')))
257
-
258
- embedding_dict = dict()
259
- for embedding_column in self.embedding_columns:
260
- embedding_file = HERE.joinpath(
261
- f'zips/embeddings/{embedding_column}_embeddings.pkl')
262
- embedding_values = pd.read_pickle(embedding_file)
263
- d = embedding_values.apply(
264
- lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
265
- x = {k: v for l in d.values.tolist() for k, v in l.items()}
266
- embedding_dict[embedding_column] = x
267
-
254
+ # if self.embedding_columns:
255
+ # zip_helper = ZipHelper()
256
+ # zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
257
+ # str(HERE.joinpath('zips/embeddings')))
258
+
259
+ # embedding_dict = dict()
260
+ # for embedding_column in self.embedding_columns:
261
+ # embedding_file = HERE.joinpath(
262
+ # f'zips/embeddings/{embedding_column}_embeddings.pkl')
263
+ # embedding_values = pd.read_pickle(embedding_file)
264
+ # d = embedding_values.apply(
265
+ # lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
266
+ # x = {k: v for l in d.values.tolist() for k, v in l.items()}
267
+ # embedding_dict[embedding_column] = x
268
+
268
269
  self.ner_df = CTakesNER().load()
269
270
  drug_names = self.drugs_df['name'].to_list()
270
271
  drug_ids = self.drugs_df['id'].to_list()
@@ -362,14 +363,8 @@ class BaseDataset(ABC):
362
363
  # (value[row['id1']], value[row['id2']]), dtype=np.float16)
363
364
 
364
365
  def x_fnc(row, embedding_values, embedding_column):
365
- # first = embedding_values[embedding_values.id == row['id1']]
366
- # second = embedding_values[embedding_values.id == row['id2']]
367
- # v1 = first.iloc[0][embedding_column+'_embedding']
368
- # v2 = second.iloc[0][embedding_column+'_embedding']
369
- v1 = embedding_dict[embedding_column][row['id1']]
370
- v2 = embedding_dict[embedding_column][row['id2']]
371
- # v1 = embedding_dict[row['id1']][embedding_column+'_embedding']
372
- # v2 = embedding_dict[row['id2']][embedding_column+'_embedding']
366
+ v1 = self.embedding_dict[embedding_column][row['id1']]
367
+ v2 = self.embedding_dict[embedding_column][row['id2']]
373
368
  return np.float16(np.hstack(
374
369
  (v1, v2)))
375
370
 
@@ -8,7 +8,7 @@ HERE = pathlib.Path(__file__).resolve().parent
8
8
 
9
9
 
10
10
  class DDIMDLDataset(BaseDataset):
11
- def __init__(self, chemical_property_columns=['enzyme',
11
+ def __init__(self, embedding_dict, chemical_property_columns=['enzyme',
12
12
  'target',
13
13
  'pathway',
14
14
  'smile'],
@@ -16,7 +16,7 @@ class DDIMDLDataset(BaseDataset):
16
16
  ner_columns=[],
17
17
  **kwargs):
18
18
 
19
- super().__init__(chemical_property_columns, embedding_columns,
19
+ super().__init__(embedding_dict, chemical_property_columns, embedding_columns,
20
20
  ner_columns, **kwargs)
21
21
 
22
22
  # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
@@ -7,30 +7,62 @@ import math
7
7
  from ddi_fw.utils.utils import create_folder_if_not_exists
8
8
 
9
9
 
10
+ def get_file_name_and_folder(file_path):
11
+ file_path_components = file_path.split('/')
12
+ file_name = file_path_components[-1]
13
+ file_path = file_path[:len(file_name)*-1-1]
14
+ return file_name, file_path
15
+
16
+
10
17
  class ZipHelper:
11
18
  def __init__(self):
12
19
  pass
13
20
 
14
- def zip_single_file(self, name, file_path, output_path):
21
+ def __zipdir__(self, file_path, zipf):
22
+ # ziph is zipfile handle
23
+ for root, dirs, files in os.walk(file_path):
24
+ for file in files:
25
+ zipf.write(os.path.join(root, file),
26
+ os.path.relpath(os.path.join(root, file),
27
+ os.path.join(file_path, '..')))
28
+
29
+ def zip_dir(self, zip_name, file_path, output_path):
30
+ create_folder_if_not_exists(output_path)
31
+ with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', z.ZIP_DEFLATED) as zipf:
32
+ self.__zipdir__(file_path, zipf)
33
+
34
+ def zip_single_file(self, zip_name, file_path, output_path):
15
35
  if not os.path.exists(output_path):
16
36
  os.makedirs(output_path)
17
- with z.ZipFile(f'{output_path}/{name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
37
+ with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
18
38
  zipObj.write(file_path, basename(file_path))
19
39
 
20
- def zip_as_multipart(self, zip_name, folder, file_name, output_path, chunk_size):
21
- file_path = folder+'/'+file_name
22
- parts_path=f"{folder}/parts"
23
- self.zip_single_file(zip_name, file_path, output_path)
24
- create_folder_if_not_exists(parts_path)
25
- with open(file_path, 'rb') as f:
40
+ def zip_as_multipart(self, zip_name, file_path, output_path, chunk_size):
41
+ parent_folder = os.path.dirname(file_path)
42
+
43
+ # parts_path = f"{parent_folder}/parts"
44
+ # create_folder_if_not_exists(parts_path)
45
+ # file_name, file_extension = os.path.splitext(file_path)
46
+ # file_name = os.path.basename(file_path)
47
+ file_name, folder = get_file_name_and_folder(file_path)
48
+
49
+ if os.path.isdir(file_path):
50
+ self.zip_dir(zip_name, file_path, output_path)
51
+ elif os.path.isfile(file_path):
52
+ self.zip_single_file(zip_name, file_path, output_path)
53
+ else:
54
+ return
55
+ with open(output_path+'/'+zip_name+'.zip', 'rb') as f:
26
56
  chunk_number = 1
27
57
  while True:
28
58
  chunk = f.read(chunk_size)
29
59
  if not chunk:
30
60
  break
31
- with open(f"{parts_path}/{file_name}.part{chunk_number:03}", 'wb') as chunk_file:
61
+ with open(f"{output_path}/{zip_name}.zip.part{chunk_number:03}", 'wb') as chunk_file:
32
62
  chunk_file.write(chunk)
33
63
  chunk_number += 1
64
+ if os.path.exists(output_path+'/'+zip_name+'.zip'):
65
+ os.remove(output_path+'/'+zip_name+'.zip')
34
66
 
35
67
  def zip(self, zip_prefix, input_path, output_path, chunk_size):
36
68
  files_paths = [input_path+'/' + p for p in os.listdir(input_path)]
@@ -67,7 +99,7 @@ class ZipHelper:
67
99
  z1.extractall(path=output_path)
68
100
  print(f'{file_path} has been extracted')
69
101
 
70
- def extract_multiparts(self, output_path, parts_path,output_file):
102
+ def extract_multiparts(self, output_path, parts_path, output_file):
71
103
  input_parts = [parts_path+'/' + p for p in os.listdir(parts_path)]
72
104
  with open(f"{output_path}/{output_file}", 'wb') as outfile:
73
105
  for part in input_parts:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.23
3
+ Version: 0.0.25
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,12 +1,12 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=gkzHCU9-BL_bOU-RvvwdOIp_DhKRfXKU5SvgfQLVTds,505
2
- ddi_fw/datasets/core.py,sha256=hWvDxptCTOazcDdALdHS2siHgPB1RNUa-lfVDV0snAA,18425
2
+ ddi_fw/datasets/core.py,sha256=Iv8l2WRM2rNyshdXd8LIz06qIGpz3EwCImEIc_Qr00w,18084
3
3
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
4
4
  ddi_fw/datasets/embedding_generator.py,sha256=Jqrlv88RCu0Lg812KsA12X0cSaZuxbckJ4LNRKNy_qw,2173
5
5
  ddi_fw/datasets/embedding_generator_new.py,sha256=GExjmBysPWkmFxTZQPs2yEmDdFllZ-qC9lhZeRQAfbQ,4320
6
6
  ddi_fw/datasets/feature_vector_generation.py,sha256=dxTHvp6uTkao9PdThs116Q3bWw_WTo9T8WigVL4G01s,3245
7
7
  ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
8
8
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
9
- ddi_fw/datasets/ddi_mdl/base.py,sha256=Uz1ZZS9LvxDR3EO_FqaZCKP3idQb3yytkwRzgBAYGAA,2422
9
+ ddi_fw/datasets/ddi_mdl/base.py,sha256=_R9-CE2P2kNoxpkDWWIHfSvsRidmYqTj-Ldp4HZpoPg,2453
10
10
  ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
11
11
  ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
12
12
  ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt,sha256=XVlDqYATckrQwNSXqMSKVBqyoN_Hg8SK6CL-XMdLADY,102176
@@ -78,8 +78,8 @@ ddi_fw/test/type_guarding_test.py,sha256=KxjyBxohDu7lwpejalCj-REjtJ-k1S1wQbOB6TG
78
78
  ddi_fw/utils/__init__.py,sha256=nhNU_sEp55xsZ5VtvhozjKg6r4GWP6SJI13v8F_jbCg,217
79
79
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
80
80
  ddi_fw/utils/utils.py,sha256=Na6Y8mY-CFbQjrgd9xC8agcrjVvTj_7KIXqFm1H_3qU,3549
81
- ddi_fw/utils/zip_helper.py,sha256=KnOFjf7MAw7jTjYuNJKzQL_VBGDRZiWy1a72T8_Uslg,3763
82
- ddi_fw-0.0.23.dist-info/METADATA,sha256=SkMnhWTRE0XsOT-PMUejmGVFdbNy8EBM-OWrkDErTgc,1541
83
- ddi_fw-0.0.23.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
84
- ddi_fw-0.0.23.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
85
- ddi_fw-0.0.23.dist-info/RECORD,,
81
+ ddi_fw/utils/zip_helper.py,sha256=CF6Th3ntXcx_F2bzoPhUWkVE27YqZqPNrxHgDewArfs,5123
82
+ ddi_fw-0.0.25.dist-info/METADATA,sha256=iGtI4gPTWiD9l5XKMCduX_kEY0ErYFX3kiLf8fujuFA,1541
83
+ ddi_fw-0.0.25.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
84
+ ddi_fw-0.0.25.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
85
+ ddi_fw-0.0.25.dist-info/RECORD,,