ddi-fw 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -21,8 +21,9 @@ def stack(df_column):
21
21
 
22
22
 
23
23
  class BaseDataset(ABC):
24
- def __init__(self, chemical_property_columns, embedding_columns, ner_columns,
24
+ def __init__(self,embedding_dict, chemical_property_columns, embedding_columns, ner_columns,
25
25
  **kwargs):
26
+ self.embedding_dict = embedding_dict
26
27
  self.__similarity_related_columns__ = []
27
28
  self.__similarity_related_columns__.extend(chemical_property_columns)
28
29
  self.__similarity_related_columns__.extend(ner_columns)
@@ -250,21 +251,21 @@ class BaseDataset(ABC):
250
251
 
251
252
  # her bir metin tipi için embedding oluşturursan burayı düzenle
252
253
  def prep(self):
253
- if self.embedding_columns:
254
- zip_helper = ZipHelper()
255
- zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
256
- str(HERE.joinpath('zips/embeddings')))
257
-
258
- embedding_dict = dict()
259
- for embedding_column in self.embedding_columns:
260
- embedding_file = HERE.joinpath(
261
- f'zips/embeddings/{embedding_column}_embeddings.pkl')
262
- embedding_values = pd.read_pickle(embedding_file)
263
- d = embedding_values.apply(
264
- lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
265
- x = {k: v for l in d.values.tolist() for k, v in l.items()}
266
- embedding_dict[embedding_column] = x
267
-
254
+ # if self.embedding_columns:
255
+ # zip_helper = ZipHelper()
256
+ # zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
257
+ # str(HERE.joinpath('zips/embeddings')))
258
+
259
+ # embedding_dict = dict()
260
+ # for embedding_column in self.embedding_columns:
261
+ # embedding_file = HERE.joinpath(
262
+ # f'zips/embeddings/{embedding_column}_embeddings.pkl')
263
+ # embedding_values = pd.read_pickle(embedding_file)
264
+ # d = embedding_values.apply(
265
+ # lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
266
+ # x = {k: v for l in d.values.tolist() for k, v in l.items()}
267
+ # embedding_dict[embedding_column] = x
268
+
268
269
  self.ner_df = CTakesNER().load()
269
270
  drug_names = self.drugs_df['name'].to_list()
270
271
  drug_ids = self.drugs_df['id'].to_list()
@@ -362,14 +363,8 @@ class BaseDataset(ABC):
362
363
  # (value[row['id1']], value[row['id2']]), dtype=np.float16)
363
364
 
364
365
  def x_fnc(row, embedding_values, embedding_column):
365
- # first = embedding_values[embedding_values.id == row['id1']]
366
- # second = embedding_values[embedding_values.id == row['id2']]
367
- # v1 = first.iloc[0][embedding_column+'_embedding']
368
- # v2 = second.iloc[0][embedding_column+'_embedding']
369
- v1 = embedding_dict[embedding_column][row['id1']]
370
- v2 = embedding_dict[embedding_column][row['id2']]
371
- # v1 = embedding_dict[row['id1']][embedding_column+'_embedding']
372
- # v2 = embedding_dict[row['id2']][embedding_column+'_embedding']
366
+ v1 = self.embedding_dict[embedding_column][row['id1']]
367
+ v2 = self.embedding_dict[embedding_column][row['id2']]
373
368
  return np.float16(np.hstack(
374
369
  (v1, v2)))
375
370
 
@@ -8,7 +8,7 @@ HERE = pathlib.Path(__file__).resolve().parent
8
8
 
9
9
 
10
10
  class DDIMDLDataset(BaseDataset):
11
- def __init__(self, chemical_property_columns=['enzyme',
11
+ def __init__(self, embedding_dict, chemical_property_columns=['enzyme',
12
12
  'target',
13
13
  'pathway',
14
14
  'smile'],
@@ -16,7 +16,7 @@ class DDIMDLDataset(BaseDataset):
16
16
  ner_columns=[],
17
17
  **kwargs):
18
18
 
19
- super().__init__(chemical_property_columns, embedding_columns,
19
+ super().__init__(embedding_dict, chemical_property_columns, embedding_columns,
20
20
  ner_columns, **kwargs)
21
21
 
22
22
  # kwargs = {'index_path': str(HERE.joinpath('indexes'))}
@@ -4,27 +4,61 @@ from os.path import basename
4
4
  from collections import defaultdict
5
5
  import math
6
6
 
7
+ from ddi_fw.utils.utils import create_folder_if_not_exists
8
+
9
+
10
+ def get_file_name_and_folder(file_path):
11
+ file_path_components = file_path.split('/')
12
+ file_name = file_path_components[-1]
13
+ file_path = file_path[:len(file_name)*-1-1]
14
+ return file_name, file_path
15
+
7
16
 
8
17
  class ZipHelper:
9
18
  def __init__(self):
10
19
  pass
11
20
 
12
- def zip_single_file(self, name, file_path, output_path):
21
+ def __zipdir__(self, file_path, zipf):
22
+ # ziph is zipfile handle
23
+ for root, dirs, files in os.walk(file_path):
24
+ for file in files:
25
+ zipf.write(os.path.join(root, file),
26
+ os.path.relpath(os.path.join(root, file),
27
+ os.path.join(file_path, '..')))
28
+
29
+ def zip_dir(self, zip_name, file_path, output_path):
30
+ create_folder_if_not_exists(output_path)
31
+ with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', z.ZIP_DEFLATED) as zipf:
32
+ self.__zipdir__(file_path, zipf)
33
+
34
+ def zip_single_file(self, zip_name, file_path, output_path):
13
35
  if not os.path.exists(output_path):
14
36
  os.makedirs(output_path)
15
- with z.ZipFile(f'{output_path}/{name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
37
+ with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
16
38
  zipObj.write(file_path, basename(file_path))
17
39
 
18
- def zip_as_multipart(self, name, folder, file_name, output_path, chunk_size):
19
- file_path = folder+'/'+file_name
20
- self.zip_single_file(name, file_path, output_path)
40
+ def zip_as_multipart(self, zip_name, file_path, output_path, chunk_size):
41
+ parent_folder = os.path.dirname(file_path)
42
+
43
+ parts_path = f"{parent_folder}/parts"
44
+ create_folder_if_not_exists(parts_path)
45
+ # file_name, file_extension = os.path.splitext(file_path)
46
+ # file_name = os.path.basename(file_path)
47
+ file_name, folder = get_file_name_and_folder(file_path)
48
+
49
+ if os.path.isdir(file_path):
50
+ self.zip_dir(zip_name, file_path, output_path)
51
+ elif os.path.isfile(file_path):
52
+ self.zip_single_file(zip_name, file_path, output_path)
53
+ else:
54
+ return
21
55
  with open(file_path, 'rb') as f:
22
56
  chunk_number = 1
23
57
  while True:
24
58
  chunk = f.read(chunk_size)
25
59
  if not chunk:
26
60
  break
27
- with open(f"{folder}/path/{file_name}.part{chunk_number:03}", 'wb') as chunk_file:
61
+ with open(f"{parts_path}/{file_name}.part{chunk_number:03}", 'wb') as chunk_file:
28
62
  chunk_file.write(chunk)
29
63
  chunk_number += 1
30
64
 
@@ -63,9 +97,9 @@ class ZipHelper:
63
97
  z1.extractall(path=output_path)
64
98
  print(f'{file_path} has been extracted')
65
99
 
66
- def extract_multiparts(self, output_path, parts_path):
100
+ def extract_multiparts(self, output_path, parts_path, output_file):
67
101
  input_parts = [parts_path+'/' + p for p in os.listdir(parts_path)]
68
- with open(output_file, 'wb') as outfile:
102
+ with open(f"{output_path}/{output_file}", 'wb') as outfile:
69
103
  for part in input_parts:
70
104
  with open(part, 'rb') as infile:
71
105
  outfile.write(infile.read())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.22
3
+ Version: 0.0.24
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,12 +1,12 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=gkzHCU9-BL_bOU-RvvwdOIp_DhKRfXKU5SvgfQLVTds,505
2
- ddi_fw/datasets/core.py,sha256=hWvDxptCTOazcDdALdHS2siHgPB1RNUa-lfVDV0snAA,18425
2
+ ddi_fw/datasets/core.py,sha256=Iv8l2WRM2rNyshdXd8LIz06qIGpz3EwCImEIc_Qr00w,18084
3
3
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
4
4
  ddi_fw/datasets/embedding_generator.py,sha256=Jqrlv88RCu0Lg812KsA12X0cSaZuxbckJ4LNRKNy_qw,2173
5
5
  ddi_fw/datasets/embedding_generator_new.py,sha256=GExjmBysPWkmFxTZQPs2yEmDdFllZ-qC9lhZeRQAfbQ,4320
6
6
  ddi_fw/datasets/feature_vector_generation.py,sha256=dxTHvp6uTkao9PdThs116Q3bWw_WTo9T8WigVL4G01s,3245
7
7
  ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
8
8
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
9
- ddi_fw/datasets/ddi_mdl/base.py,sha256=Uz1ZZS9LvxDR3EO_FqaZCKP3idQb3yytkwRzgBAYGAA,2422
9
+ ddi_fw/datasets/ddi_mdl/base.py,sha256=_R9-CE2P2kNoxpkDWWIHfSvsRidmYqTj-Ldp4HZpoPg,2453
10
10
  ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
11
11
  ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
12
12
  ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt,sha256=XVlDqYATckrQwNSXqMSKVBqyoN_Hg8SK6CL-XMdLADY,102176
@@ -78,8 +78,8 @@ ddi_fw/test/type_guarding_test.py,sha256=KxjyBxohDu7lwpejalCj-REjtJ-k1S1wQbOB6TG
78
78
  ddi_fw/utils/__init__.py,sha256=nhNU_sEp55xsZ5VtvhozjKg6r4GWP6SJI13v8F_jbCg,217
79
79
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
80
80
  ddi_fw/utils/utils.py,sha256=Na6Y8mY-CFbQjrgd9xC8agcrjVvTj_7KIXqFm1H_3qU,3549
81
- ddi_fw/utils/zip_helper.py,sha256=D0pYHifqfKSKEG2oTpGh_0eNwx5fYZIuR6CuQ4BorSg,3576
82
- ddi_fw-0.0.22.dist-info/METADATA,sha256=wD31gaXsInwk3ERDHuhAHDqFlwGjtOLosWU7_3nu68M,1541
83
- ddi_fw-0.0.22.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
84
- ddi_fw-0.0.22.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
85
- ddi_fw-0.0.22.dist-info/RECORD,,
81
+ ddi_fw/utils/zip_helper.py,sha256=oOnXlMVaH1Bj5EZpcOIxQMfKnLcuwamQCEOIuN288HQ,4976
82
+ ddi_fw-0.0.24.dist-info/METADATA,sha256=eKPBSBXAX-ooVfz_FTeKNPsV7aU3byQ7IDS_FVAOK78,1541
83
+ ddi_fw-0.0.24.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
84
+ ddi_fw-0.0.24.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
85
+ ddi_fw-0.0.24.dist-info/RECORD,,