ddi-fw 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/core.py +19 -24
- ddi_fw/datasets/ddi_mdl/base.py +2 -2
- ddi_fw/utils/zip_helper.py +37 -7
- {ddi_fw-0.0.23.dist-info → ddi_fw-0.0.24.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.23.dist-info → ddi_fw-0.0.24.dist-info}/RECORD +7 -7
- {ddi_fw-0.0.23.dist-info → ddi_fw-0.0.24.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.23.dist-info → ddi_fw-0.0.24.dist-info}/top_level.txt +0 -0
ddi_fw/datasets/core.py
CHANGED
@@ -21,8 +21,9 @@ def stack(df_column):
|
|
21
21
|
|
22
22
|
|
23
23
|
class BaseDataset(ABC):
|
24
|
-
def __init__(self, chemical_property_columns, embedding_columns, ner_columns,
|
24
|
+
def __init__(self,embedding_dict, chemical_property_columns, embedding_columns, ner_columns,
|
25
25
|
**kwargs):
|
26
|
+
self.embedding_dict = embedding_dict
|
26
27
|
self.__similarity_related_columns__ = []
|
27
28
|
self.__similarity_related_columns__.extend(chemical_property_columns)
|
28
29
|
self.__similarity_related_columns__.extend(ner_columns)
|
@@ -250,21 +251,21 @@ class BaseDataset(ABC):
|
|
250
251
|
|
251
252
|
# her bir metin tipi için embedding oluşturursan burayı düzenle
|
252
253
|
def prep(self):
|
253
|
-
if self.embedding_columns:
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
for embedding_column in self.embedding_columns:
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
254
|
+
# if self.embedding_columns:
|
255
|
+
# zip_helper = ZipHelper()
|
256
|
+
# zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
|
257
|
+
# str(HERE.joinpath('zips/embeddings')))
|
258
|
+
|
259
|
+
# embedding_dict = dict()
|
260
|
+
# for embedding_column in self.embedding_columns:
|
261
|
+
# embedding_file = HERE.joinpath(
|
262
|
+
# f'zips/embeddings/{embedding_column}_embeddings.pkl')
|
263
|
+
# embedding_values = pd.read_pickle(embedding_file)
|
264
|
+
# d = embedding_values.apply(
|
265
|
+
# lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
|
266
|
+
# x = {k: v for l in d.values.tolist() for k, v in l.items()}
|
267
|
+
# embedding_dict[embedding_column] = x
|
268
|
+
|
268
269
|
self.ner_df = CTakesNER().load()
|
269
270
|
drug_names = self.drugs_df['name'].to_list()
|
270
271
|
drug_ids = self.drugs_df['id'].to_list()
|
@@ -362,14 +363,8 @@ class BaseDataset(ABC):
|
|
362
363
|
# (value[row['id1']], value[row['id2']]), dtype=np.float16)
|
363
364
|
|
364
365
|
def x_fnc(row, embedding_values, embedding_column):
|
365
|
-
|
366
|
-
|
367
|
-
# v1 = first.iloc[0][embedding_column+'_embedding']
|
368
|
-
# v2 = second.iloc[0][embedding_column+'_embedding']
|
369
|
-
v1 = embedding_dict[embedding_column][row['id1']]
|
370
|
-
v2 = embedding_dict[embedding_column][row['id2']]
|
371
|
-
# v1 = embedding_dict[row['id1']][embedding_column+'_embedding']
|
372
|
-
# v2 = embedding_dict[row['id2']][embedding_column+'_embedding']
|
366
|
+
v1 = self.embedding_dict[embedding_column][row['id1']]
|
367
|
+
v2 = self.embedding_dict[embedding_column][row['id2']]
|
373
368
|
return np.float16(np.hstack(
|
374
369
|
(v1, v2)))
|
375
370
|
|
ddi_fw/datasets/ddi_mdl/base.py
CHANGED
@@ -8,7 +8,7 @@ HERE = pathlib.Path(__file__).resolve().parent
|
|
8
8
|
|
9
9
|
|
10
10
|
class DDIMDLDataset(BaseDataset):
|
11
|
-
def __init__(self,
|
11
|
+
def __init__(self, embedding_dict, chemical_property_columns=['enzyme',
|
12
12
|
'target',
|
13
13
|
'pathway',
|
14
14
|
'smile'],
|
@@ -16,7 +16,7 @@ class DDIMDLDataset(BaseDataset):
|
|
16
16
|
ner_columns=[],
|
17
17
|
**kwargs):
|
18
18
|
|
19
|
-
super().__init__(chemical_property_columns, embedding_columns,
|
19
|
+
super().__init__(embedding_dict, chemical_property_columns, embedding_columns,
|
20
20
|
ner_columns, **kwargs)
|
21
21
|
|
22
22
|
# kwargs = {'index_path': str(HERE.joinpath('indexes'))}
|
ddi_fw/utils/zip_helper.py
CHANGED
@@ -7,21 +7,51 @@ import math
|
|
7
7
|
from ddi_fw.utils.utils import create_folder_if_not_exists
|
8
8
|
|
9
9
|
|
10
|
+
def get_file_name_and_folder(file_path):
|
11
|
+
file_path_components = file_path.split('/')
|
12
|
+
file_name = file_path_components[-1]
|
13
|
+
file_path = file_path[:len(file_name)*-1-1]
|
14
|
+
return file_name, file_path
|
15
|
+
|
16
|
+
|
10
17
|
class ZipHelper:
|
11
18
|
def __init__(self):
|
12
19
|
pass
|
13
20
|
|
14
|
-
def
|
21
|
+
def __zipdir__(self, file_path, zipf):
|
22
|
+
# ziph is zipfile handle
|
23
|
+
for root, dirs, files in os.walk(file_path):
|
24
|
+
for file in files:
|
25
|
+
zipf.write(os.path.join(root, file),
|
26
|
+
os.path.relpath(os.path.join(root, file),
|
27
|
+
os.path.join(file_path, '..')))
|
28
|
+
|
29
|
+
def zip_dir(self, zip_name, file_path, output_path):
|
30
|
+
create_folder_if_not_exists(output_path)
|
31
|
+
with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', z.ZIP_DEFLATED) as zipf:
|
32
|
+
self.__zipdir__(file_path, zipf)
|
33
|
+
|
34
|
+
def zip_single_file(self, zip_name, file_path, output_path):
|
15
35
|
if not os.path.exists(output_path):
|
16
36
|
os.makedirs(output_path)
|
17
|
-
with z.ZipFile(f'{output_path}/{
|
37
|
+
with z.ZipFile(f'{output_path}/{zip_name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
|
18
38
|
zipObj.write(file_path, basename(file_path))
|
19
39
|
|
20
|
-
def zip_as_multipart(self, zip_name,
|
21
|
-
|
22
|
-
|
23
|
-
|
40
|
+
def zip_as_multipart(self, zip_name, file_path, output_path, chunk_size):
|
41
|
+
parent_folder = os.path.dirname(file_path)
|
42
|
+
|
43
|
+
parts_path = f"{parent_folder}/parts"
|
24
44
|
create_folder_if_not_exists(parts_path)
|
45
|
+
# file_name, file_extension = os.path.splitext(file_path)
|
46
|
+
# file_name = os.path.basename(file_path)
|
47
|
+
file_name, folder = get_file_name_and_folder(file_path)
|
48
|
+
|
49
|
+
if os.path.isdir(file_path):
|
50
|
+
self.zip_dir(zip_name, file_path, output_path)
|
51
|
+
elif os.path.isfile(file_path):
|
52
|
+
self.zip_single_file(zip_name, file_path, output_path)
|
53
|
+
else:
|
54
|
+
return
|
25
55
|
with open(file_path, 'rb') as f:
|
26
56
|
chunk_number = 1
|
27
57
|
while True:
|
@@ -67,7 +97,7 @@ class ZipHelper:
|
|
67
97
|
z1.extractall(path=output_path)
|
68
98
|
print(f'{file_path} has been extracted')
|
69
99
|
|
70
|
-
def extract_multiparts(self, output_path, parts_path,output_file):
|
100
|
+
def extract_multiparts(self, output_path, parts_path, output_file):
|
71
101
|
input_parts = [parts_path+'/' + p for p in os.listdir(parts_path)]
|
72
102
|
with open(f"{output_path}/{output_file}", 'wb') as outfile:
|
73
103
|
for part in input_parts:
|
@@ -1,12 +1,12 @@
|
|
1
1
|
ddi_fw/datasets/__init__.py,sha256=gkzHCU9-BL_bOU-RvvwdOIp_DhKRfXKU5SvgfQLVTds,505
|
2
|
-
ddi_fw/datasets/core.py,sha256=
|
2
|
+
ddi_fw/datasets/core.py,sha256=Iv8l2WRM2rNyshdXd8LIz06qIGpz3EwCImEIc_Qr00w,18084
|
3
3
|
ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
|
4
4
|
ddi_fw/datasets/embedding_generator.py,sha256=Jqrlv88RCu0Lg812KsA12X0cSaZuxbckJ4LNRKNy_qw,2173
|
5
5
|
ddi_fw/datasets/embedding_generator_new.py,sha256=GExjmBysPWkmFxTZQPs2yEmDdFllZ-qC9lhZeRQAfbQ,4320
|
6
6
|
ddi_fw/datasets/feature_vector_generation.py,sha256=dxTHvp6uTkao9PdThs116Q3bWw_WTo9T8WigVL4G01s,3245
|
7
7
|
ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
8
8
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
9
|
-
ddi_fw/datasets/ddi_mdl/base.py,sha256=
|
9
|
+
ddi_fw/datasets/ddi_mdl/base.py,sha256=_R9-CE2P2kNoxpkDWWIHfSvsRidmYqTj-Ldp4HZpoPg,2453
|
10
10
|
ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
|
11
11
|
ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
|
12
12
|
ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt,sha256=XVlDqYATckrQwNSXqMSKVBqyoN_Hg8SK6CL-XMdLADY,102176
|
@@ -78,8 +78,8 @@ ddi_fw/test/type_guarding_test.py,sha256=KxjyBxohDu7lwpejalCj-REjtJ-k1S1wQbOB6TG
|
|
78
78
|
ddi_fw/utils/__init__.py,sha256=nhNU_sEp55xsZ5VtvhozjKg6r4GWP6SJI13v8F_jbCg,217
|
79
79
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
80
80
|
ddi_fw/utils/utils.py,sha256=Na6Y8mY-CFbQjrgd9xC8agcrjVvTj_7KIXqFm1H_3qU,3549
|
81
|
-
ddi_fw/utils/zip_helper.py,sha256=
|
82
|
-
ddi_fw-0.0.
|
83
|
-
ddi_fw-0.0.
|
84
|
-
ddi_fw-0.0.
|
85
|
-
ddi_fw-0.0.
|
81
|
+
ddi_fw/utils/zip_helper.py,sha256=oOnXlMVaH1Bj5EZpcOIxQMfKnLcuwamQCEOIuN288HQ,4976
|
82
|
+
ddi_fw-0.0.24.dist-info/METADATA,sha256=eKPBSBXAX-ooVfz_FTeKNPsV7aU3byQ7IDS_FVAOK78,1541
|
83
|
+
ddi_fw-0.0.24.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
84
|
+
ddi_fw-0.0.24.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
85
|
+
ddi_fw-0.0.24.dist-info/RECORD,,
|
File without changes
|
File without changes
|