ddi-fw 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/ner/ner.py ADDED
@@ -0,0 +1,340 @@
1
+ from collections import defaultdict
2
+ import glob
3
+ import json
4
+ from pathlib import Path
5
+ import pathlib
6
+ from time import sleep
7
+ import pandas as pd
8
+
9
+ from tqdm import tqdm
10
+ import os
11
+ import requests
12
+ # from mmlrestclient as metamapliteclient
13
+ from enum import Enum
14
+ from ddi_fw.utils import create_folder_if_not_exists
15
+
16
+
17
+ # data = '''
18
+ # Lepirudin is a recombinant hirudin formed by 65 amino acids that acts as a highly specific and direct thrombin inhibitor.
19
+ # [L41539,L41569] Natural hirudin is an endogenous anticoagulant found in _Hirudo medicinalis_ leeches.
20
+ # [L41539] Lepirudin is produced in yeast cells and is identical to natural hirudin except for the absence of sulfate
21
+ # on the tyrosine residue at position 63 and the substitution of leucine for isoleucine at position 1 (N-terminal end).[A246609]
22
+
23
+ # Lepirudin is used as an anticoagulant in patients with heparin-induced thrombocytopenia (HIT),
24
+ # an immune reaction associated with a high risk of thromboembolic complications.[A3, L41539]
25
+ # HIT is caused by the expression of immunoglobulin G (IgG) antibodies that bind to the complex formed by heparin and platelet factor 4.
26
+ # This activates endothelial cells and platelets and enhances the formation of thrombi.
27
+ # [A246609] Bayer ceased the production of lepirudin (Refludan) effective May 31, 2012.[L41574]'''
28
+
29
+ # response = requests.post(url, data=data)
30
+
31
+ # print(response.content)
32
+
33
+ HERE = pathlib.Path(__file__).resolve().parent
34
+
35
+
36
+ class CTakesNER:
37
+ def __init__(self, drugs_df = None,api_url= 'http://localhost:8080/ctakes-web-rest/service/analyze?pipeline=Default'
38
+ , output_path='ner-output/ctakes', ids=[],
39
+ columns=[]):
40
+ self.drugs_df = drugs_df
41
+ self.api_url = api_url
42
+ self.columns = columns
43
+ self.ids = ids
44
+ self.output_path = output_path
45
+
46
+ def run(self,
47
+ run_for=[]):
48
+
49
+ for column in self.columns:
50
+ if not os.path.exists(self.output_path+"/"+column):
51
+ os.makedirs(self.output_path+"/"+column)
52
+ for column in self.columns:
53
+ column_output_path = self.output_path+'/'+column
54
+ if not column in run_for:
55
+ continue
56
+ # not include
57
+ if self.ids:
58
+ self.drugs_df = self.drugs_df[~self.drugs_df['drugbank_id'].isin(
59
+ self.ids)]
60
+ for index, row in self.drugs_df.iterrows():
61
+ drugbank_id = row['drugbank_id']
62
+ data = row[column]
63
+ if data is None or pd.isna(data) or (type(data) == str and len(data.strip()) == 0): # or len(data) == 0:
64
+ with open(f'{column_output_path}/{drugbank_id}.json', 'w', encoding='utf-8') as f:
65
+ json.dump([], f, ensure_ascii=False, indent=4)
66
+ continue
67
+ data = data.encode()
68
+ response = requests.post(self.api_url, data=data)
69
+
70
+ with open(f'{column_output_path}/{drugbank_id}.json', 'w', encoding='utf-8') as f:
71
+ try:
72
+ obj = json.loads(response.text)
73
+ json.dump(obj, f, ensure_ascii=False, indent=4)
74
+ except:
75
+ # print(f'{drugbank_id} is not parsable')
76
+ json.dump([], f, ensure_ascii=False, indent=4)
77
+ continue
78
+
79
+ # if index % 10 == 0:
80
+ # sleep(10)
81
+
82
+ def load(self, filename = None, group = True):
83
+ file_path= filename if filename else HERE.joinpath('output/ctakes/ctakes_ner.pkl')
84
+ df = pd.read_pickle(file_path)
85
+
86
+ if group:
87
+ keys = list(df.columns.values)
88
+
89
+ df['tui'] = [[]] * df.shape[0]
90
+ df['cui'] = [[]] * df.shape[0]
91
+ df['entities'] = [[]] * df.shape[0]
92
+
93
+ tui_columns = [key for key in keys if key.startswith('tui')]
94
+ cui_columns = [key for key in keys if key.startswith('cui')]
95
+ entities_columns = [key for key in keys if key.startswith('entities')]
96
+ #bunu tek bir eşitlikle çöz
97
+ df['tui'] = df[tui_columns].values.tolist()
98
+ df['tui'] = df['tui'].apply(lambda items:{i for item in items for i in item})
99
+
100
+ df['cui'] = df[cui_columns].values.tolist()
101
+ df['cui'] = df['cui'].apply(lambda items:{i for item in items for i in item})
102
+
103
+ df['entities'] = df[entities_columns].values.tolist()
104
+ df['entities'] = df['entities'].apply(lambda items:{i for item in items for i in item})
105
+
106
+ return df
107
+
108
+ def create_dataframe(self, override = False): # dataframe_columns=[]
109
+ filename='ctakes_ner.pkl'
110
+ if not override and os.path.exists(self.output_path+"/" + filename):
111
+ return self.load(self.output_path+"/" + filename)
112
+
113
+ create_folder_if_not_exists(self.output_path+"/" + filename)
114
+ dict_of_dict = defaultdict(dict)
115
+ for column in self.columns:
116
+ all_json_files = f'{self.output_path}/{column}/'+'*.json*'
117
+ for filepath in tqdm(glob.glob(all_json_files)):
118
+ with open(filepath, 'r', encoding="utf8") as f:
119
+ file_name = Path(f.name).stem
120
+ t = dict_of_dict[file_name]
121
+ data = json.load(f)
122
+ entities = []
123
+ cuis = []
124
+ tuis = []
125
+ if data is None or len(data) == 0:
126
+ t['drugbank_id'] = file_name
127
+ t[f'cui_{column}'] = []
128
+ t[f'tui_{column}']= []
129
+ t[f'entities_{column}'] = []
130
+ dict_of_dict[file_name] = t
131
+ continue
132
+ for key, value in data.items():
133
+ entities = [v['text'] for v in value]
134
+ cuis = [attr['cui']
135
+ for v in value for attr in v['conceptAttributes']]
136
+ tuis = [attr['tui']
137
+ for v in value for attr in v['conceptAttributes']]
138
+ # codingScheme
139
+
140
+ if 'drugbank_id' not in t:
141
+ t['drugbank_id'] = file_name
142
+ t[f'cui_{column}'] = cuis
143
+ t[f'tui_{column}'] = tuis
144
+ t[f'entities_{column}'] = entities
145
+ dict_of_dict[file_name] = t
146
+
147
+ df = pd.DataFrame(dict_of_dict.values(),
148
+ # orient='index',
149
+ # columns=columns
150
+ )
151
+ df.to_pickle(self.output_path+"/" + filename)
152
+ # dataframe_columns.insert(0, 'drugbank_id')
153
+
154
+ # new_columns = {columns[i]: dataframe_columns[i]
155
+ # for i in range(len(columns))}
156
+ # df.rename(columns=new_columns, inplace=True)
157
+ return df
158
+
159
+
160
+ # no module named 'mmlrestclient'
161
+ # class MMSLiteNER:
162
+
163
+ # # https://ii.nlm.nih.gov/metamaplite/js/formControls.js
164
+
165
+ # class Groups(Enum):
166
+
167
+ # activities_group = ['acty', 'bhvr', 'dora',
168
+ # 'evnt', 'gora', 'inbe', 'mcha', 'ocac', 'socb']
169
+
170
+ # anatomy_group = ['anst', 'blor', 'bpoc', 'bsoj', 'bdsu',
171
+ # 'bdsy', 'cell', 'celc', 'emst', 'ffas', 'tisu']
172
+
173
+ # checmicals_and_drugs_group = ['aapp', 'antb', 'bacs', 'bodm', 'carb', 'chem', 'chvf', 'chvs',
174
+ # 'clnd', 'eico', 'elii', 'enzy', 'hops', 'horm', 'imft',
175
+ # 'irda', 'inch', 'lipd', 'nsba', 'nnon', 'orch', 'opco',
176
+ # 'phsu', 'rcpt', 'strd', 'vita']
177
+
178
+ # concept_and_ideas_group = ['clas', 'cnce', 'ftcn', 'grpa', 'idcn', 'inpr', 'lang',
179
+ # 'qlco', 'rnlw', 'spco', 'tmco']
180
+
181
+ # devices_group = ['drdd', 'medd', 'resd']
182
+
183
+ # disorders_group = ['acab', 'anab', 'bact', 'comd', 'cgab', 'dsyn',
184
+ # 'emod', 'fndg', 'inpo', 'mobd', 'patf', 'sosy']
185
+
186
+ # # abbreviated disorders group, finding and congenital abnormality removed
187
+ # disorders_abbrev_group = ['acab', 'anab', 'bact', 'cgab', 'dsyn',
188
+ # 'emod', 'inpo', 'mobd', 'patf', 'sosy']
189
+
190
+ # genes_and_molecular_sequences = [
191
+ # 'amas', 'crbs', 'gngm', 'mosq', 'nusq']
192
+
193
+ # geographic_areas = ['geoa']
194
+
195
+ # living_being = ['aggp', 'amph', 'anim', 'arch', 'bact', 'bird', 'euka', 'fish',
196
+ # 'fngs', 'grup', 'humn', 'mamm', 'orgm', 'podg',
197
+ # 'plnt', 'popg', 'prog', 'rept', 'vtbt', 'virs']
198
+
199
+ # objects = ['enty', 'food', 'mnob', 'sbst']
200
+
201
+ # occupations = ['bmod', 'ocdi']
202
+
203
+ # organizations = ['hcro', 'orgt', 'pros', 'shro']
204
+
205
+ # phenomena = ['eehu' 'hcpp', 'lbtr', 'npop', 'phpr']
206
+
207
+ # physiology = ['celf', 'clna', 'clnd']
208
+
209
+ # procedures = ['diap', 'edac', 'hlca', 'lbpr', 'mbrt', 'resa', 'topp']
210
+
211
+ # def __init__(self, drugs_df, input_path='drugbank/output', output_path='ner-output/metamaplite', ids=[],
212
+ # columns=[],
213
+ # included_groups: Groups = [],
214
+ # excluded_groups: Groups = [],
215
+ # ):
216
+
217
+ # self.drugs_df = drugs_df
218
+ # self.columns = columns
219
+ # self.ids = ids
220
+ # self.output_path = output_path
221
+ # self.included_groups = set()
222
+ # for i, g in enumerate(included_groups):
223
+ # for v in g.value:
224
+ # self.included_groups.add(v)
225
+
226
+ # self.excluded_groups = set()
227
+ # for i, g in enumerate(excluded_groups):
228
+ # for v in g.value:
229
+ # self.excluded_groups.add(v)
230
+
231
+ # for column in columns:
232
+ # if not os.path.exists(output_path+"/"+column):
233
+ # os.makedirs(output_path+"/"+column)
234
+
235
+ # def run_ner(self):
236
+ # # # url = 'https://ii-public1.nlm.nih.gov/metamaplite/rest/annotate'
237
+ # base_url = 'https://ii.nlm.nih.gov/metamaplite/rest/annotate'
238
+ # acceptfmt = 'text/plain'
239
+ # for column in self.columns:
240
+ # column_output_path = self.output_path+'/'+column
241
+
242
+ # if self.ids:
243
+ # self.drugs_df = self.drugs_df[~self.drugs_df['drugbank_id'].isin(
244
+ # self.ids)]
245
+ # for index, row in self.drugs_df.iterrows():
246
+ # drugbank_id = row['drugbank_id']
247
+ # input_text = row[column]
248
+ # params = [('inputtext', input_text), ('docformat', 'freetext'),
249
+ # ('resultformat', 'json'), ('sourceString', 'all'),
250
+ # ('semanticTypeString', 'all')]
251
+ # resp = metamapliteclient.handle_request(
252
+ # base_url, acceptfmt, params)
253
+
254
+ # with open(f'{column_output_path}/{drugbank_id}.json', 'w', encoding='utf-8') as f:
255
+ # obj = json.loads(resp.text)
256
+ # json.dump(obj, f, ensure_ascii=False, indent=4)
257
+
258
+ # if index % 10 == 0:
259
+ # sleep(10)
260
+
261
+ # def __dict_of_semantic_types__(self, path):
262
+ # m = dict()
263
+ # with open(path, 'r', encoding='utf-8') as f:
264
+ # data = f.read()
265
+ # rows = data.split("\n")
266
+ # for row in rows:
267
+ # if row != "":
268
+ # arr = row.split("|")
269
+ # m[arr[0]] = arr[1]
270
+ # return m
271
+
272
+ # def load(self, semantic_type_path: str, dataframe_columns=[]):
273
+ # semantic_type_dict = self.__dict_of_semantic_types__(
274
+ # semantic_type_path)
275
+
276
+ # cui_dict = defaultdict(dict)
277
+ # tui_dict = defaultdict(dict)
278
+ # for column in self.columns:
279
+ # all_json_files = f'{self.output_path}/{column}/'+'*.json*'
280
+ # for filepath in tqdm(glob.glob(all_json_files)):
281
+ # with open(filepath, 'r', encoding="utf8") as f:
282
+ # file_name = Path(f.name).stem
283
+ # data = json.load(f)
284
+ # filtered_obj = [o for o in data if len(o['evlist']) == 1]
285
+ # # filtered_obj = [o for o in data if len(o['evlist']) == 1 and set(
286
+ # # checmicals_and_drugs_group).intersection(set(o['evlist'][0]['conceptinfo']['semantictypes']))]
287
+
288
+ # if self.included_groups:
289
+ # evaluation = [o['evlist'][0]['conceptinfo'] for o in filtered_obj if len(o['evlist']) == 1
290
+ # and
291
+ # self.included_groups.intersection(
292
+ # set(o['evlist'][0]['conceptinfo']['semantictypes']))]
293
+ # # cuis = [o['evlist'][0]['conceptinfo']['cui'] for o in filtered_obj if len(o['evlist']) == 1
294
+ # # and
295
+ # # self.included_groups.intersection(
296
+ # # set(o['evlist'][0]['conceptinfo']['semantictypes']))]
297
+ # elif self.excluded_groups:
298
+ # evaluation = cuis = [o['evlist'][0]['conceptinfo'] for o in filtered_obj if len(o['evlist']) == 1
299
+ # and
300
+ # not self.excluded_groups.intersection(
301
+ # set(o['evlist'][0]['conceptinfo']['semantictypes']))]
302
+ # # cuis = [o['evlist'][0]['conceptinfo']['cui'] for o in filtered_obj if len(o['evlist']) == 1
303
+ # # and
304
+ # # not self.excluded_groups.intersection(
305
+ # # set(o['evlist'][0]['conceptinfo']['semantictypes']))]
306
+ # else:
307
+ # evaluation = [o['evlist'][0]['conceptinfo']
308
+ # for o in filtered_obj if len(o['evlist']) == 1]
309
+ # # cuis = [o['evlist'][0]['conceptinfo']['cui']
310
+ # # for o in filtered_obj if len(o['evlist']) == 1]
311
+
312
+ # # cuis = [o['evlist'][0]['conceptinfo']['cui'] for o in filtered_obj if len(o['evlist']) == 1 and set(
313
+ # # checmicals_and_drugs_group).intersection(set(o['evlist'][0]['conceptinfo']['semantictypes']))]
314
+ # cuis = [ev['cui'] for ev in evaluation]
315
+ # semantic_types = [ev['semantictypes'] for ev in evaluation]
316
+ # tuis = [semantic_type_dict[s]
317
+ # for semantic_type in semantic_types for s in semantic_type]
318
+
319
+ # d = cui_dict[file_name]
320
+ # d['drugbank_id'] = file_name
321
+ # d[column] = set(cuis)
322
+
323
+ # t = tui_dict[file_name]
324
+ # t['drugbank_id'] = file_name
325
+ # t[column] = set(tuis)
326
+ # tui_dict[file_name] = t
327
+
328
+ # columns = self.columns
329
+ # columns.insert(0, 'drugbank_id')
330
+ # df = pd.DataFrame(tui_dict.values(),
331
+ # # orient='index',
332
+ # columns=columns
333
+ # )
334
+
335
+ # dataframe_columns.insert(0, 'drugbank_id')
336
+
337
+ # new_columns = {columns[i]: dataframe_columns[i]
338
+ # for i in range(len(columns))}
339
+ # df.rename(columns=new_columns, inplace=True)
340
+ # return df
@@ -0,0 +1,3 @@
1
+ from .utils import create_folder_if_not_exists, utc_time_as_string,utc_time_as_string_simple_format, compress_and_save_data
2
+ from .zip_helper import ZipHelper
3
+ from .enums import UMLSCodeTypes, DrugBankTextDataTypes
ddi_fw/utils/enums.py ADDED
@@ -0,0 +1,23 @@
1
+ from enum import Enum
2
+
3
+
4
+ class UMLSCodeTypes(Enum):
5
+ TUI = 'tui',
6
+ CUI = 'cui',
7
+ ENTITIES = 'entities',
8
+
9
+
10
+ class DrugBankTextDataTypes(Enum):
11
+ DESCRIPTION = 'description',
12
+ INDICATION = 'indication',
13
+ SYNTHESIS_REFERENCE = 'synthesis_reference',
14
+ PHARMACODYNAMICS = 'pharmacodynamics',
15
+ MECHANISM_OF_ACTION = 'mechanism_of_action',
16
+ TOXICITY = 'toxicity',
17
+ METABOLISM = 'metabolism',
18
+ ABSORPTION = 'absorption',
19
+ HALF_LIFE = 'half_life',
20
+ PROTEIN_BINDING = 'protein_binding',
21
+ ROUTE_OF_ELIMINATION = 'route_of_elimination',
22
+ VOLUME_OF_DISTRIBUTION = 'volume_of_distribution',
23
+ CLEARANCE = 'clearance',
ddi_fw/utils/utils.py ADDED
@@ -0,0 +1,103 @@
1
+ import gzip
2
+ import json
3
+ import os
4
+
5
+ from datetime import datetime, timezone
6
+
7
+ from matplotlib import pyplot as plt
8
+
9
+
10
+ def create_folder_if_not_exists(path):
11
+ if not os.path.exists(path):
12
+ os.makedirs(path)
13
+
14
+
15
+ def utc_time_as_string():
16
+ utc_datetime = datetime.now(timezone.utc)
17
+
18
+ return datetime.strftime(utc_datetime, "%Y-%m-%dT%H:%M:%S.%f")[:-3]
19
+
20
+ def utc_time_as_string_simple_format():
21
+ utc_datetime = datetime.now(timezone.utc)
22
+
23
+ return datetime.strftime(utc_datetime, '%Y%m%d')
24
+
25
+ # https://gist.github.com/LouisAmon/4bd79b8ab80d3851601f3f9016300ac4
26
+
27
+
28
+ def compress_data(data):
29
+ # Convert to JSON
30
+ # json_data = json.dumps(data, indent=2)
31
+ json_data = json.dumps(data, separators=(',', ":"))
32
+ # Convert to bytes
33
+ encoded = json_data.encode('UTF-8')
34
+ # Compress
35
+ compressed = gzip.compress(encoded)
36
+ return compressed
37
+
38
+
39
+ def compress_and_save_data(data, path, file_name):
40
+ compressed = compress_data(data)
41
+ create_folder_if_not_exists(path)
42
+ with gzip.open(path+f'/{file_name}', 'wb') as f:
43
+ f.write(compressed)
44
+
45
+ def decompress(gzip_file):
46
+ with gzip.open(gzip_file, 'r') as fin: # 4. gzip
47
+ json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
48
+ json_bytes = gzip.decompress(json_bytes)
49
+ json_str = json_bytes.decode('UTF-8') # 2. string (i.e. JSON)
50
+ data = json.loads(json_str)
51
+ return data
52
+
53
+
54
+ if __name__ == "__main__":
55
+ # json_file = f'C:\\Users\\kivanc\\Downloads\\metrics.json'
56
+ # file_data = open(json_file, "r", 1).read()
57
+ # a = json.loads(file_data) # store in json structure
58
+ # # a = {'key1':1, 'key2':2}
59
+ # compressed = compress_data(a)
60
+ # with gzip.open('deneme.gzip', 'wb') as f:
61
+ # f.write(compressed)
62
+
63
+ # with gzip.open('deneme.gzip', 'r') as fin: # 4. gzip
64
+ # json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
65
+ # json_bytes = gzip.decompress(json_bytes)
66
+ # json_str = json_bytes.decode('UTF-8') # 2. string (i.e. JSON)
67
+ # data = json.loads(json_str)
68
+ # print(data)
69
+
70
+ gzip_file = f'C:\\Users\\kivanc\\Downloads\\metrics (2).gzip'
71
+ stored_file = f'C:\\Users\\kivanc\\Downloads\\save.png'
72
+ metrics = decompress(gzip_file)
73
+ # print(metrics)
74
+
75
+ # Plot Precision-Recall curves for each class and micro-average
76
+ fig = plt.figure()
77
+ plt.step(metrics['recall']['micro_event'], metrics['precision']['micro_event'],
78
+ color='b', alpha=0.2, where='post')
79
+ plt.fill_between(
80
+ metrics['recall']["micro_event"], metrics['precision']["micro_event"], step='post', alpha=0.2, color='b')
81
+
82
+ # for i in range(65):
83
+ # plt.step( metrics['recall'][str(i)], metrics['precision'][str(i)], where='post',
84
+ # label='Class {0} (AUC={1:0.2f})'.format(i, metrics['roc_aupr'][str(i)]))
85
+
86
+ plt.xlabel('Recall')
87
+ plt.ylabel('Precision')
88
+ plt.ylim([0.0, 1.05])
89
+ plt.xlim([0.0, 1.0])
90
+ plt.title(
91
+ 'Micro-average Precision-Recall curve: AUC={0:0.2f}'.format(metrics['roc_aupr']["micro"]))
92
+ plt.legend(loc='best')
93
+ plt.savefig(stored_file)
94
+ # plt.show()
95
+
96
+ import plotly.express as px
97
+ import pandas as pd
98
+ df = pd.DataFrame(dict(
99
+ r=[1, 5, 2, 2, 3],
100
+ theta=['processing cost','mechanical properties','chemical stability',
101
+ 'thermal stability', 'device integration']))
102
+ fig = px.line_polar(df, r='r', theta='theta', line_close=True)
103
+ fig.show()
@@ -0,0 +1,66 @@
1
+ import zipfile as z
2
+ import os
3
+ from os.path import basename
4
+ from collections import defaultdict
5
+ import math
6
+
7
+
8
+ class ZipHelper:
9
+ def __init__(self):
10
+ pass
11
+
12
+ def zip_single_file(self, name, file_path, output_path):
13
+ if not os.path.exists(output_path):
14
+ os.makedirs(output_path)
15
+ with z.ZipFile(f'{output_path}/{name}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
16
+ zipObj.write(file_path, basename(file_path))
17
+
18
+ def zip(self, zip_prefix, input_path, output_path, chunk_size):
19
+ files_paths = [input_path+'/' + p for p in os.listdir(input_path)]
20
+ count_of_chunks = math.ceil(len(files_paths) / chunk_size)
21
+ zero_padding_length = len(str(int(count_of_chunks))) + 2
22
+
23
+ if not os.path.exists(output_path):
24
+ os.makedirs(output_path)
25
+
26
+ part = 1
27
+ i = 0
28
+ zip_dict = defaultdict(list)
29
+ for filePath in files_paths:
30
+ padded_part = f'{part}'.zfill(zero_padding_length)
31
+ key = f'{zip_prefix}.{padded_part}'
32
+ zip_dict[key].append(filePath)
33
+ i += 1
34
+ if i % chunk_size == 0:
35
+ i = 0
36
+ part += 1
37
+
38
+ for key, value in zip_dict.items():
39
+ with z.ZipFile(f'{output_path}/{key}.zip', 'w', compression=z.ZIP_LZMA, compresslevel=z.ZIP_LZMA) as zipObj:
40
+ for file_path in value:
41
+ zipObj.write(file_path, basename(file_path))
42
+
43
+ def extract(self, input_path, output_path):
44
+ files_paths = [input_path+'/' + p for p in os.listdir(input_path)]
45
+ if not os.path.exists(output_path):
46
+ os.makedirs(output_path)
47
+ for file_path in files_paths:
48
+ if file_path.endswith('zip'):
49
+ with z.ZipFile(file_path, 'r') as z1:
50
+ z1.extractall(path=output_path)
51
+ print(f'{file_path} has been extracted')
52
+
53
+
54
+ # if __name__ == "__main__":
55
+ # helper = ZipHelper()
56
+ # helper.zip(zip_prefix='drugs', input_path='drugbank/drugs',
57
+ # output_path='drugbank/drugs-zips', chunk_size=1000)
58
+ # helper.extract(input_path='drugbank/drugs-zips',
59
+ # output_path='drugbank/drugs-extracted')
60
+ # path = ''
61
+ # import pandas as pd
62
+ # d = {'col1': [1, 2], 'col2': [3, 4]}
63
+ # df = pd.DataFrame(data=d)
64
+ # df.to_pickle('test/dataframe.pickle')
65
+ # helper.zip_single_file(file_path='test/dataframe.pickle',output_path='test/output', name='zip')
66
+ # helper.extract(input_path='test/output', output_path='test/output')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi-fw
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: Do not use :)
5
5
  Home-page: UNKNOWN
6
6
  Author: Kıvanç Bayraktar
@@ -0,0 +1,28 @@
1
+ ddi_fw/datasets/__init__.py,sha256=WmupqKInz9XMorCAUFS_iUZoSB56xasTrC8eb0UlCVk,540
2
+ ddi_fw/datasets/core.py,sha256=4b8ai37BpQwFeE1x1saD-YP_RN_h8-LrG5o9Sh4lQik,18418
3
+ ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
4
+ ddi_fw/datasets/embedding_generator.py,sha256=Jqrlv88RCu0Lg812KsA12X0cSaZuxbckJ4LNRKNy_qw,2173
5
+ ddi_fw/datasets/embedding_generator_new.py,sha256=GExjmBysPWkmFxTZQPs2yEmDdFllZ-qC9lhZeRQAfbQ,4320
6
+ ddi_fw/datasets/feature_vector_generation.py,sha256=dxTHvp6uTkao9PdThs116Q3bWw_WTo9T8WigVL4G01s,3245
7
+ ddi_fw/datasets/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
8
+ ddi_fw/drugbank/__init__.py,sha256=4_eKdZsnXUSJyr-TZpHwIn13JC6PqS5imeLJJbgt2-A,94
9
+ ddi_fw/drugbank/drugbank_parser.py,sha256=Zc-WXCqE6cjaPpXmDSFYZWZgckTrhEHl5PW9ZNxtbcs,5457
10
+ ddi_fw/drugbank/drugbank_processor.py,sha256=guH586c92wBjVZlfpYQlJ4KxAewTLvte-vcpUSlqWZY,17685
11
+ ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
12
+ ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
13
+ ddi_fw/experiments/__init__.py,sha256=UJwd2i3QcuaI1YjC_2yGCiLuEMTT5Yo7rDFxw89chIw,108
14
+ ddi_fw/experiments/custom_torch_model.py,sha256=iQ_R_EApzD2JCcASN8cie6D21oh7VCxaOQ45_dkiGwc,2576
15
+ ddi_fw/experiments/evaluation_helper.py,sha256=pY69cezV3WzrXw1bduIwRJfah1w3wXJ2YyTNim1J7ko,9349
16
+ ddi_fw/experiments/tensorflow_helper.py,sha256=FnwF0UHb-5gUDC8FsXognq8gFyRaWMz9khASAUIe0Hk,12641
17
+ ddi_fw/experiments/test.py,sha256=rf7UB2SUZR2-UL_IVOm8_8NOY9__2dVGlUbct5tqf-0,1981
18
+ ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
19
+ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
20
+ ddi_fw/ner/ner.py,sha256=BEs9AFljAxOQrC2BEP1raSzRoypcfELS5UTdl4bjTqw,15863
21
+ ddi_fw/utils/__init__.py,sha256=nhNU_sEp55xsZ5VtvhozjKg6r4GWP6SJI13v8F_jbCg,217
22
+ ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
23
+ ddi_fw/utils/utils.py,sha256=Na6Y8mY-CFbQjrgd9xC8agcrjVvTj_7KIXqFm1H_3qU,3549
24
+ ddi_fw/utils/zip_helper.py,sha256=DjtwcGBoYw8zOP-Ye5OxzeR1OgN3WfNkVx85nb0wbJA,2635
25
+ ddi_fw-0.0.3.dist-info/METADATA,sha256=EpE1WfQ2EviFaJolYzdkAlBlmAMH-RDrrzx3SKk_Jws,391
26
+ ddi_fw-0.0.3.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
27
+ ddi_fw-0.0.3.dist-info/top_level.txt,sha256=Lfsqipq5Jm60ALnmFA_cdNfpVfzBJlKM0GiQ_sB8KGE,75
28
+ ddi_fw-0.0.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ ddi_fw/datasets
2
+ ddi_fw/drugbank
3
+ ddi_fw/experiments
4
+ ddi_fw/ner
5
+ ddi_fw/utils
@@ -1,4 +0,0 @@
1
- ddi_fw-0.0.1.dist-info/METADATA,sha256=G-yvgvniNCT677dkbA9kmW6mgFh43bIb71E6zxhqnmA,391
2
- ddi_fw-0.0.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
3
- ddi_fw-0.0.1.dist-info/top_level.txt,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
4
- ddi_fw-0.0.1.dist-info/RECORD,,
@@ -1 +0,0 @@
1
-
File without changes