ddi-fw 0.0.17__py3-none-any.whl → 0.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/drugbank/drugbank_processor.py +31 -19
- {ddi_fw-0.0.17.dist-info → ddi_fw-0.0.18.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.17.dist-info → ddi_fw-0.0.18.dist-info}/RECORD +5 -5
- {ddi_fw-0.0.17.dist-info → ddi_fw-0.0.18.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.17.dist-info → ddi_fw-0.0.18.dist-info}/top_level.txt +0 -0
@@ -40,7 +40,7 @@ class DrugBankProcessor():
|
|
40
40
|
zip_helper = ZipHelper()
|
41
41
|
zip_helper.extract(input_path=input_path, output_path=output_path)
|
42
42
|
|
43
|
-
def get_external_identifiers(self,input_path='drugs'):
|
43
|
+
def get_external_identifiers(self, input_path='drugs'):
|
44
44
|
external_identifier_list = []
|
45
45
|
all_json_files = input_path+'/*.json*'
|
46
46
|
|
@@ -50,14 +50,15 @@ class DrugBankProcessor():
|
|
50
50
|
data = json.load(f)
|
51
51
|
drug_1 = data['name']
|
52
52
|
drug_1_id = [d['value']
|
53
|
-
|
53
|
+
for d in data['drugbank-id'] if d['primary'] == True][0]
|
54
54
|
external_identifiers = data['external-identifiers'] if "external-identifiers" in data else None
|
55
55
|
external_identifiers_dict = {}
|
56
56
|
external_identifiers_dict['name'] = drug_1
|
57
57
|
external_identifiers_dict['drugbank_id'] = drug_1_id
|
58
58
|
if external_identifiers is not None:
|
59
59
|
for p in external_identifiers['external-identifier']:
|
60
|
-
external_identifiers_dict[p['resource'].lower().replace(
|
60
|
+
external_identifiers_dict[p['resource'].lower().replace(
|
61
|
+
" ", "_")] = p['identifier']
|
61
62
|
# external_identifiers_dict = dict(
|
62
63
|
# [(p['resource'].lower().replace(" ","_"), p['identifier']) for p in external_identifiers['external-identifier']])
|
63
64
|
# external_identifiers_dict['name'] = drug_1
|
@@ -65,11 +66,16 @@ class DrugBankProcessor():
|
|
65
66
|
external_identifier_list.append(external_identifiers_dict)
|
66
67
|
return external_identifier_list
|
67
68
|
|
68
|
-
|
69
|
-
|
69
|
+
def process(self,
|
70
|
+
input_path='drugs',
|
71
|
+
output_path='output',
|
72
|
+
save_as_sql=True,
|
73
|
+
db_path=r"./drugbank.db",
|
74
|
+
zip_outputs=True,
|
75
|
+
ner_data_path):
|
70
76
|
if not os.path.exists(output_path):
|
71
77
|
os.makedirs(output_path)
|
72
|
-
ner_df =CTakesNER().load()
|
78
|
+
ner_df = CTakesNER().load(ner_data_path)
|
73
79
|
drugs_pickle_path = output_path+'/drugs.pkl'
|
74
80
|
drugs_csv_path = output_path+'/drugs.gzip'
|
75
81
|
ddi_pickle_path = output_path + '/ddi.pkl'
|
@@ -158,12 +164,12 @@ class DrugBankProcessor():
|
|
158
164
|
pathways = [
|
159
165
|
d['smpdb-id'] for d in data['pathways']['pathway']]
|
160
166
|
|
161
|
-
|
162
167
|
if external_identifiers is not None:
|
163
168
|
external_identifiers_dict = dict(
|
164
169
|
[(p['resource'], p['identifier']) for p in external_identifiers['external-identifier']])
|
165
170
|
external_identifiers_dict['drugbank_id'] = drug_1_id
|
166
|
-
external_identifier_list.append(
|
171
|
+
external_identifier_list.append(
|
172
|
+
external_identifiers_dict)
|
167
173
|
# add note column
|
168
174
|
smiles = None
|
169
175
|
morgan_hashed = None
|
@@ -182,19 +188,22 @@ class DrugBankProcessor():
|
|
182
188
|
morgan_hashed = np.zeros(881).tolist()
|
183
189
|
|
184
190
|
# TODO cui, tui, entities other types of texts, test it
|
185
|
-
tuis_description = ner_df[ner_df['drugbank_id']
|
191
|
+
tuis_description = ner_df[ner_df['drugbank_id']
|
192
|
+
== drug_1_id]['tui_description'].values
|
186
193
|
if len(tuis_description) > 0:
|
187
194
|
tuis_description = tuis_description[0]
|
188
195
|
else:
|
189
196
|
tuis_description = None
|
190
197
|
|
191
|
-
cuis_description = ner_df[ner_df['drugbank_id']
|
198
|
+
cuis_description = ner_df[ner_df['drugbank_id']
|
199
|
+
== drug_1_id]['cui_description'].values
|
192
200
|
if len(cuis_description) > 0:
|
193
201
|
cuis_description = cuis_description[0]
|
194
202
|
else:
|
195
203
|
cuis_description = None
|
196
204
|
|
197
|
-
entities_description = ner_df[ner_df['drugbank_id']
|
205
|
+
entities_description = ner_df[ner_df['drugbank_id']
|
206
|
+
== drug_1_id]['entities_description'].values
|
198
207
|
if len(entities_description) > 0:
|
199
208
|
entities_description = entities_description[0]
|
200
209
|
else:
|
@@ -225,10 +234,10 @@ class DrugBankProcessor():
|
|
225
234
|
'enzymes_polypeptides': '|'.join(enzymes_polypeptides) if enzymes_polypeptides is not None else None,
|
226
235
|
'targets_polypeptides': '|'.join(targets_polypeptides) if targets_polypeptides is not None else None,
|
227
236
|
'pathways': '|'.join(pathways) if pathways is not None else None,
|
228
|
-
'tuis_description':'|'.join(tuis_description) if tuis_description is not None else None,
|
229
|
-
'cuis_description':'|'.join(cuis_description) if cuis_description is not None else None,
|
230
|
-
'entities_description':'|'.join(entities_description) if entities_description is not None else None
|
231
|
-
|
237
|
+
'tuis_description': '|'.join(tuis_description) if tuis_description is not None else None,
|
238
|
+
'cuis_description': '|'.join(cuis_description) if cuis_description is not None else None,
|
239
|
+
'entities_description': '|'.join(entities_description) if entities_description is not None else None
|
240
|
+
# 'external_identifiers': external_identifiers_dict
|
232
241
|
}
|
233
242
|
drug_rows.append(row)
|
234
243
|
|
@@ -282,10 +291,13 @@ class DrugBankProcessor():
|
|
282
291
|
|
283
292
|
if save_as_sql:
|
284
293
|
conn = sqlite3.connect(db_path)
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
294
|
+
self.drugs_df.to_sql(
|
295
|
+
'_Drugs', conn, if_exists='replace', index=True)
|
296
|
+
self.ddis_df.to_sql('_Interactions', conn,
|
297
|
+
if_exists='replace', index=True)
|
298
|
+
ext_id_df = pd.DataFrame.from_records(external_identifier_list)
|
299
|
+
ext_id_df.to_sql('_ExternalIdentifiers', conn,
|
300
|
+
if_exists='replace', index=True)
|
289
301
|
|
290
302
|
zip_helper.zip_single_file(
|
291
303
|
file_path=db_path, output_path=output_path+'/zips', name='db')
|
@@ -53,7 +53,7 @@ ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt,sha256=STKdpjRqA0Vi63RJ
|
|
53
53
|
ddi_fw/drugbank/__init__.py,sha256=4_eKdZsnXUSJyr-TZpHwIn13JC6PqS5imeLJJbgt2-A,94
|
54
54
|
ddi_fw/drugbank/drugbank.xsd,sha256=y-WzMHANyzmf5T_0ywlMOq63ft9uW41B5I1WfBKbh9c,43306
|
55
55
|
ddi_fw/drugbank/drugbank_parser.py,sha256=lxUuhB0s8ef_aPNDs0V8ClKF7-KIWugNIV9gVsRvzrw,5610
|
56
|
-
ddi_fw/drugbank/drugbank_processor.py,sha256=
|
56
|
+
ddi_fw/drugbank/drugbank_processor.py,sha256=7I6YYnIagqm7tZGrRHAKTq9QoCxg5R3oS0CgjsyzMBY,18127
|
57
57
|
ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
|
58
58
|
ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
|
59
59
|
ddi_fw/experiments/__init__.py,sha256=UJwd2i3QcuaI1YjC_2yGCiLuEMTT5Yo7rDFxw89chIw,108
|
@@ -79,7 +79,7 @@ ddi_fw/utils/__init__.py,sha256=nhNU_sEp55xsZ5VtvhozjKg6r4GWP6SJI13v8F_jbCg,217
|
|
79
79
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
80
80
|
ddi_fw/utils/utils.py,sha256=Na6Y8mY-CFbQjrgd9xC8agcrjVvTj_7KIXqFm1H_3qU,3549
|
81
81
|
ddi_fw/utils/zip_helper.py,sha256=DjtwcGBoYw8zOP-Ye5OxzeR1OgN3WfNkVx85nb0wbJA,2635
|
82
|
-
ddi_fw-0.0.
|
83
|
-
ddi_fw-0.0.
|
84
|
-
ddi_fw-0.0.
|
85
|
-
ddi_fw-0.0.
|
82
|
+
ddi_fw-0.0.18.dist-info/METADATA,sha256=tE1vK0fU12Rww6VOIqBShgWFPDqf-tndbEXF6YNkHlk,1508
|
83
|
+
ddi_fw-0.0.18.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
84
|
+
ddi_fw-0.0.18.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
85
|
+
ddi_fw-0.0.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|