ddi-fw 0.0.17__py3-none-any.whl → 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,7 +40,7 @@ class DrugBankProcessor():
40
40
  zip_helper = ZipHelper()
41
41
  zip_helper.extract(input_path=input_path, output_path=output_path)
42
42
 
43
- def get_external_identifiers(self,input_path='drugs'):
43
+ def get_external_identifiers(self, input_path='drugs'):
44
44
  external_identifier_list = []
45
45
  all_json_files = input_path+'/*.json*'
46
46
 
@@ -50,14 +50,15 @@ class DrugBankProcessor():
50
50
  data = json.load(f)
51
51
  drug_1 = data['name']
52
52
  drug_1_id = [d['value']
53
- for d in data['drugbank-id'] if d['primary'] == True][0]
53
+ for d in data['drugbank-id'] if d['primary'] == True][0]
54
54
  external_identifiers = data['external-identifiers'] if "external-identifiers" in data else None
55
55
  external_identifiers_dict = {}
56
56
  external_identifiers_dict['name'] = drug_1
57
57
  external_identifiers_dict['drugbank_id'] = drug_1_id
58
58
  if external_identifiers is not None:
59
59
  for p in external_identifiers['external-identifier']:
60
- external_identifiers_dict[p['resource'].lower().replace(" ","_")] = p['identifier']
60
+ external_identifiers_dict[p['resource'].lower().replace(
61
+ " ", "_")] = p['identifier']
61
62
  # external_identifiers_dict = dict(
62
63
  # [(p['resource'].lower().replace(" ","_"), p['identifier']) for p in external_identifiers['external-identifier']])
63
64
  # external_identifiers_dict['name'] = drug_1
@@ -65,11 +66,16 @@ class DrugBankProcessor():
65
66
  external_identifier_list.append(external_identifiers_dict)
66
67
  return external_identifier_list
67
68
 
68
-
69
- def process(self, input_path='drugs', output_path='output', save_as_sql=True, db_path = r"./drugbank.db", zip_outputs=True):
69
+ def process(self,
70
+ input_path='drugs',
71
+ output_path='output',
72
+ save_as_sql=True,
73
+ db_path=r"./drugbank.db",
74
+ zip_outputs=True,
75
+ ner_data_path):
70
76
  if not os.path.exists(output_path):
71
77
  os.makedirs(output_path)
72
- ner_df =CTakesNER().load()
78
+ ner_df = CTakesNER().load(ner_data_path)
73
79
  drugs_pickle_path = output_path+'/drugs.pkl'
74
80
  drugs_csv_path = output_path+'/drugs.gzip'
75
81
  ddi_pickle_path = output_path + '/ddi.pkl'
@@ -158,12 +164,12 @@ class DrugBankProcessor():
158
164
  pathways = [
159
165
  d['smpdb-id'] for d in data['pathways']['pathway']]
160
166
 
161
-
162
167
  if external_identifiers is not None:
163
168
  external_identifiers_dict = dict(
164
169
  [(p['resource'], p['identifier']) for p in external_identifiers['external-identifier']])
165
170
  external_identifiers_dict['drugbank_id'] = drug_1_id
166
- external_identifier_list.append(external_identifiers_dict)
171
+ external_identifier_list.append(
172
+ external_identifiers_dict)
167
173
  # add note column
168
174
  smiles = None
169
175
  morgan_hashed = None
@@ -182,19 +188,22 @@ class DrugBankProcessor():
182
188
  morgan_hashed = np.zeros(881).tolist()
183
189
 
184
190
  # TODO cui, tui, entities other types of texts, test it
185
- tuis_description = ner_df[ner_df['drugbank_id'] == drug_1_id]['tui_description'].values
191
+ tuis_description = ner_df[ner_df['drugbank_id']
192
+ == drug_1_id]['tui_description'].values
186
193
  if len(tuis_description) > 0:
187
194
  tuis_description = tuis_description[0]
188
195
  else:
189
196
  tuis_description = None
190
197
 
191
- cuis_description = ner_df[ner_df['drugbank_id'] == drug_1_id]['cui_description'].values
198
+ cuis_description = ner_df[ner_df['drugbank_id']
199
+ == drug_1_id]['cui_description'].values
192
200
  if len(cuis_description) > 0:
193
201
  cuis_description = cuis_description[0]
194
202
  else:
195
203
  cuis_description = None
196
204
 
197
- entities_description = ner_df[ner_df['drugbank_id'] == drug_1_id]['entities_description'].values
205
+ entities_description = ner_df[ner_df['drugbank_id']
206
+ == drug_1_id]['entities_description'].values
198
207
  if len(entities_description) > 0:
199
208
  entities_description = entities_description[0]
200
209
  else:
@@ -225,10 +234,10 @@ class DrugBankProcessor():
225
234
  'enzymes_polypeptides': '|'.join(enzymes_polypeptides) if enzymes_polypeptides is not None else None,
226
235
  'targets_polypeptides': '|'.join(targets_polypeptides) if targets_polypeptides is not None else None,
227
236
  'pathways': '|'.join(pathways) if pathways is not None else None,
228
- 'tuis_description':'|'.join(tuis_description) if tuis_description is not None else None,
229
- 'cuis_description':'|'.join(cuis_description) if cuis_description is not None else None,
230
- 'entities_description':'|'.join(entities_description) if entities_description is not None else None
231
- # 'external_identifiers': external_identifiers_dict
237
+ 'tuis_description': '|'.join(tuis_description) if tuis_description is not None else None,
238
+ 'cuis_description': '|'.join(cuis_description) if cuis_description is not None else None,
239
+ 'entities_description': '|'.join(entities_description) if entities_description is not None else None
240
+ # 'external_identifiers': external_identifiers_dict
232
241
  }
233
242
  drug_rows.append(row)
234
243
 
@@ -282,10 +291,13 @@ class DrugBankProcessor():
282
291
 
283
292
  if save_as_sql:
284
293
  conn = sqlite3.connect(db_path)
285
- # self.drugs_df.to_sql('_Drugs', conn, if_exists='replace', index=True)
286
- # self.ddis_df.to_sql('_Interactions', conn, if_exists='replace', index=True)
287
- ext_id_df= pd.DataFrame.from_records(external_identifier_list)
288
- ext_id_df.to_sql('_ExternalIdentifiers', conn, if_exists='replace', index=True)
294
+ self.drugs_df.to_sql(
295
+ '_Drugs', conn, if_exists='replace', index=True)
296
+ self.ddis_df.to_sql('_Interactions', conn,
297
+ if_exists='replace', index=True)
298
+ ext_id_df = pd.DataFrame.from_records(external_identifier_list)
299
+ ext_id_df.to_sql('_ExternalIdentifiers', conn,
300
+ if_exists='replace', index=True)
289
301
 
290
302
  zip_helper.zip_single_file(
291
303
  file_path=db_path, output_path=output_path+'/zips', name='db')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.17
3
+ Version: 0.0.18
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -53,7 +53,7 @@ ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt,sha256=STKdpjRqA0Vi63RJ
53
53
  ddi_fw/drugbank/__init__.py,sha256=4_eKdZsnXUSJyr-TZpHwIn13JC6PqS5imeLJJbgt2-A,94
54
54
  ddi_fw/drugbank/drugbank.xsd,sha256=y-WzMHANyzmf5T_0ywlMOq63ft9uW41B5I1WfBKbh9c,43306
55
55
  ddi_fw/drugbank/drugbank_parser.py,sha256=lxUuhB0s8ef_aPNDs0V8ClKF7-KIWugNIV9gVsRvzrw,5610
56
- ddi_fw/drugbank/drugbank_processor.py,sha256=4Hs6cPmG_d0wDXA6979WMZQmFcDrGbCGLja-Nhs7gyQ,17692
56
+ ddi_fw/drugbank/drugbank_processor.py,sha256=7I6YYnIagqm7tZGrRHAKTq9QoCxg5R3oS0CgjsyzMBY,18127
57
57
  ddi_fw/drugbank/drugbank_processor_org.py,sha256=eO5Yset50P91qkic79RUXPoEuxRxQKFkKW0l4G29Mas,13322
58
58
  ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHrBXsI8,4600
59
59
  ddi_fw/experiments/__init__.py,sha256=UJwd2i3QcuaI1YjC_2yGCiLuEMTT5Yo7rDFxw89chIw,108
@@ -79,7 +79,7 @@ ddi_fw/utils/__init__.py,sha256=nhNU_sEp55xsZ5VtvhozjKg6r4GWP6SJI13v8F_jbCg,217
79
79
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
80
80
  ddi_fw/utils/utils.py,sha256=Na6Y8mY-CFbQjrgd9xC8agcrjVvTj_7KIXqFm1H_3qU,3549
81
81
  ddi_fw/utils/zip_helper.py,sha256=DjtwcGBoYw8zOP-Ye5OxzeR1OgN3WfNkVx85nb0wbJA,2635
82
- ddi_fw-0.0.17.dist-info/METADATA,sha256=IbofIWh6wWq0LO-YT5qKARTnySunykPDHsK48OTc7lA,1508
83
- ddi_fw-0.0.17.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
84
- ddi_fw-0.0.17.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
85
- ddi_fw-0.0.17.dist-info/RECORD,,
82
+ ddi_fw-0.0.18.dist-info/METADATA,sha256=tE1vK0fU12Rww6VOIqBShgWFPDqf-tndbEXF6YNkHlk,1508
83
+ ddi_fw-0.0.18.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
84
+ ddi_fw-0.0.18.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
85
+ ddi_fw-0.0.18.dist-info/RECORD,,