gsrap 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. gsrap/.ipynb_checkpoints/__init__-checkpoint.py +6 -5
  2. gsrap/__init__.py +6 -5
  3. gsrap/assets/kegg_compound_to_others.pickle +0 -0
  4. gsrap/assets/kegg_reaction_to_others.pickle +0 -0
  5. gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +3 -0
  6. gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +168 -93
  7. gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +55 -51
  8. gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +7 -1
  9. gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +8 -8
  10. gsrap/commons/biomass.py +3 -0
  11. gsrap/commons/downloads.py +168 -93
  12. gsrap/commons/escherutils.py +55 -51
  13. gsrap/commons/excelhub.py +7 -1
  14. gsrap/commons/metrics.py +8 -8
  15. gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +2 -2
  16. gsrap/mkmodel/mkmodel.py +2 -2
  17. gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +43 -18
  18. gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +2 -1
  19. gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +132 -63
  20. gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py +23 -3
  21. gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +59 -49
  22. gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +90 -53
  23. gsrap/parsedb/annotation.py +43 -18
  24. gsrap/parsedb/completeness.py +2 -1
  25. gsrap/parsedb/introduce.py +132 -63
  26. gsrap/parsedb/manual.py +22 -2
  27. gsrap/parsedb/parsedb.py +59 -49
  28. gsrap/parsedb/repeating.py +90 -53
  29. gsrap/runsims/.ipynb_checkpoints/runsims-checkpoint.py +2 -1
  30. gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +0 -1
  31. gsrap/runsims/runsims.py +2 -1
  32. gsrap/runsims/simplegrowth.py +0 -1
  33. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/METADATA +5 -2
  34. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/RECORD +37 -37
  35. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/WHEEL +1 -1
  36. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/entry_points.txt +0 -0
  37. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info/licenses}/LICENSE.txt +0 -0
@@ -1,22 +1,27 @@
1
+ import sys
2
+
1
3
  import pandas as pnd
2
4
  import cobra
3
5
 
4
-
5
6
  from .repeating import check_author
6
7
  from .repeating import check_rstring_arrow
7
8
  from .repeating import check_gpr
8
9
  from .repeating import add_reaction
10
+ from .repeating import get_curator_notes
9
11
 
10
12
  from .manual import get_manual_sinks
11
13
  from .manual import get_manual_demands
12
14
 
13
15
 
14
16
 
15
- def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to_others, goodbefore, onlyauthor):
17
+ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to_others, outdir, goodbefore, onlyauthor):
16
18
  goodbefore_reached = False
19
+
20
+
21
+ logger.info("Parsing metabolites ('M' sheet)...")
17
22
 
18
23
 
19
- logger.debug("Checking duplicated metabolite IDs...")
24
+ # check duplicated puremids:
20
25
  if len(set(db['M']['pure_mid'].to_list())) != len(db['M']):
21
26
  pure_mids = db['M']['pure_mid'].to_list()
22
27
  duplicates = list(set([item for item in pure_mids if pure_mids.count(item) > 1]))
@@ -24,11 +29,12 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
24
29
  return 1
25
30
 
26
31
 
27
- # parse M:
28
- logger.debug("Parsing metabolites...")
32
+ # parse M (row by row):
29
33
  db['M'] = db['M'].set_index('pure_mid', drop=True, verify_integrity=True)
30
34
  kc_ids_modeled = set() # account for kc codes modeled
31
- for pure_mid, row in db['M'].iterrows():
35
+ cnt = 0 # counter for parsed records
36
+ msg = '' # to be cleared
37
+ for iteration, (pure_mid, row) in enumerate(db['M'].iterrows()):
32
38
 
33
39
 
34
40
  # skip empty lines!
@@ -41,11 +47,11 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
41
47
  # manage goodbefore/onlyauthor
42
48
  if goodbefore != None and goodbefore_reached:
43
49
  if onlyauthor == None:
44
- logger.info(f"Skipping metabolite '{pure_mid}' as requested with --goodbefore[0] '{goodbefore}'.")
50
+ logger.warning(f"Skipping metabolite '{pure_mid}' as requested with --goodbefore[0] '{goodbefore}'.")
45
51
  continue
46
52
 
47
53
 
48
- # parse author
54
+ # parse and get curators
49
55
  response = check_author(logger, pure_mid, row, db, 'M')
50
56
  if type(response) == int: return 1
51
57
  else: authors = response
@@ -55,7 +61,7 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
55
61
  if goodbefore != None and goodbefore_reached:
56
62
  if onlyauthor != None and onlyauthor not in authors:
57
63
  authors_string = '; '.join(authors)
58
- logger.info(f"Skipping metabolite '{pure_mid}' (authors '{authors_string}') as requested with --goodbefore[0] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
64
+ logger.warning(f"Skipping metabolite '{pure_mid}' (authors '{authors_string}') as requested with --goodbefore[0] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
59
65
  continue
60
66
 
61
67
 
@@ -71,7 +77,7 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
71
77
  return 1
72
78
 
73
79
 
74
- # check if 'kc' codes are real:
80
+ # parse kc:
75
81
  if pnd.isna(row['kc']):
76
82
  logger.error(f"Metabolite '{pure_mid}' has missing KEGG annotation (kc): '{row['kc']}'.")
77
83
  return 1
@@ -79,11 +85,16 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
79
85
  kc_ids = [i.strip() for i in kc_ids]
80
86
  for kc_id in kc_ids:
81
87
  if kc_id == 'CXXXXX': # not in KEGG; could be knowledge gap (e.g. methyl group acceptor in R10404)
82
- logger.debug(f"Metabolite '{pure_mid}' is not in KEGG ('{kc_id}')!")
88
+ with open(f"{outdir}/logs/M.notkegg.txt", 'a') as f:
89
+ print(f"Metabolite '{pure_mid}' is not in KEGG ('{kc_id}')!", file=f)
83
90
  continue
91
+ #
92
+ # check if 'kc' codes are real:
84
93
  if kc_id not in idcollection_dict['kc']:
85
94
  logger.error(f"Metabolite '{pure_mid}' has invalid KEGG annotation (kc): '{kc_id}'.")
86
95
  return 1
96
+ #
97
+ # check if 'kc' was already used:
87
98
  if kc_id in kc_ids_modeled:
88
99
  logger.error(f"KEGG annotation (kc) '{kc_id}' used in metabolite '{pure_mid}' is duplicated.")
89
100
  return 1
@@ -95,6 +106,7 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
95
106
  if pnd.isna(row['inchikey']):
96
107
  logger.error(f"Metabolite '{pure_mid}' has missing inchikey: '{row['inchikey']}'.")
97
108
  return 1
109
+
98
110
  # check inchikey format:
99
111
  if len(row['inchikey']) != 27 or row['inchikey'][14] != '-' or row['inchikey'][25] != '-':
100
112
  logger.error(f"Metabolite '{pure_mid}' has badly formatted inchikey: '{row['inchikey']}'.")
@@ -109,7 +121,8 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
109
121
  for eqbiggid in kegg_compound_to_others[kc_id]['bigg.metabolite']:
110
122
  eqbiggids.add(eqbiggid)
111
123
  if pure_mid not in eqbiggids and eqbiggids != set():
112
- logger.debug(f"Metabolites '{'; '.join(kc_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{pure_mid}').")
124
+ with open(f"{outdir}/logs/M.inbigg.txt", 'a') as f:
125
+ print(f"Metabolites '{'; '.join(kc_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{pure_mid}').", file=f)
113
126
 
114
127
 
115
128
  # add metabolite to model
@@ -122,6 +135,10 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
122
135
  m.compartment='c'
123
136
 
124
137
 
138
+ # write curators as annotations
139
+ m.annotation['curator_codes'] = authors
140
+
141
+
125
142
  # add annotations to model (same order of Memote)
126
143
  ankeys = [
127
144
  'pubchem.compound', 'kegg.compound', 'seed.compound',
@@ -132,53 +149,73 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
132
149
  for ankey in ankeys:
133
150
  if ankey == 'kegg.compound': m.annotation[ankey] = set(kc_ids) - set(['CXXXXX'])
134
151
  else: m.annotation[ankey] = set()
152
+ #
135
153
  # populate sets:
136
154
  for kc_id in kc_ids:
137
155
  if kc_id != 'CXXXXX':
138
156
  if kc_id in kegg_compound_to_others.keys():
139
157
  for ankey in ankeys:
140
158
  m.annotation[ankey].update(kegg_compound_to_others[kc_id][ankey])
159
+ #
141
160
  # save as list:
142
161
  for ankey in ankeys:
143
162
  m.annotation[ankey] = list(m.annotation[ankey])
144
163
 
145
164
 
146
- # replace inchikey with manually-curated
165
+ # # force the manual-curated version of the inchikey
147
166
  if m.annotation['inchikey'] != [] and m.annotation['inchikey'] != [row['inchikey']]:
148
- logger.debug(f"Metabolite '{pure_mid}': manual-curated inchikey ({[row['inchikey']]}) is diferent from the one derived from MNX ({m.annotation['inchikey']}).")
149
- m.annotation['inchikey'] = [row['inchikey']] # force the manual-curated version
167
+ with open(f"{outdir}/logs/M.diffinchi.txt", 'a') as f:
168
+ print(f"Metabolite '{pure_mid}': manual-curated inchikey ({[row['inchikey']]}) is different from the one derived from MNX ({m.annotation['inchikey']}).", file=f)
169
+ m.annotation['inchikey'] = [row['inchikey']]
170
+ #
171
+ # remove inchikey if unknown:
150
172
  if m.annotation['inchikey'] == ['XXXXXXXXXXXXXX-XXXXXXXXXX-X']:
151
173
  m.annotation['inchikey'] = []
152
174
 
153
175
 
154
176
  # add SBO annotation
155
177
  m.annotation['sbo'] = ['SBO:0000247'] # generic metabolite
178
+
179
+
180
+ # add curator_notes
181
+ m.annotation['curator_notes'] = get_curator_notes(logger, row)
182
+
183
+
184
+ # communicate progress:
185
+ cnt += 1
186
+ msg = f"Done {cnt}/{len(db['M'])}!"
187
+ print(msg, file=sys.stderr, end='\r')
188
+ print(''.join([' ' for i in range(len(msg))]), file=sys.stderr, end='\r')
156
189
 
157
190
 
158
-
191
+ # check goodbefore reaching:
159
192
  if goodbefore != None and goodbefore_reached == False:
160
- logger.info(f"Metabolite '{goodbefore}' never reached. Are you sure about your --goodbefore?")
193
+ logger.warning(f"Metabolite '{goodbefore}' never reached. Are you sure about your --goodbefore?")
161
194
 
162
195
 
163
196
  return model
164
197
 
165
198
 
166
199
 
167
- def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_others, goodbefore, onlyauthor):
200
+ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_others, outdir, goodbefore, onlyauthor):
168
201
  goodbefore_reached = False
202
+
203
+
204
+ logger.info("Parsing non-transport reactions ('R' sheet)...")
169
205
 
170
206
 
171
- logger.debug("Checking duplicated reaction IDs...")
207
+ # check duplicated rids:
172
208
  if len(set(db['R']['rid'].to_list())) != len(db['R']):
173
- pure_mids = db['R']['rid'].to_list()
174
- duplicates = list(set([item for item in pure_mids if pure_mids.count(item) > 1]))
209
+ rids = db['R']['rid'].to_list()
210
+ duplicates = list(set([item for item in rids if rids.count(item) > 1]))
175
211
  logger.error(f"Sheet 'R' has duplicated reactions: {duplicates}.")
176
212
  return 1
177
213
 
178
214
 
179
- # parse R:
180
- logger.debug("Parsing reactions...")
215
+ # parse R (row by row):
181
216
  db['R'] = db['R'].set_index('rid', drop=True, verify_integrity=True)
217
+ cnt = 0 # counter for parsed records
218
+ msg = '' # to be cleared
182
219
  for rid, row in db['R'].iterrows():
183
220
 
184
221
 
@@ -192,11 +229,11 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
192
229
  # manage goodbefore/onlyauthor
193
230
  if goodbefore != None and goodbefore_reached:
194
231
  if onlyauthor == None:
195
- logger.info(f"Skipping reaction '{rid}' as requested with --goodbefore[1] '{goodbefore}'.")
232
+ logger.warning(f"Skipping reaction '{rid}' as requested with --goodbefore[1] '{goodbefore}'.")
196
233
  continue
197
234
 
198
235
 
199
- # parse author
236
+ # parse and get curators
200
237
  response = check_author(logger, rid, row, db, 'R')
201
238
  if type(response) == int: return 1
202
239
  else: authors = response
@@ -206,7 +243,7 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
206
243
  if goodbefore != None and goodbefore_reached:
207
244
  if onlyauthor != None and onlyauthor not in authors:
208
245
  authors_string = '; '.join(authors)
209
- logger.info(f"Skipping reaction '{rid}' (authors '{authors_string}') as requested with --goodbefore[1] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
246
+ logger.warning(f"Skipping reaction '{rid}' (authors '{authors_string}') as requested with --goodbefore[1] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
210
247
  continue
211
248
 
212
249
 
@@ -215,7 +252,7 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
215
252
  if response == 1: return 1
216
253
 
217
254
 
218
- # check if 'kr' codes are real:
255
+ # parse 'kr':
219
256
  if pnd.isna(row['kr']):
220
257
  logger.error(f"Reaction '{rid}' has missing KEGG annotation (kr): '{row['kr']}'.")
221
258
  return 1
@@ -223,15 +260,18 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
223
260
  kr_ids = [i.strip() for i in kr_ids]
224
261
  for kr_id in kr_ids:
225
262
  if kr_id == 'RXXXXX': # not in KEGG; could be knowledge gap
226
- logger.debug(f"Reaction '{rid}' is not in KEGG ('{kr_id}')!")
263
+ with open(f"{outdir}/logs/R.notkegg.txt", 'a') as f:
264
+ print(f"Reaction '{rid}' is not in KEGG ('{kr_id}')!", file=f)
227
265
  continue
266
+ #
267
+ # check if 'kr' codes are real:
228
268
  if kr_id not in idcollection_dict['kr']:
229
269
  logger.error(f"Reaction '{rid}' has invalid KEGG annotation (kr): '{kr_id}'.")
230
270
  return 1
231
271
 
232
272
 
233
273
  # check GPR:
234
- response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'R')
274
+ response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'R', outdir)
235
275
  if response == 1: return 1
236
276
 
237
277
 
@@ -243,28 +283,40 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
243
283
  for eqbiggid in kegg_reaction_to_others[kr_id]['bigg.reaction']:
244
284
  eqbiggids.add(eqbiggid)
245
285
  if rid not in eqbiggids and eqbiggids != set():
246
- logger.debug(f"Reactions '{'; '.join(kr_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{rid}').")
286
+ with open(f"{outdir}/logs/R.inbigg.txt", 'a') as f:
287
+ print(f"Reactions '{'; '.join(kr_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{rid}').", file=f)
247
288
 
248
289
 
249
290
  # add reaction to model
250
- response = add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, 'R')
291
+ response = add_reaction(logger, model, rid, authors, row, kr_ids, kegg_reaction_to_others, 'R', outdir)
251
292
  if response == 1: return 1
293
+
294
+
295
+ # communicate progress:
296
+ cnt += 1
297
+ msg = f"Done {cnt}/{len(db['R'])}!"
298
+ print(msg, file=sys.stderr, end='\r')
299
+ print(''.join([' ' for i in range(len(msg))]), file=sys.stderr, end='\r')
252
300
 
253
301
 
302
+ # check goodbefore reaching:
254
303
  if goodbefore != None and goodbefore_reached == False:
255
- logger.info(f"Reaction '{goodbefore}' never reached. Are you sure about your --goodbefore?")
304
+ logger.warning(f"Reaction '{goodbefore}' never reached. Are you sure about your --goodbefore?")
256
305
 
257
306
 
258
307
  return model
259
308
 
260
309
 
261
310
 
262
- def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_to_others, goodbefore, onlyauthor):
311
+ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_to_others, outdir, goodbefore, onlyauthor):
263
312
  goodbefore_reached = False
264
-
313
+
314
+
315
+ logger.info("Parsing transport reactions ('T' sheet)...")
265
316
 
266
317
 
267
318
  def clone_to_external(model, mid_c, mid_e):
319
+ # given an existing '_c' M, create its '_e' equivalent
268
320
 
269
321
  m = cobra.Metabolite(f'{mid_e}')
270
322
  model.add_metabolites([m])
@@ -281,10 +333,12 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
281
333
 
282
334
 
283
335
  def add_exchange_reaction(model, mid_e):
336
+ # given an existing '_e' M, create the corresponding EX_change reaction
284
337
 
285
338
  r = cobra.Reaction(f'EX_{mid_e}')
286
339
  model.add_reactions([r])
287
340
  r = model.reactions.get_by_id(f'EX_{mid_e}')
341
+
288
342
  r.name = f"Exchange for {model.metabolites.get_by_id(mid_e).name}"
289
343
  r.build_reaction_from_string(f'{mid_e} --> ')
290
344
  r.bounds = (0, 1000)
@@ -294,22 +348,15 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
294
348
 
295
349
 
296
350
 
297
-
298
- # get all already inserted metabolites
351
+ # get all already inserted metabolites and reactions
299
352
  mids_parsed = [m.id for m in model.metabolites]
300
353
  rids_parsed = [r.id for r in model.reactions]
301
354
 
302
355
 
303
- # protons may not have an explicit transporter
304
- clone_to_external(model, 'h_c', 'h_e')
305
- mids_parsed.append('h_e')
306
- add_exchange_reaction(model, 'h_e')
307
- rids_parsed.append(f'EX_h_e')
308
-
309
-
310
- # parse T:
311
- logger.debug("Parsing transporters...")
356
+ # parse T (row by row):
312
357
  db['T'] = db['T'].set_index('rid', drop=True, verify_integrity=True)
358
+ cnt = 0 # counter for parsed records
359
+ msg = '' # to be cleared
313
360
  for rid, row in db['T'].iterrows():
314
361
 
315
362
 
@@ -320,10 +367,16 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
320
367
  goodbefore_reached = True
321
368
 
322
369
 
370
+ # avoid duplicates!
371
+ if rid in rids_parsed:
372
+ logger.error(f"Tranport '{rid}' has ID identical to previously added reaction!")
373
+ return 1
374
+
375
+
323
376
  # manage goodbefore/onlyauthor
324
377
  if goodbefore != None and goodbefore_reached:
325
378
  if onlyauthor == None:
326
- logger.info(f"Skipping transport '{rid}' as requested with --goodbefore[2] '{goodbefore}'.")
379
+ logger.warning(f"Skipping transport '{rid}' as requested with --goodbefore[2] '{goodbefore}'.")
327
380
  continue
328
381
 
329
382
 
@@ -337,7 +390,7 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
337
390
  if goodbefore != None and goodbefore_reached:
338
391
  if onlyauthor != None and onlyauthor not in authors:
339
392
  authors_string = '; '.join(authors)
340
- logger.info(f"Skipping transport '{rid}' (authors '{authors_string}') as requested with --goodbefore[2] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
393
+ logger.warning(f"Skipping transport '{rid}' (authors '{authors_string}') as requested with --goodbefore[2] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
341
394
  continue
342
395
 
343
396
 
@@ -346,7 +399,7 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
346
399
  if response == 1: return 1
347
400
 
348
401
 
349
- # check if 'kr' codes are real:
402
+ # parse 'kr':
350
403
  if pnd.isna(row['kr']):
351
404
  logger.error(f"Reaction '{rid}' has missing KEGG annotation (kr): '{row['kr']}'.")
352
405
  return 1
@@ -354,37 +407,39 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
354
407
  kr_ids = row['kr'].split(';')
355
408
  kr_ids = [i.strip() for i in kr_ids]
356
409
  for kr_id in kr_ids:
410
+ #
411
+ # check if 'kr' codes are real
357
412
  if kr_id not in idcollection_dict['kr']:
358
413
  logger.error(f"Reaction '{rid}' has invalid KEGG annotation (kr): '{kr_id}'.")
359
414
  return 1
360
- else: kr_ids = []
415
+ else:
416
+ # no 'kr' for the majority of transport reactions!
417
+ kr_ids = []
361
418
 
362
419
 
363
420
  # check GPR:
364
- response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'T')
421
+ response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'T', outdir)
365
422
  if response == 1: return 1
366
423
 
367
424
 
368
- # get involved metabolites:
369
- involved_mids = row['rstring'].split(' ')
370
- involved_mids = [i for i in involved_mids if i not in ['-->', '<=>']]
371
-
372
-
373
- # the external metabolite must be already modeled as cytosolic
425
+ # iterate the involved metabolites
426
+ involved_mids = row['rstring'].split(' ') # dirty (arrows, coefficints are included)
374
427
  for mid in involved_mids:
375
428
  if mid.endswith('_e'):
376
429
  mid_e = mid
377
430
  mid_c = mid.rsplit('_', 1)[0] + '_c'
431
+ #
432
+ # the cytosolic counterpart must be already modeled:
378
433
  if mid_c not in mids_parsed:
379
434
  logger.error(f"{rid}: the metabolite '{mid_c}', counterpart of '{mid_e}', was not previously modeled.")
380
435
  return 1
381
-
382
- # add external metabolite to model
436
+ #
437
+ # clone to add external metabolite to model
383
438
  if mid_e not in mids_parsed:
384
439
  clone_to_external(model, mid_c, mid_e)
385
440
  mids_parsed.append(mid_e)
386
-
387
- # add exchange reaction to model
441
+ #
442
+ # add corresponding exchange reaction to model
388
443
  if f'EX_{mid_e}' not in rids_parsed:
389
444
  add_exchange_reaction(model, mid_e)
390
445
  rids_parsed.append(f'EX_{mid_e}')
@@ -398,16 +453,26 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
398
453
  for eqbiggid in kegg_reaction_to_others[kr_id]['bigg.reaction']:
399
454
  eqbiggids.add(eqbiggid)
400
455
  if rid not in eqbiggids and eqbiggids != set():
401
- logger.debug(f"Reactions '{'; '.join(kr_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{rid}').")
456
+ with open(f"{outdir}/logs/T.inbigg.txt", 'a') as f:
457
+ print(f"Reactions '{'; '.join(kr_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{rid}').", file=f)
402
458
 
403
459
 
404
460
  # add reaction to model
405
- response = add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, 'T')
461
+ response = add_reaction(logger, model, rid, authors, row, kr_ids, kegg_reaction_to_others, 'T', outdir)
406
462
  if response == 1: return 1
463
+ rids_parsed.append(rid) # update list of rids in model
464
+
407
465
 
466
+ # communicate progress:
467
+ cnt += 1
468
+ msg = f"Done {cnt}/{len(db['T'])}!"
469
+ print(msg, file=sys.stderr, end='\r')
470
+ print(''.join([' ' for i in range(len(msg))]), file=sys.stderr, end='\r')
408
471
 
472
+
473
+ # check goodbefore reaching:
409
474
  if goodbefore != None and goodbefore_reached == False:
410
- logger.info(f"Transport '{goodbefore}' never reached. Are you sure about your --goodbefore?")
475
+ logger.warning(f"Transport '{goodbefore}' never reached. Are you sure about your --goodbefore?")
411
476
 
412
477
 
413
478
  return model
@@ -416,6 +481,10 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
416
481
 
417
482
  def introduce_sinks_demands(logger, model):
418
483
 
484
+
485
+ logger.debug("Introducing sinks and demands...")
486
+
487
+
419
488
  sinks = get_manual_sinks()
420
489
  demands = get_manual_demands()
421
490
 
gsrap/parsedb/manual.py CHANGED
@@ -15,8 +15,27 @@ def get_krs_to_exclude():
15
15
  return set([
16
16
  'R12328', 'R05190', # general forms of fatty acid biosynthesis
17
17
  'R01347', 'R01348', 'R04121', # general forms of fatty acid degradation
18
+ 'R11671', # multi-step fatty acids reactions
19
+ 'R07860', 'R01317', 'R07064', # aspecific fatty acid reactions
20
+ 'R11311', 'R11256', 'R11308', 'R08772', 'R08770', # polymer reactions
21
+
22
+ # inconclusive due to semplification
23
+ 'R12425',
24
+
25
+ # "incomplete reaction" / "unclear reaction"
26
+ 'R08414', 'R13037', 'R13034', 'R13036', 'R02825', 'R11178', 'R13325', 'R12855', 'R12856', 'R09809',
27
+ 'R09808', 'R08035', 'R08034', 'R11470', 'R09360', 'R08139', 'R08318', 'R07859', 'R09361', 'R09349',
28
+ 'R13149', 'R13066', 'R11467', 'R11255', 'R08986', 'R13156', 'R13074', 'R13150', 'R11302', 'R11388',
29
+ 'R08341', 'R13147', 'R13155', 'R08339', 'R11466', 'R08272', 'R09348', 'R09362', 'R11107', 'R08340',
30
+ 'R07940', 'R11120', 'R11245', 'R08269', 'R11131', 'R07943', 'R08342', 'R06766', 'R12584', 'R09852',
31
+ 'R08268', 'R11129', 'R06702', 'R08866', 'R12555', 'R08927', 'R08343', 'R13067', 'R13069', 'R13068',
32
+ 'R05670', 'R06694', 'R09851', 'R11465', 'R08928', 'R11389', 'R11464', 'R13087', 'R12586', 'R11304',
33
+ 'R08984', 'R11254', 'R13165', 'R12884', 'R08865', 'R13151', 'R08132', 'R08929', 'R06701', 'R08345',
34
+ 'R11365', 'R11303', 'R06670', 'R11364', 'R09347', 'R08293', 'R11362', 'R03872', 'R06339', 'R10481',
35
+ 'R10480', 'R13341', 'R06505', 'R06504', 'R06326', 'R06470', 'R06467', 'R06327', 'R06503', 'R09847',
36
+ 'R13479', 'R13447', 'R13478', 'R07510', 'R04546', 'R06468', 'R05624', 'R10706', 'R13454', 'R13556',
37
+ 'R13455', 'R12691',
18
38
  ])
19
-
20
39
 
21
40
 
22
41
 
@@ -29,12 +48,13 @@ def get_rids_with_mancheck_gpr():
29
48
  return rids_mancheck_gpr
30
49
 
31
50
 
51
+
32
52
  def get_rids_with_mancheck_balancing():
33
53
  rids_mancheck_bal = [ # same reactions involving ATP can be reversible
34
54
 
35
55
  # SECTION "reversible both in KEGG and MetaCyc"
36
56
  'PGK', 'SUCOAS', 'ADK1', 'GK1', 'NNATr', 'CYTK1', 'ACKr',
37
- 'DGK1', 'PPAKr', 'ATPSr', 'NDPK10',
57
+ 'DGK1', 'PPAKr', 'ATPSr', 'NDPK10', 'BUTKr',
38
58
 
39
59
  ### SECTION "reversible in KEGG but not in MetaCyc" ###
40
60
  'CYTK2', # clearly reversible in KEGG but not in MetaCyc (RXN-7913)