gsrap 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsrap/.ipynb_checkpoints/__init__-checkpoint.py +6 -5
- gsrap/__init__.py +6 -5
- gsrap/assets/kegg_compound_to_others.pickle +0 -0
- gsrap/assets/kegg_reaction_to_others.pickle +0 -0
- gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +3 -0
- gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +168 -93
- gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +55 -51
- gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +7 -1
- gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +8 -8
- gsrap/commons/biomass.py +3 -0
- gsrap/commons/downloads.py +168 -93
- gsrap/commons/escherutils.py +55 -51
- gsrap/commons/excelhub.py +7 -1
- gsrap/commons/metrics.py +8 -8
- gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +2 -2
- gsrap/mkmodel/mkmodel.py +2 -2
- gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +43 -18
- gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +2 -1
- gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +132 -63
- gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py +23 -3
- gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +59 -49
- gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +90 -53
- gsrap/parsedb/annotation.py +43 -18
- gsrap/parsedb/completeness.py +2 -1
- gsrap/parsedb/introduce.py +132 -63
- gsrap/parsedb/manual.py +22 -2
- gsrap/parsedb/parsedb.py +59 -49
- gsrap/parsedb/repeating.py +90 -53
- gsrap/runsims/.ipynb_checkpoints/runsims-checkpoint.py +2 -1
- gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +0 -1
- gsrap/runsims/runsims.py +2 -1
- gsrap/runsims/simplegrowth.py +0 -1
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/METADATA +5 -2
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/RECORD +37 -37
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/WHEEL +1 -1
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/entry_points.txt +0 -0
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info/licenses}/LICENSE.txt +0 -0
gsrap/parsedb/introduce.py
CHANGED
|
@@ -1,22 +1,27 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
1
3
|
import pandas as pnd
|
|
2
4
|
import cobra
|
|
3
5
|
|
|
4
|
-
|
|
5
6
|
from .repeating import check_author
|
|
6
7
|
from .repeating import check_rstring_arrow
|
|
7
8
|
from .repeating import check_gpr
|
|
8
9
|
from .repeating import add_reaction
|
|
10
|
+
from .repeating import get_curator_notes
|
|
9
11
|
|
|
10
12
|
from .manual import get_manual_sinks
|
|
11
13
|
from .manual import get_manual_demands
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to_others, goodbefore, onlyauthor):
|
|
17
|
+
def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to_others, outdir, goodbefore, onlyauthor):
|
|
16
18
|
goodbefore_reached = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger.info("Parsing metabolites ('M' sheet)...")
|
|
17
22
|
|
|
18
23
|
|
|
19
|
-
|
|
24
|
+
# check duplicated puremids:
|
|
20
25
|
if len(set(db['M']['pure_mid'].to_list())) != len(db['M']):
|
|
21
26
|
pure_mids = db['M']['pure_mid'].to_list()
|
|
22
27
|
duplicates = list(set([item for item in pure_mids if pure_mids.count(item) > 1]))
|
|
@@ -24,11 +29,12 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
24
29
|
return 1
|
|
25
30
|
|
|
26
31
|
|
|
27
|
-
# parse M:
|
|
28
|
-
logger.debug("Parsing metabolites...")
|
|
32
|
+
# parse M (row by row):
|
|
29
33
|
db['M'] = db['M'].set_index('pure_mid', drop=True, verify_integrity=True)
|
|
30
34
|
kc_ids_modeled = set() # account for kc codes modeled
|
|
31
|
-
|
|
35
|
+
cnt = 0 # counter for parsed records
|
|
36
|
+
msg = '' # to be cleared
|
|
37
|
+
for iteration, (pure_mid, row) in enumerate(db['M'].iterrows()):
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
# skip empty lines!
|
|
@@ -41,11 +47,11 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
41
47
|
# manage goodbefore/onlyauthor
|
|
42
48
|
if goodbefore != None and goodbefore_reached:
|
|
43
49
|
if onlyauthor == None:
|
|
44
|
-
logger.
|
|
50
|
+
logger.warning(f"Skipping metabolite '{pure_mid}' as requested with --goodbefore[0] '{goodbefore}'.")
|
|
45
51
|
continue
|
|
46
52
|
|
|
47
53
|
|
|
48
|
-
# parse
|
|
54
|
+
# parse and get curators
|
|
49
55
|
response = check_author(logger, pure_mid, row, db, 'M')
|
|
50
56
|
if type(response) == int: return 1
|
|
51
57
|
else: authors = response
|
|
@@ -55,7 +61,7 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
55
61
|
if goodbefore != None and goodbefore_reached:
|
|
56
62
|
if onlyauthor != None and onlyauthor not in authors:
|
|
57
63
|
authors_string = '; '.join(authors)
|
|
58
|
-
logger.
|
|
64
|
+
logger.warning(f"Skipping metabolite '{pure_mid}' (authors '{authors_string}') as requested with --goodbefore[0] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
|
|
59
65
|
continue
|
|
60
66
|
|
|
61
67
|
|
|
@@ -71,7 +77,7 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
71
77
|
return 1
|
|
72
78
|
|
|
73
79
|
|
|
74
|
-
#
|
|
80
|
+
# parse kc:
|
|
75
81
|
if pnd.isna(row['kc']):
|
|
76
82
|
logger.error(f"Metabolite '{pure_mid}' has missing KEGG annotation (kc): '{row['kc']}'.")
|
|
77
83
|
return 1
|
|
@@ -79,11 +85,16 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
79
85
|
kc_ids = [i.strip() for i in kc_ids]
|
|
80
86
|
for kc_id in kc_ids:
|
|
81
87
|
if kc_id == 'CXXXXX': # not in KEGG; could be knowledge gap (e.g. methyl group acceptor in R10404)
|
|
82
|
-
|
|
88
|
+
with open(f"{outdir}/logs/M.notkegg.txt", 'a') as f:
|
|
89
|
+
print(f"Metabolite '{pure_mid}' is not in KEGG ('{kc_id}')!", file=f)
|
|
83
90
|
continue
|
|
91
|
+
#
|
|
92
|
+
# check if 'kc' codes are real:
|
|
84
93
|
if kc_id not in idcollection_dict['kc']:
|
|
85
94
|
logger.error(f"Metabolite '{pure_mid}' has invalid KEGG annotation (kc): '{kc_id}'.")
|
|
86
95
|
return 1
|
|
96
|
+
#
|
|
97
|
+
# check if 'kc' was already used:
|
|
87
98
|
if kc_id in kc_ids_modeled:
|
|
88
99
|
logger.error(f"KEGG annotation (kc) '{kc_id}' used in metabolite '{pure_mid}' is duplicated.")
|
|
89
100
|
return 1
|
|
@@ -95,6 +106,7 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
95
106
|
if pnd.isna(row['inchikey']):
|
|
96
107
|
logger.error(f"Metabolite '{pure_mid}' has missing inchikey: '{row['inchikey']}'.")
|
|
97
108
|
return 1
|
|
109
|
+
|
|
98
110
|
# check inchikey format:
|
|
99
111
|
if len(row['inchikey']) != 27 or row['inchikey'][14] != '-' or row['inchikey'][25] != '-':
|
|
100
112
|
logger.error(f"Metabolite '{pure_mid}' has badly formatted inchikey: '{row['inchikey']}'.")
|
|
@@ -109,7 +121,8 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
109
121
|
for eqbiggid in kegg_compound_to_others[kc_id]['bigg.metabolite']:
|
|
110
122
|
eqbiggids.add(eqbiggid)
|
|
111
123
|
if pure_mid not in eqbiggids and eqbiggids != set():
|
|
112
|
-
|
|
124
|
+
with open(f"{outdir}/logs/M.inbigg.txt", 'a') as f:
|
|
125
|
+
print(f"Metabolites '{'; '.join(kc_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{pure_mid}').", file=f)
|
|
113
126
|
|
|
114
127
|
|
|
115
128
|
# add metabolite to model
|
|
@@ -122,6 +135,10 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
122
135
|
m.compartment='c'
|
|
123
136
|
|
|
124
137
|
|
|
138
|
+
# write curators as annotations
|
|
139
|
+
m.annotation['curator_codes'] = authors
|
|
140
|
+
|
|
141
|
+
|
|
125
142
|
# add annotations to model (same order of Memote)
|
|
126
143
|
ankeys = [
|
|
127
144
|
'pubchem.compound', 'kegg.compound', 'seed.compound',
|
|
@@ -132,53 +149,73 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
132
149
|
for ankey in ankeys:
|
|
133
150
|
if ankey == 'kegg.compound': m.annotation[ankey] = set(kc_ids) - set(['CXXXXX'])
|
|
134
151
|
else: m.annotation[ankey] = set()
|
|
152
|
+
#
|
|
135
153
|
# populate sets:
|
|
136
154
|
for kc_id in kc_ids:
|
|
137
155
|
if kc_id != 'CXXXXX':
|
|
138
156
|
if kc_id in kegg_compound_to_others.keys():
|
|
139
157
|
for ankey in ankeys:
|
|
140
158
|
m.annotation[ankey].update(kegg_compound_to_others[kc_id][ankey])
|
|
159
|
+
#
|
|
141
160
|
# save as list:
|
|
142
161
|
for ankey in ankeys:
|
|
143
162
|
m.annotation[ankey] = list(m.annotation[ankey])
|
|
144
163
|
|
|
145
164
|
|
|
146
|
-
#
|
|
165
|
+
# # force the manual-curated version of the inchikey
|
|
147
166
|
if m.annotation['inchikey'] != [] and m.annotation['inchikey'] != [row['inchikey']]:
|
|
148
|
-
|
|
149
|
-
|
|
167
|
+
with open(f"{outdir}/logs/M.diffinchi.txt", 'a') as f:
|
|
168
|
+
print(f"Metabolite '{pure_mid}': manual-curated inchikey ({[row['inchikey']]}) is different from the one derived from MNX ({m.annotation['inchikey']}).", file=f)
|
|
169
|
+
m.annotation['inchikey'] = [row['inchikey']]
|
|
170
|
+
#
|
|
171
|
+
# remove inchikey if unknown:
|
|
150
172
|
if m.annotation['inchikey'] == ['XXXXXXXXXXXXXX-XXXXXXXXXX-X']:
|
|
151
173
|
m.annotation['inchikey'] = []
|
|
152
174
|
|
|
153
175
|
|
|
154
176
|
# add SBO annotation
|
|
155
177
|
m.annotation['sbo'] = ['SBO:0000247'] # generic metabolite
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
# add curator_notes
|
|
181
|
+
m.annotation['curator_notes'] = get_curator_notes(logger, row)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# communicate progress:
|
|
185
|
+
cnt += 1
|
|
186
|
+
msg = f"Done {cnt}/{len(db['M'])}!"
|
|
187
|
+
print(msg, file=sys.stderr, end='\r')
|
|
188
|
+
print(''.join([' ' for i in range(len(msg))]), file=sys.stderr, end='\r')
|
|
156
189
|
|
|
157
190
|
|
|
158
|
-
|
|
191
|
+
# check goodbefore reaching:
|
|
159
192
|
if goodbefore != None and goodbefore_reached == False:
|
|
160
|
-
logger.
|
|
193
|
+
logger.warning(f"Metabolite '{goodbefore}' never reached. Are you sure about your --goodbefore?")
|
|
161
194
|
|
|
162
195
|
|
|
163
196
|
return model
|
|
164
197
|
|
|
165
198
|
|
|
166
199
|
|
|
167
|
-
def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_others, goodbefore, onlyauthor):
|
|
200
|
+
def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_others, outdir, goodbefore, onlyauthor):
|
|
168
201
|
goodbefore_reached = False
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
logger.info("Parsing non-transport reactions ('R' sheet)...")
|
|
169
205
|
|
|
170
206
|
|
|
171
|
-
|
|
207
|
+
# check duplicated rids:
|
|
172
208
|
if len(set(db['R']['rid'].to_list())) != len(db['R']):
|
|
173
|
-
|
|
174
|
-
duplicates = list(set([item for item in
|
|
209
|
+
rids = db['R']['rid'].to_list()
|
|
210
|
+
duplicates = list(set([item for item in rids if rids.count(item) > 1]))
|
|
175
211
|
logger.error(f"Sheet 'R' has duplicated reactions: {duplicates}.")
|
|
176
212
|
return 1
|
|
177
213
|
|
|
178
214
|
|
|
179
|
-
# parse R:
|
|
180
|
-
logger.debug("Parsing reactions...")
|
|
215
|
+
# parse R (row by row):
|
|
181
216
|
db['R'] = db['R'].set_index('rid', drop=True, verify_integrity=True)
|
|
217
|
+
cnt = 0 # counter for parsed records
|
|
218
|
+
msg = '' # to be cleared
|
|
182
219
|
for rid, row in db['R'].iterrows():
|
|
183
220
|
|
|
184
221
|
|
|
@@ -192,11 +229,11 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
|
|
|
192
229
|
# manage goodbefore/onlyauthor
|
|
193
230
|
if goodbefore != None and goodbefore_reached:
|
|
194
231
|
if onlyauthor == None:
|
|
195
|
-
logger.
|
|
232
|
+
logger.warning(f"Skipping reaction '{rid}' as requested with --goodbefore[1] '{goodbefore}'.")
|
|
196
233
|
continue
|
|
197
234
|
|
|
198
235
|
|
|
199
|
-
# parse
|
|
236
|
+
# parse and get curators
|
|
200
237
|
response = check_author(logger, rid, row, db, 'R')
|
|
201
238
|
if type(response) == int: return 1
|
|
202
239
|
else: authors = response
|
|
@@ -206,7 +243,7 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
|
|
|
206
243
|
if goodbefore != None and goodbefore_reached:
|
|
207
244
|
if onlyauthor != None and onlyauthor not in authors:
|
|
208
245
|
authors_string = '; '.join(authors)
|
|
209
|
-
logger.
|
|
246
|
+
logger.warning(f"Skipping reaction '{rid}' (authors '{authors_string}') as requested with --goodbefore[1] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
|
|
210
247
|
continue
|
|
211
248
|
|
|
212
249
|
|
|
@@ -215,7 +252,7 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
|
|
|
215
252
|
if response == 1: return 1
|
|
216
253
|
|
|
217
254
|
|
|
218
|
-
#
|
|
255
|
+
# parse 'kr':
|
|
219
256
|
if pnd.isna(row['kr']):
|
|
220
257
|
logger.error(f"Reaction '{rid}' has missing KEGG annotation (kr): '{row['kr']}'.")
|
|
221
258
|
return 1
|
|
@@ -223,15 +260,18 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
|
|
|
223
260
|
kr_ids = [i.strip() for i in kr_ids]
|
|
224
261
|
for kr_id in kr_ids:
|
|
225
262
|
if kr_id == 'RXXXXX': # not in KEGG; could be knowledge gap
|
|
226
|
-
|
|
263
|
+
with open(f"{outdir}/logs/R.notkegg.txt", 'a') as f:
|
|
264
|
+
print(f"Reaction '{rid}' is not in KEGG ('{kr_id}')!", file=f)
|
|
227
265
|
continue
|
|
266
|
+
#
|
|
267
|
+
# check if 'kr' codes are real:
|
|
228
268
|
if kr_id not in idcollection_dict['kr']:
|
|
229
269
|
logger.error(f"Reaction '{rid}' has invalid KEGG annotation (kr): '{kr_id}'.")
|
|
230
270
|
return 1
|
|
231
271
|
|
|
232
272
|
|
|
233
273
|
# check GPR:
|
|
234
|
-
response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'R')
|
|
274
|
+
response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'R', outdir)
|
|
235
275
|
if response == 1: return 1
|
|
236
276
|
|
|
237
277
|
|
|
@@ -243,28 +283,40 @@ def introduce_reactions(logger, db, model, idcollection_dict, kegg_reaction_to_o
|
|
|
243
283
|
for eqbiggid in kegg_reaction_to_others[kr_id]['bigg.reaction']:
|
|
244
284
|
eqbiggids.add(eqbiggid)
|
|
245
285
|
if rid not in eqbiggids and eqbiggids != set():
|
|
246
|
-
|
|
286
|
+
with open(f"{outdir}/logs/R.inbigg.txt", 'a') as f:
|
|
287
|
+
print(f"Reactions '{'; '.join(kr_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{rid}').", file=f)
|
|
247
288
|
|
|
248
289
|
|
|
249
290
|
# add reaction to model
|
|
250
|
-
response = add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, 'R')
|
|
291
|
+
response = add_reaction(logger, model, rid, authors, row, kr_ids, kegg_reaction_to_others, 'R', outdir)
|
|
251
292
|
if response == 1: return 1
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
# communicate progress:
|
|
296
|
+
cnt += 1
|
|
297
|
+
msg = f"Done {cnt}/{len(db['R'])}!"
|
|
298
|
+
print(msg, file=sys.stderr, end='\r')
|
|
299
|
+
print(''.join([' ' for i in range(len(msg))]), file=sys.stderr, end='\r')
|
|
252
300
|
|
|
253
301
|
|
|
302
|
+
# check goodbefore reaching:
|
|
254
303
|
if goodbefore != None and goodbefore_reached == False:
|
|
255
|
-
logger.
|
|
304
|
+
logger.warning(f"Reaction '{goodbefore}' never reached. Are you sure about your --goodbefore?")
|
|
256
305
|
|
|
257
306
|
|
|
258
307
|
return model
|
|
259
308
|
|
|
260
309
|
|
|
261
310
|
|
|
262
|
-
def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_to_others, goodbefore, onlyauthor):
|
|
311
|
+
def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_to_others, outdir, goodbefore, onlyauthor):
|
|
263
312
|
goodbefore_reached = False
|
|
264
|
-
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
logger.info("Parsing transport reactions ('T' sheet)...")
|
|
265
316
|
|
|
266
317
|
|
|
267
318
|
def clone_to_external(model, mid_c, mid_e):
|
|
319
|
+
# given an existing '_c' M, create its '_e' equivalent
|
|
268
320
|
|
|
269
321
|
m = cobra.Metabolite(f'{mid_e}')
|
|
270
322
|
model.add_metabolites([m])
|
|
@@ -281,10 +333,12 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
281
333
|
|
|
282
334
|
|
|
283
335
|
def add_exchange_reaction(model, mid_e):
|
|
336
|
+
# given an existing '_e' M, create the corresponding EX_change reaction
|
|
284
337
|
|
|
285
338
|
r = cobra.Reaction(f'EX_{mid_e}')
|
|
286
339
|
model.add_reactions([r])
|
|
287
340
|
r = model.reactions.get_by_id(f'EX_{mid_e}')
|
|
341
|
+
|
|
288
342
|
r.name = f"Exchange for {model.metabolites.get_by_id(mid_e).name}"
|
|
289
343
|
r.build_reaction_from_string(f'{mid_e} --> ')
|
|
290
344
|
r.bounds = (0, 1000)
|
|
@@ -294,22 +348,15 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
294
348
|
|
|
295
349
|
|
|
296
350
|
|
|
297
|
-
|
|
298
|
-
# get all already inserted metabolites
|
|
351
|
+
# get all already inserted metabolites and reactions
|
|
299
352
|
mids_parsed = [m.id for m in model.metabolites]
|
|
300
353
|
rids_parsed = [r.id for r in model.reactions]
|
|
301
354
|
|
|
302
355
|
|
|
303
|
-
#
|
|
304
|
-
clone_to_external(model, 'h_c', 'h_e')
|
|
305
|
-
mids_parsed.append('h_e')
|
|
306
|
-
add_exchange_reaction(model, 'h_e')
|
|
307
|
-
rids_parsed.append(f'EX_h_e')
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
# parse T:
|
|
311
|
-
logger.debug("Parsing transporters...")
|
|
356
|
+
# parse T (row by row):
|
|
312
357
|
db['T'] = db['T'].set_index('rid', drop=True, verify_integrity=True)
|
|
358
|
+
cnt = 0 # counter for parsed records
|
|
359
|
+
msg = '' # to be cleared
|
|
313
360
|
for rid, row in db['T'].iterrows():
|
|
314
361
|
|
|
315
362
|
|
|
@@ -320,10 +367,16 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
320
367
|
goodbefore_reached = True
|
|
321
368
|
|
|
322
369
|
|
|
370
|
+
# avoid duplicates!
|
|
371
|
+
if rid in rids_parsed:
|
|
372
|
+
logger.error(f"Tranport '{rid}' has ID identical to previously added reaction!")
|
|
373
|
+
return 1
|
|
374
|
+
|
|
375
|
+
|
|
323
376
|
# manage goodbefore/onlyauthor
|
|
324
377
|
if goodbefore != None and goodbefore_reached:
|
|
325
378
|
if onlyauthor == None:
|
|
326
|
-
logger.
|
|
379
|
+
logger.warning(f"Skipping transport '{rid}' as requested with --goodbefore[2] '{goodbefore}'.")
|
|
327
380
|
continue
|
|
328
381
|
|
|
329
382
|
|
|
@@ -337,7 +390,7 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
337
390
|
if goodbefore != None and goodbefore_reached:
|
|
338
391
|
if onlyauthor != None and onlyauthor not in authors:
|
|
339
392
|
authors_string = '; '.join(authors)
|
|
340
|
-
logger.
|
|
393
|
+
logger.warning(f"Skipping transport '{rid}' (authors '{authors_string}') as requested with --goodbefore[2] '{goodbefore}' and --onlyauthor '{onlyauthor}'.")
|
|
341
394
|
continue
|
|
342
395
|
|
|
343
396
|
|
|
@@ -346,7 +399,7 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
346
399
|
if response == 1: return 1
|
|
347
400
|
|
|
348
401
|
|
|
349
|
-
#
|
|
402
|
+
# parse 'kr':
|
|
350
403
|
if pnd.isna(row['kr']):
|
|
351
404
|
logger.error(f"Reaction '{rid}' has missing KEGG annotation (kr): '{row['kr']}'.")
|
|
352
405
|
return 1
|
|
@@ -354,37 +407,39 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
354
407
|
kr_ids = row['kr'].split(';')
|
|
355
408
|
kr_ids = [i.strip() for i in kr_ids]
|
|
356
409
|
for kr_id in kr_ids:
|
|
410
|
+
#
|
|
411
|
+
# check if 'kr' codes are real
|
|
357
412
|
if kr_id not in idcollection_dict['kr']:
|
|
358
413
|
logger.error(f"Reaction '{rid}' has invalid KEGG annotation (kr): '{kr_id}'.")
|
|
359
414
|
return 1
|
|
360
|
-
else:
|
|
415
|
+
else:
|
|
416
|
+
# no 'kr' for the majority of transport reactions!
|
|
417
|
+
kr_ids = []
|
|
361
418
|
|
|
362
419
|
|
|
363
420
|
# check GPR:
|
|
364
|
-
response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'T')
|
|
421
|
+
response = check_gpr(logger, rid, row, kr_ids, idcollection_dict, 'T', outdir)
|
|
365
422
|
if response == 1: return 1
|
|
366
423
|
|
|
367
424
|
|
|
368
|
-
#
|
|
369
|
-
involved_mids = row['rstring'].split(' ')
|
|
370
|
-
involved_mids = [i for i in involved_mids if i not in ['-->', '<=>']]
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
# the external metabolite must be already modeled as cytosolic
|
|
425
|
+
# iterate the involved metabolites
|
|
426
|
+
involved_mids = row['rstring'].split(' ') # dirty (arrows, coefficints are included)
|
|
374
427
|
for mid in involved_mids:
|
|
375
428
|
if mid.endswith('_e'):
|
|
376
429
|
mid_e = mid
|
|
377
430
|
mid_c = mid.rsplit('_', 1)[0] + '_c'
|
|
431
|
+
#
|
|
432
|
+
# the cytosolic counterpart must be already modeled:
|
|
378
433
|
if mid_c not in mids_parsed:
|
|
379
434
|
logger.error(f"{rid}: the metabolite '{mid_c}', counterpart of '{mid_e}', was not previously modeled.")
|
|
380
435
|
return 1
|
|
381
|
-
|
|
382
|
-
# add external metabolite to model
|
|
436
|
+
#
|
|
437
|
+
# clone to add external metabolite to model
|
|
383
438
|
if mid_e not in mids_parsed:
|
|
384
439
|
clone_to_external(model, mid_c, mid_e)
|
|
385
440
|
mids_parsed.append(mid_e)
|
|
386
|
-
|
|
387
|
-
# add exchange reaction to model
|
|
441
|
+
#
|
|
442
|
+
# add corresponding exchange reaction to model
|
|
388
443
|
if f'EX_{mid_e}' not in rids_parsed:
|
|
389
444
|
add_exchange_reaction(model, mid_e)
|
|
390
445
|
rids_parsed.append(f'EX_{mid_e}')
|
|
@@ -398,16 +453,26 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
398
453
|
for eqbiggid in kegg_reaction_to_others[kr_id]['bigg.reaction']:
|
|
399
454
|
eqbiggids.add(eqbiggid)
|
|
400
455
|
if rid not in eqbiggids and eqbiggids != set():
|
|
401
|
-
|
|
456
|
+
with open(f"{outdir}/logs/T.inbigg.txt", 'a') as f:
|
|
457
|
+
print(f"Reactions '{'; '.join(kr_ids)}' already in BiGG as {eqbiggids} ({authors} gave '{rid}').", file=f)
|
|
402
458
|
|
|
403
459
|
|
|
404
460
|
# add reaction to model
|
|
405
|
-
response = add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, 'T')
|
|
461
|
+
response = add_reaction(logger, model, rid, authors, row, kr_ids, kegg_reaction_to_others, 'T', outdir)
|
|
406
462
|
if response == 1: return 1
|
|
463
|
+
rids_parsed.append(rid) # update list of rids in model
|
|
464
|
+
|
|
407
465
|
|
|
466
|
+
# communicate progress:
|
|
467
|
+
cnt += 1
|
|
468
|
+
msg = f"Done {cnt}/{len(db['T'])}!"
|
|
469
|
+
print(msg, file=sys.stderr, end='\r')
|
|
470
|
+
print(''.join([' ' for i in range(len(msg))]), file=sys.stderr, end='\r')
|
|
408
471
|
|
|
472
|
+
|
|
473
|
+
# check goodbefore reaching:
|
|
409
474
|
if goodbefore != None and goodbefore_reached == False:
|
|
410
|
-
logger.
|
|
475
|
+
logger.warning(f"Transport '{goodbefore}' never reached. Are you sure about your --goodbefore?")
|
|
411
476
|
|
|
412
477
|
|
|
413
478
|
return model
|
|
@@ -416,6 +481,10 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
|
|
|
416
481
|
|
|
417
482
|
def introduce_sinks_demands(logger, model):
|
|
418
483
|
|
|
484
|
+
|
|
485
|
+
logger.debug("Introducing sinks and demands...")
|
|
486
|
+
|
|
487
|
+
|
|
419
488
|
sinks = get_manual_sinks()
|
|
420
489
|
demands = get_manual_demands()
|
|
421
490
|
|
gsrap/parsedb/manual.py
CHANGED
|
@@ -15,8 +15,27 @@ def get_krs_to_exclude():
|
|
|
15
15
|
return set([
|
|
16
16
|
'R12328', 'R05190', # general forms of fatty acid biosynthesis
|
|
17
17
|
'R01347', 'R01348', 'R04121', # general forms of fatty acid degradation
|
|
18
|
+
'R11671', # multi-step fatty acids reactions
|
|
19
|
+
'R07860', 'R01317', 'R07064', # aspecific fatty acid reactions
|
|
20
|
+
'R11311', 'R11256', 'R11308', 'R08772', 'R08770', # polymer reactions
|
|
21
|
+
|
|
22
|
+
# inconclusive due to semplification
|
|
23
|
+
'R12425',
|
|
24
|
+
|
|
25
|
+
# "incomplete reaction" / "unclear reaction"
|
|
26
|
+
'R08414', 'R13037', 'R13034', 'R13036', 'R02825', 'R11178', 'R13325', 'R12855', 'R12856', 'R09809',
|
|
27
|
+
'R09808', 'R08035', 'R08034', 'R11470', 'R09360', 'R08139', 'R08318', 'R07859', 'R09361', 'R09349',
|
|
28
|
+
'R13149', 'R13066', 'R11467', 'R11255', 'R08986', 'R13156', 'R13074', 'R13150', 'R11302', 'R11388',
|
|
29
|
+
'R08341', 'R13147', 'R13155', 'R08339', 'R11466', 'R08272', 'R09348', 'R09362', 'R11107', 'R08340',
|
|
30
|
+
'R07940', 'R11120', 'R11245', 'R08269', 'R11131', 'R07943', 'R08342', 'R06766', 'R12584', 'R09852',
|
|
31
|
+
'R08268', 'R11129', 'R06702', 'R08866', 'R12555', 'R08927', 'R08343', 'R13067', 'R13069', 'R13068',
|
|
32
|
+
'R05670', 'R06694', 'R09851', 'R11465', 'R08928', 'R11389', 'R11464', 'R13087', 'R12586', 'R11304',
|
|
33
|
+
'R08984', 'R11254', 'R13165', 'R12884', 'R08865', 'R13151', 'R08132', 'R08929', 'R06701', 'R08345',
|
|
34
|
+
'R11365', 'R11303', 'R06670', 'R11364', 'R09347', 'R08293', 'R11362', 'R03872', 'R06339', 'R10481',
|
|
35
|
+
'R10480', 'R13341', 'R06505', 'R06504', 'R06326', 'R06470', 'R06467', 'R06327', 'R06503', 'R09847',
|
|
36
|
+
'R13479', 'R13447', 'R13478', 'R07510', 'R04546', 'R06468', 'R05624', 'R10706', 'R13454', 'R13556',
|
|
37
|
+
'R13455', 'R12691',
|
|
18
38
|
])
|
|
19
|
-
|
|
20
39
|
|
|
21
40
|
|
|
22
41
|
|
|
@@ -29,12 +48,13 @@ def get_rids_with_mancheck_gpr():
|
|
|
29
48
|
return rids_mancheck_gpr
|
|
30
49
|
|
|
31
50
|
|
|
51
|
+
|
|
32
52
|
def get_rids_with_mancheck_balancing():
|
|
33
53
|
rids_mancheck_bal = [ # same reactions involving ATP can be reversible
|
|
34
54
|
|
|
35
55
|
# SECTION "reversible both in KEGG and MetaCyc"
|
|
36
56
|
'PGK', 'SUCOAS', 'ADK1', 'GK1', 'NNATr', 'CYTK1', 'ACKr',
|
|
37
|
-
'DGK1', 'PPAKr', 'ATPSr', 'NDPK10',
|
|
57
|
+
'DGK1', 'PPAKr', 'ATPSr', 'NDPK10', 'BUTKr',
|
|
38
58
|
|
|
39
59
|
### SECTION "reversible in KEGG but not in MetaCyc" ###
|
|
40
60
|
'CYTK2', # clearly reversible in KEGG but not in MetaCyc (RXN-7913)
|