gsrap 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsrap/.ipynb_checkpoints/__init__-checkpoint.py +6 -5
- gsrap/__init__.py +6 -5
- gsrap/assets/kegg_compound_to_others.pickle +0 -0
- gsrap/assets/kegg_reaction_to_others.pickle +0 -0
- gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +3 -0
- gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +168 -93
- gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +55 -51
- gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +7 -1
- gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +8 -8
- gsrap/commons/biomass.py +3 -0
- gsrap/commons/downloads.py +168 -93
- gsrap/commons/escherutils.py +55 -51
- gsrap/commons/excelhub.py +7 -1
- gsrap/commons/metrics.py +8 -8
- gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +2 -2
- gsrap/mkmodel/mkmodel.py +2 -2
- gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +43 -18
- gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +2 -1
- gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +132 -63
- gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py +23 -3
- gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +59 -49
- gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +90 -53
- gsrap/parsedb/annotation.py +43 -18
- gsrap/parsedb/completeness.py +2 -1
- gsrap/parsedb/introduce.py +132 -63
- gsrap/parsedb/manual.py +22 -2
- gsrap/parsedb/parsedb.py +59 -49
- gsrap/parsedb/repeating.py +90 -53
- gsrap/runsims/.ipynb_checkpoints/runsims-checkpoint.py +2 -1
- gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +0 -1
- gsrap/runsims/runsims.py +2 -1
- gsrap/runsims/simplegrowth.py +0 -1
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/METADATA +5 -2
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/RECORD +37 -37
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/WHEEL +1 -1
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/entry_points.txt +0 -0
- {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info/licenses}/LICENSE.txt +0 -0
|
@@ -14,9 +14,28 @@ def get_deprecated_kos():
|
|
|
14
14
|
def get_krs_to_exclude():
|
|
15
15
|
return set([
|
|
16
16
|
'R12328', 'R05190', # general forms of fatty acid biosynthesis
|
|
17
|
-
'R01347', 'R04121', # general forms of fatty acid degradation
|
|
17
|
+
'R01347', 'R01348', 'R04121', # general forms of fatty acid degradation
|
|
18
|
+
'R11671', # multi-step fatty acids reactions
|
|
19
|
+
'R07860', 'R01317', 'R07064', # aspecific fatty acid reactions
|
|
20
|
+
'R11311', 'R11256', 'R11308', 'R08772', 'R08770', # polymer reactions
|
|
21
|
+
|
|
22
|
+
# inconclusive due to semplification
|
|
23
|
+
'R12425',
|
|
24
|
+
|
|
25
|
+
# "incomplete reaction" / "unclear reaction"
|
|
26
|
+
'R08414', 'R13037', 'R13034', 'R13036', 'R02825', 'R11178', 'R13325', 'R12855', 'R12856', 'R09809',
|
|
27
|
+
'R09808', 'R08035', 'R08034', 'R11470', 'R09360', 'R08139', 'R08318', 'R07859', 'R09361', 'R09349',
|
|
28
|
+
'R13149', 'R13066', 'R11467', 'R11255', 'R08986', 'R13156', 'R13074', 'R13150', 'R11302', 'R11388',
|
|
29
|
+
'R08341', 'R13147', 'R13155', 'R08339', 'R11466', 'R08272', 'R09348', 'R09362', 'R11107', 'R08340',
|
|
30
|
+
'R07940', 'R11120', 'R11245', 'R08269', 'R11131', 'R07943', 'R08342', 'R06766', 'R12584', 'R09852',
|
|
31
|
+
'R08268', 'R11129', 'R06702', 'R08866', 'R12555', 'R08927', 'R08343', 'R13067', 'R13069', 'R13068',
|
|
32
|
+
'R05670', 'R06694', 'R09851', 'R11465', 'R08928', 'R11389', 'R11464', 'R13087', 'R12586', 'R11304',
|
|
33
|
+
'R08984', 'R11254', 'R13165', 'R12884', 'R08865', 'R13151', 'R08132', 'R08929', 'R06701', 'R08345',
|
|
34
|
+
'R11365', 'R11303', 'R06670', 'R11364', 'R09347', 'R08293', 'R11362', 'R03872', 'R06339', 'R10481',
|
|
35
|
+
'R10480', 'R13341', 'R06505', 'R06504', 'R06326', 'R06470', 'R06467', 'R06327', 'R06503', 'R09847',
|
|
36
|
+
'R13479', 'R13447', 'R13478', 'R07510', 'R04546', 'R06468', 'R05624', 'R10706', 'R13454', 'R13556',
|
|
37
|
+
'R13455', 'R12691',
|
|
18
38
|
])
|
|
19
|
-
|
|
20
39
|
|
|
21
40
|
|
|
22
41
|
|
|
@@ -29,12 +48,13 @@ def get_rids_with_mancheck_gpr():
|
|
|
29
48
|
return rids_mancheck_gpr
|
|
30
49
|
|
|
31
50
|
|
|
51
|
+
|
|
32
52
|
def get_rids_with_mancheck_balancing():
|
|
33
53
|
rids_mancheck_bal = [ # same reactions involving ATP can be reversible
|
|
34
54
|
|
|
35
55
|
# SECTION "reversible both in KEGG and MetaCyc"
|
|
36
56
|
'PGK', 'SUCOAS', 'ADK1', 'GK1', 'NNATr', 'CYTK1', 'ACKr',
|
|
37
|
-
'DGK1', 'PPAKr', 'ATPSr', 'NDPK10',
|
|
57
|
+
'DGK1', 'PPAKr', 'ATPSr', 'NDPK10', 'BUTKr',
|
|
38
58
|
|
|
39
59
|
### SECTION "reversible in KEGG but not in MetaCyc" ###
|
|
40
60
|
'CYTK2', # clearly reversible in KEGG but not in MetaCyc (RXN-7913)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import pickle
|
|
3
3
|
from importlib import resources
|
|
4
|
+
import shutil
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
import cobra
|
|
@@ -15,7 +16,6 @@ from ..commons import introduce_universal_biomass
|
|
|
15
16
|
from ..commons import write_excel_model
|
|
16
17
|
from ..commons import show_contributions
|
|
17
18
|
from ..commons import adjust_biomass_precursors
|
|
18
|
-
from ..commons import count_undrawn_rids
|
|
19
19
|
from ..commons import count_undrawn_rids_focus
|
|
20
20
|
|
|
21
21
|
from ..commons import format_expansion
|
|
@@ -49,13 +49,20 @@ from .cycles import verify_egc_all
|
|
|
49
49
|
def main(args, logger):
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
|
|
52
|
+
|
|
53
|
+
###### PRE-PARSING
|
|
54
|
+
|
|
53
55
|
# adjust out folder path
|
|
54
56
|
while args.outdir.endswith('/'):
|
|
55
57
|
args.outdir = args.outdir[:-1]
|
|
56
58
|
os.makedirs(f'{args.outdir}/', exist_ok=True)
|
|
57
59
|
|
|
58
60
|
|
|
61
|
+
# prepare empty logs folder
|
|
62
|
+
shutil.rmtree(f'{args.outdir}/logs', ignore_errors=True)
|
|
63
|
+
os.makedirs(f'{args.outdir}/logs', exist_ok=True)
|
|
64
|
+
|
|
65
|
+
|
|
59
66
|
# check compatibility of input parameters
|
|
60
67
|
if args.progress==False and args.module==True:
|
|
61
68
|
logger.error(f"You cannot ask --module without --progress (see --help).")
|
|
@@ -81,8 +88,6 @@ def main(args, logger):
|
|
|
81
88
|
if args.onlyauthor == '-': args.onlyauthor = None
|
|
82
89
|
|
|
83
90
|
|
|
84
|
-
|
|
85
|
-
###### LOAD LOCAL RESOURCES
|
|
86
91
|
# check and extract the required 'gsrap.maps' file
|
|
87
92
|
if os.path.exists(f'{args.inmaps}') == False:
|
|
88
93
|
logger.error(f"File 'gsrap.maps' not found at {args.inmaps}.")
|
|
@@ -105,152 +110,157 @@ def main(args, logger):
|
|
|
105
110
|
with open(asset_path, 'rb') as handle:
|
|
106
111
|
kegg_reaction_to_others = pickle.load(handle)
|
|
107
112
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
###### FORMAT/CHECK FOCUSING ARGS
|
|
113
|
+
|
|
111
114
|
# format the --eggnog param
|
|
112
115
|
args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
|
|
113
116
|
|
|
117
|
+
|
|
114
118
|
# check the --taxon param
|
|
115
119
|
if args.taxon != '-':
|
|
116
120
|
response = check_taxon(logger, args.taxon, idcollection_dict)
|
|
117
121
|
if response == 1: return 1
|
|
118
122
|
|
|
123
|
+
|
|
119
124
|
# get the kegg organism if requested
|
|
120
125
|
if args.keggorg != '-':
|
|
121
126
|
response = download_keggorg(logger, args.keggorg, args.outdir)
|
|
122
127
|
if response == 1: return 1
|
|
123
128
|
|
|
124
129
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
logger.info("Downloading gsrap database...")
|
|
129
|
-
response = get_databases(logger)
|
|
130
|
+
# download dbuni, dbexp and lastmap:
|
|
131
|
+
logger.info("Downloading updated gsrap assets...")
|
|
132
|
+
response = get_databases(logger, map_id=args.focus)
|
|
130
133
|
if type(response)==int: return 1
|
|
131
134
|
else: dbuni, dbexp, lastmap = response
|
|
132
135
|
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
###### PARSING
|
|
133
139
|
|
|
134
140
|
# show simple statistics of contributions
|
|
135
141
|
response = show_contributions(logger, dbuni, args.goodbefore)
|
|
136
142
|
if response == 1: return 1
|
|
137
|
-
|
|
138
|
-
|
|
139
143
|
|
|
140
|
-
|
|
144
|
+
|
|
141
145
|
# create the model
|
|
142
146
|
universe = cobra.Model('universe')
|
|
143
|
-
logger.info("Parsing gsrap database...")
|
|
144
147
|
|
|
145
|
-
|
|
146
|
-
|
|
148
|
+
|
|
149
|
+
# introduce M
|
|
150
|
+
universe = introduce_metabolites(logger, dbuni, universe, idcollection_dict, kegg_compound_to_others, args.outdir, args.goodbefore[0], args.onlyauthor)
|
|
147
151
|
if type(universe)==int: return 1
|
|
148
|
-
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# introduce R
|
|
155
|
+
universe = introduce_reactions(logger, dbuni, universe, idcollection_dict, kegg_reaction_to_others, args.outdir, args.goodbefore[1], args.onlyauthor)
|
|
149
156
|
if type(universe)==int: return 1
|
|
150
|
-
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# introduce T
|
|
160
|
+
universe = introduce_transporters(logger, dbuni, universe, idcollection_dict, kegg_reaction_to_others, args.outdir, args.goodbefore[2], args.onlyauthor)
|
|
151
161
|
if type(universe)==int: return 1
|
|
152
162
|
|
|
163
|
+
|
|
153
164
|
# introduce sinks / demands (exchanges where included during T)
|
|
154
165
|
universe = introduce_sinks_demands(logger, universe)
|
|
155
166
|
if type(universe)==int: return 1
|
|
156
167
|
|
|
157
|
-
|
|
168
|
+
|
|
169
|
+
# introducce universal biomass
|
|
158
170
|
universe = introduce_universal_biomass(logger, dbexp, universe)
|
|
159
171
|
if type(universe)==int: return 1
|
|
160
172
|
|
|
161
173
|
|
|
162
|
-
|
|
163
|
-
###### ANNOTATION
|
|
164
174
|
# translate Gs to symbols and annotate them (EC, COG, GO, ...)
|
|
165
175
|
universe = translate_annotate_genes(logger, universe, idcollection_dict)
|
|
166
176
|
if type(universe)==int: return 1
|
|
167
177
|
|
|
168
|
-
|
|
178
|
+
|
|
179
|
+
# introduce collections (groups of Rs as maps/modules)
|
|
169
180
|
universe = set_up_groups(logger, universe, idcollection_dict)
|
|
170
181
|
if type(universe)==int: return 1
|
|
171
182
|
|
|
172
183
|
|
|
173
184
|
|
|
174
|
-
|
|
185
|
+
###### POST-PARSING
|
|
186
|
+
|
|
187
|
+
# log metrics
|
|
175
188
|
log_metrics(logger, universe)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# check absence of unbalancing above the threshold
|
|
176
192
|
log_unbalances(logger, universe)
|
|
177
193
|
|
|
178
194
|
|
|
179
|
-
|
|
180
|
-
###### CHECKS 1
|
|
181
195
|
# check universe completness
|
|
182
196
|
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.taxon, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
|
|
183
197
|
if type(df_C)==int: return 1
|
|
184
198
|
|
|
185
199
|
|
|
186
|
-
|
|
187
|
-
###### POLISHING 1
|
|
188
200
|
# remove disconnected metabolites
|
|
189
201
|
if args.keepdisconn == False:
|
|
190
202
|
universe = remove_disconnected(logger, universe) # can be commented when using booster.py
|
|
191
203
|
|
|
192
204
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
# check erroneous EGCs
|
|
196
|
-
verify_egc_all(logger, universe, args.outdir)
|
|
197
|
-
|
|
205
|
+
# avoid time-consuming activities
|
|
206
|
+
if not args.justparse:
|
|
198
207
|
|
|
199
208
|
|
|
200
|
-
|
|
209
|
+
# check erroneous EGCs
|
|
210
|
+
verify_egc_all(logger, universe, args.outdir)
|
|
211
|
+
|
|
201
212
|
|
|
202
|
-
###### CHECKS 3
|
|
203
213
|
# check growth on minmal media
|
|
204
|
-
df_G = grow_on_media(logger, universe, dbexp, args.media,
|
|
214
|
+
df_G = grow_on_media(logger, universe, dbexp, args.media, fva=False, universe_in_parsedb=True)
|
|
205
215
|
if type(df_G)==int: return 1
|
|
206
216
|
|
|
217
|
+
|
|
207
218
|
# check blocked biomass precursors
|
|
208
219
|
cond_col_dict = adjust_biomass_precursors(logger, universe, universe, 1.0)
|
|
209
220
|
df_E = precursors_on_media(logger, universe, universe, dbexp, args.media, cond_col_dict, args.precursors)
|
|
210
221
|
if type(df_E)==int: return 1
|
|
211
222
|
|
|
223
|
+
|
|
212
224
|
# check blocked metabolites / dead-ends
|
|
213
225
|
df_S = biosynthesis_on_media(logger, universe, dbexp, args.media, args.biosynth)
|
|
214
226
|
if type(df_S)==int: return 1
|
|
215
227
|
|
|
216
228
|
|
|
217
|
-
|
|
218
|
-
###### POLISHING 2
|
|
219
|
-
# reset growth environment befor saving the model
|
|
229
|
+
# reset growth environment befor saving the model (changed during growth sims)
|
|
220
230
|
gempipe.reset_growth_env(universe)
|
|
221
231
|
|
|
232
|
+
|
|
222
233
|
# initialize model
|
|
223
234
|
response = initialize_model(logger, universe, dbexp, args.initialize, args.media)
|
|
224
235
|
if response==1: return 1
|
|
225
236
|
|
|
226
237
|
|
|
227
|
-
|
|
228
|
-
###### CHECKS 4
|
|
229
238
|
# compute Memote metrics
|
|
230
239
|
memote_results_dict = get_memote_results_dict(logger, universe)
|
|
231
240
|
|
|
232
241
|
|
|
233
|
-
|
|
234
|
-
# output the universe (even when --justparse)
|
|
242
|
+
# write JSON
|
|
235
243
|
logger.info("Writing universal model...")
|
|
236
244
|
cobra.io.save_json_model(universe, f'{args.outdir}/universe.json')
|
|
237
245
|
logger.info(f"'{args.outdir}/universe.json' created!")
|
|
238
246
|
|
|
239
247
|
|
|
248
|
+
# avoid time-consuming activities
|
|
240
249
|
if not args.justparse:
|
|
241
250
|
|
|
242
|
-
|
|
251
|
+
|
|
252
|
+
# write XML
|
|
243
253
|
cobra.io.write_sbml_model(universe, f'{args.outdir}/universe.xml') # groups are saved only to SBML
|
|
244
254
|
logger.info(f"'{args.outdir}/universe.xml' created!")
|
|
245
255
|
force_id_on_sbml(f'{args.outdir}/universe.xml', 'universe') # force introduction of the 'id=""' field
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
# write XLSX
|
|
246
259
|
sheets_dict = write_excel_model(universe, f'{args.outdir}/universe.parsedb.xlsx', args.nofigs, memote_results_dict, df_E, None, None, df_S, df_C)
|
|
247
260
|
logger.info(f"'{args.outdir}/universe.parsedb.xlsx' created!")
|
|
248
261
|
|
|
249
|
-
|
|
250
262
|
|
|
251
|
-
|
|
252
|
-
# check if universal escher map is updated:
|
|
253
|
-
count_undrawn_rids(logger, universe, lastmap, args.focus)
|
|
263
|
+
# check if escher map is updated:
|
|
254
264
|
if args.focus != '-':
|
|
255
265
|
count_undrawn_rids_focus(logger, universe, lastmap, args.focus, args.outdir)
|
|
256
266
|
|
|
@@ -8,9 +8,13 @@ from .manual import get_rids_with_mancheck_balancing
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype
|
|
11
|
+
def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype, outdir):
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
# define the itemtype:
|
|
15
|
+
if addtype=='R':
|
|
16
|
+
itemtype = 'Reaction'
|
|
17
|
+
else: itemtype = 'Transporter'
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
# check presence of the GPR
|
|
@@ -53,7 +57,8 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
|
|
|
53
57
|
if ko_id not in ko_for_rid and ko_id != 'spontaneous' and ko_id != 'orphan':
|
|
54
58
|
if kr_id != 'RXXXXX':
|
|
55
59
|
if rid not in get_rids_with_mancheck_gpr():
|
|
56
|
-
|
|
60
|
+
with open(f"{outdir}/logs/R.orthlink.txt", 'a') as f:
|
|
61
|
+
print(f"Ortholog '{ko_id}' should not be linked to reaction '{rid}' (available for {kr_ids}: {ko_for_rid}).", file=f)
|
|
57
62
|
|
|
58
63
|
|
|
59
64
|
# check if some ko_ids are missing from this reaction:
|
|
@@ -68,12 +73,79 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
|
|
|
68
73
|
|
|
69
74
|
|
|
70
75
|
|
|
71
|
-
def
|
|
72
|
-
|
|
76
|
+
def check_rstring_arrow(logger, rid, row, addtype='R'):
|
|
73
77
|
|
|
74
78
|
itemtype = 'Reaction' if addtype=='R' else 'Transporter'
|
|
75
79
|
|
|
76
80
|
|
|
81
|
+
if pnd.isna(row['rstring']):
|
|
82
|
+
logger.error(f"{itemtype} '{rid}' has no definition (rstring).")
|
|
83
|
+
return 1
|
|
84
|
+
if ' --> ' not in row['rstring'] and ' <=> ' not in row['rstring']:
|
|
85
|
+
logger.error(f"{itemtype} '{rid}' has invalid arrow: '{row['rstring']}'.")
|
|
86
|
+
return 1
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def check_author(logger, mrid, row, db, addtype='R'):
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# define itemtype:
|
|
97
|
+
if addtype=='M':
|
|
98
|
+
itemtype = 'Metabolite'
|
|
99
|
+
elif addtype=='R' :
|
|
100
|
+
itemtype = 'Reaction'
|
|
101
|
+
else: itemtype = 'Transporter'
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# check if author was indicated:
|
|
105
|
+
if pnd.isna(row['curator']):
|
|
106
|
+
logger.error(f"{itemtype} '{mrid}' has no curator.")
|
|
107
|
+
return 1
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# check if the are valid authors
|
|
111
|
+
authors = set()
|
|
112
|
+
for author in row['curator'].split(';'):
|
|
113
|
+
author = author.strip()
|
|
114
|
+
authors.add(author)
|
|
115
|
+
if author not in db['curators']['username'].to_list():
|
|
116
|
+
logger.error(f"{itemtype} '{mrid}' has invalid curator: '{author}'.")
|
|
117
|
+
return 1
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
return list(authors)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_curator_notes(logger, row):
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# notes are separated by ';'
|
|
128
|
+
notes = []
|
|
129
|
+
if pnd.isna(row['notes']) == False:
|
|
130
|
+
for i in row['notes'].strip().split(';'):
|
|
131
|
+
notes.append(i.strip())
|
|
132
|
+
if notes == ['-']:
|
|
133
|
+
notes = []
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
return notes
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def add_reaction(logger, model, rid, authors, row, kr_ids, kegg_reaction_to_others, addtype, outdir):
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# define the itemtype:
|
|
144
|
+
if addtype=='R':
|
|
145
|
+
itemtype = 'Reaction'
|
|
146
|
+
else: itemtype = 'Transporter'
|
|
147
|
+
|
|
148
|
+
|
|
77
149
|
# create a frash reaction
|
|
78
150
|
r = cobra.Reaction(rid)
|
|
79
151
|
model.add_reactions([r])
|
|
@@ -95,7 +167,8 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
|
|
|
95
167
|
# handle GPR
|
|
96
168
|
r.gene_reaction_rule = row['gpr_manual'].strip()
|
|
97
169
|
if r.gene_reaction_rule == 'orphan':
|
|
98
|
-
|
|
170
|
+
# don't want 'orphan' as artificial gene in adition to 'spontaneous'!
|
|
171
|
+
r.gene_reaction_rule = ''
|
|
99
172
|
r.update_genes_from_gpr()
|
|
100
173
|
|
|
101
174
|
|
|
@@ -105,22 +178,29 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
|
|
|
105
178
|
logger.error(f"Metabolite '{m.id}' appears in '{r.id}' but was not previously defined.")
|
|
106
179
|
return 1
|
|
107
180
|
|
|
181
|
+
|
|
182
|
+
# write curators as annotations
|
|
183
|
+
r.annotation['curator_codes'] = authors
|
|
184
|
+
|
|
108
185
|
|
|
109
186
|
# add annotations to model (same order of Memote)
|
|
110
187
|
ankeys = [
|
|
111
188
|
'rhea', 'kegg.reaction', 'seed.reaction', 'metanetx.reaction',
|
|
112
189
|
'bigg.reaction', 'reactome', 'ec-code', 'brenda', 'biocyc',
|
|
113
190
|
]
|
|
191
|
+
#
|
|
114
192
|
# initialize sets:
|
|
115
193
|
for ankey in ankeys:
|
|
116
194
|
if ankey == 'kegg.reaction': r.annotation[ankey] = set(kr_ids) - set(['RXXXXX'])
|
|
117
195
|
else: r.annotation[ankey] = set()
|
|
196
|
+
#
|
|
118
197
|
# populate sets:
|
|
119
198
|
for kr_id in kr_ids:
|
|
120
199
|
if kr_id != 'RXXXXX':
|
|
121
200
|
if kr_id in kegg_reaction_to_others.keys():
|
|
122
201
|
for ankey in ankeys:
|
|
123
202
|
r.annotation[ankey].update(kegg_reaction_to_others[kr_id][ankey])
|
|
203
|
+
#
|
|
124
204
|
# save as list:
|
|
125
205
|
for ankey in ankeys:
|
|
126
206
|
r.annotation[ankey] = list(r.annotation[ankey])
|
|
@@ -133,6 +213,10 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
|
|
|
133
213
|
r.annotation['sbo'] = ['SBO:0000185'] # transport reaction
|
|
134
214
|
|
|
135
215
|
|
|
216
|
+
# add curator notes
|
|
217
|
+
r.annotation['curator_notes'] = get_curator_notes(logger, row)
|
|
218
|
+
|
|
219
|
+
|
|
136
220
|
# check if unbalanced
|
|
137
221
|
if r.check_mass_balance() != {}:
|
|
138
222
|
logger.error(f"{itemtype} '{r.id}' is unbalanced: {r.check_mass_balance()}.")
|
|
@@ -148,50 +232,3 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
|
|
|
148
232
|
|
|
149
233
|
|
|
150
234
|
return 0
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def check_rstring_arrow(logger, rid, row, addtype='R'):
|
|
155
|
-
|
|
156
|
-
itemtype = 'Reaction' if addtype=='R' else 'Transporter'
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
if pnd.isna(row['rstring']):
|
|
160
|
-
logger.error(f"{itemtype} '{rid}' has no definition (rstring).")
|
|
161
|
-
return 1
|
|
162
|
-
if ' --> ' not in row['rstring'] and ' <=> ' not in row['rstring']:
|
|
163
|
-
logger.error(f"{itemtype} '{rid}' has invalid arrow: '{row['rstring']}'.")
|
|
164
|
-
return 1
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
return 0
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def check_author(logger, mrid, row, db, addtype='R'):
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
if addtype=='M':
|
|
175
|
-
itemtype = 'Metabolite'
|
|
176
|
-
elif addtype=='R' :
|
|
177
|
-
itemtype = 'Reaction'
|
|
178
|
-
else: itemtype = 'Transporter'
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
if pnd.isna(row['author']):
|
|
182
|
-
logger.error(f"{itemtype} '{mrid}' has no author.")
|
|
183
|
-
return 1
|
|
184
|
-
|
|
185
|
-
authors = set()
|
|
186
|
-
for author in row['author'].split(';'):
|
|
187
|
-
author = author.strip()
|
|
188
|
-
authors.add(author)
|
|
189
|
-
if author not in db['authors']['username'].to_list():
|
|
190
|
-
logger.error(f"{itemtype} '{mrid}' has invalid author: '{author}'.")
|
|
191
|
-
return 1
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
return list(authors)
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
gsrap/parsedb/annotation.py
CHANGED
|
@@ -1,23 +1,30 @@
|
|
|
1
|
-
import
|
|
1
|
+
import threading
|
|
2
2
|
|
|
3
|
+
import cobra
|
|
3
4
|
|
|
4
5
|
from .manual import get_deprecated_kos
|
|
5
6
|
from .manual import get_custom_groups
|
|
6
7
|
|
|
8
|
+
from ..commons.downloads import SimpleLoadingWheel
|
|
9
|
+
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
def translate_annotate_genes(logger, model, idcollection_dict):
|
|
10
13
|
|
|
11
|
-
|
|
12
|
-
|
|
14
|
+
|
|
15
|
+
logger.info("Translating and annotating orthologs...")
|
|
16
|
+
|
|
17
|
+
|
|
13
18
|
ko_to_name = idcollection_dict['ko_to_name']
|
|
14
19
|
ko_to_symbols = idcollection_dict['ko_to_symbols']
|
|
15
20
|
ko_to_ecs = idcollection_dict['ko_to_ecs']
|
|
16
21
|
ko_to_cogs = idcollection_dict['ko_to_cogs']
|
|
17
22
|
ko_to_gos = idcollection_dict['ko_to_gos']
|
|
23
|
+
ko_to_taxa = idcollection_dict['ko_to_taxa']
|
|
18
24
|
|
|
19
25
|
|
|
20
|
-
# translation dicts:
|
|
26
|
+
# create the translation dicts (ko_to_sym):
|
|
27
|
+
# assign to each KO a symbol that is unique in the universe model.
|
|
21
28
|
ko_to_sym = {}
|
|
22
29
|
sym_to_ko = {}
|
|
23
30
|
cnt = 0
|
|
@@ -26,20 +33,23 @@ def translate_annotate_genes(logger, model, idcollection_dict):
|
|
|
26
33
|
continue
|
|
27
34
|
ko = g.id
|
|
28
35
|
cnt += 1
|
|
29
|
-
|
|
36
|
+
#
|
|
37
|
+
# if the ko is deprecated, it was not included in 'ko_to_symbols'
|
|
30
38
|
if ko in get_deprecated_kos():
|
|
31
|
-
# if the ko is deprecated, it was not included in 'ko_to_symbols'
|
|
32
39
|
ko_to_sym[ko] = ko
|
|
33
40
|
sym_to_ko[ko] = ko
|
|
34
41
|
continue
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
42
|
+
#
|
|
43
|
+
# iterate the available symbols for this KO
|
|
44
|
+
for symbol in ko_to_symbols[ko]:
|
|
45
|
+
# take the first available (not yet used)
|
|
46
|
+
if symbol not in sym_to_ko.keys():
|
|
38
47
|
ko_to_sym[ko] = symbol
|
|
39
48
|
sym_to_ko[symbol] = ko
|
|
40
49
|
break
|
|
41
|
-
|
|
42
|
-
|
|
50
|
+
#
|
|
51
|
+
# no symbol was assigned (symbol was already taken by another KO)
|
|
52
|
+
if cnt != len(ko_to_sym):
|
|
43
53
|
cnt_dups = 2
|
|
44
54
|
symbol = list(ko_to_symbols[ko])[0] + f'_{cnt_dups}' # generate a new symbol
|
|
45
55
|
while cnt != len(ko_to_sym): # until a symbol is assigned
|
|
@@ -50,7 +60,6 @@ def translate_annotate_genes(logger, model, idcollection_dict):
|
|
|
50
60
|
symbol = list(ko_to_symbols[ko])[0] + f'_{cnt_dups}' # retry with the next one
|
|
51
61
|
|
|
52
62
|
|
|
53
|
-
|
|
54
63
|
|
|
55
64
|
# insert annotations
|
|
56
65
|
for g in model.genes:
|
|
@@ -67,16 +76,30 @@ def translate_annotate_genes(logger, model, idcollection_dict):
|
|
|
67
76
|
g.annotation['cog'] = list(ko_to_cogs[ko])
|
|
68
77
|
g.annotation['go'] = list(ko_to_gos[ko])
|
|
69
78
|
|
|
79
|
+
# add taxa information
|
|
80
|
+
g.annotation['kingdom'] = list(ko_to_taxa[ko]['kingdom'])
|
|
81
|
+
g.annotation['phylum'] = list(ko_to_taxa[ko]['phylum'])
|
|
82
|
+
|
|
83
|
+
|
|
70
84
|
# add SBO annotation
|
|
71
85
|
g.annotation['sbo'] = ['SBO:0000243'] # demand reaction
|
|
72
86
|
|
|
73
87
|
|
|
74
88
|
|
|
75
|
-
#
|
|
89
|
+
# handle orphan and spontaneous
|
|
76
90
|
translation_dict = ko_to_sym
|
|
77
91
|
translation_dict['orphan'] = 'orphan'
|
|
78
92
|
translation_dict['spontaneous'] = 'spontaneous'
|
|
79
|
-
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# finally apply translations of IDs in a dedicated Thread
|
|
96
|
+
t1 = threading.Thread(target = cobra.manipulation.rename_genes, args=(
|
|
97
|
+
model, translation_dict))
|
|
98
|
+
t1.start()
|
|
99
|
+
slw = SimpleLoadingWheel(msg="Please wait... ")
|
|
100
|
+
while t1.is_alive():
|
|
101
|
+
slw.proceed()
|
|
102
|
+
slw.clear()
|
|
80
103
|
|
|
81
104
|
|
|
82
105
|
return model
|
|
@@ -85,6 +108,8 @@ def translate_annotate_genes(logger, model, idcollection_dict):
|
|
|
85
108
|
|
|
86
109
|
def set_up_groups(logger, model, idcollection_dict):
|
|
87
110
|
|
|
111
|
+
|
|
112
|
+
logger.debug("Introducing groups...")
|
|
88
113
|
|
|
89
114
|
|
|
90
115
|
kr_to_maps = idcollection_dict['kr_to_maps']
|
|
@@ -140,12 +165,12 @@ def set_up_groups(logger, model, idcollection_dict):
|
|
|
140
165
|
custom_groups = get_custom_groups()
|
|
141
166
|
#
|
|
142
167
|
# create a group for transporters on-the-fly
|
|
143
|
-
custom_groups['
|
|
168
|
+
custom_groups['gr_transport'] = []
|
|
144
169
|
for r in model.reactions:
|
|
145
|
-
if len(r.metabolites) == 1: # exchanges
|
|
146
|
-
custom_groups['
|
|
170
|
+
if len(r.metabolites) == 1 and list(r.metabolites)[0].id.rsplit('_',1)[-1] != 'c': # just exchanges (esclude sinks/demands)
|
|
171
|
+
custom_groups['gr_transport'].append(r.id)
|
|
147
172
|
if len(set([m.id.rsplit('_', 1)[-1] for m in r.metabolites])) > 1: # transport reactions
|
|
148
|
-
custom_groups['
|
|
173
|
+
custom_groups['gr_transport'].append(r.id)
|
|
149
174
|
#
|
|
150
175
|
for group_id in custom_groups.keys():
|
|
151
176
|
actual_group = cobra.core.Group(
|
gsrap/parsedb/completeness.py
CHANGED
|
@@ -118,6 +118,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
|
|
|
118
118
|
kr_uni_missing = (kr_uni - kr_ids_modeled) - get_krs_to_exclude()
|
|
119
119
|
kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
|
|
120
120
|
logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
|
|
121
|
+
#logger.warning(f"Copy these: {kr_uni_missing}")
|
|
121
122
|
|
|
122
123
|
|
|
123
124
|
# define the map?????, containing krs not included in maps
|
|
@@ -144,7 +145,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
|
|
|
144
145
|
|
|
145
146
|
# check if 'focus' exist
|
|
146
147
|
if focus != '-' and focus not in map_ids and focus not in md_ids:
|
|
147
|
-
if focus == '
|
|
148
|
+
if focus == 'gr_transport':
|
|
148
149
|
df_coverage = None
|
|
149
150
|
return df_coverage # just the jeneration of 'transport.json' for Escher drawing is needed here
|
|
150
151
|
else:
|