gsrap 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. gsrap/.ipynb_checkpoints/__init__-checkpoint.py +6 -5
  2. gsrap/__init__.py +6 -5
  3. gsrap/assets/kegg_compound_to_others.pickle +0 -0
  4. gsrap/assets/kegg_reaction_to_others.pickle +0 -0
  5. gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +3 -0
  6. gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +168 -93
  7. gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +55 -51
  8. gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +7 -1
  9. gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +8 -8
  10. gsrap/commons/biomass.py +3 -0
  11. gsrap/commons/downloads.py +168 -93
  12. gsrap/commons/escherutils.py +55 -51
  13. gsrap/commons/excelhub.py +7 -1
  14. gsrap/commons/metrics.py +8 -8
  15. gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +2 -2
  16. gsrap/mkmodel/mkmodel.py +2 -2
  17. gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +43 -18
  18. gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +2 -1
  19. gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +132 -63
  20. gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py +23 -3
  21. gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +59 -49
  22. gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +90 -53
  23. gsrap/parsedb/annotation.py +43 -18
  24. gsrap/parsedb/completeness.py +2 -1
  25. gsrap/parsedb/introduce.py +132 -63
  26. gsrap/parsedb/manual.py +22 -2
  27. gsrap/parsedb/parsedb.py +59 -49
  28. gsrap/parsedb/repeating.py +90 -53
  29. gsrap/runsims/.ipynb_checkpoints/runsims-checkpoint.py +2 -1
  30. gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +0 -1
  31. gsrap/runsims/runsims.py +2 -1
  32. gsrap/runsims/simplegrowth.py +0 -1
  33. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/METADATA +5 -2
  34. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/RECORD +37 -37
  35. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/WHEEL +1 -1
  36. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info}/entry_points.txt +0 -0
  37. {gsrap-0.9.0.dist-info → gsrap-0.10.1.dist-info/licenses}/LICENSE.txt +0 -0
@@ -14,9 +14,28 @@ def get_deprecated_kos():
14
14
  def get_krs_to_exclude():
15
15
  return set([
16
16
  'R12328', 'R05190', # general forms of fatty acid biosynthesis
17
- 'R01347', 'R04121', # general forms of fatty acid degradation
17
+ 'R01347', 'R01348', 'R04121', # general forms of fatty acid degradation
18
+ 'R11671', # multi-step fatty acids reactions
19
+ 'R07860', 'R01317', 'R07064', # aspecific fatty acid reactions
20
+ 'R11311', 'R11256', 'R11308', 'R08772', 'R08770', # polymer reactions
21
+
22
+ # inconclusive due to semplification
23
+ 'R12425',
24
+
25
+ # "incomplete reaction" / "unclear reaction"
26
+ 'R08414', 'R13037', 'R13034', 'R13036', 'R02825', 'R11178', 'R13325', 'R12855', 'R12856', 'R09809',
27
+ 'R09808', 'R08035', 'R08034', 'R11470', 'R09360', 'R08139', 'R08318', 'R07859', 'R09361', 'R09349',
28
+ 'R13149', 'R13066', 'R11467', 'R11255', 'R08986', 'R13156', 'R13074', 'R13150', 'R11302', 'R11388',
29
+ 'R08341', 'R13147', 'R13155', 'R08339', 'R11466', 'R08272', 'R09348', 'R09362', 'R11107', 'R08340',
30
+ 'R07940', 'R11120', 'R11245', 'R08269', 'R11131', 'R07943', 'R08342', 'R06766', 'R12584', 'R09852',
31
+ 'R08268', 'R11129', 'R06702', 'R08866', 'R12555', 'R08927', 'R08343', 'R13067', 'R13069', 'R13068',
32
+ 'R05670', 'R06694', 'R09851', 'R11465', 'R08928', 'R11389', 'R11464', 'R13087', 'R12586', 'R11304',
33
+ 'R08984', 'R11254', 'R13165', 'R12884', 'R08865', 'R13151', 'R08132', 'R08929', 'R06701', 'R08345',
34
+ 'R11365', 'R11303', 'R06670', 'R11364', 'R09347', 'R08293', 'R11362', 'R03872', 'R06339', 'R10481',
35
+ 'R10480', 'R13341', 'R06505', 'R06504', 'R06326', 'R06470', 'R06467', 'R06327', 'R06503', 'R09847',
36
+ 'R13479', 'R13447', 'R13478', 'R07510', 'R04546', 'R06468', 'R05624', 'R10706', 'R13454', 'R13556',
37
+ 'R13455', 'R12691',
18
38
  ])
19
-
20
39
 
21
40
 
22
41
 
@@ -29,12 +48,13 @@ def get_rids_with_mancheck_gpr():
29
48
  return rids_mancheck_gpr
30
49
 
31
50
 
51
+
32
52
  def get_rids_with_mancheck_balancing():
33
53
  rids_mancheck_bal = [ # same reactions involving ATP can be reversible
34
54
 
35
55
  # SECTION "reversible both in KEGG and MetaCyc"
36
56
  'PGK', 'SUCOAS', 'ADK1', 'GK1', 'NNATr', 'CYTK1', 'ACKr',
37
- 'DGK1', 'PPAKr', 'ATPSr', 'NDPK10',
57
+ 'DGK1', 'PPAKr', 'ATPSr', 'NDPK10', 'BUTKr',
38
58
 
39
59
  ### SECTION "reversible in KEGG but not in MetaCyc" ###
40
60
  'CYTK2', # clearly reversible in KEGG but not in MetaCyc (RXN-7913)
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  import pickle
3
3
  from importlib import resources
4
+ import shutil
4
5
 
5
6
 
6
7
  import cobra
@@ -15,7 +16,6 @@ from ..commons import introduce_universal_biomass
15
16
  from ..commons import write_excel_model
16
17
  from ..commons import show_contributions
17
18
  from ..commons import adjust_biomass_precursors
18
- from ..commons import count_undrawn_rids
19
19
  from ..commons import count_undrawn_rids_focus
20
20
 
21
21
  from ..commons import format_expansion
@@ -49,13 +49,20 @@ from .cycles import verify_egc_all
49
49
  def main(args, logger):
50
50
 
51
51
 
52
- ###### FORMAT ARGS NOT REQUIRING RESOURCES
52
+
53
+ ###### PRE-PARSING
54
+
53
55
  # adjust out folder path
54
56
  while args.outdir.endswith('/'):
55
57
  args.outdir = args.outdir[:-1]
56
58
  os.makedirs(f'{args.outdir}/', exist_ok=True)
57
59
 
58
60
 
61
+ # prepare empty logs folder
62
+ shutil.rmtree(f'{args.outdir}/logs', ignore_errors=True)
63
+ os.makedirs(f'{args.outdir}/logs', exist_ok=True)
64
+
65
+
59
66
  # check compatibility of input parameters
60
67
  if args.progress==False and args.module==True:
61
68
  logger.error(f"You cannot ask --module without --progress (see --help).")
@@ -81,8 +88,6 @@ def main(args, logger):
81
88
  if args.onlyauthor == '-': args.onlyauthor = None
82
89
 
83
90
 
84
-
85
- ###### LOAD LOCAL RESOURCES
86
91
  # check and extract the required 'gsrap.maps' file
87
92
  if os.path.exists(f'{args.inmaps}') == False:
88
93
  logger.error(f"File 'gsrap.maps' not found at {args.inmaps}.")
@@ -105,152 +110,157 @@ def main(args, logger):
105
110
  with open(asset_path, 'rb') as handle:
106
111
  kegg_reaction_to_others = pickle.load(handle)
107
112
 
108
-
109
-
110
- ###### FORMAT/CHECK FOCUSING ARGS
113
+
111
114
  # format the --eggnog param
112
115
  args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
113
116
 
117
+
114
118
  # check the --taxon param
115
119
  if args.taxon != '-':
116
120
  response = check_taxon(logger, args.taxon, idcollection_dict)
117
121
  if response == 1: return 1
118
122
 
123
+
119
124
  # get the kegg organism if requested
120
125
  if args.keggorg != '-':
121
126
  response = download_keggorg(logger, args.keggorg, args.outdir)
122
127
  if response == 1: return 1
123
128
 
124
129
 
125
-
126
- # DOWNLOAD ONLINE RESOURCES
127
- # get dbuni and dbexp:
128
- logger.info("Downloading gsrap database...")
129
- response = get_databases(logger)
130
+ # download dbuni, dbexp and lastmap:
131
+ logger.info("Downloading updated gsrap assets...")
132
+ response = get_databases(logger, map_id=args.focus)
130
133
  if type(response)==int: return 1
131
134
  else: dbuni, dbexp, lastmap = response
132
135
 
136
+
137
+
138
+ ###### PARSING
133
139
 
134
140
  # show simple statistics of contributions
135
141
  response = show_contributions(logger, dbuni, args.goodbefore)
136
142
  if response == 1: return 1
137
-
138
-
139
143
 
140
- ###### RECONSTRUCTION
144
+
141
145
  # create the model
142
146
  universe = cobra.Model('universe')
143
- logger.info("Parsing gsrap database...")
144
147
 
145
- # introduce M / R / T
146
- universe = introduce_metabolites(logger, dbuni, universe, idcollection_dict, kegg_compound_to_others, args.goodbefore[0], args.onlyauthor)
148
+
149
+ # introduce M
150
+ universe = introduce_metabolites(logger, dbuni, universe, idcollection_dict, kegg_compound_to_others, args.outdir, args.goodbefore[0], args.onlyauthor)
147
151
  if type(universe)==int: return 1
148
- universe = introduce_reactions(logger, dbuni, universe, idcollection_dict, kegg_reaction_to_others, args.goodbefore[1], args.onlyauthor)
152
+
153
+
154
+ # introduce R
155
+ universe = introduce_reactions(logger, dbuni, universe, idcollection_dict, kegg_reaction_to_others, args.outdir, args.goodbefore[1], args.onlyauthor)
149
156
  if type(universe)==int: return 1
150
- universe = introduce_transporters(logger, dbuni, universe, idcollection_dict, kegg_reaction_to_others, args.goodbefore[2], args.onlyauthor)
157
+
158
+
159
+ # introduce T
160
+ universe = introduce_transporters(logger, dbuni, universe, idcollection_dict, kegg_reaction_to_others, args.outdir, args.goodbefore[2], args.onlyauthor)
151
161
  if type(universe)==int: return 1
152
162
 
163
+
153
164
  # introduce sinks / demands (exchanges where included during T)
154
165
  universe = introduce_sinks_demands(logger, universe)
155
166
  if type(universe)==int: return 1
156
167
 
157
- # introducce biomass
168
+
169
+ # introducce universal biomass
158
170
  universe = introduce_universal_biomass(logger, dbexp, universe)
159
171
  if type(universe)==int: return 1
160
172
 
161
173
 
162
-
163
- ###### ANNOTATION
164
174
  # translate Gs to symbols and annotate them (EC, COG, GO, ...)
165
175
  universe = translate_annotate_genes(logger, universe, idcollection_dict)
166
176
  if type(universe)==int: return 1
167
177
 
168
- # introduce collectionas (groups of Rs as maps/modules)
178
+
179
+ # introduce collections (groups of Rs as maps/modules)
169
180
  universe = set_up_groups(logger, universe, idcollection_dict)
170
181
  if type(universe)==int: return 1
171
182
 
172
183
 
173
184
 
174
- # # # # # PARSING ENDS HERE # # # # #
185
+ ###### POST-PARSING
186
+
187
+ # log metrics
175
188
  log_metrics(logger, universe)
189
+
190
+
191
+ # check absence of unbalancing above the threshold
176
192
  log_unbalances(logger, universe)
177
193
 
178
194
 
179
-
180
- ###### CHECKS 1
181
195
  # check universe completness
182
196
  df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.taxon, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
183
197
  if type(df_C)==int: return 1
184
198
 
185
199
 
186
-
187
- ###### POLISHING 1
188
200
  # remove disconnected metabolites
189
201
  if args.keepdisconn == False:
190
202
  universe = remove_disconnected(logger, universe) # can be commented when using booster.py
191
203
 
192
204
 
193
-
194
- ###### CHECKS 2
195
- # check erroneous EGCs
196
- verify_egc_all(logger, universe, args.outdir)
197
-
205
+ # avoid time-consuming activities
206
+ if not args.justparse:
198
207
 
199
208
 
200
- if not args.justparse:
209
+ # check erroneous EGCs
210
+ verify_egc_all(logger, universe, args.outdir)
211
+
201
212
 
202
- ###### CHECKS 3
203
213
  # check growth on minmal media
204
- df_G = grow_on_media(logger, universe, dbexp, args.media, '-', True)
214
+ df_G = grow_on_media(logger, universe, dbexp, args.media, fva=False, universe_in_parsedb=True)
205
215
  if type(df_G)==int: return 1
206
216
 
217
+
207
218
  # check blocked biomass precursors
208
219
  cond_col_dict = adjust_biomass_precursors(logger, universe, universe, 1.0)
209
220
  df_E = precursors_on_media(logger, universe, universe, dbexp, args.media, cond_col_dict, args.precursors)
210
221
  if type(df_E)==int: return 1
211
222
 
223
+
212
224
  # check blocked metabolites / dead-ends
213
225
  df_S = biosynthesis_on_media(logger, universe, dbexp, args.media, args.biosynth)
214
226
  if type(df_S)==int: return 1
215
227
 
216
228
 
217
-
218
- ###### POLISHING 2
219
- # reset growth environment befor saving the model
229
+ # reset growth environment befor saving the model (changed during growth sims)
220
230
  gempipe.reset_growth_env(universe)
221
231
 
232
+
222
233
  # initialize model
223
234
  response = initialize_model(logger, universe, dbexp, args.initialize, args.media)
224
235
  if response==1: return 1
225
236
 
226
237
 
227
-
228
- ###### CHECKS 4
229
238
  # compute Memote metrics
230
239
  memote_results_dict = get_memote_results_dict(logger, universe)
231
240
 
232
241
 
233
-
234
- # output the universe (even when --justparse)
242
+ # write JSON
235
243
  logger.info("Writing universal model...")
236
244
  cobra.io.save_json_model(universe, f'{args.outdir}/universe.json')
237
245
  logger.info(f"'{args.outdir}/universe.json' created!")
238
246
 
239
247
 
248
+ # avoid time-consuming activities
240
249
  if not args.justparse:
241
250
 
242
- # outptu in the remaining formats:
251
+
252
+ # write XML
243
253
  cobra.io.write_sbml_model(universe, f'{args.outdir}/universe.xml') # groups are saved only to SBML
244
254
  logger.info(f"'{args.outdir}/universe.xml' created!")
245
255
  force_id_on_sbml(f'{args.outdir}/universe.xml', 'universe') # force introduction of the 'id=""' field
256
+
257
+
258
+ # write XLSX
246
259
  sheets_dict = write_excel_model(universe, f'{args.outdir}/universe.parsedb.xlsx', args.nofigs, memote_results_dict, df_E, None, None, df_S, df_C)
247
260
  logger.info(f"'{args.outdir}/universe.parsedb.xlsx' created!")
248
261
 
249
-
250
262
 
251
- ###### CHECKS 4
252
- # check if universal escher map is updated:
253
- count_undrawn_rids(logger, universe, lastmap, args.focus)
263
+ # check if escher map is updated:
254
264
  if args.focus != '-':
255
265
  count_undrawn_rids_focus(logger, universe, lastmap, args.focus, args.outdir)
256
266
 
@@ -8,9 +8,13 @@ from .manual import get_rids_with_mancheck_balancing
8
8
 
9
9
 
10
10
 
11
- def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
11
+ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype, outdir):
12
12
 
13
- itemtype = 'Reaction' if addtype=='R' else 'Transporter'
13
+
14
+ # define the itemtype:
15
+ if addtype=='R':
16
+ itemtype = 'Reaction'
17
+ else: itemtype = 'Transporter'
14
18
 
15
19
 
16
20
  # check presence of the GPR
@@ -53,7 +57,8 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
53
57
  if ko_id not in ko_for_rid and ko_id != 'spontaneous' and ko_id != 'orphan':
54
58
  if kr_id != 'RXXXXX':
55
59
  if rid not in get_rids_with_mancheck_gpr():
56
- logger.debug(f"Ortholog '{ko_id}' should not be linked to reaction '{rid}' (available for {kr_ids}: {ko_for_rid}).")
60
+ with open(f"{outdir}/logs/R.orthlink.txt", 'a') as f:
61
+ print(f"Ortholog '{ko_id}' should not be linked to reaction '{rid}' (available for {kr_ids}: {ko_for_rid}).", file=f)
57
62
 
58
63
 
59
64
  # check if some ko_ids are missing from this reaction:
@@ -68,12 +73,79 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
68
73
 
69
74
 
70
75
 
71
- def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addtype='R'):
72
-
76
+ def check_rstring_arrow(logger, rid, row, addtype='R'):
73
77
 
74
78
  itemtype = 'Reaction' if addtype=='R' else 'Transporter'
75
79
 
76
80
 
81
+ if pnd.isna(row['rstring']):
82
+ logger.error(f"{itemtype} '{rid}' has no definition (rstring).")
83
+ return 1
84
+ if ' --> ' not in row['rstring'] and ' <=> ' not in row['rstring']:
85
+ logger.error(f"{itemtype} '{rid}' has invalid arrow: '{row['rstring']}'.")
86
+ return 1
87
+
88
+
89
+ return 0
90
+
91
+
92
+
93
+ def check_author(logger, mrid, row, db, addtype='R'):
94
+
95
+
96
+ # define itemtype:
97
+ if addtype=='M':
98
+ itemtype = 'Metabolite'
99
+ elif addtype=='R' :
100
+ itemtype = 'Reaction'
101
+ else: itemtype = 'Transporter'
102
+
103
+
104
+ # check if author was indicated:
105
+ if pnd.isna(row['curator']):
106
+ logger.error(f"{itemtype} '{mrid}' has no curator.")
107
+ return 1
108
+
109
+
110
+ # check if the are valid authors
111
+ authors = set()
112
+ for author in row['curator'].split(';'):
113
+ author = author.strip()
114
+ authors.add(author)
115
+ if author not in db['curators']['username'].to_list():
116
+ logger.error(f"{itemtype} '{mrid}' has invalid curator: '{author}'.")
117
+ return 1
118
+
119
+
120
+ return list(authors)
121
+
122
+
123
+
124
+ def get_curator_notes(logger, row):
125
+
126
+
127
+ # notes are separated by ';'
128
+ notes = []
129
+ if pnd.isna(row['notes']) == False:
130
+ for i in row['notes'].strip().split(';'):
131
+ notes.append(i.strip())
132
+ if notes == ['-']:
133
+ notes = []
134
+
135
+
136
+ return notes
137
+
138
+
139
+
140
+ def add_reaction(logger, model, rid, authors, row, kr_ids, kegg_reaction_to_others, addtype, outdir):
141
+
142
+
143
+ # define the itemtype:
144
+ if addtype=='R':
145
+ itemtype = 'Reaction'
146
+ else: itemtype = 'Transporter'
147
+
148
+
77
149
  # create a frash reaction
78
150
  r = cobra.Reaction(rid)
79
151
  model.add_reactions([r])
@@ -95,7 +167,8 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
95
167
  # handle GPR
96
168
  r.gene_reaction_rule = row['gpr_manual'].strip()
97
169
  if r.gene_reaction_rule == 'orphan':
98
- r.gene_reaction_rule = '' # don't want 'orphan' as artificial gene in adition to 'spontaneous'!
170
+ # don't want 'orphan' as artificial gene in adition to 'spontaneous'!
171
+ r.gene_reaction_rule = ''
99
172
  r.update_genes_from_gpr()
100
173
 
101
174
 
@@ -105,22 +178,29 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
105
178
  logger.error(f"Metabolite '{m.id}' appears in '{r.id}' but was not previously defined.")
106
179
  return 1
107
180
 
181
+
182
+ # write curators as annotations
183
+ r.annotation['curator_codes'] = authors
184
+
108
185
 
109
186
  # add annotations to model (same order of Memote)
110
187
  ankeys = [
111
188
  'rhea', 'kegg.reaction', 'seed.reaction', 'metanetx.reaction',
112
189
  'bigg.reaction', 'reactome', 'ec-code', 'brenda', 'biocyc',
113
190
  ]
191
+ #
114
192
  # initialize sets:
115
193
  for ankey in ankeys:
116
194
  if ankey == 'kegg.reaction': r.annotation[ankey] = set(kr_ids) - set(['RXXXXX'])
117
195
  else: r.annotation[ankey] = set()
196
+ #
118
197
  # populate sets:
119
198
  for kr_id in kr_ids:
120
199
  if kr_id != 'RXXXXX':
121
200
  if kr_id in kegg_reaction_to_others.keys():
122
201
  for ankey in ankeys:
123
202
  r.annotation[ankey].update(kegg_reaction_to_others[kr_id][ankey])
203
+ #
124
204
  # save as list:
125
205
  for ankey in ankeys:
126
206
  r.annotation[ankey] = list(r.annotation[ankey])
@@ -133,6 +213,10 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
133
213
  r.annotation['sbo'] = ['SBO:0000185'] # transport reaction
134
214
 
135
215
 
216
+ # add curator notes
217
+ r.annotation['curator_notes'] = get_curator_notes(logger, row)
218
+
219
+
136
220
  # check if unbalanced
137
221
  if r.check_mass_balance() != {}:
138
222
  logger.error(f"{itemtype} '{r.id}' is unbalanced: {r.check_mass_balance()}.")
@@ -148,50 +232,3 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
148
232
 
149
233
 
150
234
  return 0
151
-
152
-
153
-
154
- def check_rstring_arrow(logger, rid, row, addtype='R'):
155
-
156
- itemtype = 'Reaction' if addtype=='R' else 'Transporter'
157
-
158
-
159
- if pnd.isna(row['rstring']):
160
- logger.error(f"{itemtype} '{rid}' has no definition (rstring).")
161
- return 1
162
- if ' --> ' not in row['rstring'] and ' <=> ' not in row['rstring']:
163
- logger.error(f"{itemtype} '{rid}' has invalid arrow: '{row['rstring']}'.")
164
- return 1
165
-
166
-
167
- return 0
168
-
169
-
170
-
171
- def check_author(logger, mrid, row, db, addtype='R'):
172
-
173
-
174
- if addtype=='M':
175
- itemtype = 'Metabolite'
176
- elif addtype=='R' :
177
- itemtype = 'Reaction'
178
- else: itemtype = 'Transporter'
179
-
180
-
181
- if pnd.isna(row['author']):
182
- logger.error(f"{itemtype} '{mrid}' has no author.")
183
- return 1
184
-
185
- authors = set()
186
- for author in row['author'].split(';'):
187
- author = author.strip()
188
- authors.add(author)
189
- if author not in db['authors']['username'].to_list():
190
- logger.error(f"{itemtype} '{mrid}' has invalid author: '{author}'.")
191
- return 1
192
-
193
-
194
- return list(authors)
195
-
196
-
197
-
@@ -1,23 +1,30 @@
1
- import cobra
1
+ import threading
2
2
 
3
+ import cobra
3
4
 
4
5
  from .manual import get_deprecated_kos
5
6
  from .manual import get_custom_groups
6
7
 
8
+ from ..commons.downloads import SimpleLoadingWheel
9
+
7
10
 
8
11
 
9
12
  def translate_annotate_genes(logger, model, idcollection_dict):
10
13
 
11
-
12
-
14
+
15
+ logger.info("Translating and annotating orthologs...")
16
+
17
+
13
18
  ko_to_name = idcollection_dict['ko_to_name']
14
19
  ko_to_symbols = idcollection_dict['ko_to_symbols']
15
20
  ko_to_ecs = idcollection_dict['ko_to_ecs']
16
21
  ko_to_cogs = idcollection_dict['ko_to_cogs']
17
22
  ko_to_gos = idcollection_dict['ko_to_gos']
23
+ ko_to_taxa = idcollection_dict['ko_to_taxa']
18
24
 
19
25
 
20
- # translation dicts: assign to each KO a symbol that is unique in the universe model.
26
+ # create the translation dicts (ko_to_sym):
27
+ # assign to each KO a symbol that is unique in the universe model.
21
28
  ko_to_sym = {}
22
29
  sym_to_ko = {}
23
30
  cnt = 0
@@ -26,20 +33,23 @@ def translate_annotate_genes(logger, model, idcollection_dict):
26
33
  continue
27
34
  ko = g.id
28
35
  cnt += 1
29
-
36
+ #
37
+ # if the ko is deprecated, it was not included in 'ko_to_symbols'
30
38
  if ko in get_deprecated_kos():
31
- # if the ko is deprecated, it was not included in 'ko_to_symbols'
32
39
  ko_to_sym[ko] = ko
33
40
  sym_to_ko[ko] = ko
34
41
  continue
35
-
36
- for symbol in ko_to_symbols[ko]: # iterate the available symbols for this KO
37
- if symbol not in sym_to_ko.keys(): # take the first available (not yet used)
42
+ #
43
+ # iterate the available symbols for this KO
44
+ for symbol in ko_to_symbols[ko]:
45
+ # take the first available (not yet used)
46
+ if symbol not in sym_to_ko.keys():
38
47
  ko_to_sym[ko] = symbol
39
48
  sym_to_ko[symbol] = ko
40
49
  break
41
-
42
- if cnt != len(ko_to_sym): # no symbol was assigned (symbol was already taken by another KO)
50
+ #
51
+ # no symbol was assigned (symbol was already taken by another KO)
52
+ if cnt != len(ko_to_sym):
43
53
  cnt_dups = 2
44
54
  symbol = list(ko_to_symbols[ko])[0] + f'_{cnt_dups}' # generate a new symbol
45
55
  while cnt != len(ko_to_sym): # until a symbol is assigned
@@ -50,7 +60,6 @@ def translate_annotate_genes(logger, model, idcollection_dict):
50
60
  symbol = list(ko_to_symbols[ko])[0] + f'_{cnt_dups}' # retry with the next one
51
61
 
52
62
 
53
-
54
63
 
55
64
  # insert annotations
56
65
  for g in model.genes:
@@ -67,16 +76,30 @@ def translate_annotate_genes(logger, model, idcollection_dict):
67
76
  g.annotation['cog'] = list(ko_to_cogs[ko])
68
77
  g.annotation['go'] = list(ko_to_gos[ko])
69
78
 
79
+ # add taxa information
80
+ g.annotation['kingdom'] = list(ko_to_taxa[ko]['kingdom'])
81
+ g.annotation['phylum'] = list(ko_to_taxa[ko]['phylum'])
82
+
83
+
70
84
  # add SBO annotation
71
85
  g.annotation['sbo'] = ['SBO:0000243'] # demand reaction
72
86
 
73
87
 
74
88
 
75
- # finally apply translations of IDs
89
+ # handle orphan and spontaneous
76
90
  translation_dict = ko_to_sym
77
91
  translation_dict['orphan'] = 'orphan'
78
92
  translation_dict['spontaneous'] = 'spontaneous'
79
- cobra.manipulation.rename_genes(model, translation_dict)
93
+
94
+
95
+ # finally apply translations of IDs in a dedicated Thread
96
+ t1 = threading.Thread(target = cobra.manipulation.rename_genes, args=(
97
+ model, translation_dict))
98
+ t1.start()
99
+ slw = SimpleLoadingWheel(msg="Please wait... ")
100
+ while t1.is_alive():
101
+ slw.proceed()
102
+ slw.clear()
80
103
 
81
104
 
82
105
  return model
@@ -85,6 +108,8 @@ def translate_annotate_genes(logger, model, idcollection_dict):
85
108
 
86
109
  def set_up_groups(logger, model, idcollection_dict):
87
110
 
111
+
112
+ logger.debug("Introducing groups...")
88
113
 
89
114
 
90
115
  kr_to_maps = idcollection_dict['kr_to_maps']
@@ -140,12 +165,12 @@ def set_up_groups(logger, model, idcollection_dict):
140
165
  custom_groups = get_custom_groups()
141
166
  #
142
167
  # create a group for transporters on-the-fly
143
- custom_groups['transport'] = []
168
+ custom_groups['gr_transport'] = []
144
169
  for r in model.reactions:
145
- if len(r.metabolites) == 1: # exchanges / sinks/ demands
146
- custom_groups['transport'].append(r.id)
170
+ if len(r.metabolites) == 1 and list(r.metabolites)[0].id.rsplit('_',1)[-1] != 'c': # just exchanges (esclude sinks/demands)
171
+ custom_groups['gr_transport'].append(r.id)
147
172
  if len(set([m.id.rsplit('_', 1)[-1] for m in r.metabolites])) > 1: # transport reactions
148
- custom_groups['transport'].append(r.id)
173
+ custom_groups['gr_transport'].append(r.id)
149
174
  #
150
175
  for group_id in custom_groups.keys():
151
176
  actual_group = cobra.core.Group(
@@ -118,6 +118,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
118
118
  kr_uni_missing = (kr_uni - kr_ids_modeled) - get_krs_to_exclude()
119
119
  kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
120
120
  logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
121
+ #logger.warning(f"Copy these: {kr_uni_missing}")
121
122
 
122
123
 
123
124
  # define the map?????, containing krs not included in maps
@@ -144,7 +145,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
144
145
 
145
146
  # check if 'focus' exist
146
147
  if focus != '-' and focus not in map_ids and focus not in md_ids:
147
- if focus == 'transport':
148
+ if focus == 'gr_transport':
148
149
  df_coverage = None
149
150
  return df_coverage # just the jeneration of 'transport.json' for Escher drawing is needed here
150
151
  else: