gsrap 0.8.3__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -82,6 +82,7 @@ def main():
82
82
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
83
83
  parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
84
84
  parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
85
+ parsedb_parser.add_argument("-d", "--keepdisconn", action='store_true', help="Do not remove disconnected metabolites.")
85
86
 
86
87
 
87
88
 
gsrap/__init__.py CHANGED
@@ -82,6 +82,7 @@ def main():
82
82
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
83
83
  parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
84
84
  parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
85
+ parsedb_parser.add_argument("-d", "--keepdisconn", action='store_true', help="Do not remove disconnected metabolites.")
85
86
 
86
87
 
87
88
 
Binary file
Binary file
@@ -275,6 +275,67 @@ def check_taxon(logger, taxon, idcollection_dict):
275
275
  if name not in avail_taxa_at_level:
276
276
  logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
277
277
  return 1
278
-
278
+
279
+
280
+ """
281
+ sorted(list(df.query("kingdom == 'Bacteria'")['phylum'].unique()))
282
+ ['Acidobacteriota',
283
+ 'Actinomycetota',
284
+ 'Alphaproteobacteria',
285
+ 'Aquificota',
286
+ 'Armatimonadota',
287
+ 'Atribacterota',
288
+ 'Bacilli',
289
+ 'Bacteria incertae sedis',
290
+ 'Bacteroidota',
291
+ 'Balneolota',
292
+ 'Bdellovibrionota',
293
+ 'Betaproteobacteria',
294
+ 'Caldisericota',
295
+ 'Calditrichota',
296
+ 'Campylobacterota',
297
+ 'Chlamydiota',
298
+ 'Chlorobiota',
299
+ 'Chloroflexota',
300
+ 'Chrysiogenota',
301
+ 'Cloacimonadota',
302
+ 'Clostridia',
303
+ 'Coprothermobacterota',
304
+ 'Cyanobacteriota',
305
+ 'Deferribacterota',
306
+ 'Deinococcota',
307
+ 'Deltaproteobacteria',
308
+ 'Dictyoglomota',
309
+ 'Elusimicrobiota',
310
+ 'Enterobacteria',
311
+ 'Fibrobacterota',
312
+ 'Fidelibacterota',
313
+ 'Fusobacteriota',
314
+ 'Gemmatimonadota',
315
+ 'Ignavibacteriota',
316
+ 'Kiritimatiellota',
317
+ 'Lentisphaerota',
318
+ 'Melainabacteria',
319
+ 'Mycoplasmatota',
320
+ 'Myxococcota',
321
+ 'Nitrospinota',
322
+ 'Nitrospirota',
323
+ 'Omnitrophota',
324
+ 'Planctomycetota',
325
+ 'Rhodothermota',
326
+ 'Spirochaetota',
327
+ 'Synergistota',
328
+ 'Thermodesulfobacteriota',
329
+ 'Thermodesulfobiota',
330
+ 'Thermomicrobiota',
331
+ 'Thermosulfidibacterota',
332
+ 'Thermotogota',
333
+ 'Verrucomicrobiota',
334
+ 'Vulcanimicrobiota',
335
+ 'other Bacillota',
336
+ 'other Gammaproteobacteria',
337
+ 'other Pseudomonadota',
338
+ 'unclassified Bacteria']
339
+ """
279
340
 
280
341
  return 0
@@ -1,3 +1,9 @@
1
+ import warnings
2
+ import logging
3
+
4
+
5
+ import cobra
6
+
1
7
 
2
8
 
3
9
  def print_json_tree(data, level=0, max_level=2):
@@ -17,7 +23,7 @@ def print_json_tree(data, level=0, max_level=2):
17
23
 
18
24
 
19
25
 
20
- def count_undrawn_rids(logger, universe, lastmap):
26
+ def count_undrawn_rids(logger, universe, lastmap, focus):
21
27
 
22
28
 
23
29
  rids = set([r.id for r in universe.reactions])
@@ -32,6 +38,71 @@ def count_undrawn_rids(logger, universe, lastmap):
32
38
  logger.debug(f"Last universal map version detected: '{filename}'.")
33
39
  if len(remainings) > 0:
34
40
  logger.warning(f"Our universal map is {len(remainings)} reactions behind. Please draw!")
41
+ if focus == '-':
42
+ logger.warning(f"Drawing is eased when using '--focus'...")
35
43
  else:
36
44
  logger.info(f"Our universal map is {len(remainings)} reactions behind. Thank you ♥")
45
+
46
+
47
+
48
+ def count_undrawn_rids_focus(logger, universe, lastmap, focus, outdir):
49
+
50
+
51
+ # get modeled reads for this --focus:
52
+ rids = set()
53
+ try: gr = universe.groups.get_by_id(focus)
54
+ except:
55
+ logger.warning(f"Group '{focus}' not found!")
56
+ return
57
+ for r in gr.members:
58
+ rids.add(r.id)
59
+
60
+
61
+ # get rids on Escher:
62
+ drawn_rids = set()
63
+ for key, value in lastmap['json'][1]['reactions'].items():
64
+ drawn_rids.add(value['bigg_id'])
65
+
66
+
67
+ # get remaining rids for this map:
68
+ remainings = rids - drawn_rids
69
+ remainings_krs = set()
70
+ for rid in remainings:
71
+ r = universe.reactions.get_by_id(rid)
72
+ krs = r.annotation['kegg.reaction']
73
+ for kr in krs:
74
+ remainings_krs.add(kr)
75
+
76
+
77
+ if len(remainings) > 0:
78
+ if focus != 'transport':
79
+ logger.warning(f"Focusing on '{focus}', our universal map is {len(remainings)} reactions behind: {' '.join(list(remainings_krs))}.")
80
+ else:
81
+ logger.warning(f"Focusing on '{focus}', our universal map is {len(remainings)} reactions behind.") # usually no kegg codes for tranport reactions
82
+
83
+
84
+ # subset the universe to ease the drawing:
85
+ universe_focus = universe.copy()
86
+ to_remove = [r for r in universe_focus.reactions if r.id not in rids]
87
+
88
+
89
+ # trick to avoid the WARNING "cobra/core/group.py:147: UserWarning: need to pass in a list"
90
+ # triggered when trying to remove reactions that are included in groups.
91
+ with warnings.catch_warnings(): # temporarily suppress warnings for this block
92
+ warnings.simplefilter("ignore") # ignore all warnings
93
+ cobra_logger = logging.getLogger("cobra.util.solver")
94
+ old_level = cobra_logger.level
95
+ cobra_logger.setLevel(logging.ERROR)
96
+
97
+ universe_focus.remove_reactions(to_remove,remove_orphans=True)
98
+
99
+ # restore original behaviour:
100
+ cobra_logger.setLevel(old_level)
101
+
102
+
103
+ # save the subset for drawing in Escher!
104
+ logger.info(f"Writing '{outdir}/{focus}.json' to ease your drawing workflow...")
105
+ cobra.io.save_json_model(universe_focus, f'{outdir}/{focus}.json')
106
+ else:
107
+ logger.info(f"Focusing on '{focus}', our universal map is {len(remainings)} reactions behind. Thank you ♥")
37
108
 
@@ -275,6 +275,67 @@ def check_taxon(logger, taxon, idcollection_dict):
275
275
  if name not in avail_taxa_at_level:
276
276
  logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
277
277
  return 1
278
-
278
+
279
+
280
+ """
281
+ sorted(list(df.query("kingdom == 'Bacteria'")['phylum'].unique()))
282
+ ['Acidobacteriota',
283
+ 'Actinomycetota',
284
+ 'Alphaproteobacteria',
285
+ 'Aquificota',
286
+ 'Armatimonadota',
287
+ 'Atribacterota',
288
+ 'Bacilli',
289
+ 'Bacteria incertae sedis',
290
+ 'Bacteroidota',
291
+ 'Balneolota',
292
+ 'Bdellovibrionota',
293
+ 'Betaproteobacteria',
294
+ 'Caldisericota',
295
+ 'Calditrichota',
296
+ 'Campylobacterota',
297
+ 'Chlamydiota',
298
+ 'Chlorobiota',
299
+ 'Chloroflexota',
300
+ 'Chrysiogenota',
301
+ 'Cloacimonadota',
302
+ 'Clostridia',
303
+ 'Coprothermobacterota',
304
+ 'Cyanobacteriota',
305
+ 'Deferribacterota',
306
+ 'Deinococcota',
307
+ 'Deltaproteobacteria',
308
+ 'Dictyoglomota',
309
+ 'Elusimicrobiota',
310
+ 'Enterobacteria',
311
+ 'Fibrobacterota',
312
+ 'Fidelibacterota',
313
+ 'Fusobacteriota',
314
+ 'Gemmatimonadota',
315
+ 'Ignavibacteriota',
316
+ 'Kiritimatiellota',
317
+ 'Lentisphaerota',
318
+ 'Melainabacteria',
319
+ 'Mycoplasmatota',
320
+ 'Myxococcota',
321
+ 'Nitrospinota',
322
+ 'Nitrospirota',
323
+ 'Omnitrophota',
324
+ 'Planctomycetota',
325
+ 'Rhodothermota',
326
+ 'Spirochaetota',
327
+ 'Synergistota',
328
+ 'Thermodesulfobacteriota',
329
+ 'Thermodesulfobiota',
330
+ 'Thermomicrobiota',
331
+ 'Thermosulfidibacterota',
332
+ 'Thermotogota',
333
+ 'Verrucomicrobiota',
334
+ 'Vulcanimicrobiota',
335
+ 'other Bacillota',
336
+ 'other Gammaproteobacteria',
337
+ 'other Pseudomonadota',
338
+ 'unclassified Bacteria']
339
+ """
279
340
 
280
341
  return 0
@@ -1,3 +1,9 @@
1
+ import warnings
2
+ import logging
3
+
4
+
5
+ import cobra
6
+
1
7
 
2
8
 
3
9
  def print_json_tree(data, level=0, max_level=2):
@@ -17,7 +23,7 @@ def print_json_tree(data, level=0, max_level=2):
17
23
 
18
24
 
19
25
 
20
- def count_undrawn_rids(logger, universe, lastmap):
26
+ def count_undrawn_rids(logger, universe, lastmap, focus):
21
27
 
22
28
 
23
29
  rids = set([r.id for r in universe.reactions])
@@ -32,6 +38,71 @@ def count_undrawn_rids(logger, universe, lastmap):
32
38
  logger.debug(f"Last universal map version detected: '{filename}'.")
33
39
  if len(remainings) > 0:
34
40
  logger.warning(f"Our universal map is {len(remainings)} reactions behind. Please draw!")
41
+ if focus == '-':
42
+ logger.warning(f"Drawing is eased when using '--focus'...")
35
43
  else:
36
44
  logger.info(f"Our universal map is {len(remainings)} reactions behind. Thank you ♥")
45
+
46
+
47
+
48
+ def count_undrawn_rids_focus(logger, universe, lastmap, focus, outdir):
49
+
50
+
51
+ # get modeled reads for this --focus:
52
+ rids = set()
53
+ try: gr = universe.groups.get_by_id(focus)
54
+ except:
55
+ logger.warning(f"Group '{focus}' not found!")
56
+ return
57
+ for r in gr.members:
58
+ rids.add(r.id)
59
+
60
+
61
+ # get rids on Escher:
62
+ drawn_rids = set()
63
+ for key, value in lastmap['json'][1]['reactions'].items():
64
+ drawn_rids.add(value['bigg_id'])
65
+
66
+
67
+ # get remaining rids for this map:
68
+ remainings = rids - drawn_rids
69
+ remainings_krs = set()
70
+ for rid in remainings:
71
+ r = universe.reactions.get_by_id(rid)
72
+ krs = r.annotation['kegg.reaction']
73
+ for kr in krs:
74
+ remainings_krs.add(kr)
75
+
76
+
77
+ if len(remainings) > 0:
78
+ if focus != 'transport':
79
+ logger.warning(f"Focusing on '{focus}', our universal map is {len(remainings)} reactions behind: {' '.join(list(remainings_krs))}.")
80
+ else:
81
+ logger.warning(f"Focusing on '{focus}', our universal map is {len(remainings)} reactions behind.") # usually no kegg codes for tranport reactions
82
+
83
+
84
+ # subset the universe to ease the drawing:
85
+ universe_focus = universe.copy()
86
+ to_remove = [r for r in universe_focus.reactions if r.id not in rids]
87
+
88
+
89
+ # trick to avoid the WARNING "cobra/core/group.py:147: UserWarning: need to pass in a list"
90
+ # triggered when trying to remove reactions that are included in groups.
91
+ with warnings.catch_warnings(): # temporarily suppress warnings for this block
92
+ warnings.simplefilter("ignore") # ignore all warnings
93
+ cobra_logger = logging.getLogger("cobra.util.solver")
94
+ old_level = cobra_logger.level
95
+ cobra_logger.setLevel(logging.ERROR)
96
+
97
+ universe_focus.remove_reactions(to_remove,remove_orphans=True)
98
+
99
+ # restore original behaviour:
100
+ cobra_logger.setLevel(old_level)
101
+
102
+
103
+ # save the subset for drawing in Escher!
104
+ logger.info(f"Writing '{outdir}/{focus}.json' to ease your drawing workflow...")
105
+ cobra.io.save_json_model(universe_focus, f'{outdir}/{focus}.json')
106
+ else:
107
+ logger.info(f"Focusing on '{focus}', our universal map is {len(remainings)} reactions behind. Thank you ♥")
37
108
 
@@ -138,6 +138,15 @@ def set_up_groups(logger, model, idcollection_dict):
138
138
 
139
139
  # insert custom groups:
140
140
  custom_groups = get_custom_groups()
141
+ #
142
+ # create a group for transporters on-the-fly
143
+ custom_groups['transport'] = []
144
+ for r in model.reactions:
145
+ if len(r.metabolites) == 1: # exchanges / sinks/ demands
146
+ custom_groups['transport'].append(r.id)
147
+ if len(set([m.id.rsplit('_', 1)[-1] for m in r.metabolites])) > 1: # transport reactions
148
+ custom_groups['transport'].append(r.id)
149
+ #
141
150
  for group_id in custom_groups.keys():
142
151
  actual_group = cobra.core.Group(
143
152
  group_id,
@@ -6,6 +6,9 @@ import os
6
6
  import pandas as pnd
7
7
 
8
8
 
9
+ from .manual import get_krs_to_exclude
10
+
11
+
9
12
 
10
13
  def parse_eggnog(model, eggnog, idcollection_dict):
11
14
 
@@ -112,7 +115,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
112
115
  if 'kegg.reaction' in r.annotation.keys():
113
116
  for kr_id in r.annotation['kegg.reaction']:
114
117
  kr_ids_modeled.add(kr_id)
115
- kr_uni_missing = kr_uni - kr_ids_modeled
118
+ kr_uni_missing = (kr_uni - kr_ids_modeled) - get_krs_to_exclude()
116
119
  kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
117
120
  logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
118
121
 
@@ -141,8 +144,12 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
141
144
 
142
145
  # check if 'focus' exist
143
146
  if focus != '-' and focus not in map_ids and focus not in md_ids:
144
- logger.error(f"The ID provided with --focus does not exist: {focus}.")
145
- return 1
147
+ if focus == 'transport':
148
+ df_coverage = None
149
+ return df_coverage # just the jeneration of 'transport.json' for Escher drawing is needed here
150
+ else:
151
+ logger.error(f"The ID provided with --focus does not exist: {focus}.")
152
+ return 1
146
153
  if focus.startswith('map'):
147
154
  logger.debug(f"With --focus {focus}, --module will switch to False.")
148
155
  module = False
@@ -175,7 +182,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
175
182
 
176
183
  # check if this map was (at least partially) covered:
177
184
  map_krs = set([kr for kr in i['kr_ids'] if kr in kr_uni])
178
- missing = map_krs - kr_ids_modeled
185
+ missing = (map_krs - kr_ids_modeled) - get_krs_to_exclude()
179
186
  present = kr_ids_modeled.intersection(map_krs)
180
187
  if focus == map_id:
181
188
  missing_logger = (map_id, missing)
@@ -287,7 +294,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
287
294
 
288
295
  # check if this module was (at least partially) covered:
289
296
  md_krs = set([kr for kr in z['kr_ids_md'] if kr in kr_uni])
290
- missing = md_krs - kr_ids_modeled
297
+ missing = (md_krs - kr_ids_modeled) - get_krs_to_exclude()
291
298
  present = kr_ids_modeled.intersection(md_krs)
292
299
  if focus == md_id:
293
300
  missing_logger = (md_id, missing)
@@ -336,7 +343,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
336
343
  if module and focus=='-':
337
344
  logger.info(f"{spacer}Modules of {right_item['map_id']}: completed {len(mds_completed)} - partial {len(mds_partial)} - missing {len(mds_missing)} - noreac {len(mds_noreac)}")
338
345
  if focus != '-':
339
- logger.info(f"Missing reactions focusing on {missing_logger[0]}: {' '.join(list(missing_logger[1]))}.")
346
+ logger.info(f"Missing reactions focusing on '{missing_logger[0]}': {' '.join(list(missing_logger[1]))}.")
340
347
  if progress:
341
348
  logger.info(f"Maps: finished {len(maps_finished)} - partial {len(maps_partial)} - missing {len(maps_missing)} - noreac {len(maps_noreac)}")
342
349
 
@@ -5,11 +5,21 @@ def get_deprecated_kos():
5
5
  deprecated_kos = [
6
6
  'K11189', # should be K02784
7
7
  'K07011', # linked to lp_1215(cps3A) and lp_1216(cps3B) during 2018 and not replaced
8
+ #'K24301', # to be introduced in GPRs
8
9
  ]
9
10
  return deprecated_kos
10
11
 
11
12
 
12
13
 
14
+ def get_krs_to_exclude():
15
+ return set([
16
+ 'R12328', 'R05190', # general forms of fatty acid biosynthesis
17
+ 'R01347', 'R04121', # general forms of fatty acid degradation
18
+ ])
19
+
20
+
21
+
22
+
13
23
  def get_rids_with_mancheck_gpr():
14
24
  rids_mancheck_gpr = [ # reactions with manually checked GPRs
15
25
  'SUCD1', 'ALKP', 'PFK_3', 'TCMPTS', 'PPA', 'APSR',
@@ -16,6 +16,8 @@ from ..commons import write_excel_model
16
16
  from ..commons import show_contributions
17
17
  from ..commons import adjust_biomass_precursors
18
18
  from ..commons import count_undrawn_rids
19
+ from ..commons import count_undrawn_rids_focus
20
+
19
21
  from ..commons import format_expansion
20
22
  from ..commons import check_taxon
21
23
  from ..commons import download_keggorg
@@ -184,7 +186,8 @@ def main(args, logger):
184
186
 
185
187
  ###### POLISHING 1
186
188
  # remove disconnected metabolites
187
- universe = remove_disconnected(logger, universe)
189
+ if args.keepdisconn == False:
190
+ universe = remove_disconnected(logger, universe) # can be commented when using booster.py
188
191
 
189
192
 
190
193
 
@@ -193,9 +196,9 @@ def main(args, logger):
193
196
  verify_egc_all(logger, universe, args.outdir)
194
197
 
195
198
 
199
+
196
200
  if not args.justparse:
197
201
 
198
-
199
202
  ###### CHECKS 3
200
203
  # check growth on minmal media
201
204
  df_G = grow_on_media(logger, universe, dbexp, args.media, '-', True)
@@ -228,10 +231,15 @@ def main(args, logger):
228
231
 
229
232
 
230
233
 
231
- # output the universe
232
- logger.info("Writing universal model...")
233
- cobra.io.save_json_model(universe, f'{args.outdir}/universe.json')
234
- logger.info(f"'{args.outdir}/universe.json' created!")
234
+ # output the universe (even when --justparse)
235
+ logger.info("Writing universal model...")
236
+ cobra.io.save_json_model(universe, f'{args.outdir}/universe.json')
237
+ logger.info(f"'{args.outdir}/universe.json' created!")
238
+
239
+
240
+ if not args.justparse:
241
+
242
+ # outptu in the remaining formats:
235
243
  cobra.io.write_sbml_model(universe, f'{args.outdir}/universe.xml') # groups are saved only to SBML
236
244
  logger.info(f"'{args.outdir}/universe.xml' created!")
237
245
  force_id_on_sbml(f'{args.outdir}/universe.xml', 'universe') # force introduction of the 'id=""' field
@@ -242,7 +250,9 @@ def main(args, logger):
242
250
 
243
251
  ###### CHECKS 4
244
252
  # check if universal escher map is updated:
245
- count_undrawn_rids(logger, universe, lastmap)
253
+ count_undrawn_rids(logger, universe, lastmap, args.focus)
254
+ if args.focus != '-':
255
+ count_undrawn_rids_focus(logger, universe, lastmap, args.focus, args.outdir)
246
256
 
247
257
 
248
258
  return 0
@@ -45,7 +45,7 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
45
45
  pass
46
46
  elif ko_id not in idcollection_dict['ko'] and ko_id != 'spontaneous' and ko_id != 'orphan':
47
47
  logger.error(f"{itemtype} '{rid}' has an invalid KEGG Ortholog: '{ko_id}'.")
48
- return 1
48
+ return 1 # can be commented when migrating to new kegg release
49
49
 
50
50
 
51
51
  # check if these ko_ids are really assigned to this reaction:
@@ -61,7 +61,7 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
61
61
  missing_ko_ids = ko_for_rid - (set(ko_ids_parsed) - set(['spontaneous', 'orphan']))
62
62
  if len(missing_ko_ids) > 0:
63
63
  logger.error(f"Orthologs {missing_ko_ids} are missing from reaction '{rid}' ({kr_ids}).")
64
- return 1
64
+ return 1 # can be commented when migrating to new kegg release
65
65
 
66
66
 
67
67
  return 0
@@ -138,6 +138,15 @@ def set_up_groups(logger, model, idcollection_dict):
138
138
 
139
139
  # insert custom groups:
140
140
  custom_groups = get_custom_groups()
141
+ #
142
+ # create a group for transporters on-the-fly
143
+ custom_groups['transport'] = []
144
+ for r in model.reactions:
145
+ if len(r.metabolites) == 1: # exchanges / sinks/ demands
146
+ custom_groups['transport'].append(r.id)
147
+ if len(set([m.id.rsplit('_', 1)[-1] for m in r.metabolites])) > 1: # transport reactions
148
+ custom_groups['transport'].append(r.id)
149
+ #
141
150
  for group_id in custom_groups.keys():
142
151
  actual_group = cobra.core.Group(
143
152
  group_id,
@@ -6,6 +6,9 @@ import os
6
6
  import pandas as pnd
7
7
 
8
8
 
9
+ from .manual import get_krs_to_exclude
10
+
11
+
9
12
 
10
13
  def parse_eggnog(model, eggnog, idcollection_dict):
11
14
 
@@ -112,7 +115,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
112
115
  if 'kegg.reaction' in r.annotation.keys():
113
116
  for kr_id in r.annotation['kegg.reaction']:
114
117
  kr_ids_modeled.add(kr_id)
115
- kr_uni_missing = kr_uni - kr_ids_modeled
118
+ kr_uni_missing = (kr_uni - kr_ids_modeled) - get_krs_to_exclude()
116
119
  kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
117
120
  logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
118
121
 
@@ -141,8 +144,12 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
141
144
 
142
145
  # check if 'focus' exist
143
146
  if focus != '-' and focus not in map_ids and focus not in md_ids:
144
- logger.error(f"The ID provided with --focus does not exist: {focus}.")
145
- return 1
147
+ if focus == 'transport':
148
+ df_coverage = None
149
+ return df_coverage # just the jeneration of 'transport.json' for Escher drawing is needed here
150
+ else:
151
+ logger.error(f"The ID provided with --focus does not exist: {focus}.")
152
+ return 1
146
153
  if focus.startswith('map'):
147
154
  logger.debug(f"With --focus {focus}, --module will switch to False.")
148
155
  module = False
@@ -175,7 +182,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
175
182
 
176
183
  # check if this map was (at least partially) covered:
177
184
  map_krs = set([kr for kr in i['kr_ids'] if kr in kr_uni])
178
- missing = map_krs - kr_ids_modeled
185
+ missing = (map_krs - kr_ids_modeled) - get_krs_to_exclude()
179
186
  present = kr_ids_modeled.intersection(map_krs)
180
187
  if focus == map_id:
181
188
  missing_logger = (map_id, missing)
@@ -287,7 +294,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
287
294
 
288
295
  # check if this module was (at least partially) covered:
289
296
  md_krs = set([kr for kr in z['kr_ids_md'] if kr in kr_uni])
290
- missing = md_krs - kr_ids_modeled
297
+ missing = (md_krs - kr_ids_modeled) - get_krs_to_exclude()
291
298
  present = kr_ids_modeled.intersection(md_krs)
292
299
  if focus == md_id:
293
300
  missing_logger = (md_id, missing)
@@ -336,7 +343,7 @@ def check_completeness(logger, model, progress, module, focus, taxon, eggnog, ke
336
343
  if module and focus=='-':
337
344
  logger.info(f"{spacer}Modules of {right_item['map_id']}: completed {len(mds_completed)} - partial {len(mds_partial)} - missing {len(mds_missing)} - noreac {len(mds_noreac)}")
338
345
  if focus != '-':
339
- logger.info(f"Missing reactions focusing on {missing_logger[0]}: {' '.join(list(missing_logger[1]))}.")
346
+ logger.info(f"Missing reactions focusing on '{missing_logger[0]}': {' '.join(list(missing_logger[1]))}.")
340
347
  if progress:
341
348
  logger.info(f"Maps: finished {len(maps_finished)} - partial {len(maps_partial)} - missing {len(maps_missing)} - noreac {len(maps_noreac)}")
342
349
 
gsrap/parsedb/manual.py CHANGED
@@ -5,11 +5,21 @@ def get_deprecated_kos():
5
5
  deprecated_kos = [
6
6
  'K11189', # should be K02784
7
7
  'K07011', # linked to lp_1215(cps3A) and lp_1216(cps3B) during 2018 and not replaced
8
+ #'K24301', # to be introduced in GPRs
8
9
  ]
9
10
  return deprecated_kos
10
11
 
11
12
 
12
13
 
14
+ def get_krs_to_exclude():
15
+ return set([
16
+ 'R12328', 'R05190', # general forms of fatty acid biosynthesis
17
+ 'R01347', 'R01348', 'R04121', # general forms of fatty acid degradation
18
+ ])
19
+
20
+
21
+
22
+
13
23
  def get_rids_with_mancheck_gpr():
14
24
  rids_mancheck_gpr = [ # reactions with manually checked GPRs
15
25
  'SUCD1', 'ALKP', 'PFK_3', 'TCMPTS', 'PPA', 'APSR',
gsrap/parsedb/parsedb.py CHANGED
@@ -16,6 +16,8 @@ from ..commons import write_excel_model
16
16
  from ..commons import show_contributions
17
17
  from ..commons import adjust_biomass_precursors
18
18
  from ..commons import count_undrawn_rids
19
+ from ..commons import count_undrawn_rids_focus
20
+
19
21
  from ..commons import format_expansion
20
22
  from ..commons import check_taxon
21
23
  from ..commons import download_keggorg
@@ -184,7 +186,8 @@ def main(args, logger):
184
186
 
185
187
  ###### POLISHING 1
186
188
  # remove disconnected metabolites
187
- universe = remove_disconnected(logger, universe)
189
+ if args.keepdisconn == False:
190
+ universe = remove_disconnected(logger, universe) # can be commented when using booster.py
188
191
 
189
192
 
190
193
 
@@ -193,9 +196,9 @@ def main(args, logger):
193
196
  verify_egc_all(logger, universe, args.outdir)
194
197
 
195
198
 
199
+
196
200
  if not args.justparse:
197
201
 
198
-
199
202
  ###### CHECKS 3
200
203
  # check growth on minmal media
201
204
  df_G = grow_on_media(logger, universe, dbexp, args.media, '-', True)
@@ -228,10 +231,15 @@ def main(args, logger):
228
231
 
229
232
 
230
233
 
231
- # output the universe
232
- logger.info("Writing universal model...")
233
- cobra.io.save_json_model(universe, f'{args.outdir}/universe.json')
234
- logger.info(f"'{args.outdir}/universe.json' created!")
234
+ # output the universe (even when --justparse)
235
+ logger.info("Writing universal model...")
236
+ cobra.io.save_json_model(universe, f'{args.outdir}/universe.json')
237
+ logger.info(f"'{args.outdir}/universe.json' created!")
238
+
239
+
240
+ if not args.justparse:
241
+
242
+ # outptu in the remaining formats:
235
243
  cobra.io.write_sbml_model(universe, f'{args.outdir}/universe.xml') # groups are saved only to SBML
236
244
  logger.info(f"'{args.outdir}/universe.xml' created!")
237
245
  force_id_on_sbml(f'{args.outdir}/universe.xml', 'universe') # force introduction of the 'id=""' field
@@ -242,7 +250,9 @@ def main(args, logger):
242
250
 
243
251
  ###### CHECKS 4
244
252
  # check if universal escher map is updated:
245
- count_undrawn_rids(logger, universe, lastmap)
253
+ count_undrawn_rids(logger, universe, lastmap, args.focus)
254
+ if args.focus != '-':
255
+ count_undrawn_rids_focus(logger, universe, lastmap, args.focus, args.outdir)
246
256
 
247
257
 
248
258
  return 0
@@ -45,7 +45,7 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
45
45
  pass
46
46
  elif ko_id not in idcollection_dict['ko'] and ko_id != 'spontaneous' and ko_id != 'orphan':
47
47
  logger.error(f"{itemtype} '{rid}' has an invalid KEGG Ortholog: '{ko_id}'.")
48
- return 1
48
+ return 1 # can be commented when migrating to new kegg release
49
49
 
50
50
 
51
51
  # check if these ko_ids are really assigned to this reaction:
@@ -61,7 +61,7 @@ def check_gpr(logger, rid, row, kr_ids, idcollection_dict, addtype='R'):
61
61
  missing_ko_ids = ko_for_rid - (set(ko_ids_parsed) - set(['spontaneous', 'orphan']))
62
62
  if len(missing_ko_ids) > 0:
63
63
  logger.error(f"Orthologs {missing_ko_ids} are missing from reaction '{rid}' ({kr_ids}).")
64
- return 1
64
+ return 1 # can be commented when migrating to new kegg release
65
65
 
66
66
 
67
67
  return 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: gsrap
3
- Version: 0.8.3
3
+ Version: 0.9.0
4
4
  Summary:
5
5
  License: GNU General Public License v3.0
6
6
  Author: Gioele Lazzari
@@ -1,5 +1,5 @@
1
- gsrap/.ipynb_checkpoints/__init__-checkpoint.py,sha256=Epw4X8B1O9kWnfC9v-X4jvDXE4X-G0XlRfkMdtNvMq0,15459
2
- gsrap/__init__.py,sha256=Epw4X8B1O9kWnfC9v-X4jvDXE4X-G0XlRfkMdtNvMq0,15459
1
+ gsrap/.ipynb_checkpoints/__init__-checkpoint.py,sha256=8or3yeJRjjuaKUSWD5w3-I27LWKUF5hz9ECtzdQ_PKU,15583
2
+ gsrap/__init__.py,sha256=8or3yeJRjjuaKUSWD5w3-I27LWKUF5hz9ECtzdQ_PKU,15583
3
3
  gsrap/assets/.ipynb_checkpoints/PM1-checkpoint.csv,sha256=0qjaMVG_t9aFxbHbxON6ecmEUnWPwN9nhmxc61QFeCU,8761
4
4
  gsrap/assets/.ipynb_checkpoints/PM2A-checkpoint.csv,sha256=rjYTdwe8lpRS552BYiUP3J71juG2ywVdR5Sux6fjZTY,8816
5
5
  gsrap/assets/.ipynb_checkpoints/PM3B-checkpoint.csv,sha256=42IGX_2O5bRYSiHoMuVKT-T-bzVj0cSRZBvGOrbnQMA,8130
@@ -9,13 +9,13 @@ gsrap/assets/PM2A.csv,sha256=rjYTdwe8lpRS552BYiUP3J71juG2ywVdR5Sux6fjZTY,8816
9
9
  gsrap/assets/PM3B.csv,sha256=42IGX_2O5bRYSiHoMuVKT-T-bzVj0cSRZBvGOrbnQMA,8130
10
10
  gsrap/assets/PM4A.csv,sha256=f_5__0Ap_T0KYje5h9veW29I2qB4yU0h7Hr7WpaHjSc,9081
11
11
  gsrap/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- gsrap/assets/kegg_compound_to_others.pickle,sha256=pz1897cfQ7PLsYZiBVcoMQPzvRzT-nHUdgphBe0g5ZQ,8233860
13
- gsrap/assets/kegg_reaction_to_others.pickle,sha256=AGW8CGN5hKeXZoYn3JRF4Xu832WyNrTlMcLw7luttlc,1703146
12
+ gsrap/assets/kegg_compound_to_others.pickle,sha256=ddo1bdFSQOSmXJBuvsWWKSsFDWT_XOfV6ZG1BBj4-ew,8498114
13
+ gsrap/assets/kegg_reaction_to_others.pickle,sha256=x53Ch8GjNWXHeqCF5wCPdANmi0ZxmhusYGNfDlk2ZpQ,1582568
14
14
  gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py,sha256=9lrb0sBFSWEgV_e5FYzSgjTbam8b959rW_8VuxQHt1M,268
15
15
  gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py,sha256=4u7WBaUgo42tBoXDU1D0VUjICatb44e0jfswZrBeHYs,17987
16
16
  gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py,sha256=qI3_GuqHkeA2KbK9pYdkqJaFwYemAVZJGLRR4QtHt6w,19182
17
- gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py,sha256=VnIFC6Y8QZ4oPyi04J_rtC2imzk5yOe0i5SpTP3MA6M,9713
18
- gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py,sha256=_y0TgM0-Im0RT8W8z5rr4vlnGK55iRFds6DlDsjGD-8,1151
17
+ gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py,sha256=JrVk-UbQzelefGvOz5k4j8Ofib58u_KD_zlUzTcuv6g,11170
18
+ gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py,sha256=VQ1FXyby6Cbfw3UNHzRoePH8M8owJ8E2rzB-11vBweA,3732
19
19
  gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py,sha256=_LtaWripY_D99f14Yk-3v9VIov2cUGzJ53AXgmNFpUk,11457
20
20
  gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py,sha256=IRHSQXrCi4SQoISEfNB0rDhvUzbjcgsPi9zUSefsRto,4316
21
21
  gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py,sha256=jgC3-vI9Tbjvqohh2mJwFra4rl_pbUzHWrSa_QAxVO4,1262
@@ -28,8 +28,8 @@ gsrap/commons/.ipynb_checkpoints/sbmlutils-checkpoint.py,sha256=gkY02qbGXrbYStn2
28
28
  gsrap/commons/__init__.py,sha256=9lrb0sBFSWEgV_e5FYzSgjTbam8b959rW_8VuxQHt1M,268
29
29
  gsrap/commons/biomass.py,sha256=4u7WBaUgo42tBoXDU1D0VUjICatb44e0jfswZrBeHYs,17987
30
30
  gsrap/commons/coeffs.py,sha256=qI3_GuqHkeA2KbK9pYdkqJaFwYemAVZJGLRR4QtHt6w,19182
31
- gsrap/commons/downloads.py,sha256=VnIFC6Y8QZ4oPyi04J_rtC2imzk5yOe0i5SpTP3MA6M,9713
32
- gsrap/commons/escherutils.py,sha256=_y0TgM0-Im0RT8W8z5rr4vlnGK55iRFds6DlDsjGD-8,1151
31
+ gsrap/commons/downloads.py,sha256=JrVk-UbQzelefGvOz5k4j8Ofib58u_KD_zlUzTcuv6g,11170
32
+ gsrap/commons/escherutils.py,sha256=VQ1FXyby6Cbfw3UNHzRoePH8M8owJ8E2rzB-11vBweA,3732
33
33
  gsrap/commons/excelhub.py,sha256=_LtaWripY_D99f14Yk-3v9VIov2cUGzJ53AXgmNFpUk,11457
34
34
  gsrap/commons/figures.py,sha256=IRHSQXrCi4SQoISEfNB0rDhvUzbjcgsPi9zUSefsRto,4316
35
35
  gsrap/commons/fluxbal.py,sha256=jgC3-vI9Tbjvqohh2mJwFra4rl_pbUzHWrSa_QAxVO4,1262
@@ -60,21 +60,21 @@ gsrap/mkmodel/mkmodel.py,sha256=zm-JA2sXwqTLalCc0L5POw2iRI56QK0UJMUgorHQrLw,1083
60
60
  gsrap/mkmodel/polishing.py,sha256=R1UdFPxN8N27Iu0jsYW2N_1BkWEbBHaMYW6NkCYZK_k,3256
61
61
  gsrap/mkmodel/pruner.py,sha256=FAZid-0H6j66wR2dVKRAaMaDREVt1edflmZXbX7blXg,9836
62
62
  gsrap/parsedb/.ipynb_checkpoints/__init__-checkpoint.py,sha256=1k2K1gz4lIdXAwHEdJ0OhdkPu83woGv0Z4TpT1kGrTk,97
63
- gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py,sha256=Y02_zXJj_tS1GyBdfuLBy9YJjMgx3mjX6tqr1KhQ-9Q,4810
64
- gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py,sha256=Op7VwmmwHmt1nhcl_0ISAejtLz-F9IkmnTrcJvO0BGc,12829
63
+ gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py,sha256=r0sYkSqA6b8mZOGD1tgk7I__ZFkH8r4aqxVA6EUspEY,5205
64
+ gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py,sha256=IziYv6xShUnKVxnHUwJ_I2TfiKplSXoiHQoUakI1xFI,13141
65
65
  gsrap/parsedb/.ipynb_checkpoints/cycles-checkpoint.py,sha256=HJ58LcHQseQ1eploysfXd5Y8Rip8n62qhje4pmL22VM,4761
66
66
  gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py,sha256=UuwGWGB2saG9VDMoboumeRBWhHOO68bs5_1r2RSkyVo,17145
67
- gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py,sha256=qMKYshVftSGCRAjHC87E6n9-6kAiffFFCgHOUbqlyC0,3625
68
- gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py,sha256=8mQgUTMOLpoeHK_X28s5jaW8adltKZ40nn_0uxmIXz8,8515
69
- gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py,sha256=WwPOzlZgsZWmJ-rvhFg21iOJ6gajgKFc2vCIHh6weBg,6103
67
+ gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py,sha256=sDux5CFNC8v2YJ3oDMV0vXsQrvSG6gov1f4hftc4nyo,3874
68
+ gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py,sha256=HbjkcWRI916_pvKVwTruNUbRWrHRlT__200wocLSOMY,8860
69
+ gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py,sha256=ph1gC-84SyBnBuM7s0A9jTJBJmBlRzq0lk4FYG70228,6215
70
70
  gsrap/parsedb/__init__.py,sha256=1k2K1gz4lIdXAwHEdJ0OhdkPu83woGv0Z4TpT1kGrTk,97
71
- gsrap/parsedb/annotation.py,sha256=Y02_zXJj_tS1GyBdfuLBy9YJjMgx3mjX6tqr1KhQ-9Q,4810
72
- gsrap/parsedb/completeness.py,sha256=Op7VwmmwHmt1nhcl_0ISAejtLz-F9IkmnTrcJvO0BGc,12829
71
+ gsrap/parsedb/annotation.py,sha256=r0sYkSqA6b8mZOGD1tgk7I__ZFkH8r4aqxVA6EUspEY,5205
72
+ gsrap/parsedb/completeness.py,sha256=IziYv6xShUnKVxnHUwJ_I2TfiKplSXoiHQoUakI1xFI,13141
73
73
  gsrap/parsedb/cycles.py,sha256=HJ58LcHQseQ1eploysfXd5Y8Rip8n62qhje4pmL22VM,4761
74
74
  gsrap/parsedb/introduce.py,sha256=UuwGWGB2saG9VDMoboumeRBWhHOO68bs5_1r2RSkyVo,17145
75
- gsrap/parsedb/manual.py,sha256=qMKYshVftSGCRAjHC87E6n9-6kAiffFFCgHOUbqlyC0,3625
76
- gsrap/parsedb/parsedb.py,sha256=8mQgUTMOLpoeHK_X28s5jaW8adltKZ40nn_0uxmIXz8,8515
77
- gsrap/parsedb/repeating.py,sha256=WwPOzlZgsZWmJ-rvhFg21iOJ6gajgKFc2vCIHh6weBg,6103
75
+ gsrap/parsedb/manual.py,sha256=8UCvfMK7HSXd_-JlnIWgMmBnsFGojTGE63iOJE5J_6E,3884
76
+ gsrap/parsedb/parsedb.py,sha256=HbjkcWRI916_pvKVwTruNUbRWrHRlT__200wocLSOMY,8860
77
+ gsrap/parsedb/repeating.py,sha256=ph1gC-84SyBnBuM7s0A9jTJBJmBlRzq0lk4FYG70228,6215
78
78
  gsrap/runsims/.ipynb_checkpoints/__init__-checkpoint.py,sha256=6E6E1gWgH0V7ls4Omx4mxxC85gMJ_27YqhjugJzlZtY,97
79
79
  gsrap/runsims/.ipynb_checkpoints/biosynth-checkpoint.py,sha256=fUlHUo4CfB4rGX9Dth87B1p5E5sz7i6spR7ZoqDDGaI,2836
80
80
  gsrap/runsims/.ipynb_checkpoints/cnps-checkpoint.py,sha256=A0U8QPqW_uyrtHs99F286aEDEC6eukHXeMWrmnd0efA,5636
@@ -93,8 +93,8 @@ gsrap/runsims/precursors.py,sha256=1RNt_Rxs0L1lolDmYh4_CiZgiwHfU5B_AcomJO6vJ28,2
93
93
  gsrap/runsims/runsims.py,sha256=2FC5Gs8oSYyZTjHF3A7aXB_O6myVfcn3bCxQfLJlZTk,2842
94
94
  gsrap/runsims/simplegrowth.py,sha256=tCQHTMUqum1YwlBKRTNaQoag2co_yQlCaKmISOARAlE,2353
95
95
  gsrap/runsims/singleomission.py,sha256=jMuKAi0pINP8Jlrm-yI-tX7D110VzttR3YfTSnDRe4I,2847
96
- gsrap-0.8.3.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
97
- gsrap-0.8.3.dist-info/METADATA,sha256=JADff6H-Y_SWY5PtR9qzEhmabesB6A5dLa-V0GTKqgc,898
98
- gsrap-0.8.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
99
- gsrap-0.8.3.dist-info/entry_points.txt,sha256=S9MY0DjfnbKGlZbp5bV7W6dNFy3APoEV84u9x6MV1eI,36
100
- gsrap-0.8.3.dist-info/RECORD,,
96
+ gsrap-0.9.0.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
97
+ gsrap-0.9.0.dist-info/METADATA,sha256=Zd_nRZYHhrKBle7mzTBWnQQ1KdMWlHPuKqH9-YU5IHA,898
98
+ gsrap-0.9.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
99
+ gsrap-0.9.0.dist-info/entry_points.txt,sha256=S9MY0DjfnbKGlZbp5bV7W6dNFy3APoEV84u9x6MV1eI,36
100
+ gsrap-0.9.0.dist-info/RECORD,,
File without changes