gsrap 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,9 @@ import requests
5
5
  import importlib.metadata
6
6
  from datetime import datetime
7
7
  from packaging import version
8
+ import atexit
9
+ import os
10
+
8
11
 
9
12
 
10
13
  import cobra
@@ -69,13 +72,17 @@ def main():
69
72
  parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use only with --progress).")
70
73
  parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use only with --progress).")
71
74
  parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
75
+ parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
72
76
  parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
73
77
  parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
78
+ parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
74
79
  parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
75
80
  parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
76
81
  parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
77
82
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
78
83
  parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
84
+ parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
85
+
79
86
 
80
87
 
81
88
  # add arguments for the 'mkmodel' command
@@ -87,10 +94,11 @@ def main():
87
94
  mkmodel_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the eggnog-mapper annotation table(s).")
88
95
  mkmodel_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
89
96
  mkmodel_parser.add_argument("-u", "--universe", metavar='', type=str, default='-', help="Path to the universe model (SBML format).")
90
- mkmodel_parser.add_argument("-i", "--force_inclusion", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
91
- mkmodel_parser.add_argument("-f", "--gap_fill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
92
- mkmodel_parser.add_argument("-x", "--exclude_orphans", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
93
- #mkmodel_parser.add_argument("-r", "--force_removal", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
97
+ mkmodel_parser.add_argument("-i", "--include", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
98
+ mkmodel_parser.add_argument("-f", "--gapfill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
99
+ mkmodel_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the model on the provided medium. By default, the first medium in --gapfill is used. Use 'none' to avoid initialization.")
100
+ mkmodel_parser.add_argument("-x", "--excludeorp", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
101
+ #mkmodel_parser.add_argument("-r", "--remove", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
94
102
  mkmodel_parser.add_argument("-l", "--biolog", metavar='', type=str, default='-', help="Strain ID associated to binary Biolog(R) PM1, PM2A, PM3B and PM4A plates; if not provided, Biolog(R)-based model curation will be skipped (use with --cnps and --gap_fill).")
95
103
  mkmodel_parser.add_argument("-s", "--cnps", metavar='', type=str, default='glc__D,nh4,pi,so4', help="Starting C, N, P and S source metabolites (comma-separated IDs).")
96
104
  mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
@@ -161,6 +169,15 @@ def main():
161
169
 
162
170
 
163
171
 
172
+ # The following chunk suppresses the warning
173
+ # "sys:1: DeprecationWarning: builtin type swigvarlink has no __module__ attribute"
174
+ # raised at Gsrap shutdown by calling memote.suite.api.test_model() in common/memoteutils.py
175
+ def _suppress_swigvarlink_warning():
176
+ sys.stderr = open(os.devnull, 'w') # tested also with sys.stdout: same effect.
177
+ atexit.register(_suppress_swigvarlink_warning)
178
+
179
+
180
+
164
181
  # run the program:
165
182
  set_usual_formatter(logger.handlers[0])
166
183
  current_date_time = datetime.now()
gsrap/__init__.py CHANGED
@@ -5,6 +5,9 @@ import requests
5
5
  import importlib.metadata
6
6
  from datetime import datetime
7
7
  from packaging import version
8
+ import atexit
9
+ import os
10
+
8
11
 
9
12
 
10
13
  import cobra
@@ -69,13 +72,17 @@ def main():
69
72
  parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use only with --progress).")
70
73
  parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use only with --progress).")
71
74
  parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
75
+ parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
72
76
  parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
73
77
  parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
78
+ parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
74
79
  parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
75
80
  parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
76
81
  parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
77
82
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
78
83
  parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
84
+ parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
85
+
79
86
 
80
87
 
81
88
  # add arguments for the 'mkmodel' command
@@ -87,10 +94,11 @@ def main():
87
94
  mkmodel_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the eggnog-mapper annotation table(s).")
88
95
  mkmodel_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
89
96
  mkmodel_parser.add_argument("-u", "--universe", metavar='', type=str, default='-', help="Path to the universe model (SBML format).")
90
- mkmodel_parser.add_argument("-i", "--force_inclusion", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
91
- mkmodel_parser.add_argument("-f", "--gap_fill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
92
- mkmodel_parser.add_argument("-x", "--exclude_orphans", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
93
- #mkmodel_parser.add_argument("-r", "--force_removal", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
97
+ mkmodel_parser.add_argument("-i", "--include", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
98
+ mkmodel_parser.add_argument("-f", "--gapfill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
99
+ mkmodel_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the model on the provided medium. By default, the first medium in --gapfill is used. Use 'none' to avoid initialization.")
100
+ mkmodel_parser.add_argument("-x", "--excludeorp", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
101
+ #mkmodel_parser.add_argument("-r", "--remove", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
94
102
  mkmodel_parser.add_argument("-l", "--biolog", metavar='', type=str, default='-', help="Strain ID associated to binary Biolog(R) PM1, PM2A, PM3B and PM4A plates; if not provided, Biolog(R)-based model curation will be skipped (use with --cnps and --gap_fill).")
95
103
  mkmodel_parser.add_argument("-s", "--cnps", metavar='', type=str, default='glc__D,nh4,pi,so4', help="Starting C, N, P and S source metabolites (comma-separated IDs).")
96
104
  mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
@@ -161,6 +169,15 @@ def main():
161
169
 
162
170
 
163
171
 
172
+ # The following chunk suppresses the warning
173
+ # "sys:1: DeprecationWarning: builtin type swigvarlink has no __module__ attribute"
174
+ # raised at Gsrap shutdown by calling memote.suite.api.test_model() in common/memoteutils.py
175
+ def _suppress_swigvarlink_warning():
176
+ sys.stderr = open(os.devnull, 'w') # tested also with sys.stdout: same effect.
177
+ atexit.register(_suppress_swigvarlink_warning)
178
+
179
+
180
+
164
181
  # run the program:
165
182
  set_usual_formatter(logger.handlers[0])
166
183
  current_date_time = datetime.now()
@@ -8,3 +8,4 @@ from .sbmlutils import *
8
8
  from .escherutils import *
9
9
  from .logutils import *
10
10
  from .keggutils import *
11
+ from .memoteutils import *
@@ -243,7 +243,38 @@ def format_expansion(logger, eggnog):
243
243
 
244
244
 
245
245
 
246
+ def check_taxon(logger, taxon, idcollection_dict):
247
+
248
+
249
+ # verify presence of needed assets
250
+ if 'ko_to_taxa' not in idcollection_dict.keys():
251
+ logger.error(f"Asset 'ko_to_taxa' not found in 'gsrap.maps'. Please update 'gsrap.maps' with 'gsrap getmaps'.")
252
+ return 1
253
+
254
+
255
+ # extract level and name
256
+ try: level, name = taxon.split(':')
257
+ except:
258
+ logger.error(f"Provided --taxon is not well formatted: '{taxon}'.")
259
+ return 1
260
+
261
+
262
+ # compute available levels and check
263
+ avail_levels = set(['kingdom', 'phylum'])
264
+ if level not in avail_levels:
265
+ logger.error(f"Provided level is not acceptable: '{level}' (see --taxon). Acceptable levels are {avail_levels}.")
266
+ return 1
267
+
268
+
269
+ # compute available taxa at input level
270
+ avail_taxa_at_level = set()
271
+ ko_to_taxa = idcollection_dict['ko_to_taxa']
272
+ for ko in ko_to_taxa.keys():
273
+ for taxon_name in ko_to_taxa[ko][level]:
274
+ avail_taxa_at_level.add(taxon_name)
275
+ if name not in avail_taxa_at_level:
276
+ logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
277
+ return 1
246
278
 
247
-
248
-
249
-
279
+
280
+ return 0
@@ -1,3 +1,6 @@
1
+ import importlib.metadata
2
+
3
+
1
4
  import pandas as pnd
2
5
 
3
6
 
@@ -5,7 +8,51 @@ from .figures import figure_df_C_F1
5
8
 
6
9
 
7
10
 
8
- def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
11
+
12
+ def get_summary_sheet(model, memote_results_dict):
13
+ df_gsrap = [
14
+ # Gsrap
15
+ {'c1': 'Gsrap version', 'c2': f"v{importlib.metadata.metadata('gsrap')['Version']}", 'c3': '', 'c4': ''},
16
+ {'c1': 'Model ID', 'c2': f"{model.id}", 'c3': '', 'c4': ''},
17
+ {'c1': 'Compartments', 'c2': f"{len(model.compartments)}", 'c3': '', 'c4': ''},
18
+ {'c1': 'Metabolites', 'c2': f"{len(model.metabolites)}", 'c3': '', 'c4': ''},
19
+ {'c1': '', 'c2': 'Unique', 'c3': f"{len(set([m.id.rsplit('_',1)[0] for m in model.metabolites]))}", 'c4': ''},
20
+ {'c1': 'Reactions', 'c2': f"{len(model.reactions)}", 'c3': '', 'c4': ''},
21
+ {'c1': '', 'c2': 'Non-transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))==1)])}", 'c4': ''},
22
+ {'c1': '', 'c2': 'Transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))>1)])}", 'c4': ''},
23
+ {'c1': '', 'c2': 'Artificial', 'c3': f"{len([r for r in model.reactions if ((r.id == 'Biomass' or len(r.metabolites)==1))])}", 'c4': ''},
24
+ {'c1': 'Genes', 'c2': f"{len(model.genes)}", 'c3': '', 'c4': ''},
25
+ # empty line
26
+ {'c1': '', 'c2': '', 'c3': '', 'c4': ''},
27
+ ]
28
+ df_gsrap = pnd.DataFrame.from_records(df_gsrap)
29
+ if memote_results_dict != None:
30
+ df_memote = [
31
+ # MEMOTE
32
+ {'c1': 'MEMOTE version', 'c2': f"v{memote_results_dict['version']}", 'c3': '', 'c4': ''},
33
+ {'c1': 'MEMOTE Total Score', 'c2': f"{memote_results_dict['total']}%", 'c3': '', 'c4': ''},
34
+ {'c1': 'MEMOTE consistency', 'c2': f"{memote_results_dict['consistency']['sub_total']}%", 'c3': '', 'c4': ''},
35
+ {'c1': '', 'c2': 'stoichiometric consistency', 'c3': f"{memote_results_dict['consistency']['test_stoichiometric_consistency']}%", 'c4': ''},
36
+ {'c1': '', 'c2': 'mass balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_mass_balance']}%", 'c4': ''},
37
+ {'c1': '', 'c2': 'charge balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
38
+ {'c1': '', 'c2': 'disconnected metabolites', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
39
+ {'c1': '', 'c2': 'unbounded flux in default conditions', 'c3': f"{memote_results_dict['consistency']['test_find_reactions_unbounded_flux_default_condition']}%", 'c4': ''},
40
+ {'c1': 'MEMOTE annotation Metabolites', 'c2': f"{memote_results_dict['annotation_M']['sub_total']}%", 'c3': '', 'c4': ''},
41
+ {'c1': 'MEMOTE annotation Reactions', 'c2': f"{memote_results_dict['annotation_R']['sub_total']}%", 'c3': '', 'c4': ''},
42
+ {'c1': 'MEMOTE annotation Genes', 'c2': f"{memote_results_dict['annotation_G']['sub_total']}%", 'c3': '', 'c4': ''},
43
+ {'c1': 'MEMOTE annotation SBO', 'c2': f"{memote_results_dict['annotation_SBO']['sub_total']}%", 'c3': '', 'c4': ''},
44
+ ]
45
+ df_memote = pnd.DataFrame.from_records(df_memote)
46
+ else:
47
+ df_memote = pnd.DataFrame()
48
+
49
+
50
+ df = pnd.concat([df_gsrap, df_memote])
51
+ return df
52
+
53
+
54
+
55
+ def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B, df_P, df_S, df_C=None):
9
56
 
10
57
 
11
58
  # generate figures
@@ -101,7 +148,7 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
101
148
  else: df_T.append(row_dict)
102
149
 
103
150
  for g in model.genes:
104
- row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
151
+ row_dict = {'gid': g.id, 'name': g.name, 'involved_in': '; '.join([r.id for r in g.reactions])}
105
152
 
106
153
  for db in g.annotation.keys():
107
154
  annots = g.annotation[db]
@@ -124,12 +171,13 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
124
171
  df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
125
172
  df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
126
173
  df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
127
- df_G_first_cols = ['gid', 'involved_in']
174
+ df_G_first_cols = ['gid', 'name', 'involved_in']
128
175
  df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
129
176
 
130
177
 
131
178
 
132
179
  with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
180
+ get_summary_sheet(model, memote_results_dict).to_excel(writer, sheet_name='Summary', index=False, header=False)
133
181
  df_M.to_excel(writer, sheet_name='Metabolites', index=False)
134
182
  df_R.to_excel(writer, sheet_name='Reactions', index=False)
135
183
  df_T.to_excel(writer, sheet_name='Transporters', index=False)
@@ -1,6 +1,10 @@
1
1
  import gempipe
2
2
 
3
3
 
4
+ from .fluxbal import fba_no_warnings
5
+
6
+
7
+
4
8
 
5
9
  def apply_medium_given_column(logger, model, medium, column, is_reference=False):
6
10
 
@@ -80,4 +84,36 @@ def apply_medium_given_column(logger, model, medium, column, is_reference=False)
80
84
  model.reactions.get_by_id(f'EX_{substrate}_e').lower_bound = value -error
81
85
  model.reactions.get_by_id(f'EX_{substrate}_e').upper_bound = value +error
82
86
 
87
+ return 0
88
+
89
+
90
+
91
+ def initialize_model(logger, model, dbexp, initialize, media):
92
+
93
+
94
+ if initialize in ['None', 'none']:
95
+ logger.info(f"Initialization will be skipped.")
96
+ return 0
97
+ elif initialize == '-':
98
+ if media == '-':
99
+ logger.info(f"No media provided: initialization will be skipped.")
100
+ return 0
101
+ else:
102
+ media = media.split(',')
103
+ medium = media[0] # taking the first medium
104
+ else:
105
+ medium = initialize
106
+
107
+
108
+ if medium not in dbexp['media'].columns:
109
+ logger.warning(f"Medium '{medium}' does not exists: initialization will be skipped.")
110
+ return 0
111
+
112
+
113
+ response = apply_medium_given_column(logger, model, medium, dbexp['media'][medium])
114
+ if response == 1: return 1
115
+ res, obj_value, status = fba_no_warnings(model)
116
+ logger.info(f"Initialized on medium '{medium}': {obj_value} ({status})")
117
+
118
+
83
119
  return 0
@@ -0,0 +1,132 @@
1
+ import os
2
+ import contextlib
3
+ import importlib.metadata
4
+
5
+
6
+
7
+ import memote
8
+
9
+
10
+
11
+
12
+ def get_memote_results_dict(logger, model):
13
+
14
+
15
+ logger.info(f"Running selected modules of MEMOTE v{importlib.metadata.metadata('memote')['Version']}...")
16
+
17
+
18
+ # launch memote (only relevant modules)
19
+ with open(os.devnull, 'w') as devnull:
20
+ with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
21
+ try: memote_report = memote.suite.api.test_model(model, exclusive=[
22
+ 'test_annotation',
23
+ 'test_sbo',
24
+ 'test_stoichiometric_consistency',
25
+ 'test_reaction_mass_balance',
26
+ 'test_reaction_charge_balance',
27
+ 'test_find_disconnected',
28
+ 'test_find_reactions_unbounded_flux_default_condition'], results=True)
29
+ except ValueError: memote_report = None
30
+
31
+
32
+ # parse memote's results
33
+ results_dict = {}
34
+ results_dict['version'] = importlib.metadata.version("memote")
35
+ test_results = dict(memote_report[1])['tests']
36
+ sections = {
37
+ 'consistency': [
38
+ ('test_stoichiometric_consistency', 3),
39
+ ('test_reaction_mass_balance', 1),
40
+ ('test_reaction_charge_balance', 1),
41
+ ('test_find_disconnected', 1),
42
+ ('test_find_reactions_unbounded_flux_default_condition', 1)
43
+ ],
44
+ 'annotation_M': [
45
+ ('test_metabolite_annotation_presence', 1),
46
+ ('test_metabolite_annotation_overview', 1),
47
+ ('test_metabolite_annotation_wrong_ids', 1),
48
+ ('test_metabolite_id_namespace_consistency', 1),
49
+ ],
50
+ 'annotation_R': [
51
+ ('test_reaction_annotation_presence', 1),
52
+ ('test_reaction_annotation_overview', 1),
53
+ ('test_reaction_annotation_wrong_ids', 1),
54
+ ('test_reaction_id_namespace_consistency', 1),
55
+ ],
56
+ 'annotation_G': [
57
+ ('test_gene_product_annotation_presence', 1),
58
+ ('test_gene_product_annotation_overview', 1),
59
+ ('test_gene_product_annotation_wrong_ids', 1),
60
+ ],
61
+ 'annotation_SBO': [
62
+ ('test_metabolite_sbo_presence', 1),
63
+ ('test_metabolite_specific_sbo_presence', 1),
64
+ ('test_reaction_sbo_presence', 1),
65
+ ('test_metabolic_reaction_specific_sbo_presence', 1),
66
+ ('test_transport_reaction_specific_sbo_presence', 1),
67
+ ('test_exchange_specific_sbo_presence', 1),
68
+ ('test_demand_specific_sbo_presence', 1),
69
+ ('test_sink_specific_sbo_presence', 1),
70
+ ('test_gene_sbo_presence', 1),
71
+ ('test_gene_specific_sbo_presence', 1),
72
+ ('test_biomass_specific_sbo_presence', 1),
73
+ ],
74
+ }
75
+ section_multipliers = {
76
+ 'consistency': 3,
77
+ 'annotation_M': 1,
78
+ 'annotation_R': 1,
79
+ 'annotation_G': 1,
80
+ 'annotation_SBO': 2,
81
+ }
82
+
83
+
84
+ numerator_total = 0
85
+ denominator_total = 0
86
+ for section, metrics in sections.items():
87
+ numerator = 0
88
+ denominator = 0
89
+ results_dict[section] = {}
90
+
91
+
92
+ # iterate metrics of this section:
93
+ for metric, metric_multiplier in metrics:
94
+ metric_raw = test_results[metric]['metric']
95
+
96
+
97
+ # no subcategories here:
98
+ if type(metric_raw) == float:
99
+ metric_percentage = ((1- metric_raw ) *100)
100
+ numerator = numerator + (metric_percentage * metric_multiplier)
101
+ denominator = denominator + metric_multiplier
102
+ results_dict[section][metric] = round(metric_percentage, 1)
103
+
104
+
105
+ # there are subcategories (like in the case of M/R/G/SBO annots)
106
+ else:
107
+ results_dict[section][metric] = {}
108
+ for key, value in metric_raw.items():
109
+ n_subcategories = len(metric_raw)
110
+ multiplier_corrected = metric_multiplier / n_subcategories
111
+ metric_percentage = ((1- value ) *100)
112
+ numerator = numerator + (metric_percentage * multiplier_corrected)
113
+ denominator = denominator + multiplier_corrected
114
+ results_dict[section][metric][key] = round(metric_percentage, 1)
115
+
116
+
117
+ # compute the subtotal:
118
+ sub_total = numerator / denominator
119
+ results_dict[section]['sub_total'] = int(round(sub_total, 0))
120
+
121
+
122
+ # compute the total:
123
+ denominator_total = denominator_total + section_multipliers[section] *denominator
124
+ numerator_total = numerator_total + section_multipliers[section] *numerator
125
+ total = numerator_total / denominator_total
126
+ results_dict['total'] = int(round(total, 0))
127
+
128
+
129
+ logger.info(f"Done! MEMOTE Total Score: {results_dict['total']}%.")
130
+
131
+
132
+ return results_dict
gsrap/commons/__init__.py CHANGED
@@ -8,3 +8,4 @@ from .sbmlutils import *
8
8
  from .escherutils import *
9
9
  from .logutils import *
10
10
  from .keggutils import *
11
+ from .memoteutils import *
@@ -243,7 +243,38 @@ def format_expansion(logger, eggnog):
243
243
 
244
244
 
245
245
 
246
+ def check_taxon(logger, taxon, idcollection_dict):
247
+
248
+
249
+ # verify presence of needed assets
250
+ if 'ko_to_taxa' not in idcollection_dict.keys():
251
+ logger.error(f"Asset 'ko_to_taxa' not found in 'gsrap.maps'. Please update 'gsrap.maps' with 'gsrap getmaps'.")
252
+ return 1
253
+
254
+
255
+ # extract level and name
256
+ try: level, name = taxon.split(':')
257
+ except:
258
+ logger.error(f"Provided --taxon is not well formatted: '{taxon}'.")
259
+ return 1
260
+
261
+
262
+ # compute available levels and check
263
+ avail_levels = set(['kingdom', 'phylum'])
264
+ if level not in avail_levels:
265
+ logger.error(f"Provided level is not acceptable: '{level}' (see --taxon). Acceptable levels are {avail_levels}.")
266
+ return 1
267
+
268
+
269
+ # compute available taxa at input level
270
+ avail_taxa_at_level = set()
271
+ ko_to_taxa = idcollection_dict['ko_to_taxa']
272
+ for ko in ko_to_taxa.keys():
273
+ for taxon_name in ko_to_taxa[ko][level]:
274
+ avail_taxa_at_level.add(taxon_name)
275
+ if name not in avail_taxa_at_level:
276
+ logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
277
+ return 1
246
278
 
247
-
248
-
249
-
279
+
280
+ return 0
gsrap/commons/excelhub.py CHANGED
@@ -1,3 +1,6 @@
1
+ import importlib.metadata
2
+
3
+
1
4
  import pandas as pnd
2
5
 
3
6
 
@@ -5,7 +8,51 @@ from .figures import figure_df_C_F1
5
8
 
6
9
 
7
10
 
8
- def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
11
+
12
+ def get_summary_sheet(model, memote_results_dict):
13
+ df_gsrap = [
14
+ # Gsrap
15
+ {'c1': 'Gsrap version', 'c2': f"v{importlib.metadata.metadata('gsrap')['Version']}", 'c3': '', 'c4': ''},
16
+ {'c1': 'Model ID', 'c2': f"{model.id}", 'c3': '', 'c4': ''},
17
+ {'c1': 'Compartments', 'c2': f"{len(model.compartments)}", 'c3': '', 'c4': ''},
18
+ {'c1': 'Metabolites', 'c2': f"{len(model.metabolites)}", 'c3': '', 'c4': ''},
19
+ {'c1': '', 'c2': 'Unique', 'c3': f"{len(set([m.id.rsplit('_',1)[0] for m in model.metabolites]))}", 'c4': ''},
20
+ {'c1': 'Reactions', 'c2': f"{len(model.reactions)}", 'c3': '', 'c4': ''},
21
+ {'c1': '', 'c2': 'Non-transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))==1)])}", 'c4': ''},
22
+ {'c1': '', 'c2': 'Transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))>1)])}", 'c4': ''},
23
+ {'c1': '', 'c2': 'Artificial', 'c3': f"{len([r for r in model.reactions if ((r.id == 'Biomass' or len(r.metabolites)==1))])}", 'c4': ''},
24
+ {'c1': 'Genes', 'c2': f"{len(model.genes)}", 'c3': '', 'c4': ''},
25
+ # empty line
26
+ {'c1': '', 'c2': '', 'c3': '', 'c4': ''},
27
+ ]
28
+ df_gsrap = pnd.DataFrame.from_records(df_gsrap)
29
+ if memote_results_dict != None:
30
+ df_memote = [
31
+ # MEMOTE
32
+ {'c1': 'MEMOTE version', 'c2': f"v{memote_results_dict['version']}", 'c3': '', 'c4': ''},
33
+ {'c1': 'MEMOTE Total Score', 'c2': f"{memote_results_dict['total']}%", 'c3': '', 'c4': ''},
34
+ {'c1': 'MEMOTE consistency', 'c2': f"{memote_results_dict['consistency']['sub_total']}%", 'c3': '', 'c4': ''},
35
+ {'c1': '', 'c2': 'stoichiometric consistency', 'c3': f"{memote_results_dict['consistency']['test_stoichiometric_consistency']}%", 'c4': ''},
36
+ {'c1': '', 'c2': 'mass balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_mass_balance']}%", 'c4': ''},
37
+ {'c1': '', 'c2': 'charge balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
38
+ {'c1': '', 'c2': 'disconnected metabolites', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
39
+ {'c1': '', 'c2': 'unbounded flux in default conditions', 'c3': f"{memote_results_dict['consistency']['test_find_reactions_unbounded_flux_default_condition']}%", 'c4': ''},
40
+ {'c1': 'MEMOTE annotation Metabolites', 'c2': f"{memote_results_dict['annotation_M']['sub_total']}%", 'c3': '', 'c4': ''},
41
+ {'c1': 'MEMOTE annotation Reactions', 'c2': f"{memote_results_dict['annotation_R']['sub_total']}%", 'c3': '', 'c4': ''},
42
+ {'c1': 'MEMOTE annotation Genes', 'c2': f"{memote_results_dict['annotation_G']['sub_total']}%", 'c3': '', 'c4': ''},
43
+ {'c1': 'MEMOTE annotation SBO', 'c2': f"{memote_results_dict['annotation_SBO']['sub_total']}%", 'c3': '', 'c4': ''},
44
+ ]
45
+ df_memote = pnd.DataFrame.from_records(df_memote)
46
+ else:
47
+ df_memote = pnd.DataFrame()
48
+
49
+
50
+ df = pnd.concat([df_gsrap, df_memote])
51
+ return df
52
+
53
+
54
+
55
+ def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B, df_P, df_S, df_C=None):
9
56
 
10
57
 
11
58
  # generate figures
@@ -101,7 +148,7 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
101
148
  else: df_T.append(row_dict)
102
149
 
103
150
  for g in model.genes:
104
- row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
151
+ row_dict = {'gid': g.id, 'name': g.name, 'involved_in': '; '.join([r.id for r in g.reactions])}
105
152
 
106
153
  for db in g.annotation.keys():
107
154
  annots = g.annotation[db]
@@ -124,12 +171,13 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
124
171
  df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
125
172
  df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
126
173
  df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
127
- df_G_first_cols = ['gid', 'involved_in']
174
+ df_G_first_cols = ['gid', 'name', 'involved_in']
128
175
  df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
129
176
 
130
177
 
131
178
 
132
179
  with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
180
+ get_summary_sheet(model, memote_results_dict).to_excel(writer, sheet_name='Summary', index=False, header=False)
133
181
  df_M.to_excel(writer, sheet_name='Metabolites', index=False)
134
182
  df_R.to_excel(writer, sheet_name='Reactions', index=False)
135
183
  df_T.to_excel(writer, sheet_name='Transporters', index=False)
gsrap/commons/medium.py CHANGED
@@ -1,6 +1,10 @@
1
1
  import gempipe
2
2
 
3
3
 
4
+ from .fluxbal import fba_no_warnings
5
+
6
+
7
+
4
8
 
5
9
  def apply_medium_given_column(logger, model, medium, column, is_reference=False):
6
10
 
@@ -80,4 +84,36 @@ def apply_medium_given_column(logger, model, medium, column, is_reference=False)
80
84
  model.reactions.get_by_id(f'EX_{substrate}_e').lower_bound = value -error
81
85
  model.reactions.get_by_id(f'EX_{substrate}_e').upper_bound = value +error
82
86
 
87
+ return 0
88
+
89
+
90
+
91
+ def initialize_model(logger, model, dbexp, initialize, media):
92
+
93
+
94
+ if initialize in ['None', 'none']:
95
+ logger.info(f"Initialization will be skipped.")
96
+ return 0
97
+ elif initialize == '-':
98
+ if media == '-':
99
+ logger.info(f"No media provided: initialization will be skipped.")
100
+ return 0
101
+ else:
102
+ media = media.split(',')
103
+ medium = media[0] # taking the first medium
104
+ else:
105
+ medium = initialize
106
+
107
+
108
+ if medium not in dbexp['media'].columns:
109
+ logger.warning(f"Medium '{medium}' does not exists: initialization will be skipped.")
110
+ return 0
111
+
112
+
113
+ response = apply_medium_given_column(logger, model, medium, dbexp['media'][medium])
114
+ if response == 1: return 1
115
+ res, obj_value, status = fba_no_warnings(model)
116
+ logger.info(f"Initialized on medium '{medium}': {obj_value} ({status})")
117
+
118
+
83
119
  return 0