PyPI - gsrap - Versions diffs - 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

gsrap 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

gsrap/.ipynb_checkpoints/__init__-checkpoint.py +21 -4
gsrap/__init__.py +21 -4
gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py +1 -0
gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +34 -3
gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +51 -3
gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py +36 -0
gsrap/commons/.ipynb_checkpoints/memoteutils-checkpoint.py +132 -0
gsrap/commons/__init__.py +1 -0
gsrap/commons/downloads.py +34 -3
gsrap/commons/excelhub.py +51 -3
gsrap/commons/medium.py +36 -0
gsrap/commons/memoteutils.py +132 -0
gsrap/getmaps/.ipynb_checkpoints/getmaps-checkpoint.py +14 -5
gsrap/getmaps/.ipynb_checkpoints/kdown-checkpoint.py +75 -4
gsrap/getmaps/getmaps.py +14 -5
gsrap/getmaps/kdown.py +75 -4
gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +22 -8
gsrap/mkmodel/mkmodel.py +22 -8
gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +32 -5
gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +66 -37
gsrap/parsedb/completeness.py +32 -5
gsrap/parsedb/parsedb.py +66 -37
{gsrap-0.8.1.dist-info → gsrap-0.8.3.dist-info}/METADATA +1 -1
{gsrap-0.8.1.dist-info → gsrap-0.8.3.dist-info}/RECORD +27 -25
{gsrap-0.8.1.dist-info → gsrap-0.8.3.dist-info}/LICENSE.txt +0 -0
{gsrap-0.8.1.dist-info → gsrap-0.8.3.dist-info}/WHEEL +0 -0
{gsrap-0.8.1.dist-info → gsrap-0.8.3.dist-info}/entry_points.txt +0 -0

gsrap/.ipynb_checkpoints/__init__-checkpoint.py CHANGED Viewed

@@ -5,6 +5,9 @@ import requests
 import importlib.metadata
 from datetime import datetime
 from packaging import version
+import atexit
+import os
 import cobra
@@ -69,13 +72,17 @@ def main():
     parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use only with --progress).")
     parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use only with --progress).")
     parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
+    parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
+    parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
     parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
     parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
     parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
     parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
     parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
+    parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
     # add arguments for the 'mkmodel' command
@@ -87,10 +94,11 @@ def main():
     mkmodel_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the eggnog-mapper annotation table(s).")
     mkmodel_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
     mkmodel_parser.add_argument("-u", "--universe", metavar='', type=str, default='-', help="Path to the universe model (SBML format).")
-    mkmodel_parser.add_argument("-i", "--force_inclusion", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
-    mkmodel_parser.add_argument("-f", "--gap_fill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
-    mkmodel_parser.add_argument("-x", "--exclude_orphans", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
-    #mkmodel_parser.add_argument("-r", "--force_removal", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
+    mkmodel_parser.add_argument("-i", "--include", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
+    mkmodel_parser.add_argument("-f", "--gapfill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
+    mkmodel_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the model on the provided medium. By default, the first medium in --gapfill is used. Use 'none' to avoid initialization.")
+    mkmodel_parser.add_argument("-x", "--excludeorp", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
+    #mkmodel_parser.add_argument("-r", "--remove", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
     mkmodel_parser.add_argument("-l", "--biolog", metavar='', type=str, default='-', help="Strain ID associated to binary Biolog(R) PM1, PM2A, PM3B and PM4A plates; if not provided, Biolog(R)-based model curation will be skipped (use with --cnps and --gap_fill).")
     mkmodel_parser.add_argument("-s", "--cnps", metavar='', type=str, default='glc__D,nh4,pi,so4', help="Starting C, N, P and S source metabolites (comma-separated IDs).")
     mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
@@ -161,6 +169,15 @@ def main():
+    # The following chunk suppresses the warning
+    # "sys:1: DeprecationWarning: builtin type swigvarlink has no __module__ attribute"
+    # raised at Gsrap shutdown by calling memote.suite.api.test_model() in common/memoteutils.py
+    def _suppress_swigvarlink_warning():
+        sys.stderr = open(os.devnull, 'w')  # tested also with sys.stdout: same effect.
+    atexit.register(_suppress_swigvarlink_warning)
     # run the program:
     set_usual_formatter(logger.handlers[0])
     current_date_time = datetime.now()

gsrap/__init__.py CHANGED Viewed

@@ -5,6 +5,9 @@ import requests
 import importlib.metadata
 from datetime import datetime
 from packaging import version
+import atexit
+import os
 import cobra
@@ -69,13 +72,17 @@ def main():
     parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use only with --progress).")
     parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use only with --progress).")
     parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
+    parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
+    parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
     parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
     parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
     parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
     parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
     parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
+    parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
     # add arguments for the 'mkmodel' command
@@ -87,10 +94,11 @@ def main():
     mkmodel_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the eggnog-mapper annotation table(s).")
     mkmodel_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
     mkmodel_parser.add_argument("-u", "--universe", metavar='', type=str, default='-', help="Path to the universe model (SBML format).")
-    mkmodel_parser.add_argument("-i", "--force_inclusion", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
-    mkmodel_parser.add_argument("-f", "--gap_fill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
-    mkmodel_parser.add_argument("-x", "--exclude_orphans", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
-    #mkmodel_parser.add_argument("-r", "--force_removal", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
+    mkmodel_parser.add_argument("-i", "--include", metavar='', type=str, default='-', help="Force the inclusion of the specified reactions (comma-separated IDs).")
+    mkmodel_parser.add_argument("-f", "--gapfill", metavar='', type=str, default='-', help="Media to use during gap-filling (comma-separated IDs); if not provided, gap-filling will be skipped.")
+    mkmodel_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the model on the provided medium. By default, the first medium in --gapfill is used. Use 'none' to avoid initialization.")
+    mkmodel_parser.add_argument("-x", "--excludeorp", action='store_true', help="Exclude orphan reactions from the gap-filling repository.")
+    #mkmodel_parser.add_argument("-r", "--remove", metavar='', type=str, default='-', help="Force the removal of the specified reactions (comma-separated IDs) (it applies after gap-filling, before Biolog(R)-based curation).")
     mkmodel_parser.add_argument("-l", "--biolog", metavar='', type=str, default='-', help="Strain ID associated to binary Biolog(R) PM1, PM2A, PM3B and PM4A plates; if not provided, Biolog(R)-based model curation will be skipped (use with --cnps and --gap_fill).")
     mkmodel_parser.add_argument("-s", "--cnps", metavar='', type=str, default='glc__D,nh4,pi,so4', help="Starting C, N, P and S source metabolites (comma-separated IDs).")
     mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
@@ -161,6 +169,15 @@ def main():
+    # The following chunk suppresses the warning
+    # "sys:1: DeprecationWarning: builtin type swigvarlink has no __module__ attribute"
+    # raised at Gsrap shutdown by calling memote.suite.api.test_model() in common/memoteutils.py
+    def _suppress_swigvarlink_warning():
+        sys.stderr = open(os.devnull, 'w')  # tested also with sys.stdout: same effect.
+    atexit.register(_suppress_swigvarlink_warning)
     # run the program:
     set_usual_formatter(logger.handlers[0])
     current_date_time = datetime.now()

gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py CHANGED Viewed

@@ -8,3 +8,4 @@ from .sbmlutils import *
 from .escherutils import *
 from .logutils import *
 from .keggutils import *
+from .memoteutils import *

gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py CHANGED Viewed

@@ -243,7 +243,38 @@ def format_expansion(logger, eggnog):
+def check_taxon(logger, taxon, idcollection_dict):
+    # verify presence of needed assets
+    if 'ko_to_taxa' not in idcollection_dict.keys():
+        logger.error(f"Asset 'ko_to_taxa' not found in 'gsrap.maps'. Please update 'gsrap.maps' with 'gsrap getmaps'.")
+        return 1
+    # extract level and name
+    try: level, name = taxon.split(':')
+    except:
+        logger.error(f"Provided --taxon is not well formatted: '{taxon}'.")
+        return 1
+    # compute available levels and check
+    avail_levels = set(['kingdom', 'phylum'])
+    if level not in avail_levels:
+        logger.error(f"Provided level is not acceptable: '{level}' (see --taxon). Acceptable levels are {avail_levels}.")
+        return 1
+    # compute available taxa at input level
+    avail_taxa_at_level = set()
+    ko_to_taxa = idcollection_dict['ko_to_taxa']
+    for ko in ko_to_taxa.keys():
+        for taxon_name in ko_to_taxa[ko][level]:
+            avail_taxa_at_level.add(taxon_name)
+    if name not in avail_taxa_at_level:
+        logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
+        return 1
+    return 0

gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py CHANGED Viewed

@@ -1,3 +1,6 @@
+import importlib.metadata
 import pandas as pnd
@@ -5,7 +8,51 @@ from .figures import figure_df_C_F1
-def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
+def get_summary_sheet(model, memote_results_dict):
+    df_gsrap = [
+        # Gsrap
+        {'c1': 'Gsrap version', 'c2': f"v{importlib.metadata.metadata('gsrap')['Version']}", 'c3': '', 'c4': ''},
+        {'c1': 'Model ID', 'c2': f"{model.id}", 'c3': '', 'c4': ''},
+        {'c1': 'Compartments', 'c2': f"{len(model.compartments)}", 'c3': '', 'c4': ''},
+        {'c1': 'Metabolites', 'c2': f"{len(model.metabolites)}", 'c3': '', 'c4': ''},
+        {'c1': '', 'c2': 'Unique', 'c3': f"{len(set([m.id.rsplit('_',1)[0] for m in model.metabolites]))}", 'c4': ''},
+        {'c1': 'Reactions', 'c2': f"{len(model.reactions)}", 'c3': '', 'c4': ''},
+        {'c1': '', 'c2': 'Non-transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))==1)])}", 'c4': ''},
+        {'c1': '', 'c2': 'Transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))>1)])}", 'c4': ''},
+        {'c1': '', 'c2': 'Artificial', 'c3': f"{len([r for r in model.reactions if ((r.id == 'Biomass' or len(r.metabolites)==1))])}", 'c4': ''},
+        {'c1': 'Genes', 'c2': f"{len(model.genes)}", 'c3': '', 'c4': ''},
+        # empty line
+        {'c1': '', 'c2': '', 'c3': '', 'c4': ''},
+    ]
+    df_gsrap = pnd.DataFrame.from_records(df_gsrap)
+    if memote_results_dict != None:
+        df_memote = [
+            # MEMOTE
+            {'c1': 'MEMOTE version', 'c2': f"v{memote_results_dict['version']}", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE Total Score', 'c2': f"{memote_results_dict['total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE consistency', 'c2': f"{memote_results_dict['consistency']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': '', 'c2': 'stoichiometric consistency', 'c3': f"{memote_results_dict['consistency']['test_stoichiometric_consistency']}%", 'c4': ''},
+            {'c1': '', 'c2': 'mass balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_mass_balance']}%", 'c4': ''},
+            {'c1': '', 'c2': 'charge balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
+            {'c1': '', 'c2': 'disconnected metabolites', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
+            {'c1': '', 'c2': 'unbounded flux in default conditions', 'c3': f"{memote_results_dict['consistency']['test_find_reactions_unbounded_flux_default_condition']}%", 'c4': ''},
+            {'c1': 'MEMOTE annotation Metabolites', 'c2': f"{memote_results_dict['annotation_M']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE annotation Reactions', 'c2': f"{memote_results_dict['annotation_R']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE annotation Genes', 'c2': f"{memote_results_dict['annotation_G']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE annotation SBO', 'c2': f"{memote_results_dict['annotation_SBO']['sub_total']}%", 'c3': '', 'c4': ''},
+        ]
+        df_memote = pnd.DataFrame.from_records(df_memote)
+    else:
+        df_memote = pnd.DataFrame()
+    df = pnd.concat([df_gsrap, df_memote])
+    return df
+def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B, df_P, df_S, df_C=None):
     # generate figures
@@ -101,7 +148,7 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
             else: df_T.append(row_dict)
     for g in model.genes:
-        row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
+        row_dict = {'gid': g.id, 'name': g.name, 'involved_in': '; '.join([r.id for r in g.reactions])}
         for db in g.annotation.keys():
             annots = g.annotation[db]
@@ -124,12 +171,13 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
     df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
     df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
     df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
-    df_G_first_cols = ['gid', 'involved_in']
+    df_G_first_cols = ['gid', 'name', 'involved_in']
     df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
     with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
+        get_summary_sheet(model, memote_results_dict).to_excel(writer, sheet_name='Summary', index=False, header=False)
         df_M.to_excel(writer, sheet_name='Metabolites', index=False)
         df_R.to_excel(writer, sheet_name='Reactions', index=False)
         df_T.to_excel(writer, sheet_name='Transporters', index=False)

gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import gempipe
+from .fluxbal import fba_no_warnings
 def apply_medium_given_column(logger, model, medium, column, is_reference=False):
@@ -80,4 +84,36 @@ def apply_medium_given_column(logger, model, medium, column, is_reference=False)
             model.reactions.get_by_id(f'EX_{substrate}_e').lower_bound = value -error
             model.reactions.get_by_id(f'EX_{substrate}_e').upper_bound = value +error
+    return 0
+def initialize_model(logger, model, dbexp, initialize, media):
+    if initialize in ['None', 'none']:
+        logger.info(f"Initialization will be skipped.")
+        return 0
+    elif initialize == '-':
+        if media == '-':
+            logger.info(f"No media provided: initialization will be skipped.")
+            return 0
+        else:
+            media = media.split(',')
+            medium = media[0]  # taking the first medium
+    else:
+        medium = initialize
+    if medium not in dbexp['media'].columns:
+        logger.warning(f"Medium '{medium}' does not exists: initialization will be skipped.")
+        return 0
+    response = apply_medium_given_column(logger, model, medium, dbexp['media'][medium])
+    if response == 1: return 1
+    res, obj_value, status = fba_no_warnings(model)
+    logger.info(f"Initialized on medium '{medium}': {obj_value} ({status})")
     return 0

gsrap/commons/.ipynb_checkpoints/memoteutils-checkpoint.py ADDED Viewed

@@ -0,0 +1,132 @@
+import os
+import contextlib
+import importlib.metadata
+import memote
+def get_memote_results_dict(logger, model):
+    logger.info(f"Running selected modules of MEMOTE v{importlib.metadata.metadata('memote')['Version']}...")
+    # launch memote (only relevant modules)
+    with open(os.devnull, 'w') as devnull:
+        with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
+            try: memote_report = memote.suite.api.test_model(model, exclusive=[
+                'test_annotation',
+                'test_sbo',
+                'test_stoichiometric_consistency',
+                'test_reaction_mass_balance',
+                'test_reaction_charge_balance',
+                'test_find_disconnected',
+                'test_find_reactions_unbounded_flux_default_condition'], results=True)
+            except ValueError: memote_report = None
+    # parse memote's results
+    results_dict = {}
+    results_dict['version'] = importlib.metadata.version("memote")
+    test_results = dict(memote_report[1])['tests']
+    sections = {
+        'consistency': [
+            ('test_stoichiometric_consistency', 3),
+            ('test_reaction_mass_balance', 1),
+            ('test_reaction_charge_balance', 1),
+            ('test_find_disconnected', 1),
+            ('test_find_reactions_unbounded_flux_default_condition', 1)
+        ],
+        'annotation_M': [
+            ('test_metabolite_annotation_presence', 1),
+            ('test_metabolite_annotation_overview', 1),
+            ('test_metabolite_annotation_wrong_ids', 1),
+            ('test_metabolite_id_namespace_consistency', 1),
+        ],
+        'annotation_R': [
+            ('test_reaction_annotation_presence', 1),
+            ('test_reaction_annotation_overview', 1),
+            ('test_reaction_annotation_wrong_ids', 1),
+            ('test_reaction_id_namespace_consistency', 1),
+        ],
+        'annotation_G': [
+            ('test_gene_product_annotation_presence', 1),
+            ('test_gene_product_annotation_overview', 1),
+            ('test_gene_product_annotation_wrong_ids', 1),
+        ],
+        'annotation_SBO': [
+            ('test_metabolite_sbo_presence', 1),
+            ('test_metabolite_specific_sbo_presence', 1),
+            ('test_reaction_sbo_presence', 1),
+            ('test_metabolic_reaction_specific_sbo_presence', 1),
+            ('test_transport_reaction_specific_sbo_presence', 1),
+            ('test_exchange_specific_sbo_presence', 1),
+            ('test_demand_specific_sbo_presence', 1),
+            ('test_sink_specific_sbo_presence', 1),
+            ('test_gene_sbo_presence', 1),
+            ('test_gene_specific_sbo_presence', 1),
+            ('test_biomass_specific_sbo_presence', 1),
+        ],
+    }
+    section_multipliers = {
+        'consistency': 3,
+        'annotation_M': 1,
+        'annotation_R': 1,
+        'annotation_G': 1,
+        'annotation_SBO': 2,
+    }
+    numerator_total = 0
+    denominator_total = 0
+    for section, metrics in sections.items():
+        numerator = 0
+        denominator = 0
+        results_dict[section] = {}
+        # iterate metrics of this section:
+        for metric, metric_multiplier in metrics:
+            metric_raw = test_results[metric]['metric']
+            # no subcategories here:
+            if type(metric_raw) == float:
+                metric_percentage = ((1- metric_raw ) *100)
+                numerator = numerator + (metric_percentage * metric_multiplier)
+                denominator = denominator + metric_multiplier
+                results_dict[section][metric] = round(metric_percentage, 1)
+            # there are subcategories (like in the case of M/R/G/SBO annots)
+            else:
+                results_dict[section][metric] = {}
+                for key, value in metric_raw.items():
+                    n_subcategories = len(metric_raw)
+                    multiplier_corrected = metric_multiplier / n_subcategories
+                    metric_percentage = ((1- value ) *100)
+                    numerator = numerator + (metric_percentage * multiplier_corrected)
+                    denominator = denominator + multiplier_corrected
+                    results_dict[section][metric][key] = round(metric_percentage, 1)
+        # compute the subtotal:
+        sub_total = numerator / denominator
+        results_dict[section]['sub_total'] = int(round(sub_total, 0))
+        # compute the total:
+        denominator_total = denominator_total + section_multipliers[section] *denominator
+        numerator_total = numerator_total + section_multipliers[section] *numerator
+    total = numerator_total / denominator_total
+    results_dict['total'] = int(round(total, 0))
+    logger.info(f"Done! MEMOTE Total Score: {results_dict['total']}%.")
+    return results_dict

gsrap/commons/__init__.py CHANGED Viewed

@@ -8,3 +8,4 @@ from .sbmlutils import *
 from .escherutils import *
 from .logutils import *
 from .keggutils import *
+from .memoteutils import *

gsrap/commons/downloads.py CHANGED Viewed

@@ -243,7 +243,38 @@ def format_expansion(logger, eggnog):
+def check_taxon(logger, taxon, idcollection_dict):
+    # verify presence of needed assets
+    if 'ko_to_taxa' not in idcollection_dict.keys():
+        logger.error(f"Asset 'ko_to_taxa' not found in 'gsrap.maps'. Please update 'gsrap.maps' with 'gsrap getmaps'.")
+        return 1
+    # extract level and name
+    try: level, name = taxon.split(':')
+    except:
+        logger.error(f"Provided --taxon is not well formatted: '{taxon}'.")
+        return 1
+    # compute available levels and check
+    avail_levels = set(['kingdom', 'phylum'])
+    if level not in avail_levels:
+        logger.error(f"Provided level is not acceptable: '{level}' (see --taxon). Acceptable levels are {avail_levels}.")
+        return 1
+    # compute available taxa at input level
+    avail_taxa_at_level = set()
+    ko_to_taxa = idcollection_dict['ko_to_taxa']
+    for ko in ko_to_taxa.keys():
+        for taxon_name in ko_to_taxa[ko][level]:
+            avail_taxa_at_level.add(taxon_name)
+    if name not in avail_taxa_at_level:
+        logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
+        return 1
+    return 0

gsrap/commons/excelhub.py CHANGED Viewed

@@ -1,3 +1,6 @@
+import importlib.metadata
 import pandas as pnd
@@ -5,7 +8,51 @@ from .figures import figure_df_C_F1
-def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
+def get_summary_sheet(model, memote_results_dict):
+    df_gsrap = [
+        # Gsrap
+        {'c1': 'Gsrap version', 'c2': f"v{importlib.metadata.metadata('gsrap')['Version']}", 'c3': '', 'c4': ''},
+        {'c1': 'Model ID', 'c2': f"{model.id}", 'c3': '', 'c4': ''},
+        {'c1': 'Compartments', 'c2': f"{len(model.compartments)}", 'c3': '', 'c4': ''},
+        {'c1': 'Metabolites', 'c2': f"{len(model.metabolites)}", 'c3': '', 'c4': ''},
+        {'c1': '', 'c2': 'Unique', 'c3': f"{len(set([m.id.rsplit('_',1)[0] for m in model.metabolites]))}", 'c4': ''},
+        {'c1': 'Reactions', 'c2': f"{len(model.reactions)}", 'c3': '', 'c4': ''},
+        {'c1': '', 'c2': 'Non-transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))==1)])}", 'c4': ''},
+        {'c1': '', 'c2': 'Transport', 'c3': f"{len([r for r in model.reactions if ((r.id != 'Biomass' and len(r.metabolites)!=1) and len(set([m.id.rsplit('_',1)[-1] for m in r.metabolites]))>1)])}", 'c4': ''},
+        {'c1': '', 'c2': 'Artificial', 'c3': f"{len([r for r in model.reactions if ((r.id == 'Biomass' or len(r.metabolites)==1))])}", 'c4': ''},
+        {'c1': 'Genes', 'c2': f"{len(model.genes)}", 'c3': '', 'c4': ''},
+        # empty line
+        {'c1': '', 'c2': '', 'c3': '', 'c4': ''},
+    ]
+    df_gsrap = pnd.DataFrame.from_records(df_gsrap)
+    if memote_results_dict != None:
+        df_memote = [
+            # MEMOTE
+            {'c1': 'MEMOTE version', 'c2': f"v{memote_results_dict['version']}", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE Total Score', 'c2': f"{memote_results_dict['total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE consistency', 'c2': f"{memote_results_dict['consistency']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': '', 'c2': 'stoichiometric consistency', 'c3': f"{memote_results_dict['consistency']['test_stoichiometric_consistency']}%", 'c4': ''},
+            {'c1': '', 'c2': 'mass balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_mass_balance']}%", 'c4': ''},
+            {'c1': '', 'c2': 'charge balance', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
+            {'c1': '', 'c2': 'disconnected metabolites', 'c3': f"{memote_results_dict['consistency']['test_reaction_charge_balance']}%", 'c4': ''},
+            {'c1': '', 'c2': 'unbounded flux in default conditions', 'c3': f"{memote_results_dict['consistency']['test_find_reactions_unbounded_flux_default_condition']}%", 'c4': ''},
+            {'c1': 'MEMOTE annotation Metabolites', 'c2': f"{memote_results_dict['annotation_M']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE annotation Reactions', 'c2': f"{memote_results_dict['annotation_R']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE annotation Genes', 'c2': f"{memote_results_dict['annotation_G']['sub_total']}%", 'c3': '', 'c4': ''},
+            {'c1': 'MEMOTE annotation SBO', 'c2': f"{memote_results_dict['annotation_SBO']['sub_total']}%", 'c3': '', 'c4': ''},
+        ]
+        df_memote = pnd.DataFrame.from_records(df_memote)
+    else:
+        df_memote = pnd.DataFrame()
+    df = pnd.concat([df_gsrap, df_memote])
+    return df
+def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B, df_P, df_S, df_C=None):
     # generate figures
@@ -101,7 +148,7 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
             else: df_T.append(row_dict)
     for g in model.genes:
-        row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
+        row_dict = {'gid': g.id, 'name': g.name, 'involved_in': '; '.join([r.id for r in g.reactions])}
         for db in g.annotation.keys():
             annots = g.annotation[db]
@@ -124,12 +171,13 @@ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None
     df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
     df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
     df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
-    df_G_first_cols = ['gid', 'involved_in']
+    df_G_first_cols = ['gid', 'name', 'involved_in']
     df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
     with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
+        get_summary_sheet(model, memote_results_dict).to_excel(writer, sheet_name='Summary', index=False, header=False)
         df_M.to_excel(writer, sheet_name='Metabolites', index=False)
         df_R.to_excel(writer, sheet_name='Reactions', index=False)
         df_T.to_excel(writer, sheet_name='Transporters', index=False)

gsrap/commons/medium.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import gempipe
+from .fluxbal import fba_no_warnings
 def apply_medium_given_column(logger, model, medium, column, is_reference=False):
@@ -80,4 +84,36 @@ def apply_medium_given_column(logger, model, medium, column, is_reference=False)
             model.reactions.get_by_id(f'EX_{substrate}_e').lower_bound = value -error
             model.reactions.get_by_id(f'EX_{substrate}_e').upper_bound = value +error
+    return 0
+def initialize_model(logger, model, dbexp, initialize, media):
+    if initialize in ['None', 'none']:
+        logger.info(f"Initialization will be skipped.")
+        return 0
+    elif initialize == '-':
+        if media == '-':
+            logger.info(f"No media provided: initialization will be skipped.")
+            return 0
+        else:
+            media = media.split(',')
+            medium = media[0]  # taking the first medium
+    else:
+        medium = initialize
+    if medium not in dbexp['media'].columns:
+        logger.warning(f"Medium '{medium}' does not exists: initialization will be skipped.")
+        return 0
+    response = apply_medium_given_column(logger, model, medium, dbexp['media'][medium])
+    if response == 1: return 1
+    res, obj_value, status = fba_no_warnings(model)
+    logger.info(f"Initialized on medium '{medium}': {obj_value} ({status})")
     return 0

gsrap 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

gsrap 0.8.1py3-none-any.whl → 0.8.3py3-none-any.whl