PyPI - gsrap - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

gsrap 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

gsrap/.ipynb_checkpoints/__init__-checkpoint.py +34 -5
gsrap/__init__.py +34 -5
gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +4 -0
gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py +1 -1
gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +27 -3
gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py +105 -0
gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py +1 -1
gsrap/commons/biomass.py +4 -0
gsrap/commons/coeffs.py +1 -1
gsrap/commons/excelhub.py +27 -3
gsrap/commons/figures.py +105 -0
gsrap/commons/fluxbal.py +1 -1
gsrap/mkmodel/.ipynb_checkpoints/gapfillutils-checkpoint.py +3 -0
gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +11 -4
gsrap/mkmodel/gapfillutils.py +3 -0
gsrap/mkmodel/mkmodel.py +11 -4
gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +3 -0
gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +101 -65
gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +16 -1
gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +4 -5
gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +7 -0
gsrap/parsedb/annotation.py +3 -0
gsrap/parsedb/completeness.py +101 -65
gsrap/parsedb/introduce.py +16 -1
gsrap/parsedb/parsedb.py +4 -5
gsrap/parsedb/repeating.py +7 -0
gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +6 -7
gsrap/runsims/simplegrowth.py +6 -7
{gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/METADATA +3 -1
{gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/RECORD +33 -31
{gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/LICENSE.txt +0 -0
{gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/WHEEL +0 -0
{gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/entry_points.txt +0 -0

gsrap/.ipynb_checkpoints/__init__-checkpoint.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import argparse
 import sys
 import traceback
+import requests
 import importlib.metadata
 from datetime import datetime
+from packaging import version
 import cobra
@@ -29,8 +31,9 @@ solver_name = solver_name.replace("_interface", '')
 def main():
-    # define the header of main- and sub-commands.
-    header = f'gsrap v{importlib.metadata.metadata("gsrap")["Version"]},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
+    # define the header of main- and sub-commands.
+    current_version = importlib.metadata.metadata("gsrap")["Version"]
+    header = f'gsrap v{current_version},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
     # create the command line arguments:
@@ -69,9 +72,9 @@ def main():
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
-    #parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
     parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
     parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
+    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'mkmodel' command
@@ -91,6 +94,7 @@ def main():
     mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
     mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
+    mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'runsims' command
@@ -107,6 +111,7 @@ def main():
     runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
     runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
     runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
+    runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.
@@ -117,12 +122,36 @@ def main():
     # set up the logger:
     logger = get_logger('gsrap', args.verbose)
     # show a welcome message:
     set_header_trailer_formatter(logger.handlers[0])
     logger.info(header + '\n')
-    command_line = '' # print the full command line:
+    # check if newer version is available
+    try:
+        response = requests.get(f"https://pypi.org/pypi/gsrap/json", timeout=3)  # sends an HTTP GET request to the given URL
+        response.raise_for_status()  # check the HTTP status code (e.g. 200, 404, 500): if not in the 2xx success range, raise requests.exceptions.HTTPError
+        data = response.json()
+        newest_version = data["info"]["version"]
+    except Exception as error:  # eg requests.exceptions.Timeout, requests.exceptions.HTTPError
+        logger.info(f'Can\'t retrieve the number of the newest version. Please contact the developer reporting the following error: "{error}".')
+        logger.info('')  # still no formatting here
+        # do not exit, continue with the program
+    if version.parse(current_version) < version.parse(newest_version):
+        warning_message = f"███ Last version is v{newest_version} and you have v{current_version}: please update gsrap! ███"
+        border = ''.join(['█' for i in range(len(warning_message))])
+        logger.info(border)
+        logger.info(warning_message)
+        logger.info(border)
+        logger.info('')  # still no formatting here
+    # print the full command line:
+    command_line = ''
     for arg, value in vars(args).items():
         if arg == 'subcommand': command_line = command_line + f"gsrap {value} "
         else: command_line = command_line + f"--{arg} {value} "

gsrap/__init__.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import argparse
 import sys
 import traceback
+import requests
 import importlib.metadata
 from datetime import datetime
+from packaging import version
 import cobra
@@ -29,8 +31,9 @@ solver_name = solver_name.replace("_interface", '')
 def main():
-    # define the header of main- and sub-commands.
-    header = f'gsrap v{importlib.metadata.metadata("gsrap")["Version"]},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
+    # define the header of main- and sub-commands.
+    current_version = importlib.metadata.metadata("gsrap")["Version"]
+    header = f'gsrap v{current_version},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
     # create the command line arguments:
@@ -69,9 +72,9 @@ def main():
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
-    #parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
     parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
     parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
+    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'mkmodel' command
@@ -91,6 +94,7 @@ def main():
     mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
     mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
+    mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'runsims' command
@@ -107,6 +111,7 @@ def main():
     runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
     runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
     runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
+    runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.
@@ -117,12 +122,36 @@ def main():
     # set up the logger:
     logger = get_logger('gsrap', args.verbose)
     # show a welcome message:
     set_header_trailer_formatter(logger.handlers[0])
     logger.info(header + '\n')
-    command_line = '' # print the full command line:
+    # check if newer version is available
+    try:
+        response = requests.get(f"https://pypi.org/pypi/gsrap/json", timeout=3)  # sends an HTTP GET request to the given URL
+        response.raise_for_status()  # check the HTTP status code (e.g. 200, 404, 500): if not in the 2xx success range, raise requests.exceptions.HTTPError
+        data = response.json()
+        newest_version = data["info"]["version"]
+    except Exception as error:  # eg requests.exceptions.Timeout, requests.exceptions.HTTPError
+        logger.info(f'Can\'t retrieve the number of the newest version. Please contact the developer reporting the following error: "{error}".')
+        logger.info('')  # still no formatting here
+        # do not exit, continue with the program
+    if version.parse(current_version) < version.parse(newest_version):
+        warning_message = f"███ Last version is v{newest_version} and you have v{current_version}: please update gsrap! ███"
+        border = ''.join(['█' for i in range(len(warning_message))])
+        logger.info(border)
+        logger.info(warning_message)
+        logger.info(border)
+        logger.info('')  # still no formatting here
+    # print the full command line:
+    command_line = ''
     for arg, value in vars(args).items():
         if arg == 'subcommand': command_line = command_line + f"gsrap {value} "
         else: command_line = command_line + f"--{arg} {value} "

gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py CHANGED Viewed

@@ -190,6 +190,10 @@ def introduce_universal_biomass(logger, dbexp, universe):
     r.build_reaction_from_string(rstring)
+    # add SBO annotation
+    r.annotation['sbo'] = ['SBO:0000629']  # biomass reaction
     # set as objective:
     universe.objective = 'Biomass'

gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py CHANGED Viewed

@@ -267,7 +267,7 @@ def compute_exp_LIPIDS_coeffs(logger, model, MWF, LIPIDS_PL, LIPIDS_FA):
     r.bounds = (0, 1000)
     r.gene_reaction_rule = 'spontaneous'
     r.update_genes_from_gpr()
     # determine 'L' formula and charge (charge should be -1 like every fatty acid)
     L_dict = dict()              # for 1 mol

gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py CHANGED Viewed

@@ -1,8 +1,20 @@
 import pandas as pnd
+from .figures import figure_df_C_F1
-def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
+def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
+    # generate figures
+    if nofigs == False:
+        if df_C is not None:
+            df_C_F1 = figure_df_C_F1(df_C)
     df_M = []
     df_R = []
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         df_S.insert(0, 'mid', '')  # new columns as first
         df_S['mid'] = df_S.index
         df_S = df_S.reset_index(drop=True)
+    # format df_C: universal reaction coverage
+    if df_C is not None:
+        df_C.insert(0, 'kr', '')  # new columns as first
+        df_C['kr'] = df_C.index
+        df_C = df_C.reset_index(drop=True)
     for m in model.metabolites:
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
     df_R = pnd.DataFrame.from_records(df_R)
     df_T = pnd.DataFrame.from_records(df_T)
     df_A = pnd.DataFrame.from_records(df_A)
-    with pnd.ExcelWriter(filepath) as writer:
+    with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
         df_M.to_excel(writer, sheet_name='Metabolites', index=False)
         df_R.to_excel(writer, sheet_name='Reactions', index=False)
         df_T.to_excel(writer, sheet_name='Transporters', index=False)
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
         if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
         if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
+        if df_C is not None:
+            df_C.to_excel(writer, sheet_name='Coverage', index=False)
+            if nofigs == False:
+                worksheet = writer.sheets['Coverage']
+                worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
     sheets_dict = {
         'model_id': model.id,
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         'Biomass': df_B,
         'Biolog': df_P,
         'Biosynth': df_S,
+        'Coverage': df_C,
     }
     return sheets_dict

gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py ADDED Viewed

@@ -0,0 +1,105 @@
+from io import BytesIO
+import numpy as np
+import pandas as pnd
+from scipy.spatial.distance import pdist
+from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
+import matplotlib.pyplot as plt
+from matplotlib.patches import Patch
+def figure_df_C_F1(df_coverage):
+    bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
+    strains = bin_matrix.columns
+    bin_matrix = bin_matrix.T  # features in column
+    # pdist() / linkage() will loose the accession information. So here we save a dict:
+    index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
+    # Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
+    distances = pdist(bin_matrix, 'jaccard')
+    linkage_matrix = linkage(distances, method='ward')
+    # PART 0: create the frame
+    fig, axs = plt.subplots(
+        nrows=2, ncols=2,
+        figsize=(15, 10),
+        gridspec_kw={  # suplots width proportions.
+            'width_ratios': [0.5, 1.0],
+            'height_ratios': [0.015, 0.985]
+        }
+    )
+    # PART 1: dendrogram
+    dn = dendrogram(
+        linkage_matrix, ax=axs[1,0],
+        orientation='left',
+        color_threshold=0, above_threshold_color='black',
+    )
+    ### PART 2: heatmap
+    ord_leaves = leaves_list(linkage_matrix)
+    ord_leaves = np.flip(ord_leaves)  # because leaves are returned in the inverse sense.
+    ord_leaves = [index_to_strain[i] for i in ord_leaves]  # convert index as number to index as accession
+    bin_matrix = bin_matrix.loc[ord_leaves, :]  # reordered dataframe.
+    axs[1,1].matshow(
+        bin_matrix,
+        cmap='viridis',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 3: coverage bar
+    axs[0,1].matshow(
+        df_coverage[['modeled']].T,
+        cmap='cool_r',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 4: legends
+    legend_feat = [
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
+    ]
+    legend_cov = [
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
+    ]
+    l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
+    l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
+    axs[1,0].add_artist(l1)  # keep both legends visible
+    ### PART 5: aesthetics
+    plt.subplots_adjust(wspace=0, hspace=0)  # adjust the space between subplots:
+    axs[0,0].axis('off')  # remove frame and axis
+    axs[1,0].axis('off')  # remove frame and axis
+    axs[0,1].yaxis.set_visible(False)  # remove ticks, tick labels, axis label
+    axs[1,1].xaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_xticklabels([])       # remove tick labels
+    axs[1,1].xaxis.set_label_position("bottom")
+    axs[1,1].set_xlabel("KEGG reactions")
+    axs[1,1].yaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_yticklabels([])       # remove tick labels
+    axs[1,1].yaxis.set_label_position("right")
+    axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13)  # labelpad is in points (1 point = 1/72 inch)
+    ### PART 6: save fig
+    buf = BytesIO()
+    fig.savefig(buf, dpi=300, bbox_inches='tight')  # labelpad is in inches (1 point = 1/72 inch)
+    plt.close(fig)
+    buf.seek(0)  # rewind the buffer to the beginning
+    return buf

gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py CHANGED Viewed

@@ -44,7 +44,7 @@ def verify_growth(model, boolean=True):
         if status =='infeasible':
             return 'infeasible'
         elif obj_value < get_optthr():
-            return 0
+            return 0.0
         else:
             return round(obj_value, 3)

gsrap/commons/biomass.py CHANGED Viewed

@@ -190,6 +190,10 @@ def introduce_universal_biomass(logger, dbexp, universe):
     r.build_reaction_from_string(rstring)
+    # add SBO annotation
+    r.annotation['sbo'] = ['SBO:0000629']  # biomass reaction
     # set as objective:
     universe.objective = 'Biomass'

gsrap/commons/coeffs.py CHANGED Viewed

@@ -267,7 +267,7 @@ def compute_exp_LIPIDS_coeffs(logger, model, MWF, LIPIDS_PL, LIPIDS_FA):
     r.bounds = (0, 1000)
     r.gene_reaction_rule = 'spontaneous'
     r.update_genes_from_gpr()
     # determine 'L' formula and charge (charge should be -1 like every fatty acid)
     L_dict = dict()              # for 1 mol

gsrap/commons/excelhub.py CHANGED Viewed

@@ -1,8 +1,20 @@
 import pandas as pnd
+from .figures import figure_df_C_F1
-def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
+def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
+    # generate figures
+    if nofigs == False:
+        if df_C is not None:
+            df_C_F1 = figure_df_C_F1(df_C)
     df_M = []
     df_R = []
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         df_S.insert(0, 'mid', '')  # new columns as first
         df_S['mid'] = df_S.index
         df_S = df_S.reset_index(drop=True)
+    # format df_C: universal reaction coverage
+    if df_C is not None:
+        df_C.insert(0, 'kr', '')  # new columns as first
+        df_C['kr'] = df_C.index
+        df_C = df_C.reset_index(drop=True)
     for m in model.metabolites:
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
     df_R = pnd.DataFrame.from_records(df_R)
     df_T = pnd.DataFrame.from_records(df_T)
     df_A = pnd.DataFrame.from_records(df_A)
-    with pnd.ExcelWriter(filepath) as writer:
+    with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
         df_M.to_excel(writer, sheet_name='Metabolites', index=False)
         df_R.to_excel(writer, sheet_name='Reactions', index=False)
         df_T.to_excel(writer, sheet_name='Transporters', index=False)
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
         if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
         if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
+        if df_C is not None:
+            df_C.to_excel(writer, sheet_name='Coverage', index=False)
+            if nofigs == False:
+                worksheet = writer.sheets['Coverage']
+                worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
     sheets_dict = {
         'model_id': model.id,
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         'Biomass': df_B,
         'Biolog': df_P,
         'Biosynth': df_S,
+        'Coverage': df_C,
     }
     return sheets_dict

gsrap/commons/figures.py ADDED Viewed

@@ -0,0 +1,105 @@
+from io import BytesIO
+import numpy as np
+import pandas as pnd
+from scipy.spatial.distance import pdist
+from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
+import matplotlib.pyplot as plt
+from matplotlib.patches import Patch
+def figure_df_C_F1(df_coverage):
+    bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
+    strains = bin_matrix.columns
+    bin_matrix = bin_matrix.T  # features in column
+    # pdist() / linkage() will loose the accession information. So here we save a dict:
+    index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
+    # Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
+    distances = pdist(bin_matrix, 'jaccard')
+    linkage_matrix = linkage(distances, method='ward')
+    # PART 0: create the frame
+    fig, axs = plt.subplots(
+        nrows=2, ncols=2,
+        figsize=(15, 10),
+        gridspec_kw={  # suplots width proportions.
+            'width_ratios': [0.5, 1.0],
+            'height_ratios': [0.015, 0.985]
+        }
+    )
+    # PART 1: dendrogram
+    dn = dendrogram(
+        linkage_matrix, ax=axs[1,0],
+        orientation='left',
+        color_threshold=0, above_threshold_color='black',
+    )
+    ### PART 2: heatmap
+    ord_leaves = leaves_list(linkage_matrix)
+    ord_leaves = np.flip(ord_leaves)  # because leaves are returned in the inverse sense.
+    ord_leaves = [index_to_strain[i] for i in ord_leaves]  # convert index as number to index as accession
+    bin_matrix = bin_matrix.loc[ord_leaves, :]  # reordered dataframe.
+    axs[1,1].matshow(
+        bin_matrix,
+        cmap='viridis',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 3: coverage bar
+    axs[0,1].matshow(
+        df_coverage[['modeled']].T,
+        cmap='cool_r',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 4: legends
+    legend_feat = [
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
+    ]
+    legend_cov = [
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
+    ]
+    l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
+    l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
+    axs[1,0].add_artist(l1)  # keep both legends visible
+    ### PART 5: aesthetics
+    plt.subplots_adjust(wspace=0, hspace=0)  # adjust the space between subplots:
+    axs[0,0].axis('off')  # remove frame and axis
+    axs[1,0].axis('off')  # remove frame and axis
+    axs[0,1].yaxis.set_visible(False)  # remove ticks, tick labels, axis label
+    axs[1,1].xaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_xticklabels([])       # remove tick labels
+    axs[1,1].xaxis.set_label_position("bottom")
+    axs[1,1].set_xlabel("KEGG reactions")
+    axs[1,1].yaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_yticklabels([])       # remove tick labels
+    axs[1,1].yaxis.set_label_position("right")
+    axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13)  # labelpad is in points (1 point = 1/72 inch)
+    ### PART 6: save fig
+    buf = BytesIO()
+    fig.savefig(buf, dpi=300, bbox_inches='tight')  # labelpad is in inches (1 point = 1/72 inch)
+    plt.close(fig)
+    buf.seek(0)  # rewind the buffer to the beginning
+    return buf

gsrap/commons/fluxbal.py CHANGED Viewed

@@ -44,7 +44,7 @@ def verify_growth(model, boolean=True):
         if status =='infeasible':
             return 'infeasible'
         elif obj_value < get_optthr():
-            return 0
+            return 0.0
         else:
             return round(obj_value, 3)

gsrap/mkmodel/.ipynb_checkpoints/gapfillutils-checkpoint.py CHANGED Viewed

@@ -33,6 +33,9 @@ def import_from_universe(model, universe, rid, bounds=None, gpr=None):
     else:
         r.gene_reaction_rule = ''
     r.update_genes_from_gpr()
+    # set annotations
+    r.annotation = ru.annotation

gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py CHANGED Viewed

@@ -64,6 +64,7 @@ def create_model_incore(params):
     # remove universal orphans
     model = remove_universal_orphans(logger, model)
     ###### PRUNING
     logger.info("Reading provided eggnog-mapper annotation...")
@@ -77,6 +78,7 @@ def create_model_incore(params):
     translate_remaining_kos(logger, model, eggnog_ko_to_gids)
     restore_gene_annotations(logger, model, universe, eggonog_gid_to_kos)
     ###### GAPFILLING
     # force inclusion of reactions:
@@ -103,30 +105,35 @@ def create_model_incore(params):
     if type(df_P)==int: return 1
-    ###### POLISHING 2
-    # remove disconnected metabolites
-    model = remove_disconnected(logger, model)
+    ###### POLISHING 2
     # remove unsed sinks and demands
     model = remove_sinks_demands(logger, model)
+    # remove disconnected metabolites
+    model = remove_disconnected(logger, model)
     # # # # #   DERIVATION ENDS HERE   # # # # #
     log_metrics(logger, model)
     log_unbalances(logger, model)
     ###### CHECKS
     # check blocked metabolites / dead-ends
     df_S = biosynthesis_on_media(logger, model, dbexp, args.gap_fill, args.biosynth)
     if type(df_S)==int: return 1
     ###### POLISHING 3
     # reset growth environment befor saving the model
     gempipe.reset_growth_env(model)
     # output the model:
     logger.info("Writing strain-specific model...")
     cobra.io.save_json_model(model, f'{args.outdir}/{model.id}.json')        # JSON
@@ -134,7 +141,7 @@ def create_model_incore(params):
     cobra.io.write_sbml_model(model, f'{args.outdir}/{model.id}.xml')        # SBML   # groups are saved only to SBML
     logger.info(f"'{args.outdir}/{model.id}.xml' created!")
     force_id_on_sbml(f'{args.outdir}/{model.id}.xml', model.id)   # force introduction of the 'id=""' field
-    sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', None, df_B, df_P, df_S)
+    sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', args.nofigs, None, df_B, df_P, df_S)
     logger.info(f"'{args.outdir}/{model.id}.mkmodel.xlsx' created!")

gsrap/mkmodel/gapfillutils.py CHANGED Viewed

@@ -33,6 +33,9 @@ def import_from_universe(model, universe, rid, bounds=None, gpr=None):
     else:
         r.gene_reaction_rule = ''
     r.update_genes_from_gpr()
+    # set annotations
+    r.annotation = ru.annotation

gsrap 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

gsrap 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl