PyPI - gsrap - Versions diffs - 0.7.1__tar.gz → 0.7.2__tar.gz - Mend

gsrap 0.7.1tar.gz → 0.7.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

{gsrap-0.7.1 → gsrap-0.7.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: gsrap
-Version: 0.7.1
+Version: 0.7.2
 Summary:
 License: GNU General Public License v3.0
 Author: Gioele Lazzari
@@ -17,9 +17,11 @@ Requires-Dist: cobra (>=0.29)
 Requires-Dist: colorlog (>=6.9.0)
 Requires-Dist: gdown (>=5.2.0)
 Requires-Dist: gempipe (>=1.38.1)
+Requires-Dist: matplotlib (>=3.9.0)
 Requires-Dist: memote (>=0.17.0)
 Requires-Dist: openpyxl (>=3.1.0)
 Requires-Dist: pandas (>=2.0.0)
+Requires-Dist: xlsxwriter (>=3.1.0)
 Description-Content-Type: text/markdown
 Source code for `gsrap`.

{gsrap-0.7.1 → gsrap-0.7.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "gsrap"
-version = "0.7.1"
+version = "0.7.2"
 description = ""
 authors = ["Gioele Lazzari"]
 license = "GNU General Public License v3.0"
@@ -16,6 +16,9 @@ gempipe = ">=1.38.1"
 gdown = ">=5.2.0"
 colorlog = ">=6.9.0"
 memote = ">=0.17.0"
+matplotlib = ">=3.9.0"
+xlsxwriter = ">=3.1.0"
 [build-system]
 requires = ["poetry-core>=1.0.0"]

{gsrap-0.7.1 → gsrap-0.7.2}/src/gsrap/.ipynb_checkpoints/__init__-checkpoint.py RENAMED Viewed

@@ -72,9 +72,9 @@ def main():
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
-    #parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
     parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
     parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
+    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'mkmodel' command
@@ -94,6 +94,7 @@ def main():
     mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
     mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
+    mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'runsims' command
@@ -110,6 +111,7 @@ def main():
     runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
     runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
     runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
+    runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.

{gsrap-0.7.1 → gsrap-0.7.2}/src/gsrap/__init__.py RENAMED Viewed

@@ -72,9 +72,9 @@ def main():
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
-    #parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
     parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
     parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
+    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'mkmodel' command
@@ -94,6 +94,7 @@ def main():
     mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
     mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
     mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
+    mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # add arguments for the 'runsims' command
@@ -110,6 +111,7 @@ def main():
     runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
     runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
     runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
+    runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
     # check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.

{gsrap-0.7.1 → gsrap-0.7.2}/src/gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py RENAMED Viewed

@@ -1,8 +1,20 @@
 import pandas as pnd
+from .figures import figure_df_C_F1
-def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
+def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
+    # generate figures
+    if nofigs == False:
+        if df_C is not None:
+            df_C_F1 = figure_df_C_F1(df_C)
     df_M = []
     df_R = []
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         df_S.insert(0, 'mid', '')  # new columns as first
         df_S['mid'] = df_S.index
         df_S = df_S.reset_index(drop=True)
+    # format df_C: universal reaction coverage
+    if df_C is not None:
+        df_C.insert(0, 'kr', '')  # new columns as first
+        df_C['kr'] = df_C.index
+        df_C = df_C.reset_index(drop=True)
     for m in model.metabolites:
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
     df_R = pnd.DataFrame.from_records(df_R)
     df_T = pnd.DataFrame.from_records(df_T)
     df_A = pnd.DataFrame.from_records(df_A)
-    with pnd.ExcelWriter(filepath) as writer:
+    with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
         df_M.to_excel(writer, sheet_name='Metabolites', index=False)
         df_R.to_excel(writer, sheet_name='Reactions', index=False)
         df_T.to_excel(writer, sheet_name='Transporters', index=False)
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
         if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
         if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
+        if df_C is not None:
+            df_C.to_excel(writer, sheet_name='Coverage', index=False)
+            if nofigs == False:
+                worksheet = writer.sheets['Coverage']
+                worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
     sheets_dict = {
         'model_id': model.id,
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         'Biomass': df_B,
         'Biolog': df_P,
         'Biosynth': df_S,
+        'Coverage': df_C,
     }
     return sheets_dict

gsrap-0.7.2/src/gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py ADDED Viewed

@@ -0,0 +1,105 @@
+from io import BytesIO
+import numpy as np
+import pandas as pnd
+from scipy.spatial.distance import pdist
+from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
+import matplotlib.pyplot as plt
+from matplotlib.patches import Patch
+def figure_df_C_F1(df_coverage):
+    bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
+    strains = bin_matrix.columns
+    bin_matrix = bin_matrix.T  # features in column
+    # pdist() / linkage() will loose the accession information. So here we save a dict:
+    index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
+    # Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
+    distances = pdist(bin_matrix, 'jaccard')
+    linkage_matrix = linkage(distances, method='ward')
+    # PART 0: create the frame
+    fig, axs = plt.subplots(
+        nrows=2, ncols=2,
+        figsize=(15, 10),
+        gridspec_kw={  # suplots width proportions.
+            'width_ratios': [0.5, 1.0],
+            'height_ratios': [0.015, 0.985]
+        }
+    )
+    # PART 1: dendrogram
+    dn = dendrogram(
+        linkage_matrix, ax=axs[1,0],
+        orientation='left',
+        color_threshold=0, above_threshold_color='black',
+    )
+    ### PART 2: heatmap
+    ord_leaves = leaves_list(linkage_matrix)
+    ord_leaves = np.flip(ord_leaves)  # because leaves are returned in the inverse sense.
+    ord_leaves = [index_to_strain[i] for i in ord_leaves]  # convert index as number to index as accession
+    bin_matrix = bin_matrix.loc[ord_leaves, :]  # reordered dataframe.
+    axs[1,1].matshow(
+        bin_matrix,
+        cmap='viridis',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 3: coverage bar
+    axs[0,1].matshow(
+        df_coverage[['modeled']].T,
+        cmap='cool_r',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 4: legends
+    legend_feat = [
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
+    ]
+    legend_cov = [
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
+    ]
+    l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
+    l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
+    axs[1,0].add_artist(l1)  # keep both legends visible
+    ### PART 5: aesthetics
+    plt.subplots_adjust(wspace=0, hspace=0)  # adjust the space between subplots:
+    axs[0,0].axis('off')  # remove frame and axis
+    axs[1,0].axis('off')  # remove frame and axis
+    axs[0,1].yaxis.set_visible(False)  # remove ticks, tick labels, axis label
+    axs[1,1].xaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_xticklabels([])       # remove tick labels
+    axs[1,1].xaxis.set_label_position("bottom")
+    axs[1,1].set_xlabel("KEGG reactions")
+    axs[1,1].yaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_yticklabels([])       # remove tick labels
+    axs[1,1].yaxis.set_label_position("right")
+    axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13)  # labelpad is in points (1 point = 1/72 inch)
+    ### PART 6: save fig
+    buf = BytesIO()
+    fig.savefig(buf, dpi=300, bbox_inches='tight')  # labelpad is in inches (1 point = 1/72 inch)
+    plt.close(fig)
+    buf.seek(0)  # rewind the buffer to the beginning
+    return buf

{gsrap-0.7.1 → gsrap-0.7.2}/src/gsrap/commons/excelhub.py RENAMED Viewed

@@ -1,8 +1,20 @@
 import pandas as pnd
+from .figures import figure_df_C_F1
-def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
+def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
+    # generate figures
+    if nofigs == False:
+        if df_C is not None:
+            df_C_F1 = figure_df_C_F1(df_C)
     df_M = []
     df_R = []
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         df_S.insert(0, 'mid', '')  # new columns as first
         df_S['mid'] = df_S.index
         df_S = df_S.reset_index(drop=True)
+    # format df_C: universal reaction coverage
+    if df_C is not None:
+        df_C.insert(0, 'kr', '')  # new columns as first
+        df_C['kr'] = df_C.index
+        df_C = df_C.reset_index(drop=True)
     for m in model.metabolites:
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
     df_R = pnd.DataFrame.from_records(df_R)
     df_T = pnd.DataFrame.from_records(df_T)
     df_A = pnd.DataFrame.from_records(df_A)
-    with pnd.ExcelWriter(filepath) as writer:
+    with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
         df_M.to_excel(writer, sheet_name='Metabolites', index=False)
         df_R.to_excel(writer, sheet_name='Reactions', index=False)
         df_T.to_excel(writer, sheet_name='Transporters', index=False)
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
         if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
         if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
+        if df_C is not None:
+            df_C.to_excel(writer, sheet_name='Coverage', index=False)
+            if nofigs == False:
+                worksheet = writer.sheets['Coverage']
+                worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
     sheets_dict = {
         'model_id': model.id,
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
         'Biomass': df_B,
         'Biolog': df_P,
         'Biosynth': df_S,
+        'Coverage': df_C,
     }
     return sheets_dict

gsrap-0.7.2/src/gsrap/commons/figures.py ADDED Viewed

@@ -0,0 +1,105 @@
+from io import BytesIO
+import numpy as np
+import pandas as pnd
+from scipy.spatial.distance import pdist
+from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
+import matplotlib.pyplot as plt
+from matplotlib.patches import Patch
+def figure_df_C_F1(df_coverage):
+    bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
+    strains = bin_matrix.columns
+    bin_matrix = bin_matrix.T  # features in column
+    # pdist() / linkage() will loose the accession information. So here we save a dict:
+    index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
+    # Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
+    distances = pdist(bin_matrix, 'jaccard')
+    linkage_matrix = linkage(distances, method='ward')
+    # PART 0: create the frame
+    fig, axs = plt.subplots(
+        nrows=2, ncols=2,
+        figsize=(15, 10),
+        gridspec_kw={  # suplots width proportions.
+            'width_ratios': [0.5, 1.0],
+            'height_ratios': [0.015, 0.985]
+        }
+    )
+    # PART 1: dendrogram
+    dn = dendrogram(
+        linkage_matrix, ax=axs[1,0],
+        orientation='left',
+        color_threshold=0, above_threshold_color='black',
+    )
+    ### PART 2: heatmap
+    ord_leaves = leaves_list(linkage_matrix)
+    ord_leaves = np.flip(ord_leaves)  # because leaves are returned in the inverse sense.
+    ord_leaves = [index_to_strain[i] for i in ord_leaves]  # convert index as number to index as accession
+    bin_matrix = bin_matrix.loc[ord_leaves, :]  # reordered dataframe.
+    axs[1,1].matshow(
+        bin_matrix,
+        cmap='viridis',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 3: coverage bar
+    axs[0,1].matshow(
+        df_coverage[['modeled']].T,
+        cmap='cool_r',
+        aspect='auto', # non-squared pixels to fit the axis
+    )
+    ### PART 4: legends
+    legend_feat = [
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
+        Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
+    ]
+    legend_cov = [
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
+        Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
+    ]
+    l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
+    l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
+    axs[1,0].add_artist(l1)  # keep both legends visible
+    ### PART 5: aesthetics
+    plt.subplots_adjust(wspace=0, hspace=0)  # adjust the space between subplots:
+    axs[0,0].axis('off')  # remove frame and axis
+    axs[1,0].axis('off')  # remove frame and axis
+    axs[0,1].yaxis.set_visible(False)  # remove ticks, tick labels, axis label
+    axs[1,1].xaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_xticklabels([])       # remove tick labels
+    axs[1,1].xaxis.set_label_position("bottom")
+    axs[1,1].set_xlabel("KEGG reactions")
+    axs[1,1].yaxis.set_ticks([])       # remove ticks
+    axs[1,1].set_yticklabels([])       # remove tick labels
+    axs[1,1].yaxis.set_label_position("right")
+    axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13)  # labelpad is in points (1 point = 1/72 inch)
+    ### PART 6: save fig
+    buf = BytesIO()
+    fig.savefig(buf, dpi=300, bbox_inches='tight')  # labelpad is in inches (1 point = 1/72 inch)
+    plt.close(fig)
+    buf.seek(0)  # rewind the buffer to the beginning
+    return buf

{gsrap-0.7.1 → gsrap-0.7.2}/src/gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py RENAMED Viewed

@@ -141,7 +141,7 @@ def create_model_incore(params):
     cobra.io.write_sbml_model(model, f'{args.outdir}/{model.id}.xml')        # SBML   # groups are saved only to SBML
     logger.info(f"'{args.outdir}/{model.id}.xml' created!")
     force_id_on_sbml(f'{args.outdir}/{model.id}.xml', model.id)   # force introduction of the 'id=""' field
-    sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', None, df_B, df_P, df_S)
+    sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', args.nofigs, None, df_B, df_P, df_S)
     logger.info(f"'{args.outdir}/{model.id}.mkmodel.xlsx' created!")

{gsrap-0.7.1 → gsrap-0.7.2}/src/gsrap/mkmodel/mkmodel.py RENAMED Viewed

@@ -141,7 +141,7 @@ def create_model_incore(params):
     cobra.io.write_sbml_model(model, f'{args.outdir}/{model.id}.xml')        # SBML   # groups are saved only to SBML
     logger.info(f"'{args.outdir}/{model.id}.xml' created!")
     force_id_on_sbml(f'{args.outdir}/{model.id}.xml', model.id)   # force introduction of the 'id=""' field
-    sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', None, df_B, df_P, df_S)
+    sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', args.nofigs, None, df_B, df_P, df_S)
     logger.info(f"'{args.outdir}/{model.id}.mkmodel.xlsx' created!")

gsrap 0.7.1__tar.gz → 0.7.2__tar.gz

gsrap 0.7.1tar.gz → 0.7.2tar.gz