gsrap 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsrap/.ipynb_checkpoints/__init__-checkpoint.py +34 -5
- gsrap/__init__.py +34 -5
- gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +4 -0
- gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py +1 -1
- gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +27 -3
- gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py +105 -0
- gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py +1 -1
- gsrap/commons/biomass.py +4 -0
- gsrap/commons/coeffs.py +1 -1
- gsrap/commons/excelhub.py +27 -3
- gsrap/commons/figures.py +105 -0
- gsrap/commons/fluxbal.py +1 -1
- gsrap/mkmodel/.ipynb_checkpoints/gapfillutils-checkpoint.py +3 -0
- gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +11 -4
- gsrap/mkmodel/gapfillutils.py +3 -0
- gsrap/mkmodel/mkmodel.py +11 -4
- gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +3 -0
- gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +101 -65
- gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +16 -1
- gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +4 -5
- gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +7 -0
- gsrap/parsedb/annotation.py +3 -0
- gsrap/parsedb/completeness.py +101 -65
- gsrap/parsedb/introduce.py +16 -1
- gsrap/parsedb/parsedb.py +4 -5
- gsrap/parsedb/repeating.py +7 -0
- gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +6 -7
- gsrap/runsims/simplegrowth.py +6 -7
- {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/METADATA +3 -1
- {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/RECORD +33 -31
- {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/LICENSE.txt +0 -0
- {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/WHEEL +0 -0
- {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import sys
|
|
3
3
|
import traceback
|
|
4
|
+
import requests
|
|
4
5
|
import importlib.metadata
|
|
5
6
|
from datetime import datetime
|
|
7
|
+
from packaging import version
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
import cobra
|
|
@@ -29,8 +31,9 @@ solver_name = solver_name.replace("_interface", '')
|
|
|
29
31
|
def main():
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
# define the header of main- and sub-commands.
|
|
33
|
-
|
|
34
|
+
# define the header of main- and sub-commands.
|
|
35
|
+
current_version = importlib.metadata.metadata("gsrap")["Version"]
|
|
36
|
+
header = f'gsrap v{current_version},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
|
|
34
37
|
|
|
35
38
|
|
|
36
39
|
# create the command line arguments:
|
|
@@ -69,9 +72,9 @@ def main():
|
|
|
69
72
|
parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
|
|
70
73
|
parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
71
74
|
parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
|
|
72
|
-
#parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
|
|
73
75
|
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
|
|
74
76
|
parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
|
|
77
|
+
parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
75
78
|
|
|
76
79
|
|
|
77
80
|
# add arguments for the 'mkmodel' command
|
|
@@ -91,6 +94,7 @@ def main():
|
|
|
91
94
|
mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
|
|
92
95
|
mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
93
96
|
mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
|
|
97
|
+
mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
94
98
|
|
|
95
99
|
|
|
96
100
|
# add arguments for the 'runsims' command
|
|
@@ -107,6 +111,7 @@ def main():
|
|
|
107
111
|
runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
|
|
108
112
|
runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
|
|
109
113
|
runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
|
|
114
|
+
runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
110
115
|
|
|
111
116
|
|
|
112
117
|
# check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.
|
|
@@ -117,12 +122,36 @@ def main():
|
|
|
117
122
|
# set up the logger:
|
|
118
123
|
logger = get_logger('gsrap', args.verbose)
|
|
119
124
|
|
|
120
|
-
|
|
125
|
+
|
|
121
126
|
|
|
122
127
|
# show a welcome message:
|
|
123
128
|
set_header_trailer_formatter(logger.handlers[0])
|
|
124
129
|
logger.info(header + '\n')
|
|
125
|
-
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# check if newer version is available
|
|
134
|
+
try:
|
|
135
|
+
response = requests.get(f"https://pypi.org/pypi/gsrap/json", timeout=3) # sends an HTTP GET request to the given URL
|
|
136
|
+
response.raise_for_status() # check the HTTP status code (e.g. 200, 404, 500): if not in the 2xx success range, raise requests.exceptions.HTTPError
|
|
137
|
+
data = response.json()
|
|
138
|
+
newest_version = data["info"]["version"]
|
|
139
|
+
except Exception as error: # eg requests.exceptions.Timeout, requests.exceptions.HTTPError
|
|
140
|
+
logger.info(f'Can\'t retrieve the number of the newest version. Please contact the developer reporting the following error: "{error}".')
|
|
141
|
+
logger.info('') # still no formatting here
|
|
142
|
+
# do not exit, continue with the program
|
|
143
|
+
if version.parse(current_version) < version.parse(newest_version):
|
|
144
|
+
warning_message = f"███ Last version is v{newest_version} and you have v{current_version}: please update gsrap! ███"
|
|
145
|
+
border = ''.join(['█' for i in range(len(warning_message))])
|
|
146
|
+
logger.info(border)
|
|
147
|
+
logger.info(warning_message)
|
|
148
|
+
logger.info(border)
|
|
149
|
+
logger.info('') # still no formatting here
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# print the full command line:
|
|
154
|
+
command_line = ''
|
|
126
155
|
for arg, value in vars(args).items():
|
|
127
156
|
if arg == 'subcommand': command_line = command_line + f"gsrap {value} "
|
|
128
157
|
else: command_line = command_line + f"--{arg} {value} "
|
gsrap/__init__.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import sys
|
|
3
3
|
import traceback
|
|
4
|
+
import requests
|
|
4
5
|
import importlib.metadata
|
|
5
6
|
from datetime import datetime
|
|
7
|
+
from packaging import version
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
import cobra
|
|
@@ -29,8 +31,9 @@ solver_name = solver_name.replace("_interface", '')
|
|
|
29
31
|
def main():
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
# define the header of main- and sub-commands.
|
|
33
|
-
|
|
34
|
+
# define the header of main- and sub-commands.
|
|
35
|
+
current_version = importlib.metadata.metadata("gsrap")["Version"]
|
|
36
|
+
header = f'gsrap v{current_version},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
|
|
34
37
|
|
|
35
38
|
|
|
36
39
|
# create the command line arguments:
|
|
@@ -69,9 +72,9 @@ def main():
|
|
|
69
72
|
parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
|
|
70
73
|
parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
71
74
|
parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
|
|
72
|
-
#parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
|
|
73
75
|
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
|
|
74
76
|
parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
|
|
77
|
+
parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
75
78
|
|
|
76
79
|
|
|
77
80
|
# add arguments for the 'mkmodel' command
|
|
@@ -91,6 +94,7 @@ def main():
|
|
|
91
94
|
mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
|
|
92
95
|
mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
93
96
|
mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
|
|
97
|
+
mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
94
98
|
|
|
95
99
|
|
|
96
100
|
# add arguments for the 'runsims' command
|
|
@@ -107,6 +111,7 @@ def main():
|
|
|
107
111
|
runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
|
|
108
112
|
runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
|
|
109
113
|
runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
|
|
114
|
+
runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
110
115
|
|
|
111
116
|
|
|
112
117
|
# check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.
|
|
@@ -117,12 +122,36 @@ def main():
|
|
|
117
122
|
# set up the logger:
|
|
118
123
|
logger = get_logger('gsrap', args.verbose)
|
|
119
124
|
|
|
120
|
-
|
|
125
|
+
|
|
121
126
|
|
|
122
127
|
# show a welcome message:
|
|
123
128
|
set_header_trailer_formatter(logger.handlers[0])
|
|
124
129
|
logger.info(header + '\n')
|
|
125
|
-
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
# check if newer version is available
|
|
134
|
+
try:
|
|
135
|
+
response = requests.get(f"https://pypi.org/pypi/gsrap/json", timeout=3) # sends an HTTP GET request to the given URL
|
|
136
|
+
response.raise_for_status() # check the HTTP status code (e.g. 200, 404, 500): if not in the 2xx success range, raise requests.exceptions.HTTPError
|
|
137
|
+
data = response.json()
|
|
138
|
+
newest_version = data["info"]["version"]
|
|
139
|
+
except Exception as error: # eg requests.exceptions.Timeout, requests.exceptions.HTTPError
|
|
140
|
+
logger.info(f'Can\'t retrieve the number of the newest version. Please contact the developer reporting the following error: "{error}".')
|
|
141
|
+
logger.info('') # still no formatting here
|
|
142
|
+
# do not exit, continue with the program
|
|
143
|
+
if version.parse(current_version) < version.parse(newest_version):
|
|
144
|
+
warning_message = f"███ Last version is v{newest_version} and you have v{current_version}: please update gsrap! ███"
|
|
145
|
+
border = ''.join(['█' for i in range(len(warning_message))])
|
|
146
|
+
logger.info(border)
|
|
147
|
+
logger.info(warning_message)
|
|
148
|
+
logger.info(border)
|
|
149
|
+
logger.info('') # still no formatting here
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# print the full command line:
|
|
154
|
+
command_line = ''
|
|
126
155
|
for arg, value in vars(args).items():
|
|
127
156
|
if arg == 'subcommand': command_line = command_line + f"gsrap {value} "
|
|
128
157
|
else: command_line = command_line + f"--{arg} {value} "
|
|
@@ -190,6 +190,10 @@ def introduce_universal_biomass(logger, dbexp, universe):
|
|
|
190
190
|
r.build_reaction_from_string(rstring)
|
|
191
191
|
|
|
192
192
|
|
|
193
|
+
# add SBO annotation
|
|
194
|
+
r.annotation['sbo'] = ['SBO:0000629'] # biomass reaction
|
|
195
|
+
|
|
196
|
+
|
|
193
197
|
# set as objective:
|
|
194
198
|
universe.objective = 'Biomass'
|
|
195
199
|
|
|
@@ -267,7 +267,7 @@ def compute_exp_LIPIDS_coeffs(logger, model, MWF, LIPIDS_PL, LIPIDS_FA):
|
|
|
267
267
|
r.bounds = (0, 1000)
|
|
268
268
|
r.gene_reaction_rule = 'spontaneous'
|
|
269
269
|
r.update_genes_from_gpr()
|
|
270
|
-
|
|
270
|
+
|
|
271
271
|
|
|
272
272
|
# determine 'L' formula and charge (charge should be -1 like every fatty acid)
|
|
273
273
|
L_dict = dict() # for 1 mol
|
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
import pandas as pnd
|
|
2
2
|
|
|
3
3
|
|
|
4
|
+
from .figures import figure_df_C_F1
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# generate figures
|
|
12
|
+
if nofigs == False:
|
|
13
|
+
|
|
14
|
+
if df_C is not None:
|
|
15
|
+
df_C_F1 = figure_df_C_F1(df_C)
|
|
16
|
+
|
|
17
|
+
|
|
6
18
|
|
|
7
19
|
df_M = []
|
|
8
20
|
df_R = []
|
|
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
33
45
|
df_S.insert(0, 'mid', '') # new columns as first
|
|
34
46
|
df_S['mid'] = df_S.index
|
|
35
47
|
df_S = df_S.reset_index(drop=True)
|
|
48
|
+
|
|
49
|
+
# format df_C: universal reaction coverage
|
|
50
|
+
if df_C is not None:
|
|
51
|
+
df_C.insert(0, 'kr', '') # new columns as first
|
|
52
|
+
df_C['kr'] = df_C.index
|
|
53
|
+
df_C = df_C.reset_index(drop=True)
|
|
36
54
|
|
|
37
55
|
|
|
38
56
|
for m in model.metabolites:
|
|
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
81
99
|
df_R = pnd.DataFrame.from_records(df_R)
|
|
82
100
|
df_T = pnd.DataFrame.from_records(df_T)
|
|
83
101
|
df_A = pnd.DataFrame.from_records(df_A)
|
|
84
|
-
with pnd.ExcelWriter(filepath) as writer:
|
|
102
|
+
with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
|
|
85
103
|
df_M.to_excel(writer, sheet_name='Metabolites', index=False)
|
|
86
104
|
df_R.to_excel(writer, sheet_name='Reactions', index=False)
|
|
87
105
|
df_T.to_excel(writer, sheet_name='Transporters', index=False)
|
|
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
90
108
|
if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
|
|
91
109
|
if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
|
|
92
110
|
if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
|
|
93
|
-
|
|
111
|
+
if df_C is not None:
|
|
112
|
+
df_C.to_excel(writer, sheet_name='Coverage', index=False)
|
|
113
|
+
if nofigs == False:
|
|
114
|
+
worksheet = writer.sheets['Coverage']
|
|
115
|
+
worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
|
|
116
|
+
|
|
94
117
|
|
|
95
118
|
sheets_dict = {
|
|
96
119
|
'model_id': model.id,
|
|
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
102
125
|
'Biomass': df_B,
|
|
103
126
|
'Biolog': df_P,
|
|
104
127
|
'Biosynth': df_S,
|
|
128
|
+
'Coverage': df_C,
|
|
105
129
|
}
|
|
106
130
|
return sheets_dict
|
|
107
131
|
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pnd
|
|
5
|
+
|
|
6
|
+
from scipy.spatial.distance import pdist
|
|
7
|
+
from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
|
|
8
|
+
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
from matplotlib.patches import Patch
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def figure_df_C_F1(df_coverage):
|
|
15
|
+
|
|
16
|
+
bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
|
|
17
|
+
strains = bin_matrix.columns
|
|
18
|
+
bin_matrix = bin_matrix.T # features in column
|
|
19
|
+
|
|
20
|
+
# pdist() / linkage() will loose the accession information. So here we save a dict:
|
|
21
|
+
index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
|
|
22
|
+
|
|
23
|
+
# Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
|
|
24
|
+
distances = pdist(bin_matrix, 'jaccard')
|
|
25
|
+
linkage_matrix = linkage(distances, method='ward')
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# PART 0: create the frame
|
|
29
|
+
fig, axs = plt.subplots(
|
|
30
|
+
nrows=2, ncols=2,
|
|
31
|
+
figsize=(15, 10),
|
|
32
|
+
gridspec_kw={ # suplots width proportions.
|
|
33
|
+
'width_ratios': [0.5, 1.0],
|
|
34
|
+
'height_ratios': [0.015, 0.985]
|
|
35
|
+
}
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# PART 1: dendrogram
|
|
39
|
+
dn = dendrogram(
|
|
40
|
+
linkage_matrix, ax=axs[1,0],
|
|
41
|
+
orientation='left',
|
|
42
|
+
color_threshold=0, above_threshold_color='black',
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
### PART 2: heatmap
|
|
47
|
+
ord_leaves = leaves_list(linkage_matrix)
|
|
48
|
+
ord_leaves = np.flip(ord_leaves) # because leaves are returned in the inverse sense.
|
|
49
|
+
ord_leaves = [index_to_strain[i] for i in ord_leaves] # convert index as number to index as accession
|
|
50
|
+
bin_matrix = bin_matrix.loc[ord_leaves, :] # reordered dataframe.
|
|
51
|
+
axs[1,1].matshow(
|
|
52
|
+
bin_matrix,
|
|
53
|
+
cmap='viridis',
|
|
54
|
+
aspect='auto', # non-squared pixels to fit the axis
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
### PART 3: coverage bar
|
|
59
|
+
axs[0,1].matshow(
|
|
60
|
+
df_coverage[['modeled']].T,
|
|
61
|
+
cmap='cool_r',
|
|
62
|
+
aspect='auto', # non-squared pixels to fit the axis
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
### PART 4: legends
|
|
67
|
+
legend_feat = [
|
|
68
|
+
Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
|
|
69
|
+
Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
|
|
70
|
+
]
|
|
71
|
+
legend_cov = [
|
|
72
|
+
Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
|
|
73
|
+
Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
|
|
74
|
+
]
|
|
75
|
+
l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
|
|
76
|
+
l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
|
|
77
|
+
axs[1,0].add_artist(l1) # keep both legends visible
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
### PART 5: aesthetics
|
|
81
|
+
plt.subplots_adjust(wspace=0, hspace=0) # adjust the space between subplots:
|
|
82
|
+
axs[0,0].axis('off') # remove frame and axis
|
|
83
|
+
axs[1,0].axis('off') # remove frame and axis
|
|
84
|
+
|
|
85
|
+
axs[0,1].yaxis.set_visible(False) # remove ticks, tick labels, axis label
|
|
86
|
+
|
|
87
|
+
axs[1,1].xaxis.set_ticks([]) # remove ticks
|
|
88
|
+
axs[1,1].set_xticklabels([]) # remove tick labels
|
|
89
|
+
axs[1,1].xaxis.set_label_position("bottom")
|
|
90
|
+
axs[1,1].set_xlabel("KEGG reactions")
|
|
91
|
+
|
|
92
|
+
axs[1,1].yaxis.set_ticks([]) # remove ticks
|
|
93
|
+
axs[1,1].set_yticklabels([]) # remove tick labels
|
|
94
|
+
axs[1,1].yaxis.set_label_position("right")
|
|
95
|
+
axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13) # labelpad is in points (1 point = 1/72 inch)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
### PART 6: save fig
|
|
99
|
+
buf = BytesIO()
|
|
100
|
+
fig.savefig(buf, dpi=300, bbox_inches='tight') # labelpad is in inches (1 point = 1/72 inch)
|
|
101
|
+
plt.close(fig)
|
|
102
|
+
buf.seek(0) # rewind the buffer to the beginning
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
return buf
|
gsrap/commons/biomass.py
CHANGED
|
@@ -190,6 +190,10 @@ def introduce_universal_biomass(logger, dbexp, universe):
|
|
|
190
190
|
r.build_reaction_from_string(rstring)
|
|
191
191
|
|
|
192
192
|
|
|
193
|
+
# add SBO annotation
|
|
194
|
+
r.annotation['sbo'] = ['SBO:0000629'] # biomass reaction
|
|
195
|
+
|
|
196
|
+
|
|
193
197
|
# set as objective:
|
|
194
198
|
universe.objective = 'Biomass'
|
|
195
199
|
|
gsrap/commons/coeffs.py
CHANGED
|
@@ -267,7 +267,7 @@ def compute_exp_LIPIDS_coeffs(logger, model, MWF, LIPIDS_PL, LIPIDS_FA):
|
|
|
267
267
|
r.bounds = (0, 1000)
|
|
268
268
|
r.gene_reaction_rule = 'spontaneous'
|
|
269
269
|
r.update_genes_from_gpr()
|
|
270
|
-
|
|
270
|
+
|
|
271
271
|
|
|
272
272
|
# determine 'L' formula and charge (charge should be -1 like every fatty acid)
|
|
273
273
|
L_dict = dict() # for 1 mol
|
gsrap/commons/excelhub.py
CHANGED
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
import pandas as pnd
|
|
2
2
|
|
|
3
3
|
|
|
4
|
+
from .figures import figure_df_C_F1
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# generate figures
|
|
12
|
+
if nofigs == False:
|
|
13
|
+
|
|
14
|
+
if df_C is not None:
|
|
15
|
+
df_C_F1 = figure_df_C_F1(df_C)
|
|
16
|
+
|
|
17
|
+
|
|
6
18
|
|
|
7
19
|
df_M = []
|
|
8
20
|
df_R = []
|
|
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
33
45
|
df_S.insert(0, 'mid', '') # new columns as first
|
|
34
46
|
df_S['mid'] = df_S.index
|
|
35
47
|
df_S = df_S.reset_index(drop=True)
|
|
48
|
+
|
|
49
|
+
# format df_C: universal reaction coverage
|
|
50
|
+
if df_C is not None:
|
|
51
|
+
df_C.insert(0, 'kr', '') # new columns as first
|
|
52
|
+
df_C['kr'] = df_C.index
|
|
53
|
+
df_C = df_C.reset_index(drop=True)
|
|
36
54
|
|
|
37
55
|
|
|
38
56
|
for m in model.metabolites:
|
|
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
81
99
|
df_R = pnd.DataFrame.from_records(df_R)
|
|
82
100
|
df_T = pnd.DataFrame.from_records(df_T)
|
|
83
101
|
df_A = pnd.DataFrame.from_records(df_A)
|
|
84
|
-
with pnd.ExcelWriter(filepath) as writer:
|
|
102
|
+
with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
|
|
85
103
|
df_M.to_excel(writer, sheet_name='Metabolites', index=False)
|
|
86
104
|
df_R.to_excel(writer, sheet_name='Reactions', index=False)
|
|
87
105
|
df_T.to_excel(writer, sheet_name='Transporters', index=False)
|
|
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
90
108
|
if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
|
|
91
109
|
if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
|
|
92
110
|
if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
|
|
93
|
-
|
|
111
|
+
if df_C is not None:
|
|
112
|
+
df_C.to_excel(writer, sheet_name='Coverage', index=False)
|
|
113
|
+
if nofigs == False:
|
|
114
|
+
worksheet = writer.sheets['Coverage']
|
|
115
|
+
worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
|
|
116
|
+
|
|
94
117
|
|
|
95
118
|
sheets_dict = {
|
|
96
119
|
'model_id': model.id,
|
|
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
|
|
|
102
125
|
'Biomass': df_B,
|
|
103
126
|
'Biolog': df_P,
|
|
104
127
|
'Biosynth': df_S,
|
|
128
|
+
'Coverage': df_C,
|
|
105
129
|
}
|
|
106
130
|
return sheets_dict
|
|
107
131
|
|
gsrap/commons/figures.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pnd
|
|
5
|
+
|
|
6
|
+
from scipy.spatial.distance import pdist
|
|
7
|
+
from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
|
|
8
|
+
|
|
9
|
+
import matplotlib.pyplot as plt
|
|
10
|
+
from matplotlib.patches import Patch
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def figure_df_C_F1(df_coverage):
|
|
15
|
+
|
|
16
|
+
bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
|
|
17
|
+
strains = bin_matrix.columns
|
|
18
|
+
bin_matrix = bin_matrix.T # features in column
|
|
19
|
+
|
|
20
|
+
# pdist() / linkage() will loose the accession information. So here we save a dict:
|
|
21
|
+
index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
|
|
22
|
+
|
|
23
|
+
# Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
|
|
24
|
+
distances = pdist(bin_matrix, 'jaccard')
|
|
25
|
+
linkage_matrix = linkage(distances, method='ward')
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# PART 0: create the frame
|
|
29
|
+
fig, axs = plt.subplots(
|
|
30
|
+
nrows=2, ncols=2,
|
|
31
|
+
figsize=(15, 10),
|
|
32
|
+
gridspec_kw={ # suplots width proportions.
|
|
33
|
+
'width_ratios': [0.5, 1.0],
|
|
34
|
+
'height_ratios': [0.015, 0.985]
|
|
35
|
+
}
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# PART 1: dendrogram
|
|
39
|
+
dn = dendrogram(
|
|
40
|
+
linkage_matrix, ax=axs[1,0],
|
|
41
|
+
orientation='left',
|
|
42
|
+
color_threshold=0, above_threshold_color='black',
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
### PART 2: heatmap
|
|
47
|
+
ord_leaves = leaves_list(linkage_matrix)
|
|
48
|
+
ord_leaves = np.flip(ord_leaves) # because leaves are returned in the inverse sense.
|
|
49
|
+
ord_leaves = [index_to_strain[i] for i in ord_leaves] # convert index as number to index as accession
|
|
50
|
+
bin_matrix = bin_matrix.loc[ord_leaves, :] # reordered dataframe.
|
|
51
|
+
axs[1,1].matshow(
|
|
52
|
+
bin_matrix,
|
|
53
|
+
cmap='viridis',
|
|
54
|
+
aspect='auto', # non-squared pixels to fit the axis
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
### PART 3: coverage bar
|
|
59
|
+
axs[0,1].matshow(
|
|
60
|
+
df_coverage[['modeled']].T,
|
|
61
|
+
cmap='cool_r',
|
|
62
|
+
aspect='auto', # non-squared pixels to fit the axis
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
### PART 4: legends
|
|
67
|
+
legend_feat = [
|
|
68
|
+
Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
|
|
69
|
+
Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
|
|
70
|
+
]
|
|
71
|
+
legend_cov = [
|
|
72
|
+
Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
|
|
73
|
+
Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
|
|
74
|
+
]
|
|
75
|
+
l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
|
|
76
|
+
l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
|
|
77
|
+
axs[1,0].add_artist(l1) # keep both legends visible
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
### PART 5: aesthetics
|
|
81
|
+
plt.subplots_adjust(wspace=0, hspace=0) # adjust the space between subplots:
|
|
82
|
+
axs[0,0].axis('off') # remove frame and axis
|
|
83
|
+
axs[1,0].axis('off') # remove frame and axis
|
|
84
|
+
|
|
85
|
+
axs[0,1].yaxis.set_visible(False) # remove ticks, tick labels, axis label
|
|
86
|
+
|
|
87
|
+
axs[1,1].xaxis.set_ticks([]) # remove ticks
|
|
88
|
+
axs[1,1].set_xticklabels([]) # remove tick labels
|
|
89
|
+
axs[1,1].xaxis.set_label_position("bottom")
|
|
90
|
+
axs[1,1].set_xlabel("KEGG reactions")
|
|
91
|
+
|
|
92
|
+
axs[1,1].yaxis.set_ticks([]) # remove ticks
|
|
93
|
+
axs[1,1].set_yticklabels([]) # remove tick labels
|
|
94
|
+
axs[1,1].yaxis.set_label_position("right")
|
|
95
|
+
axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13) # labelpad is in points (1 point = 1/72 inch)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
### PART 6: save fig
|
|
99
|
+
buf = BytesIO()
|
|
100
|
+
fig.savefig(buf, dpi=300, bbox_inches='tight') # labelpad is in inches (1 point = 1/72 inch)
|
|
101
|
+
plt.close(fig)
|
|
102
|
+
buf.seek(0) # rewind the buffer to the beginning
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
return buf
|
gsrap/commons/fluxbal.py
CHANGED
|
@@ -64,6 +64,7 @@ def create_model_incore(params):
|
|
|
64
64
|
# remove universal orphans
|
|
65
65
|
model = remove_universal_orphans(logger, model)
|
|
66
66
|
|
|
67
|
+
|
|
67
68
|
|
|
68
69
|
###### PRUNING
|
|
69
70
|
logger.info("Reading provided eggnog-mapper annotation...")
|
|
@@ -77,6 +78,7 @@ def create_model_incore(params):
|
|
|
77
78
|
translate_remaining_kos(logger, model, eggnog_ko_to_gids)
|
|
78
79
|
restore_gene_annotations(logger, model, universe, eggonog_gid_to_kos)
|
|
79
80
|
|
|
81
|
+
|
|
80
82
|
|
|
81
83
|
###### GAPFILLING
|
|
82
84
|
# force inclusion of reactions:
|
|
@@ -103,30 +105,35 @@ def create_model_incore(params):
|
|
|
103
105
|
if type(df_P)==int: return 1
|
|
104
106
|
|
|
105
107
|
|
|
106
|
-
###### POLISHING 2
|
|
107
|
-
# remove disconnected metabolites
|
|
108
|
-
model = remove_disconnected(logger, model)
|
|
109
108
|
|
|
109
|
+
###### POLISHING 2
|
|
110
110
|
# remove unsed sinks and demands
|
|
111
111
|
model = remove_sinks_demands(logger, model)
|
|
112
|
+
|
|
113
|
+
# remove disconnected metabolites
|
|
114
|
+
model = remove_disconnected(logger, model)
|
|
112
115
|
|
|
116
|
+
|
|
113
117
|
|
|
114
118
|
# # # # # DERIVATION ENDS HERE # # # # #
|
|
115
119
|
log_metrics(logger, model)
|
|
116
120
|
log_unbalances(logger, model)
|
|
117
121
|
|
|
118
122
|
|
|
123
|
+
|
|
119
124
|
###### CHECKS
|
|
120
125
|
# check blocked metabolites / dead-ends
|
|
121
126
|
df_S = biosynthesis_on_media(logger, model, dbexp, args.gap_fill, args.biosynth)
|
|
122
127
|
if type(df_S)==int: return 1
|
|
123
128
|
|
|
124
129
|
|
|
130
|
+
|
|
125
131
|
###### POLISHING 3
|
|
126
132
|
# reset growth environment befor saving the model
|
|
127
133
|
gempipe.reset_growth_env(model)
|
|
128
134
|
|
|
129
135
|
|
|
136
|
+
|
|
130
137
|
# output the model:
|
|
131
138
|
logger.info("Writing strain-specific model...")
|
|
132
139
|
cobra.io.save_json_model(model, f'{args.outdir}/{model.id}.json') # JSON
|
|
@@ -134,7 +141,7 @@ def create_model_incore(params):
|
|
|
134
141
|
cobra.io.write_sbml_model(model, f'{args.outdir}/{model.id}.xml') # SBML # groups are saved only to SBML
|
|
135
142
|
logger.info(f"'{args.outdir}/{model.id}.xml' created!")
|
|
136
143
|
force_id_on_sbml(f'{args.outdir}/{model.id}.xml', model.id) # force introduction of the 'id=""' field
|
|
137
|
-
sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', None, df_B, df_P, df_S)
|
|
144
|
+
sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', args.nofigs, None, df_B, df_P, df_S)
|
|
138
145
|
logger.info(f"'{args.outdir}/{model.id}.mkmodel.xlsx' created!")
|
|
139
146
|
|
|
140
147
|
|