gsrap 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. gsrap/.ipynb_checkpoints/__init__-checkpoint.py +34 -5
  2. gsrap/__init__.py +34 -5
  3. gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +4 -0
  4. gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py +1 -1
  5. gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +27 -3
  6. gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py +105 -0
  7. gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py +1 -1
  8. gsrap/commons/biomass.py +4 -0
  9. gsrap/commons/coeffs.py +1 -1
  10. gsrap/commons/excelhub.py +27 -3
  11. gsrap/commons/figures.py +105 -0
  12. gsrap/commons/fluxbal.py +1 -1
  13. gsrap/mkmodel/.ipynb_checkpoints/gapfillutils-checkpoint.py +3 -0
  14. gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +11 -4
  15. gsrap/mkmodel/gapfillutils.py +3 -0
  16. gsrap/mkmodel/mkmodel.py +11 -4
  17. gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +3 -0
  18. gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +101 -65
  19. gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +16 -1
  20. gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +4 -5
  21. gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +7 -0
  22. gsrap/parsedb/annotation.py +3 -0
  23. gsrap/parsedb/completeness.py +101 -65
  24. gsrap/parsedb/introduce.py +16 -1
  25. gsrap/parsedb/parsedb.py +4 -5
  26. gsrap/parsedb/repeating.py +7 -0
  27. gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +6 -7
  28. gsrap/runsims/simplegrowth.py +6 -7
  29. {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/METADATA +3 -1
  30. {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/RECORD +33 -31
  31. {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/LICENSE.txt +0 -0
  32. {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/WHEEL +0 -0
  33. {gsrap-0.7.0.dist-info → gsrap-0.7.2.dist-info}/entry_points.txt +0 -0
@@ -1,8 +1,10 @@
1
1
  import argparse
2
2
  import sys
3
3
  import traceback
4
+ import requests
4
5
  import importlib.metadata
5
6
  from datetime import datetime
7
+ from packaging import version
6
8
 
7
9
 
8
10
  import cobra
@@ -29,8 +31,9 @@ solver_name = solver_name.replace("_interface", '')
29
31
  def main():
30
32
 
31
33
 
32
- # define the header of main- and sub-commands.
33
- header = f'gsrap v{importlib.metadata.metadata("gsrap")["Version"]},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
34
+ # define the header of main- and sub-commands.
35
+ current_version = importlib.metadata.metadata("gsrap")["Version"]
36
+ header = f'gsrap v{current_version},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
34
37
 
35
38
 
36
39
  # create the command line arguments:
@@ -69,9 +72,9 @@ def main():
69
72
  parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
70
73
  parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
71
74
  parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
72
- #parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
73
75
  parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
74
76
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
77
+ parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
75
78
 
76
79
 
77
80
  # add arguments for the 'mkmodel' command
@@ -91,6 +94,7 @@ def main():
91
94
  mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
92
95
  mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
93
96
  mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
97
+ mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
94
98
 
95
99
 
96
100
  # add arguments for the 'runsims' command
@@ -107,6 +111,7 @@ def main():
107
111
  runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
108
112
  runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
109
113
  runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
114
+ runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
110
115
 
111
116
 
112
117
  # check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.
@@ -117,12 +122,36 @@ def main():
117
122
  # set up the logger:
118
123
  logger = get_logger('gsrap', args.verbose)
119
124
 
120
-
125
+
121
126
 
122
127
  # show a welcome message:
123
128
  set_header_trailer_formatter(logger.handlers[0])
124
129
  logger.info(header + '\n')
125
- command_line = '' # print the full command line:
130
+
131
+
132
+
133
+ # check if newer version is available
134
+ try:
135
+ response = requests.get(f"https://pypi.org/pypi/gsrap/json", timeout=3) # sends an HTTP GET request to the given URL
136
+ response.raise_for_status() # check the HTTP status code (e.g. 200, 404, 500): if not in the 2xx success range, raise requests.exceptions.HTTPError
137
+ data = response.json()
138
+ newest_version = data["info"]["version"]
139
+ except Exception as error: # eg requests.exceptions.Timeout, requests.exceptions.HTTPError
140
+ logger.info(f'Can\'t retrieve the number of the newest version. Please contact the developer reporting the following error: "{error}".')
141
+ logger.info('') # still no formatting here
142
+ # do not exit, continue with the program
143
+ if version.parse(current_version) < version.parse(newest_version):
144
+ warning_message = f"███ Last version is v{newest_version} and you have v{current_version}: please update gsrap! ███"
145
+ border = ''.join(['█' for i in range(len(warning_message))])
146
+ logger.info(border)
147
+ logger.info(warning_message)
148
+ logger.info(border)
149
+ logger.info('') # still no formatting here
150
+
151
+
152
+
153
+ # print the full command line:
154
+ command_line = ''
126
155
  for arg, value in vars(args).items():
127
156
  if arg == 'subcommand': command_line = command_line + f"gsrap {value} "
128
157
  else: command_line = command_line + f"--{arg} {value} "
gsrap/__init__.py CHANGED
@@ -1,8 +1,10 @@
1
1
  import argparse
2
2
  import sys
3
3
  import traceback
4
+ import requests
4
5
  import importlib.metadata
5
6
  from datetime import datetime
7
+ from packaging import version
6
8
 
7
9
 
8
10
  import cobra
@@ -29,8 +31,9 @@ solver_name = solver_name.replace("_interface", '')
29
31
  def main():
30
32
 
31
33
 
32
- # define the header of main- and sub-commands.
33
- header = f'gsrap v{importlib.metadata.metadata("gsrap")["Version"]},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
34
+ # define the header of main- and sub-commands.
35
+ current_version = importlib.metadata.metadata("gsrap")["Version"]
36
+ header = f'gsrap v{current_version},\ndeveloped by Gioele Lazzari (gioele.lazzari@univr.it).'
34
37
 
35
38
 
36
39
  # create the command line arguments:
@@ -69,9 +72,9 @@ def main():
69
72
  parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
70
73
  parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
71
74
  parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
72
- #parsedb_parser.add_argument("-z", "--zeroes", action='store_true', help="Show maps/modules with 0%% coverage, in addition to partials (use only with --progress).")
73
75
  parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
74
76
  parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
77
+ parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
75
78
 
76
79
 
77
80
  # add arguments for the 'mkmodel' command
@@ -91,6 +94,7 @@ def main():
91
94
  mkmodel_parser.add_argument("--conditional", metavar='', type=float, default=0.5, help="Expected minimum fraction of reactions in a biosynthetic pathway for an actually present conditional biomass precursor.")
92
95
  mkmodel_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
93
96
  mkmodel_parser.add_argument("-b", "--biomass", metavar='', type=str, default='-', help="Strain ID associated to experimental biomass data.")
97
+ mkmodel_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
94
98
 
95
99
 
96
100
  # add arguments for the 'runsims' command
@@ -107,6 +111,7 @@ def main():
107
111
  runsims_parser.add_argument("--omission", action='store_true', help="Perform single omission experiments to study auxotrophies.")
108
112
  runsims_parser.add_argument("--essential", action='store_true', help="Predict essential genes (single-gene knock-out simulations).")
109
113
  runsims_parser.add_argument("--factors", action='store_true', help="Predict putative growth factors.")
114
+ runsims_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
110
115
 
111
116
 
112
117
  # check the inputted subcommand, automatic sys.exit(1) if a bad subprogram was specied.
@@ -117,12 +122,36 @@ def main():
117
122
  # set up the logger:
118
123
  logger = get_logger('gsrap', args.verbose)
119
124
 
120
-
125
+
121
126
 
122
127
  # show a welcome message:
123
128
  set_header_trailer_formatter(logger.handlers[0])
124
129
  logger.info(header + '\n')
125
- command_line = '' # print the full command line:
130
+
131
+
132
+
133
+ # check if newer version is available
134
+ try:
135
+ response = requests.get(f"https://pypi.org/pypi/gsrap/json", timeout=3) # sends an HTTP GET request to the given URL
136
+ response.raise_for_status() # check the HTTP status code (e.g. 200, 404, 500): if not in the 2xx success range, raise requests.exceptions.HTTPError
137
+ data = response.json()
138
+ newest_version = data["info"]["version"]
139
+ except Exception as error: # eg requests.exceptions.Timeout, requests.exceptions.HTTPError
140
+ logger.info(f'Can\'t retrieve the number of the newest version. Please contact the developer reporting the following error: "{error}".')
141
+ logger.info('') # still no formatting here
142
+ # do not exit, continue with the program
143
+ if version.parse(current_version) < version.parse(newest_version):
144
+ warning_message = f"███ Last version is v{newest_version} and you have v{current_version}: please update gsrap! ███"
145
+ border = ''.join(['█' for i in range(len(warning_message))])
146
+ logger.info(border)
147
+ logger.info(warning_message)
148
+ logger.info(border)
149
+ logger.info('') # still no formatting here
150
+
151
+
152
+
153
+ # print the full command line:
154
+ command_line = ''
126
155
  for arg, value in vars(args).items():
127
156
  if arg == 'subcommand': command_line = command_line + f"gsrap {value} "
128
157
  else: command_line = command_line + f"--{arg} {value} "
@@ -190,6 +190,10 @@ def introduce_universal_biomass(logger, dbexp, universe):
190
190
  r.build_reaction_from_string(rstring)
191
191
 
192
192
 
193
+ # add SBO annotation
194
+ r.annotation['sbo'] = ['SBO:0000629'] # biomass reaction
195
+
196
+
193
197
  # set as objective:
194
198
  universe.objective = 'Biomass'
195
199
 
@@ -267,7 +267,7 @@ def compute_exp_LIPIDS_coeffs(logger, model, MWF, LIPIDS_PL, LIPIDS_FA):
267
267
  r.bounds = (0, 1000)
268
268
  r.gene_reaction_rule = 'spontaneous'
269
269
  r.update_genes_from_gpr()
270
-
270
+
271
271
 
272
272
  # determine 'L' formula and charge (charge should be -1 like every fatty acid)
273
273
  L_dict = dict() # for 1 mol
@@ -1,8 +1,20 @@
1
1
  import pandas as pnd
2
2
 
3
3
 
4
+ from .figures import figure_df_C_F1
4
5
 
5
- def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
6
+
7
+
8
+ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
9
+
10
+
11
+ # generate figures
12
+ if nofigs == False:
13
+
14
+ if df_C is not None:
15
+ df_C_F1 = figure_df_C_F1(df_C)
16
+
17
+
6
18
 
7
19
  df_M = []
8
20
  df_R = []
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
33
45
  df_S.insert(0, 'mid', '') # new columns as first
34
46
  df_S['mid'] = df_S.index
35
47
  df_S = df_S.reset_index(drop=True)
48
+
49
+ # format df_C: universal reaction coverage
50
+ if df_C is not None:
51
+ df_C.insert(0, 'kr', '') # new columns as first
52
+ df_C['kr'] = df_C.index
53
+ df_C = df_C.reset_index(drop=True)
36
54
 
37
55
 
38
56
  for m in model.metabolites:
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
81
99
  df_R = pnd.DataFrame.from_records(df_R)
82
100
  df_T = pnd.DataFrame.from_records(df_T)
83
101
  df_A = pnd.DataFrame.from_records(df_A)
84
- with pnd.ExcelWriter(filepath) as writer:
102
+ with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
85
103
  df_M.to_excel(writer, sheet_name='Metabolites', index=False)
86
104
  df_R.to_excel(writer, sheet_name='Reactions', index=False)
87
105
  df_T.to_excel(writer, sheet_name='Transporters', index=False)
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
90
108
  if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
91
109
  if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
92
110
  if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
93
-
111
+ if df_C is not None:
112
+ df_C.to_excel(writer, sheet_name='Coverage', index=False)
113
+ if nofigs == False:
114
+ worksheet = writer.sheets['Coverage']
115
+ worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
116
+
94
117
 
95
118
  sheets_dict = {
96
119
  'model_id': model.id,
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
102
125
  'Biomass': df_B,
103
126
  'Biolog': df_P,
104
127
  'Biosynth': df_S,
128
+ 'Coverage': df_C,
105
129
  }
106
130
  return sheets_dict
107
131
 
@@ -0,0 +1,105 @@
1
+ from io import BytesIO
2
+
3
+ import numpy as np
4
+ import pandas as pnd
5
+
6
+ from scipy.spatial.distance import pdist
7
+ from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
8
+
9
+ import matplotlib.pyplot as plt
10
+ from matplotlib.patches import Patch
11
+
12
+
13
+
14
+ def figure_df_C_F1(df_coverage):
15
+
16
+ bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
17
+ strains = bin_matrix.columns
18
+ bin_matrix = bin_matrix.T # features in column
19
+
20
+ # pdist() / linkage() will loose the accession information. So here we save a dict:
21
+ index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
22
+
23
+ # Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
24
+ distances = pdist(bin_matrix, 'jaccard')
25
+ linkage_matrix = linkage(distances, method='ward')
26
+
27
+
28
+ # PART 0: create the frame
29
+ fig, axs = plt.subplots(
30
+ nrows=2, ncols=2,
31
+ figsize=(15, 10),
32
+ gridspec_kw={ # suplots width proportions.
33
+ 'width_ratios': [0.5, 1.0],
34
+ 'height_ratios': [0.015, 0.985]
35
+ }
36
+ )
37
+
38
+ # PART 1: dendrogram
39
+ dn = dendrogram(
40
+ linkage_matrix, ax=axs[1,0],
41
+ orientation='left',
42
+ color_threshold=0, above_threshold_color='black',
43
+ )
44
+
45
+
46
+ ### PART 2: heatmap
47
+ ord_leaves = leaves_list(linkage_matrix)
48
+ ord_leaves = np.flip(ord_leaves) # because leaves are returned in the inverse sense.
49
+ ord_leaves = [index_to_strain[i] for i in ord_leaves] # convert index as number to index as accession
50
+ bin_matrix = bin_matrix.loc[ord_leaves, :] # reordered dataframe.
51
+ axs[1,1].matshow(
52
+ bin_matrix,
53
+ cmap='viridis',
54
+ aspect='auto', # non-squared pixels to fit the axis
55
+ )
56
+
57
+
58
+ ### PART 3: coverage bar
59
+ axs[0,1].matshow(
60
+ df_coverage[['modeled']].T,
61
+ cmap='cool_r',
62
+ aspect='auto', # non-squared pixels to fit the axis
63
+ )
64
+
65
+
66
+ ### PART 4: legends
67
+ legend_feat = [
68
+ Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
69
+ Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
70
+ ]
71
+ legend_cov = [
72
+ Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
73
+ Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
74
+ ]
75
+ l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
76
+ l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
77
+ axs[1,0].add_artist(l1) # keep both legends visible
78
+
79
+
80
+ ### PART 5: aesthetics
81
+ plt.subplots_adjust(wspace=0, hspace=0) # adjust the space between subplots:
82
+ axs[0,0].axis('off') # remove frame and axis
83
+ axs[1,0].axis('off') # remove frame and axis
84
+
85
+ axs[0,1].yaxis.set_visible(False) # remove ticks, tick labels, axis label
86
+
87
+ axs[1,1].xaxis.set_ticks([]) # remove ticks
88
+ axs[1,1].set_xticklabels([]) # remove tick labels
89
+ axs[1,1].xaxis.set_label_position("bottom")
90
+ axs[1,1].set_xlabel("KEGG reactions")
91
+
92
+ axs[1,1].yaxis.set_ticks([]) # remove ticks
93
+ axs[1,1].set_yticklabels([]) # remove tick labels
94
+ axs[1,1].yaxis.set_label_position("right")
95
+ axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13) # labelpad is in points (1 point = 1/72 inch)
96
+
97
+
98
+ ### PART 6: save fig
99
+ buf = BytesIO()
100
+ fig.savefig(buf, dpi=300, bbox_inches='tight') # labelpad is in inches (1 point = 1/72 inch)
101
+ plt.close(fig)
102
+ buf.seek(0) # rewind the buffer to the beginning
103
+
104
+
105
+ return buf
@@ -44,7 +44,7 @@ def verify_growth(model, boolean=True):
44
44
  if status =='infeasible':
45
45
  return 'infeasible'
46
46
  elif obj_value < get_optthr():
47
- return 0
47
+ return 0.0
48
48
  else:
49
49
  return round(obj_value, 3)
50
50
 
gsrap/commons/biomass.py CHANGED
@@ -190,6 +190,10 @@ def introduce_universal_biomass(logger, dbexp, universe):
190
190
  r.build_reaction_from_string(rstring)
191
191
 
192
192
 
193
+ # add SBO annotation
194
+ r.annotation['sbo'] = ['SBO:0000629'] # biomass reaction
195
+
196
+
193
197
  # set as objective:
194
198
  universe.objective = 'Biomass'
195
199
 
gsrap/commons/coeffs.py CHANGED
@@ -267,7 +267,7 @@ def compute_exp_LIPIDS_coeffs(logger, model, MWF, LIPIDS_PL, LIPIDS_FA):
267
267
  r.bounds = (0, 1000)
268
268
  r.gene_reaction_rule = 'spontaneous'
269
269
  r.update_genes_from_gpr()
270
-
270
+
271
271
 
272
272
  # determine 'L' formula and charge (charge should be -1 like every fatty acid)
273
273
  L_dict = dict() # for 1 mol
gsrap/commons/excelhub.py CHANGED
@@ -1,8 +1,20 @@
1
1
  import pandas as pnd
2
2
 
3
3
 
4
+ from .figures import figure_df_C_F1
4
5
 
5
- def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
6
+
7
+
8
+ def write_excel_model(model, filepath, nofigs, df_E, df_B, df_P, df_S, df_C=None):
9
+
10
+
11
+ # generate figures
12
+ if nofigs == False:
13
+
14
+ if df_C is not None:
15
+ df_C_F1 = figure_df_C_F1(df_C)
16
+
17
+
6
18
 
7
19
  df_M = []
8
20
  df_R = []
@@ -33,6 +45,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
33
45
  df_S.insert(0, 'mid', '') # new columns as first
34
46
  df_S['mid'] = df_S.index
35
47
  df_S = df_S.reset_index(drop=True)
48
+
49
+ # format df_C: universal reaction coverage
50
+ if df_C is not None:
51
+ df_C.insert(0, 'kr', '') # new columns as first
52
+ df_C['kr'] = df_C.index
53
+ df_C = df_C.reset_index(drop=True)
36
54
 
37
55
 
38
56
  for m in model.metabolites:
@@ -81,7 +99,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
81
99
  df_R = pnd.DataFrame.from_records(df_R)
82
100
  df_T = pnd.DataFrame.from_records(df_T)
83
101
  df_A = pnd.DataFrame.from_records(df_A)
84
- with pnd.ExcelWriter(filepath) as writer:
102
+ with pnd.ExcelWriter(filepath, engine='xlsxwriter') as writer:
85
103
  df_M.to_excel(writer, sheet_name='Metabolites', index=False)
86
104
  df_R.to_excel(writer, sheet_name='Reactions', index=False)
87
105
  df_T.to_excel(writer, sheet_name='Transporters', index=False)
@@ -90,7 +108,12 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
90
108
  if df_B is not None: df_B.to_excel(writer, sheet_name='Biomass', index=False)
91
109
  if df_P is not None and len(df_P)!=0: df_P.to_excel(writer, sheet_name='Biolog®', index=False)
92
110
  if df_S is not None and len(df_S.columns)>2: df_S.to_excel(writer, sheet_name='Biosynth', index=False)
93
-
111
+ if df_C is not None:
112
+ df_C.to_excel(writer, sheet_name='Coverage', index=False)
113
+ if nofigs == False:
114
+ worksheet = writer.sheets['Coverage']
115
+ worksheet.insert_image('A1', 'df_C_F1.png', {'image_data': df_C_F1})
116
+
94
117
 
95
118
  sheets_dict = {
96
119
  'model_id': model.id,
@@ -102,6 +125,7 @@ def write_excel_model(model, filepath, df_E, df_B, df_P, df_S):
102
125
  'Biomass': df_B,
103
126
  'Biolog': df_P,
104
127
  'Biosynth': df_S,
128
+ 'Coverage': df_C,
105
129
  }
106
130
  return sheets_dict
107
131
 
@@ -0,0 +1,105 @@
1
+ from io import BytesIO
2
+
3
+ import numpy as np
4
+ import pandas as pnd
5
+
6
+ from scipy.spatial.distance import pdist
7
+ from scipy.cluster.hierarchy import linkage, cut_tree, dendrogram, leaves_list
8
+
9
+ import matplotlib.pyplot as plt
10
+ from matplotlib.patches import Patch
11
+
12
+
13
+
14
+ def figure_df_C_F1(df_coverage):
15
+
16
+ bin_matrix = df_coverage[[i for i in df_coverage.columns if i not in ['map_ids', 'modeled']]]
17
+ strains = bin_matrix.columns
18
+ bin_matrix = bin_matrix.T # features in column
19
+
20
+ # pdist() / linkage() will loose the accession information. So here we save a dict:
21
+ index_to_strain = {i: strain for i, strain in enumerate(bin_matrix.index)}
22
+
23
+ # Calculate the linkage matrix using Ward clustering and Jaccard dissimilarity
24
+ distances = pdist(bin_matrix, 'jaccard')
25
+ linkage_matrix = linkage(distances, method='ward')
26
+
27
+
28
+ # PART 0: create the frame
29
+ fig, axs = plt.subplots(
30
+ nrows=2, ncols=2,
31
+ figsize=(15, 10),
32
+ gridspec_kw={ # suplots width proportions.
33
+ 'width_ratios': [0.5, 1.0],
34
+ 'height_ratios': [0.015, 0.985]
35
+ }
36
+ )
37
+
38
+ # PART 1: dendrogram
39
+ dn = dendrogram(
40
+ linkage_matrix, ax=axs[1,0],
41
+ orientation='left',
42
+ color_threshold=0, above_threshold_color='black',
43
+ )
44
+
45
+
46
+ ### PART 2: heatmap
47
+ ord_leaves = leaves_list(linkage_matrix)
48
+ ord_leaves = np.flip(ord_leaves) # because leaves are returned in the inverse sense.
49
+ ord_leaves = [index_to_strain[i] for i in ord_leaves] # convert index as number to index as accession
50
+ bin_matrix = bin_matrix.loc[ord_leaves, :] # reordered dataframe.
51
+ axs[1,1].matshow(
52
+ bin_matrix,
53
+ cmap='viridis',
54
+ aspect='auto', # non-squared pixels to fit the axis
55
+ )
56
+
57
+
58
+ ### PART 3: coverage bar
59
+ axs[0,1].matshow(
60
+ df_coverage[['modeled']].T,
61
+ cmap='cool_r',
62
+ aspect='auto', # non-squared pixels to fit the axis
63
+ )
64
+
65
+
66
+ ### PART 4: legends
67
+ legend_feat = [
68
+ Patch(facecolor=plt.colormaps.get_cmap('viridis')(0.0), edgecolor='black', label='Absent'),
69
+ Patch(facecolor=plt.colormaps.get_cmap('viridis')(1.0), edgecolor='black', label='Probably present'),
70
+ ]
71
+ legend_cov = [
72
+ Patch(facecolor=plt.colormaps.get_cmap('cool_r')(0.0), edgecolor='black', label='Not modeled'),
73
+ Patch(facecolor=plt.colormaps.get_cmap('cool_r')(1.0), edgecolor='black', label='Modeled'),
74
+ ]
75
+ l1 = axs[1,0].legend(handles=legend_cov, title='Universe coverage', loc='upper left')
76
+ l2 = axs[1,0].legend(handles=legend_feat, title='KEGG reaction in strain', loc='lower left')
77
+ axs[1,0].add_artist(l1) # keep both legends visible
78
+
79
+
80
+ ### PART 5: aesthetics
81
+ plt.subplots_adjust(wspace=0, hspace=0) # adjust the space between subplots:
82
+ axs[0,0].axis('off') # remove frame and axis
83
+ axs[1,0].axis('off') # remove frame and axis
84
+
85
+ axs[0,1].yaxis.set_visible(False) # remove ticks, tick labels, axis label
86
+
87
+ axs[1,1].xaxis.set_ticks([]) # remove ticks
88
+ axs[1,1].set_xticklabels([]) # remove tick labels
89
+ axs[1,1].xaxis.set_label_position("bottom")
90
+ axs[1,1].set_xlabel("KEGG reactions")
91
+
92
+ axs[1,1].yaxis.set_ticks([]) # remove ticks
93
+ axs[1,1].set_yticklabels([]) # remove tick labels
94
+ axs[1,1].yaxis.set_label_position("right")
95
+ axs[1,1].set_ylabel(f"{len(strains)} strains", rotation=270, labelpad=13) # labelpad is in points (1 point = 1/72 inch)
96
+
97
+
98
+ ### PART 6: save fig
99
+ buf = BytesIO()
100
+ fig.savefig(buf, dpi=300, bbox_inches='tight') # labelpad is in inches (1 point = 1/72 inch)
101
+ plt.close(fig)
102
+ buf.seek(0) # rewind the buffer to the beginning
103
+
104
+
105
+ return buf
gsrap/commons/fluxbal.py CHANGED
@@ -44,7 +44,7 @@ def verify_growth(model, boolean=True):
44
44
  if status =='infeasible':
45
45
  return 'infeasible'
46
46
  elif obj_value < get_optthr():
47
- return 0
47
+ return 0.0
48
48
  else:
49
49
  return round(obj_value, 3)
50
50
 
@@ -33,6 +33,9 @@ def import_from_universe(model, universe, rid, bounds=None, gpr=None):
33
33
  else:
34
34
  r.gene_reaction_rule = ''
35
35
  r.update_genes_from_gpr()
36
+
37
+ # set annotations
38
+ r.annotation = ru.annotation
36
39
 
37
40
 
38
41
 
@@ -64,6 +64,7 @@ def create_model_incore(params):
64
64
  # remove universal orphans
65
65
  model = remove_universal_orphans(logger, model)
66
66
 
67
+
67
68
 
68
69
  ###### PRUNING
69
70
  logger.info("Reading provided eggnog-mapper annotation...")
@@ -77,6 +78,7 @@ def create_model_incore(params):
77
78
  translate_remaining_kos(logger, model, eggnog_ko_to_gids)
78
79
  restore_gene_annotations(logger, model, universe, eggonog_gid_to_kos)
79
80
 
81
+
80
82
 
81
83
  ###### GAPFILLING
82
84
  # force inclusion of reactions:
@@ -103,30 +105,35 @@ def create_model_incore(params):
103
105
  if type(df_P)==int: return 1
104
106
 
105
107
 
106
- ###### POLISHING 2
107
- # remove disconnected metabolites
108
- model = remove_disconnected(logger, model)
109
108
 
109
+ ###### POLISHING 2
110
110
  # remove unsed sinks and demands
111
111
  model = remove_sinks_demands(logger, model)
112
+
113
+ # remove disconnected metabolites
114
+ model = remove_disconnected(logger, model)
112
115
 
116
+
113
117
 
114
118
  # # # # # DERIVATION ENDS HERE # # # # #
115
119
  log_metrics(logger, model)
116
120
  log_unbalances(logger, model)
117
121
 
118
122
 
123
+
119
124
  ###### CHECKS
120
125
  # check blocked metabolites / dead-ends
121
126
  df_S = biosynthesis_on_media(logger, model, dbexp, args.gap_fill, args.biosynth)
122
127
  if type(df_S)==int: return 1
123
128
 
124
129
 
130
+
125
131
  ###### POLISHING 3
126
132
  # reset growth environment befor saving the model
127
133
  gempipe.reset_growth_env(model)
128
134
 
129
135
 
136
+
130
137
  # output the model:
131
138
  logger.info("Writing strain-specific model...")
132
139
  cobra.io.save_json_model(model, f'{args.outdir}/{model.id}.json') # JSON
@@ -134,7 +141,7 @@ def create_model_incore(params):
134
141
  cobra.io.write_sbml_model(model, f'{args.outdir}/{model.id}.xml') # SBML # groups are saved only to SBML
135
142
  logger.info(f"'{args.outdir}/{model.id}.xml' created!")
136
143
  force_id_on_sbml(f'{args.outdir}/{model.id}.xml', model.id) # force introduction of the 'id=""' field
137
- sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', None, df_B, df_P, df_S)
144
+ sheets_dict = write_excel_model(model, f'{args.outdir}/{model.id}.mkmodel.xlsx', args.nofigs, None, df_B, df_P, df_S)
138
145
  logger.info(f"'{args.outdir}/{model.id}.mkmodel.xlsx' created!")
139
146
 
140
147
 
@@ -33,6 +33,9 @@ def import_from_universe(model, universe, rid, bounds=None, gpr=None):
33
33
  else:
34
34
  r.gene_reaction_rule = ''
35
35
  r.update_genes_from_gpr()
36
+
37
+ # set annotations
38
+ r.annotation = ru.annotation
36
39
 
37
40
 
38
41