gsrap 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsrap/.ipynb_checkpoints/__init__-checkpoint.py +11 -13
- gsrap/__init__.py +11 -13
- gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +1 -1
- gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py +1 -1
- gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +3 -3
- gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +147 -1
- gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py +1 -1
- gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +10 -10
- gsrap/commons/biomass.py +1 -1
- gsrap/commons/coeffs.py +1 -1
- gsrap/commons/downloads.py +3 -3
- gsrap/commons/escherutils.py +147 -1
- gsrap/commons/medium.py +1 -1
- gsrap/commons/metrics.py +10 -10
- gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +16 -6
- gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +35 -35
- gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +19 -10
- gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +12 -13
- gsrap/parsedb/annotation.py +16 -6
- gsrap/parsedb/introduce.py +35 -35
- gsrap/parsedb/parsedb.py +19 -10
- gsrap/parsedb/repeating.py +12 -13
- {gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/METADATA +2 -2
- {gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/RECORD +27 -27
- {gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/WHEEL +0 -0
- {gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/entry_points.txt +0 -0
- {gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -9,21 +9,18 @@ import atexit
|
|
|
9
9
|
import os
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
import cobra
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
from .commons import get_logger
|
|
17
16
|
from .commons import set_usual_formatter
|
|
18
17
|
from .commons import set_header_trailer_formatter
|
|
19
|
-
|
|
20
18
|
from .getmaps import getmaps_command
|
|
21
19
|
from .parsedb import parsedb_command
|
|
22
20
|
from .mkmodel import mkmodel_command
|
|
23
21
|
from .runsims import runsims_command
|
|
24
22
|
|
|
25
23
|
|
|
26
|
-
|
|
27
24
|
cobra_config = cobra.Configuration()
|
|
28
25
|
solver_name = str(cobra_config.solver.log).split(' ')[1]
|
|
29
26
|
solver_name = solver_name.replace("optlang.", '')
|
|
@@ -69,20 +66,21 @@ def main():
|
|
|
69
66
|
parsedb_parser.add_argument("-o", "--outdir", metavar='', type=str, default='./', help="Main output directory (will be created if not existing).")
|
|
70
67
|
parsedb_parser.add_argument("-i", "--inmaps", metavar='', type=str, default='./gsrap.maps', help="Input file 'gsrap.maps' previously produced using the 'getmaps' subcommand.")
|
|
71
68
|
parsedb_parser.add_argument("-p", "--progress", action='store_true', help="Show progress for each map.")
|
|
72
|
-
parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use
|
|
73
|
-
parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use
|
|
69
|
+
parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use with --progress).")
|
|
70
|
+
parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module obtaining codes of missing reactions (use with --progress).")
|
|
74
71
|
parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
|
|
75
|
-
parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium.
|
|
72
|
+
parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. If not provided, the first medium in --media is used. Provide 'None' to avoid initialization.")
|
|
76
73
|
parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
|
|
77
74
|
parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
78
|
-
parsedb_parser.add_argument("-
|
|
79
|
-
parsedb_parser.add_argument("-
|
|
80
|
-
parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="
|
|
81
|
-
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="
|
|
82
|
-
parsedb_parser.add_argument("--
|
|
83
|
-
parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
84
|
-
parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="
|
|
75
|
+
parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to optional eggnog-mapper annotation table(s). If provided, --progress will be based on such table(s) instead of the entire KEGG.")
|
|
76
|
+
parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="Optional high-level taxon of interest accounted during the parsing. If provided, --progress will be based on such taxon instead of the entire KEGG (takes precedence over --eggnog). The syntax '{level}:{name}' must be honored, where {level} is 'kingdom' or 'phylum'.")
|
|
77
|
+
parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="Optional single KEGG Organism code accounted during parsing. If provided, --progress will be based on such organism instead of the entire KEGG (takes precedence over --taxon and --eggnog).")
|
|
78
|
+
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Used to skip rows of M-R-T sheets during parsing. The syntax '{pure_mid}-{rid1}-{rid2}' must be honored. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
|
|
79
|
+
parsedb_parser.add_argument("--onlycurator", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified curator ID only. Contents affected by --goodbefore are parsed anyway.")
|
|
80
|
+
parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures in th Excel output.")
|
|
81
|
+
parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Parse the database without performing extra activities (saves time).")
|
|
85
82
|
parsedb_parser.add_argument("-d", "--keepdisconn", action='store_true', help="Do not remove disconnected metabolites.")
|
|
83
|
+
parsedb_parser.add_argument("--escherzip", metavar='', type=str, default='-', help="Optional path to the zipped 'escher' folder downloaded from Google Drive.")
|
|
86
84
|
|
|
87
85
|
|
|
88
86
|
|
gsrap/__init__.py
CHANGED
|
@@ -9,21 +9,18 @@ import atexit
|
|
|
9
9
|
import os
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
import cobra
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
from .commons import get_logger
|
|
17
16
|
from .commons import set_usual_formatter
|
|
18
17
|
from .commons import set_header_trailer_formatter
|
|
19
|
-
|
|
20
18
|
from .getmaps import getmaps_command
|
|
21
19
|
from .parsedb import parsedb_command
|
|
22
20
|
from .mkmodel import mkmodel_command
|
|
23
21
|
from .runsims import runsims_command
|
|
24
22
|
|
|
25
23
|
|
|
26
|
-
|
|
27
24
|
cobra_config = cobra.Configuration()
|
|
28
25
|
solver_name = str(cobra_config.solver.log).split(' ')[1]
|
|
29
26
|
solver_name = solver_name.replace("optlang.", '')
|
|
@@ -69,20 +66,21 @@ def main():
|
|
|
69
66
|
parsedb_parser.add_argument("-o", "--outdir", metavar='', type=str, default='./', help="Main output directory (will be created if not existing).")
|
|
70
67
|
parsedb_parser.add_argument("-i", "--inmaps", metavar='', type=str, default='./gsrap.maps', help="Input file 'gsrap.maps' previously produced using the 'getmaps' subcommand.")
|
|
71
68
|
parsedb_parser.add_argument("-p", "--progress", action='store_true', help="Show progress for each map.")
|
|
72
|
-
parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use
|
|
73
|
-
parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use
|
|
69
|
+
parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use with --progress).")
|
|
70
|
+
parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module obtaining codes of missing reactions (use with --progress).")
|
|
74
71
|
parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
|
|
75
|
-
parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium.
|
|
72
|
+
parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. If not provided, the first medium in --media is used. Provide 'None' to avoid initialization.")
|
|
76
73
|
parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
|
|
77
74
|
parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
78
|
-
parsedb_parser.add_argument("-
|
|
79
|
-
parsedb_parser.add_argument("-
|
|
80
|
-
parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="
|
|
81
|
-
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="
|
|
82
|
-
parsedb_parser.add_argument("--
|
|
83
|
-
parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
|
|
84
|
-
parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="
|
|
75
|
+
parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to optional eggnog-mapper annotation table(s). If provided, --progress will be based on such table(s) instead of the entire KEGG.")
|
|
76
|
+
parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="Optional high-level taxon of interest accounted during the parsing. If provided, --progress will be based on such taxon instead of the entire KEGG (takes precedence over --eggnog). The syntax '{level}:{name}' must be honored, where {level} is 'kingdom' or 'phylum'.")
|
|
77
|
+
parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="Optional single KEGG Organism code accounted during parsing. If provided, --progress will be based on such organism instead of the entire KEGG (takes precedence over --taxon and --eggnog).")
|
|
78
|
+
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Used to skip rows of M-R-T sheets during parsing. The syntax '{pure_mid}-{rid1}-{rid2}' must be honored. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
|
|
79
|
+
parsedb_parser.add_argument("--onlycurator", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified curator ID only. Contents affected by --goodbefore are parsed anyway.")
|
|
80
|
+
parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures in th Excel output.")
|
|
81
|
+
parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Parse the database without performing extra activities (saves time).")
|
|
85
82
|
parsedb_parser.add_argument("-d", "--keepdisconn", action='store_true', help="Do not remove disconnected metabolites.")
|
|
83
|
+
parsedb_parser.add_argument("--escherzip", metavar='', type=str, default='-', help="Optional path to the zipped 'escher' folder downloaded from Google Drive.")
|
|
86
84
|
|
|
87
85
|
|
|
88
86
|
|
|
@@ -28,7 +28,7 @@ def get_biomass_dict(logger, universe, dbexp):
|
|
|
28
28
|
fraction_to_precursors = dict()
|
|
29
29
|
for fraction in ['DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
|
|
30
30
|
fraction_db = dbexp[fraction]
|
|
31
|
-
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', '
|
|
31
|
+
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
|
|
32
32
|
precursors = [f"{i}_c" for i in fraction_db.index.dropna()]
|
|
33
33
|
for i in precursors:
|
|
34
34
|
if i not in universal_mids:
|
|
@@ -26,7 +26,7 @@ def check_exp_biomass_data(logger, dbexp, biomass):
|
|
|
26
26
|
ftd = dict() # fraction_to_decimals
|
|
27
27
|
for sheet in ['MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
|
|
28
28
|
fraction_db = dbexp[sheet][biomass].dropna() # they should be str
|
|
29
|
-
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', '
|
|
29
|
+
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
|
|
30
30
|
ftd[sheet] = {i: Decimal(val) for i, val in fraction_db.items() if Decimal(val) != Decimal('0.0')} # convert to dict
|
|
31
31
|
if sum(ftd[sheet].values()) != Decimal('1.0'): # check if the sum gives 1 (g/gDW or mol/mol depending on 'sheet')
|
|
32
32
|
logger.error(f"Biomass data provided in sheet '{sheet}' for ID '{biomass}' does not sum up to 1.0. Missing mass is {Decimal('1.0')-sum(ftd[sheet].values())}.")
|
|
@@ -41,7 +41,7 @@ def get_dbuni(logger):
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
sheet_id = "1dXJBIFjCghrdvQtxEOYlVNWAQU4mK-nqLWyDQeUZqek"
|
|
44
|
-
#sheet_id = "
|
|
44
|
+
#sheet_id = "15fIBewG1B1jIbg1_9pMnyPJL7LWCLMD8s_vlf3ZUno0" # alternative
|
|
45
45
|
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
|
|
46
46
|
response = requests.get(url) # download the requested file
|
|
47
47
|
if response.status_code == 200:
|
|
@@ -100,7 +100,7 @@ def get_dbexp(logger):
|
|
|
100
100
|
|
|
101
101
|
# check table presence
|
|
102
102
|
sheet_names = exceldb.sheet_names
|
|
103
|
-
for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', '
|
|
103
|
+
for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', 'curators']:
|
|
104
104
|
if i not in sheet_names:
|
|
105
105
|
logger.error(f"Sheet '{i}' is missing!")
|
|
106
106
|
return 1
|
|
@@ -119,7 +119,7 @@ def get_dbexp(logger):
|
|
|
119
119
|
dbexp['PROTS'] = exceldb.parse('PROTS')
|
|
120
120
|
dbexp['LIPIDS_PL'] = exceldb.parse('LIPIDS_PL')
|
|
121
121
|
dbexp['LIPIDS_FA'] = exceldb.parse('LIPIDS_FA')
|
|
122
|
-
dbexp['
|
|
122
|
+
dbexp['curators'] = exceldb.parse('curators')
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
# format tables (media):
|
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
import warnings
|
|
2
2
|
import logging
|
|
3
3
|
import threading
|
|
4
|
+
import zipfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import tempfile
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import shutil
|
|
4
10
|
|
|
5
11
|
import cobra
|
|
12
|
+
from escher import Builder
|
|
6
13
|
|
|
7
14
|
from .downloads import SimpleLoadingWheel
|
|
8
15
|
|
|
@@ -109,4 +116,143 @@ def count_undrawn_rids_focus(logger, universe, lastmap, focus, outdir):
|
|
|
109
116
|
logger.warning(f"'{outdir}/focus_{focus}.json' created!")
|
|
110
117
|
else:
|
|
111
118
|
logger.info(f"Current '{lastmap['filename']}' is 0 reactions behind. Thank you ♥")
|
|
112
|
-
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def parse_zipped_escher(logger, universe, escherzip, outdir):
|
|
123
|
+
# used to parse the zipped 'escher' folder downloaded from Google Drive.
|
|
124
|
+
# 'escherzip' is the path to zipped folder.
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
logger.info("Processing collection of hand-drawn Escher maps...")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# prepare empty logs folder
|
|
131
|
+
shutil.rmtree(f'{outdir}/escherapps', ignore_errors=True)
|
|
132
|
+
os.makedirs(f'{outdir}/escherapps', exist_ok=True)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
drawngid_to_maps = dict()
|
|
136
|
+
drawnrid_to_maps = dict()
|
|
137
|
+
drawnpuremid_to_maps = dict()
|
|
138
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
139
|
+
|
|
140
|
+
# check path existance:
|
|
141
|
+
zip_path = Path(escherzip) # convert to Path for convenience
|
|
142
|
+
if not zip_path.exists():
|
|
143
|
+
logger.error(f"Zipped escher does not exist at provided path '{zip_path}'.")
|
|
144
|
+
return 1
|
|
145
|
+
|
|
146
|
+
# extract in a auto-deleting temporary directory
|
|
147
|
+
with zipfile.ZipFile(zip_path, "r") as z:
|
|
148
|
+
z.extractall(tmp_dir)
|
|
149
|
+
|
|
150
|
+
# extract the inner child dirs:
|
|
151
|
+
tmp_dir = Path(tmp_dir)
|
|
152
|
+
child_dirs = [p for p in tmp_dir.iterdir() if p.is_dir()]
|
|
153
|
+
escher_dir = child_dirs[0] # the top-level folder in the ZIP
|
|
154
|
+
inner_child_dirs = [p for p in escher_dir.iterdir() if p.is_dir()]
|
|
155
|
+
|
|
156
|
+
# iterate the inner child dirs:
|
|
157
|
+
for map_dir in inner_child_dirs:
|
|
158
|
+
map_id = map_dir.name.rsplit('/',1)[-1]
|
|
159
|
+
map_files = [f for f in map_dir.iterdir() if f.is_file()]
|
|
160
|
+
map_files_name = set([f.name.rsplit('/',1)[-1].split('-v',1)[0] for f in map_files])
|
|
161
|
+
|
|
162
|
+
# check presence of maps:
|
|
163
|
+
if len(map_files_name) == 0:
|
|
164
|
+
# still no maps in this folder
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
# check name consistency:
|
|
168
|
+
if len(map_files_name) != 1:
|
|
169
|
+
logger.error(f"Name inconsistency in folder '{map_dir_name}'!")
|
|
170
|
+
return 1
|
|
171
|
+
|
|
172
|
+
# check consistency with the folder
|
|
173
|
+
if list(map_files_name)[0] != map_id:
|
|
174
|
+
logger.error(f"Versions in '{map_id}' refer to a different map!")
|
|
175
|
+
|
|
176
|
+
# get latest version
|
|
177
|
+
map_versions = [int(f.name.rsplit('/',1)[-1].split('-v',1)[-1].replace('.json','')) for f in map_files]
|
|
178
|
+
latest_version = max(map_versions)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# read json (last version) to fill the dict
|
|
182
|
+
latest_filepath = str(map_dir / f"{map_id}-v{latest_version}.json")
|
|
183
|
+
with open(latest_filepath, 'r') as file:
|
|
184
|
+
json_data = json.load(file)
|
|
185
|
+
|
|
186
|
+
# get elements on Escher:
|
|
187
|
+
map_drawngids = set()
|
|
188
|
+
map_drawnrids = set()
|
|
189
|
+
map_drawnpuremids = set()
|
|
190
|
+
for key, value in json_data[1]['reactions'].items():
|
|
191
|
+
map_drawnrids.add(value['bigg_id'])
|
|
192
|
+
for i in value['genes']:
|
|
193
|
+
map_drawngids.add(i['bigg_id'])
|
|
194
|
+
#for i in value['metabolites']:
|
|
195
|
+
# puremeid = i['bigg_id'].rsplit('_',1)[0]
|
|
196
|
+
# map_drawnpuremids.add(puremeid)
|
|
197
|
+
for key, value in json_data[1]['nodes'].items():
|
|
198
|
+
if value['node_type'] == 'metabolite':
|
|
199
|
+
puremeid = value['bigg_id'].rsplit('_',1)[0]
|
|
200
|
+
map_drawnpuremids.add(puremeid)
|
|
201
|
+
|
|
202
|
+
# populat dicts
|
|
203
|
+
for drawngid in map_drawngids:
|
|
204
|
+
if drawngid not in drawngid_to_maps.keys():
|
|
205
|
+
drawngid_to_maps[drawngid] = set()
|
|
206
|
+
drawngid_to_maps[drawngid].add(map_id)
|
|
207
|
+
for drawnrid in map_drawnrids:
|
|
208
|
+
if drawnrid not in drawnrid_to_maps.keys():
|
|
209
|
+
drawnrid_to_maps[drawnrid] = set()
|
|
210
|
+
drawnrid_to_maps[drawnrid].add(map_id)
|
|
211
|
+
for drawnpuremid in map_drawnpuremids:
|
|
212
|
+
if drawnpuremid not in drawnpuremid_to_maps.keys():
|
|
213
|
+
drawnpuremid_to_maps[drawnpuremid] = set()
|
|
214
|
+
drawnpuremid_to_maps[drawnpuremid].add(map_id)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# read json (last version) to create the escher app
|
|
218
|
+
builder = Builder(
|
|
219
|
+
map_json = latest_filepath,
|
|
220
|
+
model_json = None,
|
|
221
|
+
)
|
|
222
|
+
builder.never_ask_before_quit = True
|
|
223
|
+
builder.scroll_behavior = 'zoom'
|
|
224
|
+
|
|
225
|
+
builder.menu = 'all'
|
|
226
|
+
builder.enable_editing = False
|
|
227
|
+
|
|
228
|
+
builder.enable_keys = False # switch not working
|
|
229
|
+
builder.enable_search = True # switch not working
|
|
230
|
+
builder.full_screen_button = True # switch not working
|
|
231
|
+
|
|
232
|
+
builder.highlight_missing = True
|
|
233
|
+
builder.show_gene_reaction_rules = True
|
|
234
|
+
builder.enable_tooltips = 'label'
|
|
235
|
+
|
|
236
|
+
builder.save_html(f"{outdir}/escherapps/{map_id}.html")
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# apply annotation to universe
|
|
240
|
+
for g in universe.genes:
|
|
241
|
+
if g.id in drawngid_to_maps.keys():
|
|
242
|
+
g.annotation['drawn_in_maps'] = list(drawngid_to_maps[g.id])
|
|
243
|
+
else:
|
|
244
|
+
g.annotation['drawn_in_maps'] = []
|
|
245
|
+
for r in universe.reactions:
|
|
246
|
+
if r.id in drawnrid_to_maps.keys():
|
|
247
|
+
r.annotation['drawn_in_maps'] = list(drawnrid_to_maps[r.id])
|
|
248
|
+
else:
|
|
249
|
+
r.annotation['drawn_in_maps'] = []
|
|
250
|
+
for m in universe.metabolites:
|
|
251
|
+
puremid = m.id.rsplit('_',1)[0]
|
|
252
|
+
if puremid in drawnpuremid_to_maps.keys():
|
|
253
|
+
m.annotation['drawn_in_maps'] = list(drawnpuremid_to_maps[puremid])
|
|
254
|
+
else:
|
|
255
|
+
m.annotation['drawn_in_maps'] = []
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
return universe
|
|
@@ -58,8 +58,8 @@ def show_contributions(logger, db, goodbefore):
|
|
|
58
58
|
return 0
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
# create a counter for each
|
|
62
|
-
cnt = {
|
|
61
|
+
# create a counter for each curator
|
|
62
|
+
cnt = {curator: 0 for curator in db['curators']['username']}
|
|
63
63
|
cnt_tot = 0
|
|
64
64
|
|
|
65
65
|
|
|
@@ -67,9 +67,9 @@ def show_contributions(logger, db, goodbefore):
|
|
|
67
67
|
if type(row['curator']) != str:
|
|
68
68
|
logger.error(f"Missing curator in tab 'R', rid '{row['rid']}'.")
|
|
69
69
|
return 1
|
|
70
|
-
for
|
|
71
|
-
|
|
72
|
-
cnt[
|
|
70
|
+
for curator in row['curator'].split(';'):
|
|
71
|
+
curator = curator.rstrip().strip()
|
|
72
|
+
cnt[curator] += 1
|
|
73
73
|
cnt_tot += 1
|
|
74
74
|
|
|
75
75
|
|
|
@@ -77,18 +77,18 @@ def show_contributions(logger, db, goodbefore):
|
|
|
77
77
|
if type(row['curator']) != str:
|
|
78
78
|
logger.error(f"Missing curator in tab 'T', rid '{row['rid']}'.")
|
|
79
79
|
return 1
|
|
80
|
-
for
|
|
81
|
-
|
|
82
|
-
cnt[
|
|
80
|
+
for curator in row['curator'].split(';'):
|
|
81
|
+
curator = curator.rstrip().strip()
|
|
82
|
+
cnt[curator] += 1
|
|
83
83
|
cnt_tot += 1
|
|
84
84
|
|
|
85
85
|
|
|
86
86
|
# compute percentages:
|
|
87
|
-
pct = {
|
|
87
|
+
pct = {curator: cnt[curator]/cnt_tot*100 for curator in cnt.keys()}
|
|
88
88
|
# sort in descending order:
|
|
89
89
|
pct = dict(sorted(pct.items(), key=lambda item: item[1], reverse=True))
|
|
90
90
|
# convert to string:
|
|
91
|
-
pct = {
|
|
91
|
+
pct = {curator: f'{round(pct[curator],2)}%' for curator in pct.keys()}
|
|
92
92
|
logger.debug(f"Contributions: {pct}.")
|
|
93
93
|
|
|
94
94
|
|
gsrap/commons/biomass.py
CHANGED
|
@@ -28,7 +28,7 @@ def get_biomass_dict(logger, universe, dbexp):
|
|
|
28
28
|
fraction_to_precursors = dict()
|
|
29
29
|
for fraction in ['DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
|
|
30
30
|
fraction_db = dbexp[fraction]
|
|
31
|
-
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', '
|
|
31
|
+
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
|
|
32
32
|
precursors = [f"{i}_c" for i in fraction_db.index.dropna()]
|
|
33
33
|
for i in precursors:
|
|
34
34
|
if i not in universal_mids:
|
gsrap/commons/coeffs.py
CHANGED
|
@@ -26,7 +26,7 @@ def check_exp_biomass_data(logger, dbexp, biomass):
|
|
|
26
26
|
ftd = dict() # fraction_to_decimals
|
|
27
27
|
for sheet in ['MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
|
|
28
28
|
fraction_db = dbexp[sheet][biomass].dropna() # they should be str
|
|
29
|
-
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', '
|
|
29
|
+
fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
|
|
30
30
|
ftd[sheet] = {i: Decimal(val) for i, val in fraction_db.items() if Decimal(val) != Decimal('0.0')} # convert to dict
|
|
31
31
|
if sum(ftd[sheet].values()) != Decimal('1.0'): # check if the sum gives 1 (g/gDW or mol/mol depending on 'sheet')
|
|
32
32
|
logger.error(f"Biomass data provided in sheet '{sheet}' for ID '{biomass}' does not sum up to 1.0. Missing mass is {Decimal('1.0')-sum(ftd[sheet].values())}.")
|
gsrap/commons/downloads.py
CHANGED
|
@@ -41,7 +41,7 @@ def get_dbuni(logger):
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
sheet_id = "1dXJBIFjCghrdvQtxEOYlVNWAQU4mK-nqLWyDQeUZqek"
|
|
44
|
-
#sheet_id = "
|
|
44
|
+
#sheet_id = "15fIBewG1B1jIbg1_9pMnyPJL7LWCLMD8s_vlf3ZUno0" # alternative
|
|
45
45
|
url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
|
|
46
46
|
response = requests.get(url) # download the requested file
|
|
47
47
|
if response.status_code == 200:
|
|
@@ -100,7 +100,7 @@ def get_dbexp(logger):
|
|
|
100
100
|
|
|
101
101
|
# check table presence
|
|
102
102
|
sheet_names = exceldb.sheet_names
|
|
103
|
-
for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', '
|
|
103
|
+
for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', 'curators']:
|
|
104
104
|
if i not in sheet_names:
|
|
105
105
|
logger.error(f"Sheet '{i}' is missing!")
|
|
106
106
|
return 1
|
|
@@ -119,7 +119,7 @@ def get_dbexp(logger):
|
|
|
119
119
|
dbexp['PROTS'] = exceldb.parse('PROTS')
|
|
120
120
|
dbexp['LIPIDS_PL'] = exceldb.parse('LIPIDS_PL')
|
|
121
121
|
dbexp['LIPIDS_FA'] = exceldb.parse('LIPIDS_FA')
|
|
122
|
-
dbexp['
|
|
122
|
+
dbexp['curators'] = exceldb.parse('curators')
|
|
123
123
|
|
|
124
124
|
|
|
125
125
|
# format tables (media):
|
gsrap/commons/escherutils.py
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
1
|
import warnings
|
|
2
2
|
import logging
|
|
3
3
|
import threading
|
|
4
|
+
import zipfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import tempfile
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import shutil
|
|
4
10
|
|
|
5
11
|
import cobra
|
|
12
|
+
from escher import Builder
|
|
6
13
|
|
|
7
14
|
from .downloads import SimpleLoadingWheel
|
|
8
15
|
|
|
@@ -109,4 +116,143 @@ def count_undrawn_rids_focus(logger, universe, lastmap, focus, outdir):
|
|
|
109
116
|
logger.warning(f"'{outdir}/focus_{focus}.json' created!")
|
|
110
117
|
else:
|
|
111
118
|
logger.info(f"Current '{lastmap['filename']}' is 0 reactions behind. Thank you ♥")
|
|
112
|
-
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def parse_zipped_escher(logger, universe, escherzip, outdir):
|
|
123
|
+
# used to parse the zipped 'escher' folder downloaded from Google Drive.
|
|
124
|
+
# 'escherzip' is the path to zipped folder.
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
logger.info("Processing collection of hand-drawn Escher maps...")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# prepare empty logs folder
|
|
131
|
+
shutil.rmtree(f'{outdir}/escherapps', ignore_errors=True)
|
|
132
|
+
os.makedirs(f'{outdir}/escherapps', exist_ok=True)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
drawngid_to_maps = dict()
|
|
136
|
+
drawnrid_to_maps = dict()
|
|
137
|
+
drawnpuremid_to_maps = dict()
|
|
138
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
139
|
+
|
|
140
|
+
# check path existance:
|
|
141
|
+
zip_path = Path(escherzip) # convert to Path for convenience
|
|
142
|
+
if not zip_path.exists():
|
|
143
|
+
logger.error(f"Zipped escher does not exist at provided path '{zip_path}'.")
|
|
144
|
+
return 1
|
|
145
|
+
|
|
146
|
+
# extract in a auto-deleting temporary directory
|
|
147
|
+
with zipfile.ZipFile(zip_path, "r") as z:
|
|
148
|
+
z.extractall(tmp_dir)
|
|
149
|
+
|
|
150
|
+
# extract the inner child dirs:
|
|
151
|
+
tmp_dir = Path(tmp_dir)
|
|
152
|
+
child_dirs = [p for p in tmp_dir.iterdir() if p.is_dir()]
|
|
153
|
+
escher_dir = child_dirs[0] # the top-level folder in the ZIP
|
|
154
|
+
inner_child_dirs = [p for p in escher_dir.iterdir() if p.is_dir()]
|
|
155
|
+
|
|
156
|
+
# iterate the inner child dirs:
|
|
157
|
+
for map_dir in inner_child_dirs:
|
|
158
|
+
map_id = map_dir.name.rsplit('/',1)[-1]
|
|
159
|
+
map_files = [f for f in map_dir.iterdir() if f.is_file()]
|
|
160
|
+
map_files_name = set([f.name.rsplit('/',1)[-1].split('-v',1)[0] for f in map_files])
|
|
161
|
+
|
|
162
|
+
# check presence of maps:
|
|
163
|
+
if len(map_files_name) == 0:
|
|
164
|
+
# still no maps in this folder
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
# check name consistency:
|
|
168
|
+
if len(map_files_name) != 1:
|
|
169
|
+
logger.error(f"Name inconsistency in folder '{map_dir_name}'!")
|
|
170
|
+
return 1
|
|
171
|
+
|
|
172
|
+
# check consistency with the folder
|
|
173
|
+
if list(map_files_name)[0] != map_id:
|
|
174
|
+
logger.error(f"Versions in '{map_id}' refer to a different map!")
|
|
175
|
+
|
|
176
|
+
# get latest version
|
|
177
|
+
map_versions = [int(f.name.rsplit('/',1)[-1].split('-v',1)[-1].replace('.json','')) for f in map_files]
|
|
178
|
+
latest_version = max(map_versions)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# read json (last version) to fill the dict
|
|
182
|
+
latest_filepath = str(map_dir / f"{map_id}-v{latest_version}.json")
|
|
183
|
+
with open(latest_filepath, 'r') as file:
|
|
184
|
+
json_data = json.load(file)
|
|
185
|
+
|
|
186
|
+
# get elements on Escher:
|
|
187
|
+
map_drawngids = set()
|
|
188
|
+
map_drawnrids = set()
|
|
189
|
+
map_drawnpuremids = set()
|
|
190
|
+
for key, value in json_data[1]['reactions'].items():
|
|
191
|
+
map_drawnrids.add(value['bigg_id'])
|
|
192
|
+
for i in value['genes']:
|
|
193
|
+
map_drawngids.add(i['bigg_id'])
|
|
194
|
+
#for i in value['metabolites']:
|
|
195
|
+
# puremeid = i['bigg_id'].rsplit('_',1)[0]
|
|
196
|
+
# map_drawnpuremids.add(puremeid)
|
|
197
|
+
for key, value in json_data[1]['nodes'].items():
|
|
198
|
+
if value['node_type'] == 'metabolite':
|
|
199
|
+
puremeid = value['bigg_id'].rsplit('_',1)[0]
|
|
200
|
+
map_drawnpuremids.add(puremeid)
|
|
201
|
+
|
|
202
|
+
# populat dicts
|
|
203
|
+
for drawngid in map_drawngids:
|
|
204
|
+
if drawngid not in drawngid_to_maps.keys():
|
|
205
|
+
drawngid_to_maps[drawngid] = set()
|
|
206
|
+
drawngid_to_maps[drawngid].add(map_id)
|
|
207
|
+
for drawnrid in map_drawnrids:
|
|
208
|
+
if drawnrid not in drawnrid_to_maps.keys():
|
|
209
|
+
drawnrid_to_maps[drawnrid] = set()
|
|
210
|
+
drawnrid_to_maps[drawnrid].add(map_id)
|
|
211
|
+
for drawnpuremid in map_drawnpuremids:
|
|
212
|
+
if drawnpuremid not in drawnpuremid_to_maps.keys():
|
|
213
|
+
drawnpuremid_to_maps[drawnpuremid] = set()
|
|
214
|
+
drawnpuremid_to_maps[drawnpuremid].add(map_id)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# read json (last version) to create the escher app
|
|
218
|
+
builder = Builder(
|
|
219
|
+
map_json = latest_filepath,
|
|
220
|
+
model_json = None,
|
|
221
|
+
)
|
|
222
|
+
builder.never_ask_before_quit = True
|
|
223
|
+
builder.scroll_behavior = 'zoom'
|
|
224
|
+
|
|
225
|
+
builder.menu = 'all'
|
|
226
|
+
builder.enable_editing = False
|
|
227
|
+
|
|
228
|
+
builder.enable_keys = False # switch not working
|
|
229
|
+
builder.enable_search = True # switch not working
|
|
230
|
+
builder.full_screen_button = True # switch not working
|
|
231
|
+
|
|
232
|
+
builder.highlight_missing = True
|
|
233
|
+
builder.show_gene_reaction_rules = True
|
|
234
|
+
builder.enable_tooltips = 'label'
|
|
235
|
+
|
|
236
|
+
builder.save_html(f"{outdir}/escherapps/{map_id}.html")
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# apply annotation to universe
|
|
240
|
+
for g in universe.genes:
|
|
241
|
+
if g.id in drawngid_to_maps.keys():
|
|
242
|
+
g.annotation['drawn_in_maps'] = list(drawngid_to_maps[g.id])
|
|
243
|
+
else:
|
|
244
|
+
g.annotation['drawn_in_maps'] = []
|
|
245
|
+
for r in universe.reactions:
|
|
246
|
+
if r.id in drawnrid_to_maps.keys():
|
|
247
|
+
r.annotation['drawn_in_maps'] = list(drawnrid_to_maps[r.id])
|
|
248
|
+
else:
|
|
249
|
+
r.annotation['drawn_in_maps'] = []
|
|
250
|
+
for m in universe.metabolites:
|
|
251
|
+
puremid = m.id.rsplit('_',1)[0]
|
|
252
|
+
if puremid in drawnpuremid_to_maps.keys():
|
|
253
|
+
m.annotation['drawn_in_maps'] = list(drawnpuremid_to_maps[puremid])
|
|
254
|
+
else:
|
|
255
|
+
m.annotation['drawn_in_maps'] = []
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
return universe
|
gsrap/commons/medium.py
CHANGED