gsrap 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsrap/.ipynb_checkpoints/__init__-checkpoint.py +2 -0
- gsrap/__init__.py +2 -0
- gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py +1 -0
- gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +1 -1
- gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +1 -1
- gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +70 -37
- gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py +15 -1
- gsrap/commons/.ipynb_checkpoints/keggutils-checkpoint.py +145 -0
- gsrap/commons/__init__.py +1 -0
- gsrap/commons/downloads.py +1 -1
- gsrap/commons/escherutils.py +1 -1
- gsrap/commons/excelhub.py +70 -37
- gsrap/commons/figures.py +15 -1
- gsrap/commons/keggutils.py +145 -0
- gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +63 -19
- gsrap/mkmodel/.ipynb_checkpoints/pruner-checkpoint.py +72 -7
- gsrap/mkmodel/mkmodel.py +63 -19
- gsrap/mkmodel/pruner.py +72 -7
- gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +33 -6
- gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +8 -0
- gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +10 -2
- gsrap/parsedb/completeness.py +33 -6
- gsrap/parsedb/introduce.py +8 -0
- gsrap/parsedb/parsedb.py +10 -2
- {gsrap-0.7.2.dist-info → gsrap-0.8.0.dist-info}/METADATA +1 -1
- {gsrap-0.7.2.dist-info → gsrap-0.8.0.dist-info}/RECORD +29 -27
- {gsrap-0.7.2.dist-info → gsrap-0.8.0.dist-info}/LICENSE.txt +0 -0
- {gsrap-0.7.2.dist-info → gsrap-0.8.0.dist-info}/WHEEL +0 -0
- {gsrap-0.7.2.dist-info → gsrap-0.8.0.dist-info}/entry_points.txt +0 -0
gsrap/mkmodel/pruner.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import warnings
|
|
3
3
|
import logging
|
|
4
|
+
import pickle
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
import pandas as pnd
|
|
@@ -43,22 +44,57 @@ def load_input_eggnog(logger, eggnog):
|
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
# load eggnog annotations
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
df_eggnog = pnd.read_csv(eggnog, sep='\t', comment='#', header=None)
|
|
48
|
+
df_eggnog.columns = 'query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs'.split('\t')
|
|
49
|
+
df_eggnog = df_eggnog.set_index('query', drop=True, verify_integrity=True)
|
|
49
50
|
|
|
50
51
|
|
|
51
|
-
return
|
|
52
|
+
return df_eggnog
|
|
52
53
|
|
|
53
54
|
|
|
54
55
|
|
|
55
|
-
def
|
|
56
|
+
def load_keggorg_like_eggnog(logger, keggorg, outdir):
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# load raw data, downloaded form kegg:
|
|
60
|
+
df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
|
|
61
|
+
df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# create an eggnog-like dataframe:
|
|
65
|
+
df_eggnog_like = [] # list of dict future df
|
|
66
|
+
for gid in df_keggorg.index:
|
|
67
|
+
row_dict = {}
|
|
68
|
+
|
|
69
|
+
row_dict['query'] = gid
|
|
70
|
+
row_dict['PFAMs'] = ','.join(df_keggorg.loc[gid, 'Pfam']) if type(df_keggorg.loc[gid, 'Pfam'])==list else '-'
|
|
71
|
+
row_dict['KEGG_ko'] = df_keggorg.loc[gid, 'ko'] if type(df_keggorg.loc[gid, 'ko'])==str else '-'
|
|
72
|
+
|
|
73
|
+
df_eggnog_like.append(row_dict)
|
|
74
|
+
df_eggnog_like = pnd.DataFrame.from_records(df_eggnog_like)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# appen missing coluns and sort
|
|
78
|
+
eggnog_columns = 'query seed_ortholog evalue score eggNOG_OGs max_annot_lvl COG_category Description Preferred_name GOs EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass BRITE KEGG_TC CAZy BiGG_Reaction PFAMs'.split('\t')
|
|
79
|
+
for c in eggnog_columns:
|
|
80
|
+
if c not in df_eggnog_like.columns:
|
|
81
|
+
df_eggnog_like[c] = '-'
|
|
82
|
+
df_eggnog_like = df_eggnog_like[eggnog_columns]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# set the index like in eggnog
|
|
86
|
+
df_eggnog_like = df_eggnog_like.set_index('query', drop=True, verify_integrity=True)
|
|
87
|
+
return df_eggnog_like
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def parse_eggnog(df_eggnog):
|
|
56
92
|
|
|
57
93
|
|
|
58
94
|
# PART 1. get KO codes available
|
|
59
95
|
gid_to_kos = {}
|
|
60
96
|
ko_to_gids = {}
|
|
61
|
-
for gid, kos in
|
|
97
|
+
for gid, kos in df_eggnog['KEGG_ko'].items():
|
|
62
98
|
if kos == '-':
|
|
63
99
|
continue
|
|
64
100
|
|
|
@@ -229,8 +265,37 @@ def restore_gene_annotations(logger, model, universe, eggonog_gid_to_kos):
|
|
|
229
265
|
# collect names
|
|
230
266
|
names.append(uni_g.name)
|
|
231
267
|
g.name = '; '.join(names)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def append_keggorg_gene_annots(logger, model, keggorg, outdir):
|
|
272
|
+
|
|
232
273
|
|
|
233
|
-
|
|
274
|
+
# load raw data, downloaded form kegg:
|
|
275
|
+
logger.info("Adding gene annotations retrieved from KEGG...")
|
|
276
|
+
df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
|
|
277
|
+
df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# KEGG can provide some useful (ie, used in Memote) gene annotations:
|
|
281
|
+
for g in model.genes:
|
|
282
|
+
if g.id in df_keggorg.index:
|
|
283
|
+
|
|
284
|
+
g.annotation['kegg.genes'] = [keggorg + ':' + g.id]
|
|
285
|
+
|
|
286
|
+
if 'NCBI-GeneID' in df_keggorg.columns:
|
|
287
|
+
g.annotation['ncbigene'] = df_keggorg.loc[g.id, 'NCBI-GeneID'] if type(df_keggorg.loc[g.id, 'NCBI-GeneID'])==list else []
|
|
288
|
+
if 'NCBI-ProteinID' in df_keggorg.columns:
|
|
289
|
+
g.annotation['ncbiprotein'] = df_keggorg.loc[g.id, 'NCBI-ProteinID'] if type(df_keggorg.loc[g.id, 'NCBI-ProteinID'])==list else []
|
|
290
|
+
if 'ASAP' in df_keggorg.columns:
|
|
291
|
+
g.annotation['asap'] = df_keggorg.loc[g.id, 'ASAP'] if type(df_keggorg.loc[g.id, 'ASAP'])==list else []
|
|
292
|
+
if 'UniProt' in df_keggorg.columns:
|
|
293
|
+
g.annotation['uniprot'] = df_keggorg.loc[g.id, 'UniProt'] if type(df_keggorg.loc[g.id, 'UniProt'])==list else []
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
|
|
234
299
|
|
|
235
300
|
|
|
236
301
|
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
import pickle
|
|
3
|
+
import os
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
import pandas as pnd
|
|
@@ -35,14 +37,39 @@ def parse_eggnog(model, eggnog, idcollection_dict):
|
|
|
35
37
|
return krs_org
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
|
|
41
|
+
def parse_keggorg(keggorg, outdir, idcollection_dict):
|
|
42
|
+
|
|
43
|
+
df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
|
|
44
|
+
df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# PART 1. get KO codes available
|
|
48
|
+
kos_org = set([i for i in df_keggorg['ko'] if pnd.isna(i)==False])
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
52
|
+
kr_to_kos = idcollection_dict['kr_to_kos']
|
|
53
|
+
krs_org = set()
|
|
54
|
+
for kr, kos in kr_to_kos.items():
|
|
55
|
+
if any([ko in kos_org for ko in kos]):
|
|
56
|
+
krs_org.add(kr)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
return krs_org
|
|
60
|
+
|
|
61
|
+
|
|
38
62
|
|
|
39
|
-
def check_completeness(logger, model, progress, module, focus, eggnog, idcollection_dict, summary_dict):
|
|
63
|
+
def check_completeness(logger, model, progress, module, focus, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
|
|
40
64
|
# check KEGG annotations in the universe model to get '%' of completeness per pathway/module.
|
|
41
65
|
|
|
42
66
|
|
|
43
67
|
# get the reference set of kr codes (all kegg or organism specific):
|
|
44
68
|
kr_uni = set()
|
|
45
|
-
if
|
|
69
|
+
if keggorg != '-': # keggorg has precedence
|
|
70
|
+
kr_uni = parse_keggorg(keggorg, outdir, idcollection_dict)
|
|
71
|
+
kr_uni_label = f"organism code '{keggorg}'"
|
|
72
|
+
elif eggnog != '-':
|
|
46
73
|
for eggfile in eggnog:
|
|
47
74
|
eggset = parse_eggnog(model, eggfile, idcollection_dict)
|
|
48
75
|
kr_uni = kr_uni.union(eggset)
|
|
@@ -60,7 +87,7 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
|
|
|
60
87
|
kr_ids_modeled.add(kr_id)
|
|
61
88
|
kr_uni_missing = kr_uni - kr_ids_modeled
|
|
62
89
|
kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
|
|
63
|
-
logger.info(f"Coverage for
|
|
90
|
+
logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
|
|
64
91
|
|
|
65
92
|
|
|
66
93
|
# define the map?????, containing krs not included in maps
|
|
@@ -177,15 +204,15 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
|
|
|
177
204
|
for eggfile in eggnog:
|
|
178
205
|
strain = Path(eggfile).stem
|
|
179
206
|
eggset = parse_eggnog(model, eggfile, idcollection_dict)
|
|
180
|
-
col = df_coverage.index.to_series().isin(eggset).astype(int)
|
|
207
|
+
col = df_coverage.index.to_series().isin(eggset).astype(int) # integer: 0 or 1
|
|
181
208
|
df_strains.append(col.rename(strain))
|
|
182
209
|
df_strains = pnd.concat(df_strains, axis=1)
|
|
183
210
|
# sort rows: upper rows are present in more strains
|
|
184
|
-
df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index]
|
|
211
|
+
#df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index] # commented: now in charge of figures.py
|
|
185
212
|
df_coverage = df_coverage.loc[df_strains.index]
|
|
186
213
|
df_coverage = pnd.concat([df_coverage, df_strains], axis=1)
|
|
187
214
|
# split in 2: modeled above, non-modeled below:
|
|
188
|
-
df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]])
|
|
215
|
+
#df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]]) # commented: now in charge of figures.py
|
|
189
216
|
else: # not interesting in a super-long table without strains in column
|
|
190
217
|
df_coverage = None
|
|
191
218
|
|
|
@@ -143,6 +143,14 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
143
143
|
m.annotation[ankey] = list(m.annotation[ankey])
|
|
144
144
|
|
|
145
145
|
|
|
146
|
+
# replace inchikey with manually-curated
|
|
147
|
+
if m.annotation['inchikey'] != [] and m.annotation['inchikey'] != [row['inchikey']]:
|
|
148
|
+
logger.debug(f"Metabolite '{pure_mid}': manual-curated inchikey ({[row['inchikey']]}) is diferent from the one derived from MNX ({m.annotation['inchikey']}).")
|
|
149
|
+
m.annotation['inchikey'] = [row['inchikey']] # force the manual-curated version
|
|
150
|
+
if m.annotation['inchikey'] == ['XXXXXXXXXXXXXX-XXXXXXXXXX-X']:
|
|
151
|
+
m.annotation['inchikey'] = []
|
|
152
|
+
|
|
153
|
+
|
|
146
154
|
# add SBO annotation
|
|
147
155
|
m.annotation['sbo'] = ['SBO:0000247'] # generic metabolite
|
|
148
156
|
|
|
@@ -17,6 +17,7 @@ from ..commons import show_contributions
|
|
|
17
17
|
from ..commons import adjust_biomass_precursors
|
|
18
18
|
from ..commons import count_undrawn_rids
|
|
19
19
|
from ..commons import format_expansion
|
|
20
|
+
from ..commons import download_keggorg
|
|
20
21
|
|
|
21
22
|
from .introduce import introduce_metabolites
|
|
22
23
|
from .introduce import introduce_reactions
|
|
@@ -72,7 +73,14 @@ def main(args, logger):
|
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
# format the --eggnog param
|
|
75
|
-
args.eggnog = format_expansion(logger, args.eggnog)
|
|
76
|
+
args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
|
|
77
|
+
|
|
78
|
+
# get the kegg organism if requested
|
|
79
|
+
if args.keggorg != '-':
|
|
80
|
+
response = download_keggorg(logger, args.keggorg, args.outdir)
|
|
81
|
+
if response == 1: return 1
|
|
82
|
+
|
|
83
|
+
|
|
76
84
|
|
|
77
85
|
|
|
78
86
|
# check and extract the required 'gsrap.maps' file
|
|
@@ -153,7 +161,7 @@ def main(args, logger):
|
|
|
153
161
|
|
|
154
162
|
###### CHECKS 1
|
|
155
163
|
# check universe completness
|
|
156
|
-
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, idcollection_dict, summary_dict)
|
|
164
|
+
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
|
|
157
165
|
if type(df_C)==int: return 1
|
|
158
166
|
|
|
159
167
|
|
gsrap/parsedb/completeness.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
import pickle
|
|
3
|
+
import os
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
import pandas as pnd
|
|
@@ -35,14 +37,39 @@ def parse_eggnog(model, eggnog, idcollection_dict):
|
|
|
35
37
|
return krs_org
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
|
|
41
|
+
def parse_keggorg(keggorg, outdir, idcollection_dict):
|
|
42
|
+
|
|
43
|
+
df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
|
|
44
|
+
df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# PART 1. get KO codes available
|
|
48
|
+
kos_org = set([i for i in df_keggorg['ko'] if pnd.isna(i)==False])
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
52
|
+
kr_to_kos = idcollection_dict['kr_to_kos']
|
|
53
|
+
krs_org = set()
|
|
54
|
+
for kr, kos in kr_to_kos.items():
|
|
55
|
+
if any([ko in kos_org for ko in kos]):
|
|
56
|
+
krs_org.add(kr)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
return krs_org
|
|
60
|
+
|
|
61
|
+
|
|
38
62
|
|
|
39
|
-
def check_completeness(logger, model, progress, module, focus, eggnog, idcollection_dict, summary_dict):
|
|
63
|
+
def check_completeness(logger, model, progress, module, focus, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
|
|
40
64
|
# check KEGG annotations in the universe model to get '%' of completeness per pathway/module.
|
|
41
65
|
|
|
42
66
|
|
|
43
67
|
# get the reference set of kr codes (all kegg or organism specific):
|
|
44
68
|
kr_uni = set()
|
|
45
|
-
if
|
|
69
|
+
if keggorg != '-': # keggorg has precedence
|
|
70
|
+
kr_uni = parse_keggorg(keggorg, outdir, idcollection_dict)
|
|
71
|
+
kr_uni_label = f"organism code '{keggorg}'"
|
|
72
|
+
elif eggnog != '-':
|
|
46
73
|
for eggfile in eggnog:
|
|
47
74
|
eggset = parse_eggnog(model, eggfile, idcollection_dict)
|
|
48
75
|
kr_uni = kr_uni.union(eggset)
|
|
@@ -60,7 +87,7 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
|
|
|
60
87
|
kr_ids_modeled.add(kr_id)
|
|
61
88
|
kr_uni_missing = kr_uni - kr_ids_modeled
|
|
62
89
|
kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
|
|
63
|
-
logger.info(f"Coverage for
|
|
90
|
+
logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
|
|
64
91
|
|
|
65
92
|
|
|
66
93
|
# define the map?????, containing krs not included in maps
|
|
@@ -177,15 +204,15 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
|
|
|
177
204
|
for eggfile in eggnog:
|
|
178
205
|
strain = Path(eggfile).stem
|
|
179
206
|
eggset = parse_eggnog(model, eggfile, idcollection_dict)
|
|
180
|
-
col = df_coverage.index.to_series().isin(eggset).astype(int)
|
|
207
|
+
col = df_coverage.index.to_series().isin(eggset).astype(int) # integer: 0 or 1
|
|
181
208
|
df_strains.append(col.rename(strain))
|
|
182
209
|
df_strains = pnd.concat(df_strains, axis=1)
|
|
183
210
|
# sort rows: upper rows are present in more strains
|
|
184
|
-
df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index]
|
|
211
|
+
#df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index] # commented: now in charge of figures.py
|
|
185
212
|
df_coverage = df_coverage.loc[df_strains.index]
|
|
186
213
|
df_coverage = pnd.concat([df_coverage, df_strains], axis=1)
|
|
187
214
|
# split in 2: modeled above, non-modeled below:
|
|
188
|
-
df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]])
|
|
215
|
+
#df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]]) # commented: now in charge of figures.py
|
|
189
216
|
else: # not interesting in a super-long table without strains in column
|
|
190
217
|
df_coverage = None
|
|
191
218
|
|
gsrap/parsedb/introduce.py
CHANGED
|
@@ -143,6 +143,14 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
|
|
|
143
143
|
m.annotation[ankey] = list(m.annotation[ankey])
|
|
144
144
|
|
|
145
145
|
|
|
146
|
+
# replace inchikey with manually-curated
|
|
147
|
+
if m.annotation['inchikey'] != [] and m.annotation['inchikey'] != [row['inchikey']]:
|
|
148
|
+
logger.debug(f"Metabolite '{pure_mid}': manual-curated inchikey ({[row['inchikey']]}) is diferent from the one derived from MNX ({m.annotation['inchikey']}).")
|
|
149
|
+
m.annotation['inchikey'] = [row['inchikey']] # force the manual-curated version
|
|
150
|
+
if m.annotation['inchikey'] == ['XXXXXXXXXXXXXX-XXXXXXXXXX-X']:
|
|
151
|
+
m.annotation['inchikey'] = []
|
|
152
|
+
|
|
153
|
+
|
|
146
154
|
# add SBO annotation
|
|
147
155
|
m.annotation['sbo'] = ['SBO:0000247'] # generic metabolite
|
|
148
156
|
|
gsrap/parsedb/parsedb.py
CHANGED
|
@@ -17,6 +17,7 @@ from ..commons import show_contributions
|
|
|
17
17
|
from ..commons import adjust_biomass_precursors
|
|
18
18
|
from ..commons import count_undrawn_rids
|
|
19
19
|
from ..commons import format_expansion
|
|
20
|
+
from ..commons import download_keggorg
|
|
20
21
|
|
|
21
22
|
from .introduce import introduce_metabolites
|
|
22
23
|
from .introduce import introduce_reactions
|
|
@@ -72,7 +73,14 @@ def main(args, logger):
|
|
|
72
73
|
|
|
73
74
|
|
|
74
75
|
# format the --eggnog param
|
|
75
|
-
args.eggnog = format_expansion(logger, args.eggnog)
|
|
76
|
+
args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
|
|
77
|
+
|
|
78
|
+
# get the kegg organism if requested
|
|
79
|
+
if args.keggorg != '-':
|
|
80
|
+
response = download_keggorg(logger, args.keggorg, args.outdir)
|
|
81
|
+
if response == 1: return 1
|
|
82
|
+
|
|
83
|
+
|
|
76
84
|
|
|
77
85
|
|
|
78
86
|
# check and extract the required 'gsrap.maps' file
|
|
@@ -153,7 +161,7 @@ def main(args, logger):
|
|
|
153
161
|
|
|
154
162
|
###### CHECKS 1
|
|
155
163
|
# check universe completness
|
|
156
|
-
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, idcollection_dict, summary_dict)
|
|
164
|
+
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
|
|
157
165
|
if type(df_C)==int: return 1
|
|
158
166
|
|
|
159
167
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
gsrap/.ipynb_checkpoints/__init__-checkpoint.py,sha256=
|
|
2
|
-
gsrap/__init__.py,sha256=
|
|
1
|
+
gsrap/.ipynb_checkpoints/__init__-checkpoint.py,sha256=JlR5ICkhkJwQrMHJOFOZg1R-0aYQAZu9h51SlC67Mtw,14064
|
|
2
|
+
gsrap/__init__.py,sha256=JlR5ICkhkJwQrMHJOFOZg1R-0aYQAZu9h51SlC67Mtw,14064
|
|
3
3
|
gsrap/assets/.ipynb_checkpoints/PM1-checkpoint.csv,sha256=0qjaMVG_t9aFxbHbxON6ecmEUnWPwN9nhmxc61QFeCU,8761
|
|
4
4
|
gsrap/assets/.ipynb_checkpoints/PM2A-checkpoint.csv,sha256=rjYTdwe8lpRS552BYiUP3J71juG2ywVdR5Sux6fjZTY,8816
|
|
5
5
|
gsrap/assets/.ipynb_checkpoints/PM3B-checkpoint.csv,sha256=42IGX_2O5bRYSiHoMuVKT-T-bzVj0cSRZBvGOrbnQMA,8130
|
|
@@ -11,26 +11,28 @@ gsrap/assets/PM4A.csv,sha256=f_5__0Ap_T0KYje5h9veW29I2qB4yU0h7Hr7WpaHjSc,9081
|
|
|
11
11
|
gsrap/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
gsrap/assets/kegg_compound_to_others.pickle,sha256=pz1897cfQ7PLsYZiBVcoMQPzvRzT-nHUdgphBe0g5ZQ,8233860
|
|
13
13
|
gsrap/assets/kegg_reaction_to_others.pickle,sha256=AGW8CGN5hKeXZoYn3JRF4Xu832WyNrTlMcLw7luttlc,1703146
|
|
14
|
-
gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py,sha256=
|
|
14
|
+
gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py,sha256=QuHINLSWNb0XKagHRWXzU5UVxw3ECTncvR7llRKblso,241
|
|
15
15
|
gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py,sha256=4u7WBaUgo42tBoXDU1D0VUjICatb44e0jfswZrBeHYs,17987
|
|
16
16
|
gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py,sha256=qI3_GuqHkeA2KbK9pYdkqJaFwYemAVZJGLRR4QtHt6w,19182
|
|
17
|
-
gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py,sha256=
|
|
18
|
-
gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py,sha256=
|
|
19
|
-
gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py,sha256=
|
|
20
|
-
gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py,sha256=
|
|
17
|
+
gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py,sha256=e-7ffMD4R07MWEgXyGcwjhScbWnG7A3L100YWbpNMk0,8461
|
|
18
|
+
gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py,sha256=_y0TgM0-Im0RT8W8z5rr4vlnGK55iRFds6DlDsjGD-8,1151
|
|
19
|
+
gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py,sha256=zFTR6H7I3XeAt9_ORL0dfrwMRvshU3JPRmGif7fSY7w,7971
|
|
20
|
+
gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py,sha256=IRHSQXrCi4SQoISEfNB0rDhvUzbjcgsPi9zUSefsRto,4316
|
|
21
21
|
gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py,sha256=jgC3-vI9Tbjvqohh2mJwFra4rl_pbUzHWrSa_QAxVO4,1262
|
|
22
|
+
gsrap/commons/.ipynb_checkpoints/keggutils-checkpoint.py,sha256=M2nhHRiNH_xObHSxOIdt7ix59MrPdl9q3HNICC8X36M,4514
|
|
22
23
|
gsrap/commons/.ipynb_checkpoints/logutils-checkpoint.py,sha256=VsnrkIsUftS3MOOwAd0n0peQ7a2X5ZEx930eCtzmW7g,1317
|
|
23
24
|
gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py,sha256=VYKN8X1PNERP6uQDbznZXfgflLEvnw4j1T8AIAdrE7s,2902
|
|
24
25
|
gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py,sha256=gvqF2c0e31m5qQWQ11JF4-eMqxtuONy_7lUiC7uaXX4,3291
|
|
25
26
|
gsrap/commons/.ipynb_checkpoints/sbmlutils-checkpoint.py,sha256=gkY02qbGXrbYStn2F8J0KM0fmqati2Nbvi128EF7coI,365
|
|
26
|
-
gsrap/commons/__init__.py,sha256=
|
|
27
|
+
gsrap/commons/__init__.py,sha256=QuHINLSWNb0XKagHRWXzU5UVxw3ECTncvR7llRKblso,241
|
|
27
28
|
gsrap/commons/biomass.py,sha256=4u7WBaUgo42tBoXDU1D0VUjICatb44e0jfswZrBeHYs,17987
|
|
28
29
|
gsrap/commons/coeffs.py,sha256=qI3_GuqHkeA2KbK9pYdkqJaFwYemAVZJGLRR4QtHt6w,19182
|
|
29
|
-
gsrap/commons/downloads.py,sha256=
|
|
30
|
-
gsrap/commons/escherutils.py,sha256=
|
|
31
|
-
gsrap/commons/excelhub.py,sha256=
|
|
32
|
-
gsrap/commons/figures.py,sha256=
|
|
30
|
+
gsrap/commons/downloads.py,sha256=e-7ffMD4R07MWEgXyGcwjhScbWnG7A3L100YWbpNMk0,8461
|
|
31
|
+
gsrap/commons/escherutils.py,sha256=_y0TgM0-Im0RT8W8z5rr4vlnGK55iRFds6DlDsjGD-8,1151
|
|
32
|
+
gsrap/commons/excelhub.py,sha256=zFTR6H7I3XeAt9_ORL0dfrwMRvshU3JPRmGif7fSY7w,7971
|
|
33
|
+
gsrap/commons/figures.py,sha256=IRHSQXrCi4SQoISEfNB0rDhvUzbjcgsPi9zUSefsRto,4316
|
|
33
34
|
gsrap/commons/fluxbal.py,sha256=jgC3-vI9Tbjvqohh2mJwFra4rl_pbUzHWrSa_QAxVO4,1262
|
|
35
|
+
gsrap/commons/keggutils.py,sha256=M2nhHRiNH_xObHSxOIdt7ix59MrPdl9q3HNICC8X36M,4514
|
|
34
36
|
gsrap/commons/logutils.py,sha256=VsnrkIsUftS3MOOwAd0n0peQ7a2X5ZEx930eCtzmW7g,1317
|
|
35
37
|
gsrap/commons/medium.py,sha256=VYKN8X1PNERP6uQDbznZXfgflLEvnw4j1T8AIAdrE7s,2902
|
|
36
38
|
gsrap/commons/metrics.py,sha256=gvqF2c0e31m5qQWQ11JF4-eMqxtuONy_7lUiC7uaXX4,3291
|
|
@@ -45,29 +47,29 @@ gsrap/mkmodel/.ipynb_checkpoints/__init__-checkpoint.py,sha256=PNze-26HMOwfdJ92K
|
|
|
45
47
|
gsrap/mkmodel/.ipynb_checkpoints/biologcuration-checkpoint.py,sha256=Nn7z-js-mzzeO23kVM2L7sJ5PNle7AkCUeBcEAYjlFU,15378
|
|
46
48
|
gsrap/mkmodel/.ipynb_checkpoints/gapfill-checkpoint.py,sha256=BPZw4sszlBhAYfHnV0pA7EpG0b2ePwS6kUfFt0Ww-ss,5159
|
|
47
49
|
gsrap/mkmodel/.ipynb_checkpoints/gapfillutils-checkpoint.py,sha256=S6nFUZ1Bbdf13nVJhGK2S5C_V3hd5zwTg2o5nzejngg,3123
|
|
48
|
-
gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py,sha256=
|
|
50
|
+
gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py,sha256=0ekGXmNULzVbkl6QW_Z8xDrHlevgtVDWEURJJR2uQRM,10323
|
|
49
51
|
gsrap/mkmodel/.ipynb_checkpoints/polishing-checkpoint.py,sha256=R1UdFPxN8N27Iu0jsYW2N_1BkWEbBHaMYW6NkCYZK_k,3256
|
|
50
|
-
gsrap/mkmodel/.ipynb_checkpoints/pruner-checkpoint.py,sha256=
|
|
52
|
+
gsrap/mkmodel/.ipynb_checkpoints/pruner-checkpoint.py,sha256=FAZid-0H6j66wR2dVKRAaMaDREVt1edflmZXbX7blXg,9836
|
|
51
53
|
gsrap/mkmodel/__init__.py,sha256=PNze-26HMOwfdJ92KiXpr--VV1ftVfo3CAxBZgeokp8,92
|
|
52
54
|
gsrap/mkmodel/biologcuration.py,sha256=Nn7z-js-mzzeO23kVM2L7sJ5PNle7AkCUeBcEAYjlFU,15378
|
|
53
55
|
gsrap/mkmodel/gapfill.py,sha256=BPZw4sszlBhAYfHnV0pA7EpG0b2ePwS6kUfFt0Ww-ss,5159
|
|
54
56
|
gsrap/mkmodel/gapfillutils.py,sha256=S6nFUZ1Bbdf13nVJhGK2S5C_V3hd5zwTg2o5nzejngg,3123
|
|
55
|
-
gsrap/mkmodel/mkmodel.py,sha256=
|
|
57
|
+
gsrap/mkmodel/mkmodel.py,sha256=0ekGXmNULzVbkl6QW_Z8xDrHlevgtVDWEURJJR2uQRM,10323
|
|
56
58
|
gsrap/mkmodel/polishing.py,sha256=R1UdFPxN8N27Iu0jsYW2N_1BkWEbBHaMYW6NkCYZK_k,3256
|
|
57
|
-
gsrap/mkmodel/pruner.py,sha256=
|
|
59
|
+
gsrap/mkmodel/pruner.py,sha256=FAZid-0H6j66wR2dVKRAaMaDREVt1edflmZXbX7blXg,9836
|
|
58
60
|
gsrap/parsedb/.ipynb_checkpoints/__init__-checkpoint.py,sha256=1k2K1gz4lIdXAwHEdJ0OhdkPu83woGv0Z4TpT1kGrTk,97
|
|
59
61
|
gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py,sha256=Y02_zXJj_tS1GyBdfuLBy9YJjMgx3mjX6tqr1KhQ-9Q,4810
|
|
60
|
-
gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py,sha256=
|
|
61
|
-
gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py,sha256=
|
|
62
|
+
gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py,sha256=yhFiEslK1qmMCk_GWZ7UZtX02FUqLU39UafG5886WsY,12016
|
|
63
|
+
gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py,sha256=TpW-Hp_rq6AGUQ-IVFwU8Vhij6poKWz8EF-NhdsAOsI,17414
|
|
62
64
|
gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py,sha256=F16wU8vLyM6V4F611ABuMJtwSAskL5KEgCJ7EQm_F9Y,2177
|
|
63
|
-
gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py,sha256=
|
|
65
|
+
gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py,sha256=OPc8PrTVD2szrmvZISlyhP1Q51AlaoQ_EghAJs4jfFU,7465
|
|
64
66
|
gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py,sha256=9PgsSw-H84eN_dFUwK5FLgbqvydsdic4-VjCrZqkfnY,5703
|
|
65
67
|
gsrap/parsedb/__init__.py,sha256=1k2K1gz4lIdXAwHEdJ0OhdkPu83woGv0Z4TpT1kGrTk,97
|
|
66
68
|
gsrap/parsedb/annotation.py,sha256=Y02_zXJj_tS1GyBdfuLBy9YJjMgx3mjX6tqr1KhQ-9Q,4810
|
|
67
|
-
gsrap/parsedb/completeness.py,sha256=
|
|
68
|
-
gsrap/parsedb/introduce.py,sha256=
|
|
69
|
+
gsrap/parsedb/completeness.py,sha256=yhFiEslK1qmMCk_GWZ7UZtX02FUqLU39UafG5886WsY,12016
|
|
70
|
+
gsrap/parsedb/introduce.py,sha256=TpW-Hp_rq6AGUQ-IVFwU8Vhij6poKWz8EF-NhdsAOsI,17414
|
|
69
71
|
gsrap/parsedb/manual.py,sha256=F16wU8vLyM6V4F611ABuMJtwSAskL5KEgCJ7EQm_F9Y,2177
|
|
70
|
-
gsrap/parsedb/parsedb.py,sha256=
|
|
72
|
+
gsrap/parsedb/parsedb.py,sha256=OPc8PrTVD2szrmvZISlyhP1Q51AlaoQ_EghAJs4jfFU,7465
|
|
71
73
|
gsrap/parsedb/repeating.py,sha256=9PgsSw-H84eN_dFUwK5FLgbqvydsdic4-VjCrZqkfnY,5703
|
|
72
74
|
gsrap/runsims/.ipynb_checkpoints/__init__-checkpoint.py,sha256=6E6E1gWgH0V7ls4Omx4mxxC85gMJ_27YqhjugJzlZtY,97
|
|
73
75
|
gsrap/runsims/.ipynb_checkpoints/biosynth-checkpoint.py,sha256=fUlHUo4CfB4rGX9Dth87B1p5E5sz7i6spR7ZoqDDGaI,2836
|
|
@@ -87,8 +89,8 @@ gsrap/runsims/precursors.py,sha256=1RNt_Rxs0L1lolDmYh4_CiZgiwHfU5B_AcomJO6vJ28,2
|
|
|
87
89
|
gsrap/runsims/runsims.py,sha256=2FC5Gs8oSYyZTjHF3A7aXB_O6myVfcn3bCxQfLJlZTk,2842
|
|
88
90
|
gsrap/runsims/simplegrowth.py,sha256=tCQHTMUqum1YwlBKRTNaQoag2co_yQlCaKmISOARAlE,2353
|
|
89
91
|
gsrap/runsims/singleomission.py,sha256=jMuKAi0pINP8Jlrm-yI-tX7D110VzttR3YfTSnDRe4I,2847
|
|
90
|
-
gsrap-0.
|
|
91
|
-
gsrap-0.
|
|
92
|
-
gsrap-0.
|
|
93
|
-
gsrap-0.
|
|
94
|
-
gsrap-0.
|
|
92
|
+
gsrap-0.8.0.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
93
|
+
gsrap-0.8.0.dist-info/METADATA,sha256=T5COI9B29df6TdmUbZvFrQsficCTN3SVzJGHJ5yw4Us,898
|
|
94
|
+
gsrap-0.8.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
95
|
+
gsrap-0.8.0.dist-info/entry_points.txt,sha256=S9MY0DjfnbKGlZbp5bV7W6dNFy3APoEV84u9x6MV1eI,36
|
|
96
|
+
gsrap-0.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|