gsrap 0.8.2__tar.gz → 0.8.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gsrap-0.8.2 → gsrap-0.8.3}/PKG-INFO +1 -1
- {gsrap-0.8.2 → gsrap-0.8.3}/pyproject.toml +1 -1
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/.ipynb_checkpoints/__init__-checkpoint.py +1 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/__init__.py +1 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +34 -3
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +2 -2
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/downloads.py +34 -3
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/excelhub.py +2 -2
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/getmaps/.ipynb_checkpoints/getmaps-checkpoint.py +14 -5
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/getmaps/.ipynb_checkpoints/kdown-checkpoint.py +75 -4
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/getmaps/getmaps.py +14 -5
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/getmaps/kdown.py +75 -4
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +32 -5
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +23 -12
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/completeness.py +32 -5
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/parsedb.py +23 -12
- {gsrap-0.8.2 → gsrap-0.8.3}/LICENSE.txt +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/README.md +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/.ipynb_checkpoints/PM1-checkpoint.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/.ipynb_checkpoints/PM2A-checkpoint.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/.ipynb_checkpoints/PM3B-checkpoint.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/.ipynb_checkpoints/PM4A-checkpoint.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/PM1.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/PM2A.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/PM3B.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/PM4A.csv +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/__init__.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/kegg_compound_to_others.pickle +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/assets/kegg_reaction_to_others.pickle +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/fluxbal-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/keggutils-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/logutils-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/memoteutils-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/.ipynb_checkpoints/sbmlutils-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/__init__.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/biomass.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/coeffs.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/escherutils.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/figures.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/fluxbal.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/keggutils.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/logutils.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/medium.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/memoteutils.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/metrics.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/commons/sbmlutils.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/getmaps/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/getmaps/__init__.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/biologcuration-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/gapfill-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/gapfillutils-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/polishing-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/pruner-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/__init__.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/biologcuration.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/gapfill.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/gapfillutils.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/mkmodel.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/polishing.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/pruner.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/cycles-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/__init__.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/annotation.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/cycles.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/introduce.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/manual.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/parsedb/repeating.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/__init__-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/biosynth-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/cnps-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/essentialgenes-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/growthfactors-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/precursors-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/runsims-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/simplegrowth-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/singleomission-checkpoint.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/__init__.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/biosynth.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/cnps.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/essentialgenes.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/growthfactors.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/precursors.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/runsims.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/simplegrowth.py +0 -0
- {gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/singleomission.py +0 -0
|
@@ -75,6 +75,7 @@ def main():
|
|
|
75
75
|
parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
|
|
76
76
|
parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
|
|
77
77
|
parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
78
|
+
parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
|
|
78
79
|
parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
|
|
79
80
|
parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
|
|
80
81
|
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
|
|
@@ -75,6 +75,7 @@ def main():
|
|
|
75
75
|
parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
|
|
76
76
|
parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
|
|
77
77
|
parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
|
|
78
|
+
parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
|
|
78
79
|
parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
|
|
79
80
|
parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
|
|
80
81
|
parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
|
|
@@ -243,7 +243,38 @@ def format_expansion(logger, eggnog):
|
|
|
243
243
|
|
|
244
244
|
|
|
245
245
|
|
|
246
|
+
def check_taxon(logger, taxon, idcollection_dict):
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# verify presence of needed assets
|
|
250
|
+
if 'ko_to_taxa' not in idcollection_dict.keys():
|
|
251
|
+
logger.error(f"Asset 'ko_to_taxa' not found in 'gsrap.maps'. Please update 'gsrap.maps' with 'gsrap getmaps'.")
|
|
252
|
+
return 1
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# extract level and name
|
|
256
|
+
try: level, name = taxon.split(':')
|
|
257
|
+
except:
|
|
258
|
+
logger.error(f"Provided --taxon is not well formatted: '{taxon}'.")
|
|
259
|
+
return 1
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# compute available levels and check
|
|
263
|
+
avail_levels = set(['kingdom', 'phylum'])
|
|
264
|
+
if level not in avail_levels:
|
|
265
|
+
logger.error(f"Provided level is not acceptable: '{level}' (see --taxon). Acceptable levels are {avail_levels}.")
|
|
266
|
+
return 1
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# compute available taxa at input level
|
|
270
|
+
avail_taxa_at_level = set()
|
|
271
|
+
ko_to_taxa = idcollection_dict['ko_to_taxa']
|
|
272
|
+
for ko in ko_to_taxa.keys():
|
|
273
|
+
for taxon_name in ko_to_taxa[ko][level]:
|
|
274
|
+
avail_taxa_at_level.add(taxon_name)
|
|
275
|
+
if name not in avail_taxa_at_level:
|
|
276
|
+
logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
|
|
277
|
+
return 1
|
|
246
278
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
279
|
+
|
|
280
|
+
return 0
|
|
@@ -148,7 +148,7 @@ def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B,
|
|
|
148
148
|
else: df_T.append(row_dict)
|
|
149
149
|
|
|
150
150
|
for g in model.genes:
|
|
151
|
-
row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
|
|
151
|
+
row_dict = {'gid': g.id, 'name': g.name, 'involved_in': '; '.join([r.id for r in g.reactions])}
|
|
152
152
|
|
|
153
153
|
for db in g.annotation.keys():
|
|
154
154
|
annots = g.annotation[db]
|
|
@@ -171,7 +171,7 @@ def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B,
|
|
|
171
171
|
df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
|
|
172
172
|
df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
|
|
173
173
|
df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
|
|
174
|
-
df_G_first_cols = ['gid', 'involved_in']
|
|
174
|
+
df_G_first_cols = ['gid', 'name', 'involved_in']
|
|
175
175
|
df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
|
|
176
176
|
|
|
177
177
|
|
|
@@ -243,7 +243,38 @@ def format_expansion(logger, eggnog):
|
|
|
243
243
|
|
|
244
244
|
|
|
245
245
|
|
|
246
|
+
def check_taxon(logger, taxon, idcollection_dict):
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
# verify presence of needed assets
|
|
250
|
+
if 'ko_to_taxa' not in idcollection_dict.keys():
|
|
251
|
+
logger.error(f"Asset 'ko_to_taxa' not found in 'gsrap.maps'. Please update 'gsrap.maps' with 'gsrap getmaps'.")
|
|
252
|
+
return 1
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# extract level and name
|
|
256
|
+
try: level, name = taxon.split(':')
|
|
257
|
+
except:
|
|
258
|
+
logger.error(f"Provided --taxon is not well formatted: '{taxon}'.")
|
|
259
|
+
return 1
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
# compute available levels and check
|
|
263
|
+
avail_levels = set(['kingdom', 'phylum'])
|
|
264
|
+
if level not in avail_levels:
|
|
265
|
+
logger.error(f"Provided level is not acceptable: '{level}' (see --taxon). Acceptable levels are {avail_levels}.")
|
|
266
|
+
return 1
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# compute available taxa at input level
|
|
270
|
+
avail_taxa_at_level = set()
|
|
271
|
+
ko_to_taxa = idcollection_dict['ko_to_taxa']
|
|
272
|
+
for ko in ko_to_taxa.keys():
|
|
273
|
+
for taxon_name in ko_to_taxa[ko][level]:
|
|
274
|
+
avail_taxa_at_level.add(taxon_name)
|
|
275
|
+
if name not in avail_taxa_at_level:
|
|
276
|
+
logger.error(f"Provided taxon name is not acceptable: '{name}' (see --taxon). Acceptable taxon names for level '{level}' are {avail_taxa_at_level}.")
|
|
277
|
+
return 1
|
|
246
278
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
279
|
+
|
|
280
|
+
return 0
|
|
@@ -148,7 +148,7 @@ def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B,
|
|
|
148
148
|
else: df_T.append(row_dict)
|
|
149
149
|
|
|
150
150
|
for g in model.genes:
|
|
151
|
-
row_dict = {'gid': g.id, 'involved_in': '; '.join([r.id for r in g.reactions])}
|
|
151
|
+
row_dict = {'gid': g.id, 'name': g.name, 'involved_in': '; '.join([r.id for r in g.reactions])}
|
|
152
152
|
|
|
153
153
|
for db in g.annotation.keys():
|
|
154
154
|
annots = g.annotation[db]
|
|
@@ -171,7 +171,7 @@ def write_excel_model(model, filepath, nofigs, memote_results_dict, df_E, df_B,
|
|
|
171
171
|
df_R = df_R[df_R_first_cols + sorted([c for c in df_R.columns if c not in df_R_first_cols])]
|
|
172
172
|
df_T = df_T[df_R_first_cols + sorted([c for c in df_T.columns if c not in df_R_first_cols])]
|
|
173
173
|
df_A = df_A[df_R_first_cols + sorted([c for c in df_A.columns if c not in df_R_first_cols])]
|
|
174
|
-
df_G_first_cols = ['gid', 'involved_in']
|
|
174
|
+
df_G_first_cols = ['gid', 'name', 'involved_in']
|
|
175
175
|
df_G = df_G[df_G_first_cols + sorted([c for c in df_G.columns if c not in df_G_first_cols])]
|
|
176
176
|
|
|
177
177
|
|
|
@@ -4,6 +4,7 @@ import pickle
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
from .kdown import download_raw_txtfiles
|
|
7
|
+
from .kdown import create_dict_keggorg
|
|
7
8
|
from .kdown import create_dict_ko
|
|
8
9
|
from .kdown import create_dict_c
|
|
9
10
|
from .kdown import create_dict_r
|
|
@@ -20,13 +21,19 @@ def do_kdown(logger, outdir, usecache, keeptmp):
|
|
|
20
21
|
logger.info(f"Respectfully retrieving metabolic information from KEGG. Raw data are being saved into '{outdir}/kdown/'. Be patient, could take a couple of days...")
|
|
21
22
|
os.makedirs(f'{outdir}/kdown/', exist_ok=True)
|
|
22
23
|
|
|
24
|
+
|
|
23
25
|
response = download_raw_txtfiles(logger, outdir, usecache)
|
|
24
26
|
if type(response) == int: return 1
|
|
25
27
|
else: RELEASE_kegg = response
|
|
26
28
|
|
|
29
|
+
|
|
27
30
|
|
|
28
31
|
logger.info("Parsing downloaded KEGG information...")
|
|
29
|
-
|
|
32
|
+
|
|
33
|
+
response = create_dict_keggorg(logger, outdir)
|
|
34
|
+
if type(response) == int: return 1
|
|
35
|
+
else: dict_keggorg = response
|
|
36
|
+
|
|
30
37
|
response = create_dict_ko(logger, outdir)
|
|
31
38
|
if type(response) == int: return 1
|
|
32
39
|
else: dict_ko = response
|
|
@@ -49,7 +56,7 @@ def do_kdown(logger, outdir, usecache, keeptmp):
|
|
|
49
56
|
|
|
50
57
|
|
|
51
58
|
# create 'idcollection_dict' and 'summary_dict' dictionaries
|
|
52
|
-
idcollection_dict = create_idcollection_dict(dict_ko, dict_c, dict_r, dict_map, dict_md)
|
|
59
|
+
idcollection_dict = create_idcollection_dict(dict_keggorg, dict_ko, dict_c, dict_r, dict_map, dict_md)
|
|
53
60
|
summary_dict = create_summary_dict(dict_c, dict_r, dict_map, dict_md)
|
|
54
61
|
|
|
55
62
|
|
|
@@ -57,7 +64,6 @@ def do_kdown(logger, outdir, usecache, keeptmp):
|
|
|
57
64
|
|
|
58
65
|
|
|
59
66
|
|
|
60
|
-
|
|
61
67
|
def main(args, logger):
|
|
62
68
|
|
|
63
69
|
|
|
@@ -67,7 +73,7 @@ def main(args, logger):
|
|
|
67
73
|
os.makedirs(f'{args.outdir}/', exist_ok=True)
|
|
68
74
|
|
|
69
75
|
|
|
70
|
-
# KEGG
|
|
76
|
+
# KEGG download
|
|
71
77
|
response = do_kdown(logger, args.outdir, args.usecache, args.keeptmp)
|
|
72
78
|
if type(response) == int: return 1
|
|
73
79
|
else: RELEASE_kegg, idcollection_dict, summary_dict = response[0], response[1], response[2]
|
|
@@ -76,7 +82,9 @@ def main(args, logger):
|
|
|
76
82
|
# create 'gsrap.maps':
|
|
77
83
|
with open(f'{args.outdir}/gsrap.maps', 'wb') as wb_handler:
|
|
78
84
|
pickle.dump({
|
|
79
|
-
'RELEASE_kegg': RELEASE_kegg,
|
|
85
|
+
'RELEASE_kegg': RELEASE_kegg,
|
|
86
|
+
'idcollection_dict': idcollection_dict,
|
|
87
|
+
'summary_dict': summary_dict,
|
|
80
88
|
}, wb_handler)
|
|
81
89
|
logger.info(f"'{args.outdir}/gsrap.maps' created!")
|
|
82
90
|
|
|
@@ -87,4 +95,5 @@ def main(args, logger):
|
|
|
87
95
|
logger.info(f"Temporary raw files deleted!")
|
|
88
96
|
|
|
89
97
|
|
|
98
|
+
|
|
90
99
|
return 0
|
|
@@ -34,6 +34,7 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
34
34
|
'orthology',
|
|
35
35
|
'module',
|
|
36
36
|
'pathway',
|
|
37
|
+
'organism',
|
|
37
38
|
]
|
|
38
39
|
for db in databases:
|
|
39
40
|
time.sleep(0.5)
|
|
@@ -45,8 +46,9 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
45
46
|
|
|
46
47
|
# mix the items to download to be respectful/compliant
|
|
47
48
|
items_to_download = []
|
|
48
|
-
|
|
49
49
|
for db in databases:
|
|
50
|
+
if db == 'organism':
|
|
51
|
+
continue # here we just need the list
|
|
50
52
|
with open(f"{outdir}/kdown/{db}.txt", 'r') as file:
|
|
51
53
|
res_string = file.read()
|
|
52
54
|
rows = res_string.split('\n')
|
|
@@ -54,7 +56,6 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
54
56
|
item_id = row.split('\t', 1)[0]
|
|
55
57
|
if item_id == '': continue
|
|
56
58
|
items_to_download.append({'db': db, 'id': item_id})
|
|
57
|
-
|
|
58
59
|
random.shuffle(items_to_download)
|
|
59
60
|
|
|
60
61
|
|
|
@@ -79,6 +80,51 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
79
80
|
|
|
80
81
|
|
|
81
82
|
|
|
83
|
+
def create_dict_keggorg(logger, outdir):
|
|
84
|
+
|
|
85
|
+
organisms_raw = open(f'{outdir}/kdown/organism.txt', 'r').read()
|
|
86
|
+
|
|
87
|
+
# create a dataframe listing all organisms in KEGG;
|
|
88
|
+
# columns are [tnumber, name, domain, kingdom, phylum, classification]
|
|
89
|
+
df = [] # list fo dicts
|
|
90
|
+
for line in organisms_raw.strip().split("\n"):
|
|
91
|
+
fields = line.split("\t")
|
|
92
|
+
if len(fields) == 4:
|
|
93
|
+
tnumber, keggorg, name, classification = fields
|
|
94
|
+
levels = classification.split(";")
|
|
95
|
+
domain = levels[0]
|
|
96
|
+
kingdom = levels[1]
|
|
97
|
+
phylum = levels[2]
|
|
98
|
+
df.append({
|
|
99
|
+
'tnumber':tnumber,
|
|
100
|
+
'keggorg': keggorg,
|
|
101
|
+
'name': name,
|
|
102
|
+
'domain': domain,
|
|
103
|
+
'kingdom': kingdom,
|
|
104
|
+
'phylum': phylum,
|
|
105
|
+
'classification': classification
|
|
106
|
+
})
|
|
107
|
+
else:
|
|
108
|
+
# never verified during tests!
|
|
109
|
+
logger.warning(f'Strange number of fields found in this line of "organism.txt": """{line}""".')
|
|
110
|
+
df = pnd.DataFrame.from_records(df)
|
|
111
|
+
df = df.set_index('keggorg', drop=True, verify_integrity=True)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# convert dataframe to dict
|
|
115
|
+
dict_keggorg = {}
|
|
116
|
+
for keggorg, row in df.iterrows():
|
|
117
|
+
dict_keggorg[keggorg] = {
|
|
118
|
+
'kingdom': row['kingdom'],
|
|
119
|
+
'phylum': row['phylum'],
|
|
120
|
+
#'name': row['name'], # not strictly needed. Commented to save disk space.
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if logger != None: logger.info(f'Number of unique items (org): {len(dict_keggorg.keys())}.')
|
|
124
|
+
return dict_keggorg
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
82
128
|
def create_dict_ko(logger, outdir):
|
|
83
129
|
|
|
84
130
|
dict_ko = {} # main output
|
|
@@ -98,6 +144,7 @@ def create_dict_ko(logger, outdir):
|
|
|
98
144
|
'ecs': set(),
|
|
99
145
|
'cogs': set(),
|
|
100
146
|
'gos': set(),
|
|
147
|
+
'keggorgs': set(),
|
|
101
148
|
}
|
|
102
149
|
else:
|
|
103
150
|
logger.error(f"{ko_id} already included!")
|
|
@@ -175,7 +222,13 @@ def create_dict_ko(logger, outdir):
|
|
|
175
222
|
gos = content[len('GO: '):].strip().split(' ')
|
|
176
223
|
for go in gos:
|
|
177
224
|
dict_ko[ko_id]['gos'].add(go)
|
|
178
|
-
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# parse the organism-specific genes
|
|
228
|
+
if curr_header == 'GENES ':
|
|
229
|
+
keggorg = content.split(': ',1)[0]
|
|
230
|
+
dict_ko[ko_id]['keggorgs'].add(keggorg.lower()) # organism.txt has IDs in lowercase
|
|
231
|
+
|
|
179
232
|
|
|
180
233
|
# parse the reactions
|
|
181
234
|
if curr_header == 'REACTION ':
|
|
@@ -547,7 +600,7 @@ def create_dict_md(logger, outdir):
|
|
|
547
600
|
|
|
548
601
|
|
|
549
602
|
|
|
550
|
-
def create_idcollection_dict(dict_ko, dict_c, dict_r, dict_map, dict_md):
|
|
603
|
+
def create_idcollection_dict(dict_keggorg, dict_ko, dict_c, dict_r, dict_map, dict_md):
|
|
551
604
|
|
|
552
605
|
idcollection_dict = {}
|
|
553
606
|
|
|
@@ -620,6 +673,24 @@ def create_idcollection_dict(dict_ko, dict_c, dict_r, dict_map, dict_md):
|
|
|
620
673
|
for go in dict_ko[ko_id]['gos']:
|
|
621
674
|
idcollection_dict['ko_to_gos'][ko_id].add(go)
|
|
622
675
|
|
|
676
|
+
|
|
677
|
+
# creation of 'ko_to_keggorgs' skipped as it takes too much disk space. Replaced with 'ko_to_taxa'.
|
|
678
|
+
idcollection_dict['ko_to_taxa'] = {}
|
|
679
|
+
missing_keggorgs = set()
|
|
680
|
+
for ko_id in dict_ko.keys():
|
|
681
|
+
idcollection_dict['ko_to_taxa'][ko_id] = {'kingdom': set(), 'phylum': set()}
|
|
682
|
+
for keggorg in dict_ko[ko_id]['keggorgs']:
|
|
683
|
+
try:
|
|
684
|
+
kingdom = dict_keggorg[keggorg]['kingdom']
|
|
685
|
+
phylum = dict_keggorg[keggorg]['phylum']
|
|
686
|
+
except:
|
|
687
|
+
if keggorg not in missing_keggorgs:
|
|
688
|
+
missing_keggorgs.add(keggorg)
|
|
689
|
+
#print(f"Organism '{keggorg}' appears in 'orthology/' but not in 'organism.txt'.")
|
|
690
|
+
continue
|
|
691
|
+
idcollection_dict['ko_to_taxa'][ko_id]['kingdom'].add(kingdom)
|
|
692
|
+
idcollection_dict['ko_to_taxa'][ko_id]['phylum'].add(phylum)
|
|
693
|
+
|
|
623
694
|
|
|
624
695
|
idcollection_dict['map_to_name'] = {}
|
|
625
696
|
for map_id in dict_map.keys():
|
|
@@ -4,6 +4,7 @@ import pickle
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
from .kdown import download_raw_txtfiles
|
|
7
|
+
from .kdown import create_dict_keggorg
|
|
7
8
|
from .kdown import create_dict_ko
|
|
8
9
|
from .kdown import create_dict_c
|
|
9
10
|
from .kdown import create_dict_r
|
|
@@ -20,13 +21,19 @@ def do_kdown(logger, outdir, usecache, keeptmp):
|
|
|
20
21
|
logger.info(f"Respectfully retrieving metabolic information from KEGG. Raw data are being saved into '{outdir}/kdown/'. Be patient, could take a couple of days...")
|
|
21
22
|
os.makedirs(f'{outdir}/kdown/', exist_ok=True)
|
|
22
23
|
|
|
24
|
+
|
|
23
25
|
response = download_raw_txtfiles(logger, outdir, usecache)
|
|
24
26
|
if type(response) == int: return 1
|
|
25
27
|
else: RELEASE_kegg = response
|
|
26
28
|
|
|
29
|
+
|
|
27
30
|
|
|
28
31
|
logger.info("Parsing downloaded KEGG information...")
|
|
29
|
-
|
|
32
|
+
|
|
33
|
+
response = create_dict_keggorg(logger, outdir)
|
|
34
|
+
if type(response) == int: return 1
|
|
35
|
+
else: dict_keggorg = response
|
|
36
|
+
|
|
30
37
|
response = create_dict_ko(logger, outdir)
|
|
31
38
|
if type(response) == int: return 1
|
|
32
39
|
else: dict_ko = response
|
|
@@ -49,7 +56,7 @@ def do_kdown(logger, outdir, usecache, keeptmp):
|
|
|
49
56
|
|
|
50
57
|
|
|
51
58
|
# create 'idcollection_dict' and 'summary_dict' dictionaries
|
|
52
|
-
idcollection_dict = create_idcollection_dict(dict_ko, dict_c, dict_r, dict_map, dict_md)
|
|
59
|
+
idcollection_dict = create_idcollection_dict(dict_keggorg, dict_ko, dict_c, dict_r, dict_map, dict_md)
|
|
53
60
|
summary_dict = create_summary_dict(dict_c, dict_r, dict_map, dict_md)
|
|
54
61
|
|
|
55
62
|
|
|
@@ -57,7 +64,6 @@ def do_kdown(logger, outdir, usecache, keeptmp):
|
|
|
57
64
|
|
|
58
65
|
|
|
59
66
|
|
|
60
|
-
|
|
61
67
|
def main(args, logger):
|
|
62
68
|
|
|
63
69
|
|
|
@@ -67,7 +73,7 @@ def main(args, logger):
|
|
|
67
73
|
os.makedirs(f'{args.outdir}/', exist_ok=True)
|
|
68
74
|
|
|
69
75
|
|
|
70
|
-
# KEGG
|
|
76
|
+
# KEGG download
|
|
71
77
|
response = do_kdown(logger, args.outdir, args.usecache, args.keeptmp)
|
|
72
78
|
if type(response) == int: return 1
|
|
73
79
|
else: RELEASE_kegg, idcollection_dict, summary_dict = response[0], response[1], response[2]
|
|
@@ -76,7 +82,9 @@ def main(args, logger):
|
|
|
76
82
|
# create 'gsrap.maps':
|
|
77
83
|
with open(f'{args.outdir}/gsrap.maps', 'wb') as wb_handler:
|
|
78
84
|
pickle.dump({
|
|
79
|
-
'RELEASE_kegg': RELEASE_kegg,
|
|
85
|
+
'RELEASE_kegg': RELEASE_kegg,
|
|
86
|
+
'idcollection_dict': idcollection_dict,
|
|
87
|
+
'summary_dict': summary_dict,
|
|
80
88
|
}, wb_handler)
|
|
81
89
|
logger.info(f"'{args.outdir}/gsrap.maps' created!")
|
|
82
90
|
|
|
@@ -87,4 +95,5 @@ def main(args, logger):
|
|
|
87
95
|
logger.info(f"Temporary raw files deleted!")
|
|
88
96
|
|
|
89
97
|
|
|
98
|
+
|
|
90
99
|
return 0
|
|
@@ -34,6 +34,7 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
34
34
|
'orthology',
|
|
35
35
|
'module',
|
|
36
36
|
'pathway',
|
|
37
|
+
'organism',
|
|
37
38
|
]
|
|
38
39
|
for db in databases:
|
|
39
40
|
time.sleep(0.5)
|
|
@@ -45,8 +46,9 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
45
46
|
|
|
46
47
|
# mix the items to download to be respectful/compliant
|
|
47
48
|
items_to_download = []
|
|
48
|
-
|
|
49
49
|
for db in databases:
|
|
50
|
+
if db == 'organism':
|
|
51
|
+
continue # here we just need the list
|
|
50
52
|
with open(f"{outdir}/kdown/{db}.txt", 'r') as file:
|
|
51
53
|
res_string = file.read()
|
|
52
54
|
rows = res_string.split('\n')
|
|
@@ -54,7 +56,6 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
54
56
|
item_id = row.split('\t', 1)[0]
|
|
55
57
|
if item_id == '': continue
|
|
56
58
|
items_to_download.append({'db': db, 'id': item_id})
|
|
57
|
-
|
|
58
59
|
random.shuffle(items_to_download)
|
|
59
60
|
|
|
60
61
|
|
|
@@ -79,6 +80,51 @@ def download_raw_txtfiles(logger, outdir, usecache):
|
|
|
79
80
|
|
|
80
81
|
|
|
81
82
|
|
|
83
|
+
def create_dict_keggorg(logger, outdir):
|
|
84
|
+
|
|
85
|
+
organisms_raw = open(f'{outdir}/kdown/organism.txt', 'r').read()
|
|
86
|
+
|
|
87
|
+
# create a dataframe listing all organisms in KEGG;
|
|
88
|
+
# columns are [tnumber, name, domain, kingdom, phylum, classification]
|
|
89
|
+
df = [] # list fo dicts
|
|
90
|
+
for line in organisms_raw.strip().split("\n"):
|
|
91
|
+
fields = line.split("\t")
|
|
92
|
+
if len(fields) == 4:
|
|
93
|
+
tnumber, keggorg, name, classification = fields
|
|
94
|
+
levels = classification.split(";")
|
|
95
|
+
domain = levels[0]
|
|
96
|
+
kingdom = levels[1]
|
|
97
|
+
phylum = levels[2]
|
|
98
|
+
df.append({
|
|
99
|
+
'tnumber':tnumber,
|
|
100
|
+
'keggorg': keggorg,
|
|
101
|
+
'name': name,
|
|
102
|
+
'domain': domain,
|
|
103
|
+
'kingdom': kingdom,
|
|
104
|
+
'phylum': phylum,
|
|
105
|
+
'classification': classification
|
|
106
|
+
})
|
|
107
|
+
else:
|
|
108
|
+
# never verified during tests!
|
|
109
|
+
logger.warning(f'Strange number of fields found in this line of "organism.txt": """{line}""".')
|
|
110
|
+
df = pnd.DataFrame.from_records(df)
|
|
111
|
+
df = df.set_index('keggorg', drop=True, verify_integrity=True)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# convert dataframe to dict
|
|
115
|
+
dict_keggorg = {}
|
|
116
|
+
for keggorg, row in df.iterrows():
|
|
117
|
+
dict_keggorg[keggorg] = {
|
|
118
|
+
'kingdom': row['kingdom'],
|
|
119
|
+
'phylum': row['phylum'],
|
|
120
|
+
#'name': row['name'], # not strictly needed. Commented to save disk space.
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if logger != None: logger.info(f'Number of unique items (org): {len(dict_keggorg.keys())}.')
|
|
124
|
+
return dict_keggorg
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
|
|
82
128
|
def create_dict_ko(logger, outdir):
|
|
83
129
|
|
|
84
130
|
dict_ko = {} # main output
|
|
@@ -98,6 +144,7 @@ def create_dict_ko(logger, outdir):
|
|
|
98
144
|
'ecs': set(),
|
|
99
145
|
'cogs': set(),
|
|
100
146
|
'gos': set(),
|
|
147
|
+
'keggorgs': set(),
|
|
101
148
|
}
|
|
102
149
|
else:
|
|
103
150
|
logger.error(f"{ko_id} already included!")
|
|
@@ -175,7 +222,13 @@ def create_dict_ko(logger, outdir):
|
|
|
175
222
|
gos = content[len('GO: '):].strip().split(' ')
|
|
176
223
|
for go in gos:
|
|
177
224
|
dict_ko[ko_id]['gos'].add(go)
|
|
178
|
-
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# parse the organism-specific genes
|
|
228
|
+
if curr_header == 'GENES ':
|
|
229
|
+
keggorg = content.split(': ',1)[0]
|
|
230
|
+
dict_ko[ko_id]['keggorgs'].add(keggorg.lower()) # organism.txt has IDs in lowercase
|
|
231
|
+
|
|
179
232
|
|
|
180
233
|
# parse the reactions
|
|
181
234
|
if curr_header == 'REACTION ':
|
|
@@ -547,7 +600,7 @@ def create_dict_md(logger, outdir):
|
|
|
547
600
|
|
|
548
601
|
|
|
549
602
|
|
|
550
|
-
def create_idcollection_dict(dict_ko, dict_c, dict_r, dict_map, dict_md):
|
|
603
|
+
def create_idcollection_dict(dict_keggorg, dict_ko, dict_c, dict_r, dict_map, dict_md):
|
|
551
604
|
|
|
552
605
|
idcollection_dict = {}
|
|
553
606
|
|
|
@@ -620,6 +673,24 @@ def create_idcollection_dict(dict_ko, dict_c, dict_r, dict_map, dict_md):
|
|
|
620
673
|
for go in dict_ko[ko_id]['gos']:
|
|
621
674
|
idcollection_dict['ko_to_gos'][ko_id].add(go)
|
|
622
675
|
|
|
676
|
+
|
|
677
|
+
# creation of 'ko_to_keggorgs' skipped as it takes too much disk space. Replaced with 'ko_to_taxa'.
|
|
678
|
+
idcollection_dict['ko_to_taxa'] = {}
|
|
679
|
+
missing_keggorgs = set()
|
|
680
|
+
for ko_id in dict_ko.keys():
|
|
681
|
+
idcollection_dict['ko_to_taxa'][ko_id] = {'kingdom': set(), 'phylum': set()}
|
|
682
|
+
for keggorg in dict_ko[ko_id]['keggorgs']:
|
|
683
|
+
try:
|
|
684
|
+
kingdom = dict_keggorg[keggorg]['kingdom']
|
|
685
|
+
phylum = dict_keggorg[keggorg]['phylum']
|
|
686
|
+
except:
|
|
687
|
+
if keggorg not in missing_keggorgs:
|
|
688
|
+
missing_keggorgs.add(keggorg)
|
|
689
|
+
#print(f"Organism '{keggorg}' appears in 'orthology/' but not in 'organism.txt'.")
|
|
690
|
+
continue
|
|
691
|
+
idcollection_dict['ko_to_taxa'][ko_id]['kingdom'].add(kingdom)
|
|
692
|
+
idcollection_dict['ko_to_taxa'][ko_id]['phylum'].add(phylum)
|
|
693
|
+
|
|
623
694
|
|
|
624
695
|
idcollection_dict['map_to_name'] = {}
|
|
625
696
|
for map_id in dict_map.keys():
|
|
@@ -27,9 +27,8 @@ def parse_eggnog(model, eggnog, idcollection_dict):
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
30
|
-
kr_to_kos = idcollection_dict['kr_to_kos']
|
|
31
30
|
krs_org = set()
|
|
32
|
-
for kr, kos in kr_to_kos.items():
|
|
31
|
+
for kr, kos in idcollection_dict['kr_to_kos'].items():
|
|
33
32
|
if any([ko in kos_org for ko in kos]):
|
|
34
33
|
krs_org.add(kr)
|
|
35
34
|
|
|
@@ -49,9 +48,34 @@ def parse_keggorg(keggorg, outdir, idcollection_dict):
|
|
|
49
48
|
|
|
50
49
|
|
|
51
50
|
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
52
|
-
kr_to_kos = idcollection_dict['kr_to_kos']
|
|
53
51
|
krs_org = set()
|
|
54
|
-
for kr, kos in kr_to_kos.items():
|
|
52
|
+
for kr, kos in idcollection_dict['kr_to_kos'].items():
|
|
53
|
+
if any([ko in kos_org for ko in kos]):
|
|
54
|
+
krs_org.add(kr)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
return krs_org
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_taxon(taxon, idcollection_dict):
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# formatting of --taxon was already verified at startup.
|
|
65
|
+
# also the presence of 'ko_to_taxa' in idcollection_dict was veryfied at startup.
|
|
66
|
+
level, name = taxon.split(':')
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# PART 1. get KO codes available
|
|
70
|
+
kos_org = set()
|
|
71
|
+
for ko in idcollection_dict['ko_to_taxa'].keys():
|
|
72
|
+
if name in idcollection_dict['ko_to_taxa'][ko][level]:
|
|
73
|
+
kos_org.add(ko)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
77
|
+
krs_org = set()
|
|
78
|
+
for kr, kos in idcollection_dict['kr_to_kos'].items():
|
|
55
79
|
if any([ko in kos_org for ko in kos]):
|
|
56
80
|
krs_org.add(kr)
|
|
57
81
|
|
|
@@ -60,7 +84,7 @@ def parse_keggorg(keggorg, outdir, idcollection_dict):
|
|
|
60
84
|
|
|
61
85
|
|
|
62
86
|
|
|
63
|
-
def check_completeness(logger, model, progress, module, focus, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
|
|
87
|
+
def check_completeness(logger, model, progress, module, focus, taxon, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
|
|
64
88
|
# check KEGG annotations in the universe model to get '%' of completeness per pathway/module.
|
|
65
89
|
|
|
66
90
|
|
|
@@ -69,6 +93,9 @@ def check_completeness(logger, model, progress, module, focus, eggnog, keggorg,
|
|
|
69
93
|
if keggorg != '-': # keggorg has precedence
|
|
70
94
|
kr_uni = parse_keggorg(keggorg, outdir, idcollection_dict)
|
|
71
95
|
kr_uni_label = f"organism code '{keggorg}'"
|
|
96
|
+
elif taxon != '-':
|
|
97
|
+
kr_uni = parse_taxon(taxon, idcollection_dict)
|
|
98
|
+
kr_uni_label = f"taxon '{taxon}'"
|
|
72
99
|
elif eggnog != '-':
|
|
73
100
|
for eggfile in eggnog:
|
|
74
101
|
eggset = parse_eggnog(model, eggfile, idcollection_dict)
|
|
@@ -17,6 +17,7 @@ from ..commons import show_contributions
|
|
|
17
17
|
from ..commons import adjust_biomass_precursors
|
|
18
18
|
from ..commons import count_undrawn_rids
|
|
19
19
|
from ..commons import format_expansion
|
|
20
|
+
from ..commons import check_taxon
|
|
20
21
|
from ..commons import download_keggorg
|
|
21
22
|
from ..commons import initialize_model
|
|
22
23
|
from ..commons import get_memote_results_dict
|
|
@@ -46,6 +47,7 @@ from .cycles import verify_egc_all
|
|
|
46
47
|
def main(args, logger):
|
|
47
48
|
|
|
48
49
|
|
|
50
|
+
###### FORMAT ARGS NOT REQUIRING RESOURCES
|
|
49
51
|
# adjust out folder path
|
|
50
52
|
while args.outdir.endswith('/'):
|
|
51
53
|
args.outdir = args.outdir[:-1]
|
|
@@ -77,17 +79,8 @@ def main(args, logger):
|
|
|
77
79
|
if args.onlyauthor == '-': args.onlyauthor = None
|
|
78
80
|
|
|
79
81
|
|
|
80
|
-
# format the --eggnog param
|
|
81
|
-
args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
|
|
82
|
-
|
|
83
|
-
# get the kegg organism if requested
|
|
84
|
-
if args.keggorg != '-':
|
|
85
|
-
response = download_keggorg(logger, args.keggorg, args.outdir)
|
|
86
|
-
if response == 1: return 1
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
82
|
|
|
83
|
+
###### LOAD LOCAL RESOURCES
|
|
91
84
|
# check and extract the required 'gsrap.maps' file
|
|
92
85
|
if os.path.exists(f'{args.inmaps}') == False:
|
|
93
86
|
logger.error(f"File 'gsrap.maps' not found at {args.inmaps}.")
|
|
@@ -108,9 +101,27 @@ def main(args, logger):
|
|
|
108
101
|
kegg_compound_to_others = pickle.load(handle)
|
|
109
102
|
with resources.path("gsrap.assets", f"kegg_reaction_to_others.pickle") as asset_path:
|
|
110
103
|
with open(asset_path, 'rb') as handle:
|
|
111
|
-
kegg_reaction_to_others = pickle.load(handle)
|
|
104
|
+
kegg_reaction_to_others = pickle.load(handle)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
###### FORMAT/CHECK FOCUSING ARGS
|
|
109
|
+
# format the --eggnog param
|
|
110
|
+
args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
|
|
111
|
+
|
|
112
|
+
# check the --taxon param
|
|
113
|
+
if args.taxon != '-':
|
|
114
|
+
response = check_taxon(logger, args.taxon, idcollection_dict)
|
|
115
|
+
if response == 1: return 1
|
|
116
|
+
|
|
117
|
+
# get the kegg organism if requested
|
|
118
|
+
if args.keggorg != '-':
|
|
119
|
+
response = download_keggorg(logger, args.keggorg, args.outdir)
|
|
120
|
+
if response == 1: return 1
|
|
121
|
+
|
|
112
122
|
|
|
113
123
|
|
|
124
|
+
# DOWNLOAD ONLINE RESOURCES
|
|
114
125
|
# get dbuni and dbexp:
|
|
115
126
|
logger.info("Downloading gsrap database...")
|
|
116
127
|
response = get_databases(logger)
|
|
@@ -166,7 +177,7 @@ def main(args, logger):
|
|
|
166
177
|
|
|
167
178
|
###### CHECKS 1
|
|
168
179
|
# check universe completness
|
|
169
|
-
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
|
|
180
|
+
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.taxon, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
|
|
170
181
|
if type(df_C)==int: return 1
|
|
171
182
|
|
|
172
183
|
|
|
@@ -27,9 +27,8 @@ def parse_eggnog(model, eggnog, idcollection_dict):
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
30
|
-
kr_to_kos = idcollection_dict['kr_to_kos']
|
|
31
30
|
krs_org = set()
|
|
32
|
-
for kr, kos in kr_to_kos.items():
|
|
31
|
+
for kr, kos in idcollection_dict['kr_to_kos'].items():
|
|
33
32
|
if any([ko in kos_org for ko in kos]):
|
|
34
33
|
krs_org.add(kr)
|
|
35
34
|
|
|
@@ -49,9 +48,34 @@ def parse_keggorg(keggorg, outdir, idcollection_dict):
|
|
|
49
48
|
|
|
50
49
|
|
|
51
50
|
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
52
|
-
kr_to_kos = idcollection_dict['kr_to_kos']
|
|
53
51
|
krs_org = set()
|
|
54
|
-
for kr, kos in kr_to_kos.items():
|
|
52
|
+
for kr, kos in idcollection_dict['kr_to_kos'].items():
|
|
53
|
+
if any([ko in kos_org for ko in kos]):
|
|
54
|
+
krs_org.add(kr)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
return krs_org
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_taxon(taxon, idcollection_dict):
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# formatting of --taxon was already verified at startup.
|
|
65
|
+
# also the presence of 'ko_to_taxa' in idcollection_dict was veryfied at startup.
|
|
66
|
+
level, name = taxon.split(':')
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# PART 1. get KO codes available
|
|
70
|
+
kos_org = set()
|
|
71
|
+
for ko in idcollection_dict['ko_to_taxa'].keys():
|
|
72
|
+
if name in idcollection_dict['ko_to_taxa'][ko][level]:
|
|
73
|
+
kos_org.add(ko)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# PART 2. get reactions in the organism (even the GPR is not complete)
|
|
77
|
+
krs_org = set()
|
|
78
|
+
for kr, kos in idcollection_dict['kr_to_kos'].items():
|
|
55
79
|
if any([ko in kos_org for ko in kos]):
|
|
56
80
|
krs_org.add(kr)
|
|
57
81
|
|
|
@@ -60,7 +84,7 @@ def parse_keggorg(keggorg, outdir, idcollection_dict):
|
|
|
60
84
|
|
|
61
85
|
|
|
62
86
|
|
|
63
|
-
def check_completeness(logger, model, progress, module, focus, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
|
|
87
|
+
def check_completeness(logger, model, progress, module, focus, taxon, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
|
|
64
88
|
# check KEGG annotations in the universe model to get '%' of completeness per pathway/module.
|
|
65
89
|
|
|
66
90
|
|
|
@@ -69,6 +93,9 @@ def check_completeness(logger, model, progress, module, focus, eggnog, keggorg,
|
|
|
69
93
|
if keggorg != '-': # keggorg has precedence
|
|
70
94
|
kr_uni = parse_keggorg(keggorg, outdir, idcollection_dict)
|
|
71
95
|
kr_uni_label = f"organism code '{keggorg}'"
|
|
96
|
+
elif taxon != '-':
|
|
97
|
+
kr_uni = parse_taxon(taxon, idcollection_dict)
|
|
98
|
+
kr_uni_label = f"taxon '{taxon}'"
|
|
72
99
|
elif eggnog != '-':
|
|
73
100
|
for eggfile in eggnog:
|
|
74
101
|
eggset = parse_eggnog(model, eggfile, idcollection_dict)
|
|
@@ -17,6 +17,7 @@ from ..commons import show_contributions
|
|
|
17
17
|
from ..commons import adjust_biomass_precursors
|
|
18
18
|
from ..commons import count_undrawn_rids
|
|
19
19
|
from ..commons import format_expansion
|
|
20
|
+
from ..commons import check_taxon
|
|
20
21
|
from ..commons import download_keggorg
|
|
21
22
|
from ..commons import initialize_model
|
|
22
23
|
from ..commons import get_memote_results_dict
|
|
@@ -46,6 +47,7 @@ from .cycles import verify_egc_all
|
|
|
46
47
|
def main(args, logger):
|
|
47
48
|
|
|
48
49
|
|
|
50
|
+
###### FORMAT ARGS NOT REQUIRING RESOURCES
|
|
49
51
|
# adjust out folder path
|
|
50
52
|
while args.outdir.endswith('/'):
|
|
51
53
|
args.outdir = args.outdir[:-1]
|
|
@@ -77,17 +79,8 @@ def main(args, logger):
|
|
|
77
79
|
if args.onlyauthor == '-': args.onlyauthor = None
|
|
78
80
|
|
|
79
81
|
|
|
80
|
-
# format the --eggnog param
|
|
81
|
-
args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
|
|
82
|
-
|
|
83
|
-
# get the kegg organism if requested
|
|
84
|
-
if args.keggorg != '-':
|
|
85
|
-
response = download_keggorg(logger, args.keggorg, args.outdir)
|
|
86
|
-
if response == 1: return 1
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
82
|
|
|
83
|
+
###### LOAD LOCAL RESOURCES
|
|
91
84
|
# check and extract the required 'gsrap.maps' file
|
|
92
85
|
if os.path.exists(f'{args.inmaps}') == False:
|
|
93
86
|
logger.error(f"File 'gsrap.maps' not found at {args.inmaps}.")
|
|
@@ -108,9 +101,27 @@ def main(args, logger):
|
|
|
108
101
|
kegg_compound_to_others = pickle.load(handle)
|
|
109
102
|
with resources.path("gsrap.assets", f"kegg_reaction_to_others.pickle") as asset_path:
|
|
110
103
|
with open(asset_path, 'rb') as handle:
|
|
111
|
-
kegg_reaction_to_others = pickle.load(handle)
|
|
104
|
+
kegg_reaction_to_others = pickle.load(handle)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
###### FORMAT/CHECK FOCUSING ARGS
|
|
109
|
+
# format the --eggnog param
|
|
110
|
+
args.eggnog = format_expansion(logger, args.eggnog) # now 'args.eggnog' could still be '-'
|
|
111
|
+
|
|
112
|
+
# check the --taxon param
|
|
113
|
+
if args.taxon != '-':
|
|
114
|
+
response = check_taxon(logger, args.taxon, idcollection_dict)
|
|
115
|
+
if response == 1: return 1
|
|
116
|
+
|
|
117
|
+
# get the kegg organism if requested
|
|
118
|
+
if args.keggorg != '-':
|
|
119
|
+
response = download_keggorg(logger, args.keggorg, args.outdir)
|
|
120
|
+
if response == 1: return 1
|
|
121
|
+
|
|
112
122
|
|
|
113
123
|
|
|
124
|
+
# DOWNLOAD ONLINE RESOURCES
|
|
114
125
|
# get dbuni and dbexp:
|
|
115
126
|
logger.info("Downloading gsrap database...")
|
|
116
127
|
response = get_databases(logger)
|
|
@@ -166,7 +177,7 @@ def main(args, logger):
|
|
|
166
177
|
|
|
167
178
|
###### CHECKS 1
|
|
168
179
|
# check universe completness
|
|
169
|
-
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
|
|
180
|
+
df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.taxon, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
|
|
170
181
|
if type(df_C)==int: return 1
|
|
171
182
|
|
|
172
183
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/mkmodel/.ipynb_checkpoints/biologcuration-checkpoint.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/essentialgenes-checkpoint.py
RENAMED
|
File without changes
|
{gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/growthfactors-checkpoint.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{gsrap-0.8.2 → gsrap-0.8.3}/src/gsrap/runsims/.ipynb_checkpoints/singleomission-checkpoint.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|