PyPI - gsrap - Versions diffs - 0.7.2__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

gsrap 0.7.2py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

gsrap/.ipynb_checkpoints/__init__-checkpoint.py +2 -0
gsrap/__init__.py +2 -0
gsrap/commons/.ipynb_checkpoints/__init__-checkpoint.py +1 -0
gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +1 -1
gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +1 -1
gsrap/commons/.ipynb_checkpoints/excelhub-checkpoint.py +70 -37
gsrap/commons/.ipynb_checkpoints/figures-checkpoint.py +15 -1
gsrap/commons/.ipynb_checkpoints/keggutils-checkpoint.py +145 -0
gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py +3 -4
gsrap/commons/__init__.py +1 -0
gsrap/commons/downloads.py +1 -1
gsrap/commons/escherutils.py +1 -1
gsrap/commons/excelhub.py +70 -37
gsrap/commons/figures.py +15 -1
gsrap/commons/keggutils.py +145 -0
gsrap/commons/medium.py +3 -4
gsrap/mkmodel/.ipynb_checkpoints/mkmodel-checkpoint.py +69 -19
gsrap/mkmodel/.ipynb_checkpoints/pruner-checkpoint.py +72 -7
gsrap/mkmodel/mkmodel.py +69 -19
gsrap/mkmodel/pruner.py +72 -7
gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py +33 -6
gsrap/parsedb/.ipynb_checkpoints/cycles-checkpoint.py +128 -0
gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +9 -9
gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py +27 -0
gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +15 -2
gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +9 -0
gsrap/parsedb/completeness.py +33 -6
gsrap/parsedb/cycles.py +128 -0
gsrap/parsedb/introduce.py +9 -9
gsrap/parsedb/manual.py +27 -0
gsrap/parsedb/parsedb.py +15 -2
gsrap/parsedb/repeating.py +9 -0
{gsrap-0.7.2.dist-info → gsrap-0.8.1.dist-info}/METADATA +1 -1
{gsrap-0.7.2.dist-info → gsrap-0.8.1.dist-info}/RECORD +37 -33
{gsrap-0.7.2.dist-info → gsrap-0.8.1.dist-info}/LICENSE.txt +0 -0
{gsrap-0.7.2.dist-info → gsrap-0.8.1.dist-info}/WHEEL +0 -0
{gsrap-0.7.2.dist-info → gsrap-0.8.1.dist-info}/entry_points.txt +0 -0

gsrap/mkmodel/pruner.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import warnings
 import logging
+import pickle
 import pandas as pnd
@@ -43,22 +44,57 @@ def load_input_eggnog(logger, eggnog):
     # load eggnog annotations
-    eggnog = pnd.read_csv(eggnog, sep='\t', comment='#', header=None)
-    eggnog.columns = 'query	seed_ortholog	evalue	score	eggNOG_OGs	max_annot_lvl	COG_category	Description	Preferred_name	GOs	EC	KEGG_ko	KEGG_Pathway	KEGG_Module	KEGG_Reaction	KEGG_rclass	BRITE	KEGG_TC	CAZy	BiGG_Reaction	PFAMs'.split('\t')
-    eggnog = eggnog.set_index('query', drop=True, verify_integrity=True)
+    df_eggnog = pnd.read_csv(eggnog, sep='\t', comment='#', header=None)
+    df_eggnog.columns = 'query	seed_ortholog	evalue	score	eggNOG_OGs	max_annot_lvl	COG_category	Description	Preferred_name	GOs	EC	KEGG_ko	KEGG_Pathway	KEGG_Module	KEGG_Reaction	KEGG_rclass	BRITE	KEGG_TC	CAZy	BiGG_Reaction	PFAMs'.split('\t')
+    df_eggnog = df_eggnog.set_index('query', drop=True, verify_integrity=True)
-    return eggnog
+    return df_eggnog
-def parse_eggnog(eggnog):
+def load_keggorg_like_eggnog(logger, keggorg, outdir):
+    # load raw data, downloaded form kegg:
+    df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
+    df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
+    # create an eggnog-like dataframe:
+    df_eggnog_like = []   # list of dict future df
+    for gid in df_keggorg.index:
+        row_dict = {}
+        row_dict['query'] = gid
+        row_dict['PFAMs'] = ','.join(df_keggorg.loc[gid, 'Pfam']) if type(df_keggorg.loc[gid, 'Pfam'])==list else '-'
+        row_dict['KEGG_ko'] = df_keggorg.loc[gid, 'ko'] if type(df_keggorg.loc[gid, 'ko'])==str else '-'
+        df_eggnog_like.append(row_dict)
+    df_eggnog_like = pnd.DataFrame.from_records(df_eggnog_like)
+    # appen missing coluns and sort
+    eggnog_columns = 'query	seed_ortholog	evalue	score	eggNOG_OGs	max_annot_lvl	COG_category	Description	Preferred_name	GOs	EC	KEGG_ko	KEGG_Pathway	KEGG_Module	KEGG_Reaction	KEGG_rclass	BRITE	KEGG_TC	CAZy	BiGG_Reaction	PFAMs'.split('\t')
+    for c in eggnog_columns:
+        if c not in df_eggnog_like.columns:
+            df_eggnog_like[c] = '-'
+    df_eggnog_like = df_eggnog_like[eggnog_columns]
+    # set the index like in eggnog
+    df_eggnog_like = df_eggnog_like.set_index('query', drop=True, verify_integrity=True)
+    return df_eggnog_like
+def parse_eggnog(df_eggnog):
     # PART 1. get KO codes available
     gid_to_kos = {}
     ko_to_gids = {}
-    for gid, kos in eggnog['KEGG_ko'].items():
+    for gid, kos in df_eggnog['KEGG_ko'].items():
         if kos == '-':
             continue
@@ -229,8 +265,37 @@ def restore_gene_annotations(logger, model, universe, eggonog_gid_to_kos):
             # collect names
             names.append(uni_g.name)
         g.name = '; '.join(names)
+def append_keggorg_gene_annots(logger, model, keggorg, outdir):
+    # load raw data, downloaded form kegg:
+    logger.info("Adding gene annotations retrieved from KEGG...")
+    df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
+    df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
+    # KEGG can provide some useful (ie, used in Memote) gene annotations:
+    for g in model.genes:
+        if g.id in df_keggorg.index:
+            g.annotation['kegg.genes'] = [keggorg + ':' + g.id]
+            if 'NCBI-GeneID' in df_keggorg.columns:
+                g.annotation['ncbigene'] = df_keggorg.loc[g.id, 'NCBI-GeneID'] if type(df_keggorg.loc[g.id, 'NCBI-GeneID'])==list else []
+            if 'NCBI-ProteinID' in df_keggorg.columns:
+                g.annotation['ncbiprotein'] = df_keggorg.loc[g.id, 'NCBI-ProteinID'] if type(df_keggorg.loc[g.id, 'NCBI-ProteinID'])==list else []
+            if 'ASAP' in df_keggorg.columns:
+                g.annotation['asap'] = df_keggorg.loc[g.id, 'ASAP'] if type(df_keggorg.loc[g.id, 'ASAP'])==list else []
+            if 'UniProt' in df_keggorg.columns:
+                g.annotation['uniprot'] = df_keggorg.loc[g.id, 'UniProt'] if type(df_keggorg.loc[g.id, 'UniProt'])==list else []

gsrap/parsedb/.ipynb_checkpoints/completeness-checkpoint.py CHANGED Viewed

@@ -1,4 +1,6 @@
 from pathlib import Path
+import pickle
+import os
 import pandas as pnd
@@ -35,14 +37,39 @@ def parse_eggnog(model, eggnog, idcollection_dict):
     return krs_org
+def parse_keggorg(keggorg, outdir, idcollection_dict):
+    df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
+    df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
+    # PART 1. get KO codes available
+    kos_org = set([i for i in df_keggorg['ko'] if pnd.isna(i)==False])
+    # PART 2. get reactions in the organism (even the GPR is not complete)
+    kr_to_kos = idcollection_dict['kr_to_kos']
+    krs_org = set()
+    for kr, kos in kr_to_kos.items():
+        if any([ko in kos_org for ko in kos]):
+            krs_org.add(kr)
+    return krs_org
-def check_completeness(logger, model, progress, module, focus, eggnog, idcollection_dict, summary_dict):
+def check_completeness(logger, model, progress, module, focus, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
     # check KEGG annotations in the universe model to get '%' of completeness per pathway/module.
     # get the reference set of kr codes (all kegg or organism specific):
     kr_uni = set()
-    if eggnog != '-':
+    if keggorg != '-':  # keggorg has precedence
+        kr_uni = parse_keggorg(keggorg, outdir, idcollection_dict)
+        kr_uni_label = f"organism code '{keggorg}'"
+    elif eggnog != '-':
         for eggfile in eggnog:
             eggset = parse_eggnog(model, eggfile, idcollection_dict)
             kr_uni = kr_uni.union(eggset)
@@ -60,7 +87,7 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
                 kr_ids_modeled.add(kr_id)
     kr_uni_missing = kr_uni - kr_ids_modeled
     kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
-    logger.info(f"Coverage for '{kr_uni_label}': {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
+    logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
     # define the map?????, containing krs not included in maps
@@ -177,15 +204,15 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
         for eggfile in eggnog:
             strain = Path(eggfile).stem
             eggset = parse_eggnog(model, eggfile, idcollection_dict)
-            col = df_coverage.index.to_series().isin(eggset).astype(int)
+            col = df_coverage.index.to_series().isin(eggset).astype(int)  # integer: 0 or 1
             df_strains.append(col.rename(strain))
         df_strains = pnd.concat(df_strains, axis=1)
         # sort rows: upper rows are present in more strains
-        df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index]
+        #df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index]   # commented: now in charge of figures.py
         df_coverage = df_coverage.loc[df_strains.index]
         df_coverage = pnd.concat([df_coverage, df_strains], axis=1)
         # split in 2: modeled above, non-modeled below:
-        df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]])
+        #df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]])   # commented: now in charge of figures.py
     else:  # not interesting in a super-long table without strains in column
         df_coverage = None

gsrap/parsedb/.ipynb_checkpoints/cycles-checkpoint.py ADDED Viewed

@@ -0,0 +1,128 @@
+import warnings
+import os
+import logging
+import cobra
+import gempipe
+from ..commons import fba_no_warnings
+from ..commons import get_optthr
+def verify_egc(logger, model, mid, outdir):
+    # changes as not permament:
+    found_egc = False
+    with model:
+        # close (0; 0) all the exchange reactions:
+        gempipe.close_boundaries(model)
+        # create a dissipation reaction:
+        dissip = cobra.Reaction(f'__dissip__{mid}')
+        model.add_reactions([dissip])
+        dissip = model.reactions.get_by_id(f'__dissip__{mid}')
+        # define the dissipation reaction:
+        modeled_mids = [m.id for m in model.metabolites]
+        if   mid == 'atp':
+            dissip_string = 'atp_c + h2o_c --> adp_c + pi_c + h_c'
+        elif mid == 'ctp':
+            dissip_string = 'ctp_c + h2o_c --> cdp_c + pi_c + h_c'
+        elif mid == 'gtp':
+            dissip_string = 'gtp_c + h2o_c --> gdp_c + pi_c + h_c'
+        elif mid == 'utp':
+            dissip_string = 'utp_c + h2o_c --> udp_c + pi_c + h_c'
+        elif mid == 'itp':
+            dissip_string = 'itp_c + h2o_c --> idp_c + pi_c + h_c'
+        elif mid == 'nadh':
+            dissip_string = 'nadh_c --> nad_c + h_c'
+        elif mid == 'nadph':
+            dissip_string = 'nadph_c --> nadp_c + h_c'
+        elif mid == 'fadh2':
+            dissip_string = 'fadh2_c --> fad_c + 2.0 h_c'
+        elif mid == 'accoa':
+            dissip_string = 'accoa_c + h2o_c --> ac_c + coa_c + h_c'
+        elif mid == 'glu__L':
+            dissip_string = 'glu__L_c + h2o_c --> akg_c + nh4_c + 2.0 h_c'
+        elif mid == 'q8h2':
+            dissip_string = 'q8h2_c --> q8_c + 2.0 h_c'
+        dissip.build_reaction_from_string(dissip_string)
+        # set the objective and optimize:
+        model.objective = f'__dissip__{mid}'
+        res, obj_value, status = fba_no_warnings(model)
+        # apply the threshold:
+        obj_value = res.objective_value
+        status = res.status
+        if status == 'optimal' and obj_value >= get_optthr():
+            found_egc = True
+            # get suspect !=0 fluxes
+            fluxes = res.fluxes
+            # get interesting fluxes (get_optthr() tries to take into account the approximation in glpk and cplex solvers)
+            fluxes_interesting = fluxes[(fluxes > get_optthr()) | (fluxes < -get_optthr())]
+            # create a model for escher, remove Rs not beloning to the cycle
+            model_copy = model.copy()
+            all_rids = [r.id for r in model_copy.reactions]
+            to_delete = set(all_rids) - set(fluxes_interesting.index)
+            # trick to avoid the WARNING "cobra/core/group.py:147: UserWarning: need to pass in a list"
+            # triggered when trying to remove reactions that are included in groups.
+            with warnings.catch_warnings():  # temporarily suppress warnings for this block
+                warnings.simplefilter("ignore")  # ignore all warnings
+                cobra_logger = logging.getLogger("cobra.util.solver")
+                old_level = cobra_logger.level
+                cobra_logger.setLevel(logging.ERROR)
+                # triggering code
+                model_copy.remove_reactions(to_delete)  # should work also with IDs
+                # restore original behaviour:
+                cobra_logger.setLevel(old_level)
+            # save JSON to direct import in Escher:
+            outfile = os.path.join(outdir, f'EGC_{mid}.json')
+            cobra.io.save_json_model(model_copy, outfile)
+            # log some messages
+            rid_labels = []
+            for rid, flux in fluxes_interesting.to_dict().items():
+                rid_label = "'" + rid + "'"
+                # mark reversible reactions composing the cycle:
+                r = model.reactions.get_by_id(rid)
+                if r.lower_bound < 0 and r.upper_bound > 0:
+                    rid_label = rid_label + '(<=>)'
+                rid_labels.append(rid_label)
+            logger.warning(f"Found erroneous EGC (N={len(model_copy.reactions)}) for '{mid}' (f={obj_value}): [{', '.join(rid_labels)}]. EGC saved to '{outfile}' to be inspected with Escher-FBA.")
+    return found_egc
+def verify_egc_all(logger, model, outdir='./', mids_to_check=['atp','ctp','gtp','utp','itp','nadh','nadph','fadh2','accoa','glu__L','q8h2']):
+    all_results = []
+    for mid in mids_to_check:
+        all_results.append(verify_egc(logger, model, mid, outdir))
+    if any(all_results)==False:
+        logger.info("Found 0 erroneous energy-generating cycles (EGCs).")

gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py CHANGED Viewed

@@ -143,6 +143,14 @@ def introduce_metabolites(logger, db, model, idcollection_dict, kegg_compound_to
             m.annotation[ankey] = list(m.annotation[ankey])
+        # replace inchikey with manually-curated
+        if m.annotation['inchikey'] != [] and m.annotation['inchikey'] != [row['inchikey']]:
+            logger.debug(f"Metabolite '{pure_mid}': manual-curated inchikey ({[row['inchikey']]}) is diferent from the one derived from MNX ({m.annotation['inchikey']}).")
+        m.annotation['inchikey'] = [row['inchikey']]  # force the manual-curated version
+        if m.annotation['inchikey'] == ['XXXXXXXXXXXXXX-XXXXXXXXXX-X']:
+            m.annotation['inchikey'] = []
         # add SBO annotation
         m.annotation['sbo'] = ['SBO:0000247']  # generic metabolite
@@ -279,15 +287,7 @@ def introduce_transporters(logger, db, model, idcollection_dict, kegg_reaction_t
         r = model.reactions.get_by_id(f'EX_{mid_e}')
         r.name = f"Exchange for {model.metabolites.get_by_id(mid_e).name}"
         r.build_reaction_from_string(f'{mid_e} --> ')
-        if mid_e in [
-            # basics:
-            'glc__D_e', 'nh4_e', 'pi_e', 'so4_e', 'h2o_e', 'h_e', 'o2_e', 'co2_e',
-            # metals:
-            'cu2_e', 'mobd_e', 'fe2_e', 'cobalt2_e',
-        ]:
-            r.bounds = (-1000, 1000)
-        else:
-            r.bounds = (0, 1000)
+        r.bounds = (0, 1000)
         # add SBO annotation
         r.annotation['sbo'] = ['SBO:0000627']  # exchange reaction

gsrap/parsedb/.ipynb_checkpoints/manual-checkpoint.py CHANGED Viewed

@@ -19,6 +19,33 @@ def get_rids_with_mancheck_gpr():
     return rids_mancheck_gpr
+def get_rids_with_mancheck_balancing():
+    rids_mancheck_bal = [  # same reactions involving ATP can be reversible
+        # SECTION "reversible both in KEGG and MetaCyc"
+        'PGK', 'SUCOAS', 'ADK1', 'GK1', 'NNATr', 'CYTK1', 'ACKr',
+        'DGK1', 'PPAKr', 'ATPSr', 'NDPK10',
+        ### SECTION "reversible in KEGG but not in MetaCyc" ###
+        'CYTK2',  # clearly reversible in KEGG but not in MetaCyc (RXN-7913)
+        'DADK',  # clearly reversible in KEGG but not in MetaCyc (DEOXYADENYLATE-KINASE-RXN)
+        'UMPK',  # clearly reversible in KEGG but not in MetaCyc (RXN-12002)
+        'NDPK1',  # clearly reversible in KEGG but not in MetaCyc (GDPKIN-RXN)
+        'NDPK2',  # clearly reversible in KEGG but not in MetaCyc (UDPKIN-RXN)
+        'NDPK3',  # clearly reversible in KEGG but not in MetaCyc (CDPKIN-RXN)
+        'NDPK4',  # clearly reversible in KEGG but not in MetaCyc (DTDPKIN-RXN)
+        'NDPK5',  # clearly reversible in KEGG but not in MetaCyc (DGDPKIN-RXN)
+        'NDPK6',  # clearly reversible in KEGG but not in MetaCyc (DUDPKIN-RXN)
+        'NDPK7',  # clearly reversible in KEGG but not in MetaCyc (DCDPKIN-RXN)
+        'NDPK8',  # clearly reversible in KEGG but not in MetaCyc (DADPKIN-RXN)
+        'NDPK9',  # clearly reversible in KEGG but not in MetaCyc (RXN-14120)
+        ### SECTION "missing reversibility info" ###
+        'LPHERA',
+    ]
+    return rids_mancheck_bal
 def get_manual_sinks():

gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py CHANGED Viewed

@@ -17,6 +17,7 @@ from ..commons import show_contributions
 from ..commons import adjust_biomass_precursors
 from ..commons import count_undrawn_rids
 from ..commons import format_expansion
+from ..commons import download_keggorg
 from .introduce import introduce_metabolites
 from .introduce import introduce_reactions
@@ -34,6 +35,8 @@ from ..runsims.biosynth import biosynthesis_on_media
 from ..mkmodel.polishing import remove_disconnected
+from .cycles import verify_egc_all
@@ -72,7 +75,14 @@ def main(args, logger):
     # format the --eggnog param
-    args.eggnog = format_expansion(logger, args.eggnog)
+    args.eggnog = format_expansion(logger, args.eggnog)  # now 'args.eggnog' could still be '-'
+    # get the kegg organism if requested
+    if args.keggorg != '-':
+        response = download_keggorg(logger, args.keggorg, args.outdir)
+        if response == 1: return 1
     # check and extract the required 'gsrap.maps' file
@@ -153,7 +163,7 @@ def main(args, logger):
     ###### CHECKS 1
     # check universe completness
-    df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, idcollection_dict, summary_dict)
+    df_C = check_completeness(logger, universe, args.progress, args.module, args.focus, args.eggnog, args.keggorg, idcollection_dict, summary_dict, args.outdir)
     if type(df_C)==int: return 1
@@ -165,6 +175,9 @@ def main(args, logger):
     ###### CHECKS 2
+    # check erroneous EGCs
+    verify_egc_all(logger, universe, args.outdir)
     # check growth on minmal media
     df_G = grow_on_media(logger, universe, dbexp, args.media, '-', True)
     if type(df_G)==int: return 1

gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py CHANGED Viewed

@@ -4,6 +4,7 @@ import cobra
 from .manual import get_deprecated_kos
 from .manual import get_rids_with_mancheck_gpr
+from .manual import get_rids_with_mancheck_balancing
@@ -138,6 +139,14 @@ def add_reaction(logger, model, rid, row, kr_ids, kegg_reaction_to_others, addty
         return 1
+    # check if reversible and using ATP
+    if r.lower_bound < 0 and r.upper_bound > 0:
+        for m in r.metabolites:
+            if m.id.rsplit('_', 1)[0] == 'atp':
+                if rid not in get_rids_with_mancheck_balancing():
+                    logger.warning(f"Reaction '{rid}' involves ATP and is reversible: are you sure?")
     return 0

gsrap/parsedb/completeness.py CHANGED Viewed

@@ -1,4 +1,6 @@
 from pathlib import Path
+import pickle
+import os
 import pandas as pnd
@@ -35,14 +37,39 @@ def parse_eggnog(model, eggnog, idcollection_dict):
     return krs_org
+def parse_keggorg(keggorg, outdir, idcollection_dict):
+    df_keggorg = pickle.load(open(os.path.join(outdir, f'{keggorg}.keggorg'), 'rb'))
+    df_keggorg = df_keggorg.set_index('gid', drop=True, verify_integrity=True)
+    # PART 1. get KO codes available
+    kos_org = set([i for i in df_keggorg['ko'] if pnd.isna(i)==False])
+    # PART 2. get reactions in the organism (even the GPR is not complete)
+    kr_to_kos = idcollection_dict['kr_to_kos']
+    krs_org = set()
+    for kr, kos in kr_to_kos.items():
+        if any([ko in kos_org for ko in kos]):
+            krs_org.add(kr)
+    return krs_org
-def check_completeness(logger, model, progress, module, focus, eggnog, idcollection_dict, summary_dict):
+def check_completeness(logger, model, progress, module, focus, eggnog, keggorg, idcollection_dict, summary_dict, outdir):
     # check KEGG annotations in the universe model to get '%' of completeness per pathway/module.
     # get the reference set of kr codes (all kegg or organism specific):
     kr_uni = set()
-    if eggnog != '-':
+    if keggorg != '-':  # keggorg has precedence
+        kr_uni = parse_keggorg(keggorg, outdir, idcollection_dict)
+        kr_uni_label = f"organism code '{keggorg}'"
+    elif eggnog != '-':
         for eggfile in eggnog:
             eggset = parse_eggnog(model, eggfile, idcollection_dict)
             kr_uni = kr_uni.union(eggset)
@@ -60,7 +87,7 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
                 kr_ids_modeled.add(kr_id)
     kr_uni_missing = kr_uni - kr_ids_modeled
     kr_uni_coverage = len(kr_ids_modeled.intersection(kr_uni)) / len(kr_uni) * 100
-    logger.info(f"Coverage for '{kr_uni_label}': {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
+    logger.info(f"Coverage for {kr_uni_label}: {round(kr_uni_coverage, 0)}% ({len(kr_uni_missing)} missing).")
     # define the map?????, containing krs not included in maps
@@ -177,15 +204,15 @@ def check_completeness(logger, model, progress, module, focus, eggnog, idcollect
         for eggfile in eggnog:
             strain = Path(eggfile).stem
             eggset = parse_eggnog(model, eggfile, idcollection_dict)
-            col = df_coverage.index.to_series().isin(eggset).astype(int)
+            col = df_coverage.index.to_series().isin(eggset).astype(int)  # integer: 0 or 1
             df_strains.append(col.rename(strain))
         df_strains = pnd.concat(df_strains, axis=1)
         # sort rows: upper rows are present in more strains
-        df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index]
+        #df_strains = df_strains.loc[df_strains.sum(axis=1).sort_values(ascending=False).index]   # commented: now in charge of figures.py
         df_coverage = df_coverage.loc[df_strains.index]
         df_coverage = pnd.concat([df_coverage, df_strains], axis=1)
         # split in 2: modeled above, non-modeled below:
-        df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]])
+        #df_coverage = pnd.concat([df_coverage[df_coverage['modeled']==True], df_coverage[df_coverage['modeled']==False]])   # commented: now in charge of figures.py
     else:  # not interesting in a super-long table without strains in column
         df_coverage = None

gsrap/parsedb/cycles.py ADDED Viewed

@@ -0,0 +1,128 @@
+import warnings
+import os
+import logging
+import cobra
+import gempipe
+from ..commons import fba_no_warnings
+from ..commons import get_optthr
+def verify_egc(logger, model, mid, outdir):
+    # changes as not permament:
+    found_egc = False
+    with model:
+        # close (0; 0) all the exchange reactions:
+        gempipe.close_boundaries(model)
+        # create a dissipation reaction:
+        dissip = cobra.Reaction(f'__dissip__{mid}')
+        model.add_reactions([dissip])
+        dissip = model.reactions.get_by_id(f'__dissip__{mid}')
+        # define the dissipation reaction:
+        modeled_mids = [m.id for m in model.metabolites]
+        if   mid == 'atp':
+            dissip_string = 'atp_c + h2o_c --> adp_c + pi_c + h_c'
+        elif mid == 'ctp':
+            dissip_string = 'ctp_c + h2o_c --> cdp_c + pi_c + h_c'
+        elif mid == 'gtp':
+            dissip_string = 'gtp_c + h2o_c --> gdp_c + pi_c + h_c'
+        elif mid == 'utp':
+            dissip_string = 'utp_c + h2o_c --> udp_c + pi_c + h_c'
+        elif mid == 'itp':
+            dissip_string = 'itp_c + h2o_c --> idp_c + pi_c + h_c'
+        elif mid == 'nadh':
+            dissip_string = 'nadh_c --> nad_c + h_c'
+        elif mid == 'nadph':
+            dissip_string = 'nadph_c --> nadp_c + h_c'
+        elif mid == 'fadh2':
+            dissip_string = 'fadh2_c --> fad_c + 2.0 h_c'
+        elif mid == 'accoa':
+            dissip_string = 'accoa_c + h2o_c --> ac_c + coa_c + h_c'
+        elif mid == 'glu__L':
+            dissip_string = 'glu__L_c + h2o_c --> akg_c + nh4_c + 2.0 h_c'
+        elif mid == 'q8h2':
+            dissip_string = 'q8h2_c --> q8_c + 2.0 h_c'
+        dissip.build_reaction_from_string(dissip_string)
+        # set the objective and optimize:
+        model.objective = f'__dissip__{mid}'
+        res, obj_value, status = fba_no_warnings(model)
+        # apply the threshold:
+        obj_value = res.objective_value
+        status = res.status
+        if status == 'optimal' and obj_value >= get_optthr():
+            found_egc = True
+            # get suspect !=0 fluxes
+            fluxes = res.fluxes
+            # get interesting fluxes (get_optthr() tries to take into account the approximation in glpk and cplex solvers)
+            fluxes_interesting = fluxes[(fluxes > get_optthr()) | (fluxes < -get_optthr())]
+            # create a model for escher, remove Rs not beloning to the cycle
+            model_copy = model.copy()
+            all_rids = [r.id for r in model_copy.reactions]
+            to_delete = set(all_rids) - set(fluxes_interesting.index)
+            # trick to avoid the WARNING "cobra/core/group.py:147: UserWarning: need to pass in a list"
+            # triggered when trying to remove reactions that are included in groups.
+            with warnings.catch_warnings():  # temporarily suppress warnings for this block
+                warnings.simplefilter("ignore")  # ignore all warnings
+                cobra_logger = logging.getLogger("cobra.util.solver")
+                old_level = cobra_logger.level
+                cobra_logger.setLevel(logging.ERROR)
+                # triggering code
+                model_copy.remove_reactions(to_delete)  # should work also with IDs
+                # restore original behaviour:
+                cobra_logger.setLevel(old_level)
+            # save JSON to direct import in Escher:
+            outfile = os.path.join(outdir, f'EGC_{mid}.json')
+            cobra.io.save_json_model(model_copy, outfile)
+            # log some messages
+            rid_labels = []
+            for rid, flux in fluxes_interesting.to_dict().items():
+                rid_label = "'" + rid + "'"
+                # mark reversible reactions composing the cycle:
+                r = model.reactions.get_by_id(rid)
+                if r.lower_bound < 0 and r.upper_bound > 0:
+                    rid_label = rid_label + '(<=>)'
+                rid_labels.append(rid_label)
+            logger.warning(f"Found erroneous EGC (N={len(model_copy.reactions)}) for '{mid}' (f={obj_value}): [{', '.join(rid_labels)}]. EGC saved to '{outfile}' to be inspected with Escher-FBA.")
+    return found_egc
+def verify_egc_all(logger, model, outdir='./', mids_to_check=['atp','ctp','gtp','utp','itp','nadh','nadph','fadh2','accoa','glu__L','q8h2']):
+    all_results = []
+    for mid in mids_to_check:
+        all_results.append(verify_egc(logger, model, mid, outdir))
+    if any(all_results)==False:
+        logger.info("Found 0 erroneous energy-generating cycles (EGCs).")

gsrap 0.7.2__py3-none-any.whl → 0.8.1__py3-none-any.whl

gsrap 0.7.2py3-none-any.whl → 0.8.1py3-none-any.whl