PyPI - gsrap - Versions diffs - 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl - Mend

gsrap 0.10.1py3-none-any.whl → 0.10.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

gsrap/.ipynb_checkpoints/__init__-checkpoint.py +11 -13
gsrap/__init__.py +11 -13
gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py +1 -1
gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py +1 -1
gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py +3 -3
gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py +147 -1
gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py +1 -1
gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py +10 -10
gsrap/commons/biomass.py +1 -1
gsrap/commons/coeffs.py +1 -1
gsrap/commons/downloads.py +3 -3
gsrap/commons/escherutils.py +147 -1
gsrap/commons/medium.py +1 -1
gsrap/commons/metrics.py +10 -10
gsrap/parsedb/.ipynb_checkpoints/annotation-checkpoint.py +16 -6
gsrap/parsedb/.ipynb_checkpoints/introduce-checkpoint.py +35 -35
gsrap/parsedb/.ipynb_checkpoints/parsedb-checkpoint.py +19 -10
gsrap/parsedb/.ipynb_checkpoints/repeating-checkpoint.py +12 -13
gsrap/parsedb/annotation.py +16 -6
gsrap/parsedb/introduce.py +35 -35
gsrap/parsedb/parsedb.py +19 -10
gsrap/parsedb/repeating.py +12 -13
{gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/METADATA +2 -2
{gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/RECORD +27 -27
{gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/WHEEL +0 -0
{gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/entry_points.txt +0 -0
{gsrap-0.10.1.dist-info → gsrap-0.10.2.dist-info}/licenses/LICENSE.txt +0 -0

gsrap/.ipynb_checkpoints/__init__-checkpoint.py CHANGED Viewed

@@ -9,21 +9,18 @@ import atexit
 import os
 import cobra
 from .commons import get_logger
 from .commons import set_usual_formatter
 from .commons import set_header_trailer_formatter
 from .getmaps import getmaps_command
 from .parsedb import parsedb_command
 from .mkmodel import mkmodel_command
 from .runsims import runsims_command
 cobra_config = cobra.Configuration()
 solver_name = str(cobra_config.solver.log).split(' ')[1]
 solver_name = solver_name.replace("optlang.", '')
@@ -69,20 +66,21 @@ def main():
     parsedb_parser.add_argument("-o", "--outdir", metavar='', type=str, default='./', help="Main output directory (will be created if not existing).")
     parsedb_parser.add_argument("-i", "--inmaps", metavar='', type=str, default='./gsrap.maps', help="Input file 'gsrap.maps' previously produced using the 'getmaps' subcommand.")
     parsedb_parser.add_argument("-p", "--progress", action='store_true', help="Show progress for each map.")
-    parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use only with --progress).")
-    parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use only with --progress).")
+    parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use with --progress).")
+    parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module obtaining codes of missing reactions (use with --progress).")
     parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
-    parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
+    parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. If not provided, the first medium in --media is used. Provide 'None' to avoid initialization.")
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
-    parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
-    parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
-    parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
-    parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
-    parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
-    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
-    parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
+    parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to optional eggnog-mapper annotation table(s). If provided, --progress will be based on such table(s) instead of the entire KEGG.")
+    parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="Optional high-level taxon of interest accounted during the parsing. If provided, --progress will be based on such taxon instead of the entire KEGG (takes precedence over --eggnog). The syntax '{level}:{name}' must be honored, where {level} is 'kingdom' or 'phylum'.")
+    parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="Optional single KEGG Organism code accounted during parsing. If provided, --progress will be based on such organism instead of the entire KEGG (takes precedence over --taxon and --eggnog).")
+    parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Used to skip rows of M-R-T sheets during parsing. The syntax '{pure_mid}-{rid1}-{rid2}' must be honored. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
+    parsedb_parser.add_argument("--onlycurator", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified curator ID only. Contents affected by --goodbefore are parsed anyway.")
+    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures in th Excel output.")
+    parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Parse the database without performing extra activities (saves time).")
     parsedb_parser.add_argument("-d", "--keepdisconn", action='store_true', help="Do not remove disconnected metabolites.")
+    parsedb_parser.add_argument("--escherzip", metavar='', type=str, default='-', help="Optional path to the zipped 'escher' folder downloaded from Google Drive.")

gsrap/__init__.py CHANGED Viewed

@@ -9,21 +9,18 @@ import atexit
 import os
 import cobra
 from .commons import get_logger
 from .commons import set_usual_formatter
 from .commons import set_header_trailer_formatter
 from .getmaps import getmaps_command
 from .parsedb import parsedb_command
 from .mkmodel import mkmodel_command
 from .runsims import runsims_command
 cobra_config = cobra.Configuration()
 solver_name = str(cobra_config.solver.log).split(' ')[1]
 solver_name = solver_name.replace("optlang.", '')
@@ -69,20 +66,21 @@ def main():
     parsedb_parser.add_argument("-o", "--outdir", metavar='', type=str, default='./', help="Main output directory (will be created if not existing).")
     parsedb_parser.add_argument("-i", "--inmaps", metavar='', type=str, default='./gsrap.maps', help="Input file 'gsrap.maps' previously produced using the 'getmaps' subcommand.")
     parsedb_parser.add_argument("-p", "--progress", action='store_true', help="Show progress for each map.")
-    parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use only with --progress).")
-    parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module (use only with --progress).")
+    parsedb_parser.add_argument("--module", action='store_true', help="Show progress for each module of each map (use with --progress).")
+    parsedb_parser.add_argument("-f", "--focus", metavar='', type=str, default='-', help="Focus on a particular map/module obtaining codes of missing reactions (use with --progress).")
     parsedb_parser.add_argument("-m", "--media", metavar='', type=str, default='M9,M9an,M9photo', help="Media to use during growth simulations (comma-separated IDs).")
-    parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. By default, the first medium in --media is used. Use 'none' to avoid initialization.")
+    parsedb_parser.add_argument("-z", "--initialize", metavar='', type=str, default='-', help="Initialize the universe on the provided medium. If not provided, the first medium in --media is used. Provide 'None' to avoid initialization.")
     parsedb_parser.add_argument("--precursors", action='store_true', help="Verify biosynthesis of biomass precursors and show blocked ones.")
     parsedb_parser.add_argument("--biosynth", action='store_true', help="Check biosynthesis of all metabolites and detect dead-ends.")
-    parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="High-level taxon of interest. If provided, it must follow the syntax '{level}:{name}', where {level} is 'kingdom' or 'phylum'.")
-    parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to the optional eggnog-mapper annotation table(s).")
-    parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="A single KEGG Organism code. If provided, it takes precedence over --eggnog.")
-    parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Syntax is {pure_mid}-{rid1}-{rid2}. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
-    parsedb_parser.add_argument("--onlyauthor", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified author ID only. Contents affected by --goodbefore are parsed anyway.")
-    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures.")
-    parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Just parse the database without performing extra activities (saves time during universe expansion).")
+    parsedb_parser.add_argument("-e", "--eggnog", nargs='+', metavar='', type=str, default='-', help="Path to optional eggnog-mapper annotation table(s). If provided, --progress will be based on such table(s) instead of the entire KEGG.")
+    parsedb_parser.add_argument("-t", "--taxon", metavar='', type=str, default='-', help="Optional high-level taxon of interest accounted during the parsing. If provided, --progress will be based on such taxon instead of the entire KEGG (takes precedence over --eggnog). The syntax '{level}:{name}' must be honored, where {level} is 'kingdom' or 'phylum'.")
+    parsedb_parser.add_argument("-k", "--keggorg", metavar='', type=str, default='-', help="Optional single KEGG Organism code accounted during parsing. If provided, --progress will be based on such organism instead of the entire KEGG (takes precedence over --taxon and --eggnog).")
+    parsedb_parser.add_argument("--goodbefore", metavar='', type=str, default='-', help="Used to skip rows of M-R-T sheets during parsing. The syntax '{pure_mid}-{rid1}-{rid2}' must be honored. From top to bottom, build the universe until reaction {rid1}, transport {rid2} and metabolite {pure_mid} are reached.")
+    parsedb_parser.add_argument("--onlycurator", metavar='', type=str, default='-', help="Build the universe by parsing contents of the specified curator ID only. Contents affected by --goodbefore are parsed anyway.")
+    parsedb_parser.add_argument("--nofigs", action='store_true', help="Do not generate figures in th Excel output.")
+    parsedb_parser.add_argument("-j", "--justparse", action='store_true', help="Parse the database without performing extra activities (saves time).")
     parsedb_parser.add_argument("-d", "--keepdisconn", action='store_true', help="Do not remove disconnected metabolites.")
+    parsedb_parser.add_argument("--escherzip", metavar='', type=str, default='-', help="Optional path to the zipped 'escher' folder downloaded from Google Drive.")

gsrap/commons/.ipynb_checkpoints/biomass-checkpoint.py CHANGED Viewed

@@ -28,7 +28,7 @@ def get_biomass_dict(logger, universe, dbexp):
     fraction_to_precursors = dict()
     for fraction in ['DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
         fraction_db = dbexp[fraction]
-        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'author', 'units'])
+        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
         precursors = [f"{i}_c" for i in fraction_db.index.dropna()]
         for i in precursors:
             if i not in universal_mids:

gsrap/commons/.ipynb_checkpoints/coeffs-checkpoint.py CHANGED Viewed

@@ -26,7 +26,7 @@ def check_exp_biomass_data(logger, dbexp, biomass):
     ftd = dict()  # fraction_to_decimals
     for sheet in ['MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
         fraction_db = dbexp[sheet][biomass].dropna() # they should be str
-        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'author', 'units'])
+        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
         ftd[sheet] = {i: Decimal(val) for i, val in fraction_db.items() if Decimal(val) != Decimal('0.0')} # convert to dict
         if sum(ftd[sheet].values()) != Decimal('1.0'):  # check if the sum gives 1 (g/gDW or mol/mol depending on 'sheet')
             logger.error(f"Biomass data provided in sheet '{sheet}' for ID '{biomass}' does not sum up to 1.0. Missing mass is {Decimal('1.0')-sum(ftd[sheet].values())}.")

gsrap/commons/.ipynb_checkpoints/downloads-checkpoint.py CHANGED Viewed

@@ -41,7 +41,7 @@ def get_dbuni(logger):
     sheet_id = "1dXJBIFjCghrdvQtxEOYlVNWAQU4mK-nqLWyDQeUZqek"
-    #sheet_id = "1dCVOOnpNg7rK3iZmTDz3wybW7YrUNoClnqezT9Q5bpc" # alternative
+    #sheet_id = "15fIBewG1B1jIbg1_9pMnyPJL7LWCLMD8s_vlf3ZUno0"  # alternative
     url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
     response = requests.get(url)  # download the requested file
     if response.status_code == 200:
@@ -100,7 +100,7 @@ def get_dbexp(logger):
     # check table presence
     sheet_names = exceldb.sheet_names
-    for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', 'authors']:
+    for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', 'curators']:
         if i not in sheet_names:
             logger.error(f"Sheet '{i}' is missing!")
             return 1
@@ -119,7 +119,7 @@ def get_dbexp(logger):
     dbexp['PROTS'] = exceldb.parse('PROTS')
     dbexp['LIPIDS_PL'] = exceldb.parse('LIPIDS_PL')
     dbexp['LIPIDS_FA'] = exceldb.parse('LIPIDS_FA')
-    dbexp['authors'] = exceldb.parse('authors')
+    dbexp['curators'] = exceldb.parse('curators')
     # format tables (media):

gsrap/commons/.ipynb_checkpoints/escherutils-checkpoint.py CHANGED Viewed

@@ -1,8 +1,15 @@
 import warnings
 import logging
 import threading
+import zipfile
+from pathlib import Path
+import tempfile
+import json
+import os
+import shutil
 import cobra
+from escher import Builder
 from .downloads import SimpleLoadingWheel
@@ -109,4 +116,143 @@ def count_undrawn_rids_focus(logger, universe, lastmap, focus, outdir):
         logger.warning(f"'{outdir}/focus_{focus}.json' created!")
     else:
         logger.info(f"Current '{lastmap['filename']}' is 0 reactions behind. Thank you ♥")
+def parse_zipped_escher(logger, universe, escherzip, outdir):
+    # used to parse the zipped 'escher' folder downloaded from Google Drive.
+    # 'escherzip' is the path to zipped folder.
+    logger.info("Processing collection of hand-drawn Escher maps...")
+    # prepare empty logs folder
+    shutil.rmtree(f'{outdir}/escherapps', ignore_errors=True)
+    os.makedirs(f'{outdir}/escherapps', exist_ok=True)
+    drawngid_to_maps = dict()
+    drawnrid_to_maps = dict()
+    drawnpuremid_to_maps = dict()
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # check path existance:
+        zip_path = Path(escherzip)  # convert to Path for convenience
+        if not zip_path.exists():
+            logger.error(f"Zipped escher does not exist at provided path '{zip_path}'.")
+            return 1
+        # extract in a auto-deleting temporary directory
+        with zipfile.ZipFile(zip_path, "r") as z:
+            z.extractall(tmp_dir)
+        # extract the inner child dirs:
+        tmp_dir = Path(tmp_dir)
+        child_dirs = [p for p in tmp_dir.iterdir() if p.is_dir()]
+        escher_dir = child_dirs[0]  # the top-level folder in the ZIP
+        inner_child_dirs = [p for p in escher_dir.iterdir() if p.is_dir()]
+        # iterate the inner child dirs:
+        for map_dir in inner_child_dirs:
+            map_id = map_dir.name.rsplit('/',1)[-1]
+            map_files = [f for f in map_dir.iterdir() if f.is_file()]
+            map_files_name = set([f.name.rsplit('/',1)[-1].split('-v',1)[0] for f in map_files])
+            # check presence of maps:
+            if len(map_files_name) == 0:
+                # still no maps in this folder
+                continue
+            # check name consistency:
+            if len(map_files_name) != 1:
+                logger.error(f"Name inconsistency in folder '{map_dir_name}'!")
+                return 1
+            # check consistency with the folder
+            if list(map_files_name)[0] != map_id:
+                logger.error(f"Versions in '{map_id}' refer to a different map!")
+            # get latest version
+            map_versions = [int(f.name.rsplit('/',1)[-1].split('-v',1)[-1].replace('.json','')) for f in map_files]
+            latest_version = max(map_versions)
+            # read json (last version) to fill the dict
+            latest_filepath = str(map_dir / f"{map_id}-v{latest_version}.json")
+            with open(latest_filepath, 'r') as file:
+                json_data = json.load(file)
+                # get elements on Escher:
+                map_drawngids = set()
+                map_drawnrids = set()
+                map_drawnpuremids = set()
+                for key, value in json_data[1]['reactions'].items():
+                    map_drawnrids.add(value['bigg_id'])
+                    for i in value['genes']:
+                        map_drawngids.add(i['bigg_id'])
+                    #for i in value['metabolites']:
+                    #    puremeid = i['bigg_id'].rsplit('_',1)[0]
+                    #    map_drawnpuremids.add(puremeid)
+                for key, value in json_data[1]['nodes'].items():
+                    if value['node_type'] == 'metabolite':
+                        puremeid = value['bigg_id'].rsplit('_',1)[0]
+                        map_drawnpuremids.add(puremeid)
+                # populat dicts
+                for drawngid in map_drawngids:
+                    if drawngid not in drawngid_to_maps.keys():
+                        drawngid_to_maps[drawngid] = set()
+                    drawngid_to_maps[drawngid].add(map_id)
+                for drawnrid in map_drawnrids:
+                    if drawnrid not in drawnrid_to_maps.keys():
+                        drawnrid_to_maps[drawnrid] = set()
+                    drawnrid_to_maps[drawnrid].add(map_id)
+                for drawnpuremid in map_drawnpuremids:
+                    if drawnpuremid not in drawnpuremid_to_maps.keys():
+                        drawnpuremid_to_maps[drawnpuremid] = set()
+                    drawnpuremid_to_maps[drawnpuremid].add(map_id)
+            # read json (last version) to create the escher app
+            builder = Builder(
+                map_json = latest_filepath,
+                model_json = None,
+            )
+            builder.never_ask_before_quit = True
+            builder.scroll_behavior = 'zoom'
+            builder.menu = 'all'
+            builder.enable_editing = False
+            builder.enable_keys = False  # switch not working
+            builder.enable_search = True  # switch not working
+            builder.full_screen_button = True  # switch not working
+            builder.highlight_missing = True
+            builder.show_gene_reaction_rules = True
+            builder.enable_tooltips = 'label'
+            builder.save_html(f"{outdir}/escherapps/{map_id}.html")
+    # apply annotation to universe
+    for g in universe.genes:
+        if g.id in drawngid_to_maps.keys():
+            g.annotation['drawn_in_maps'] = list(drawngid_to_maps[g.id])
+        else:
+            g.annotation['drawn_in_maps'] = []
+    for r in universe.reactions:
+        if r.id in drawnrid_to_maps.keys():
+            r.annotation['drawn_in_maps'] = list(drawnrid_to_maps[r.id])
+        else:
+            r.annotation['drawn_in_maps'] = []
+    for m in universe.metabolites:
+        puremid = m.id.rsplit('_',1)[0]
+        if puremid in drawnpuremid_to_maps.keys():
+            m.annotation['drawn_in_maps'] = list(drawnpuremid_to_maps[puremid])
+        else:
+            m.annotation['drawn_in_maps'] = []
+    return universe

gsrap/commons/.ipynb_checkpoints/medium-checkpoint.py CHANGED Viewed

@@ -12,7 +12,7 @@ def apply_medium_given_column(logger, model, medium, column, is_reference=False)
     # retrieve metadata
     description = column.iloc[0]
     doi = column.iloc[1]
-    author = column.iloc[2]
+    curator = column.iloc[2]
     units = column.iloc[3]

gsrap/commons/.ipynb_checkpoints/metrics-checkpoint.py CHANGED Viewed

@@ -58,8 +58,8 @@ def show_contributions(logger, db, goodbefore):
         return 0
-    # create a counter for each author
-    cnt = {author: 0 for author in db['curators']['username']}
+    # create a counter for each curator
+    cnt = {curator: 0 for curator in db['curators']['username']}
     cnt_tot = 0
@@ -67,9 +67,9 @@ def show_contributions(logger, db, goodbefore):
         if type(row['curator']) != str:
             logger.error(f"Missing curator in tab 'R', rid '{row['rid']}'.")
             return 1
-        for author in row['curator'].split(';'):
-            author = author.rstrip().strip()
-            cnt[author] += 1
+        for curator in row['curator'].split(';'):
+            curator = curator.rstrip().strip()
+            cnt[curator] += 1
             cnt_tot += 1
@@ -77,18 +77,18 @@ def show_contributions(logger, db, goodbefore):
         if type(row['curator']) != str:
             logger.error(f"Missing curator in tab 'T', rid '{row['rid']}'.")
             return 1
-        for author in row['curator'].split(';'):
-            author = author.rstrip().strip()
-            cnt[author] += 1
+        for curator in row['curator'].split(';'):
+            curator = curator.rstrip().strip()
+            cnt[curator] += 1
             cnt_tot += 1
     # compute percentages:
-    pct = {author: cnt[author]/cnt_tot*100 for author in cnt.keys()}
+    pct = {curator: cnt[curator]/cnt_tot*100 for curator in cnt.keys()}
     # sort in descending order:
     pct = dict(sorted(pct.items(), key=lambda item: item[1], reverse=True))
     # convert to string:
-    pct = {author: f'{round(pct[author],2)}%' for author in pct.keys()}
+    pct = {curator: f'{round(pct[curator],2)}%' for curator in pct.keys()}
     logger.debug(f"Contributions: {pct}.")

gsrap/commons/biomass.py CHANGED Viewed

@@ -28,7 +28,7 @@ def get_biomass_dict(logger, universe, dbexp):
     fraction_to_precursors = dict()
     for fraction in ['DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
         fraction_db = dbexp[fraction]
-        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'author', 'units'])
+        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
         precursors = [f"{i}_c" for i in fraction_db.index.dropna()]
         for i in precursors:
             if i not in universal_mids:

gsrap/commons/coeffs.py CHANGED Viewed

@@ -26,7 +26,7 @@ def check_exp_biomass_data(logger, dbexp, biomass):
     ftd = dict()  # fraction_to_decimals
     for sheet in ['MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA']:
         fraction_db = dbexp[sheet][biomass].dropna() # they should be str
-        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'author', 'units'])
+        fraction_db = fraction_db.iloc[4:] # ignore ['description', 'doi', 'curator', 'units'])
         ftd[sheet] = {i: Decimal(val) for i, val in fraction_db.items() if Decimal(val) != Decimal('0.0')} # convert to dict
         if sum(ftd[sheet].values()) != Decimal('1.0'):  # check if the sum gives 1 (g/gDW or mol/mol depending on 'sheet')
             logger.error(f"Biomass data provided in sheet '{sheet}' for ID '{biomass}' does not sum up to 1.0. Missing mass is {Decimal('1.0')-sum(ftd[sheet].values())}.")

gsrap/commons/downloads.py CHANGED Viewed

@@ -41,7 +41,7 @@ def get_dbuni(logger):
     sheet_id = "1dXJBIFjCghrdvQtxEOYlVNWAQU4mK-nqLWyDQeUZqek"
-    #sheet_id = "1dCVOOnpNg7rK3iZmTDz3wybW7YrUNoClnqezT9Q5bpc" # alternative
+    #sheet_id = "15fIBewG1B1jIbg1_9pMnyPJL7LWCLMD8s_vlf3ZUno0"  # alternative
     url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=xlsx"
     response = requests.get(url)  # download the requested file
     if response.status_code == 200:
@@ -100,7 +100,7 @@ def get_dbexp(logger):
     # check table presence
     sheet_names = exceldb.sheet_names
-    for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', 'authors']:
+    for i in ['media', 'PM1', 'PM2A', 'PM3B', 'PM4A', 'MWF', 'DNA', 'RNA', 'PROTS', 'LIPIDS_PL', 'LIPIDS_FA', 'curators']:
         if i not in sheet_names:
             logger.error(f"Sheet '{i}' is missing!")
             return 1
@@ -119,7 +119,7 @@ def get_dbexp(logger):
     dbexp['PROTS'] = exceldb.parse('PROTS')
     dbexp['LIPIDS_PL'] = exceldb.parse('LIPIDS_PL')
     dbexp['LIPIDS_FA'] = exceldb.parse('LIPIDS_FA')
-    dbexp['authors'] = exceldb.parse('authors')
+    dbexp['curators'] = exceldb.parse('curators')
     # format tables (media):

gsrap/commons/escherutils.py CHANGED Viewed

@@ -1,8 +1,15 @@
 import warnings
 import logging
 import threading
+import zipfile
+from pathlib import Path
+import tempfile
+import json
+import os
+import shutil
 import cobra
+from escher import Builder
 from .downloads import SimpleLoadingWheel
@@ -109,4 +116,143 @@ def count_undrawn_rids_focus(logger, universe, lastmap, focus, outdir):
         logger.warning(f"'{outdir}/focus_{focus}.json' created!")
     else:
         logger.info(f"Current '{lastmap['filename']}' is 0 reactions behind. Thank you ♥")
+def parse_zipped_escher(logger, universe, escherzip, outdir):
+    # used to parse the zipped 'escher' folder downloaded from Google Drive.
+    # 'escherzip' is the path to zipped folder.
+    logger.info("Processing collection of hand-drawn Escher maps...")
+    # prepare empty logs folder
+    shutil.rmtree(f'{outdir}/escherapps', ignore_errors=True)
+    os.makedirs(f'{outdir}/escherapps', exist_ok=True)
+    drawngid_to_maps = dict()
+    drawnrid_to_maps = dict()
+    drawnpuremid_to_maps = dict()
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        # check path existance:
+        zip_path = Path(escherzip)  # convert to Path for convenience
+        if not zip_path.exists():
+            logger.error(f"Zipped escher does not exist at provided path '{zip_path}'.")
+            return 1
+        # extract in a auto-deleting temporary directory
+        with zipfile.ZipFile(zip_path, "r") as z:
+            z.extractall(tmp_dir)
+        # extract the inner child dirs:
+        tmp_dir = Path(tmp_dir)
+        child_dirs = [p for p in tmp_dir.iterdir() if p.is_dir()]
+        escher_dir = child_dirs[0]  # the top-level folder in the ZIP
+        inner_child_dirs = [p for p in escher_dir.iterdir() if p.is_dir()]
+        # iterate the inner child dirs:
+        for map_dir in inner_child_dirs:
+            map_id = map_dir.name.rsplit('/',1)[-1]
+            map_files = [f for f in map_dir.iterdir() if f.is_file()]
+            map_files_name = set([f.name.rsplit('/',1)[-1].split('-v',1)[0] for f in map_files])
+            # check presence of maps:
+            if len(map_files_name) == 0:
+                # still no maps in this folder
+                continue
+            # check name consistency:
+            if len(map_files_name) != 1:
+                logger.error(f"Name inconsistency in folder '{map_dir_name}'!")
+                return 1
+            # check consistency with the folder
+            if list(map_files_name)[0] != map_id:
+                logger.error(f"Versions in '{map_id}' refer to a different map!")
+            # get latest version
+            map_versions = [int(f.name.rsplit('/',1)[-1].split('-v',1)[-1].replace('.json','')) for f in map_files]
+            latest_version = max(map_versions)
+            # read json (last version) to fill the dict
+            latest_filepath = str(map_dir / f"{map_id}-v{latest_version}.json")
+            with open(latest_filepath, 'r') as file:
+                json_data = json.load(file)
+                # get elements on Escher:
+                map_drawngids = set()
+                map_drawnrids = set()
+                map_drawnpuremids = set()
+                for key, value in json_data[1]['reactions'].items():
+                    map_drawnrids.add(value['bigg_id'])
+                    for i in value['genes']:
+                        map_drawngids.add(i['bigg_id'])
+                    #for i in value['metabolites']:
+                    #    puremeid = i['bigg_id'].rsplit('_',1)[0]
+                    #    map_drawnpuremids.add(puremeid)
+                for key, value in json_data[1]['nodes'].items():
+                    if value['node_type'] == 'metabolite':
+                        puremeid = value['bigg_id'].rsplit('_',1)[0]
+                        map_drawnpuremids.add(puremeid)
+                # populat dicts
+                for drawngid in map_drawngids:
+                    if drawngid not in drawngid_to_maps.keys():
+                        drawngid_to_maps[drawngid] = set()
+                    drawngid_to_maps[drawngid].add(map_id)
+                for drawnrid in map_drawnrids:
+                    if drawnrid not in drawnrid_to_maps.keys():
+                        drawnrid_to_maps[drawnrid] = set()
+                    drawnrid_to_maps[drawnrid].add(map_id)
+                for drawnpuremid in map_drawnpuremids:
+                    if drawnpuremid not in drawnpuremid_to_maps.keys():
+                        drawnpuremid_to_maps[drawnpuremid] = set()
+                    drawnpuremid_to_maps[drawnpuremid].add(map_id)
+            # read json (last version) to create the escher app
+            builder = Builder(
+                map_json = latest_filepath,
+                model_json = None,
+            )
+            builder.never_ask_before_quit = True
+            builder.scroll_behavior = 'zoom'
+            builder.menu = 'all'
+            builder.enable_editing = False
+            builder.enable_keys = False  # switch not working
+            builder.enable_search = True  # switch not working
+            builder.full_screen_button = True  # switch not working
+            builder.highlight_missing = True
+            builder.show_gene_reaction_rules = True
+            builder.enable_tooltips = 'label'
+            builder.save_html(f"{outdir}/escherapps/{map_id}.html")
+    # apply annotation to universe
+    for g in universe.genes:
+        if g.id in drawngid_to_maps.keys():
+            g.annotation['drawn_in_maps'] = list(drawngid_to_maps[g.id])
+        else:
+            g.annotation['drawn_in_maps'] = []
+    for r in universe.reactions:
+        if r.id in drawnrid_to_maps.keys():
+            r.annotation['drawn_in_maps'] = list(drawnrid_to_maps[r.id])
+        else:
+            r.annotation['drawn_in_maps'] = []
+    for m in universe.metabolites:
+        puremid = m.id.rsplit('_',1)[0]
+        if puremid in drawnpuremid_to_maps.keys():
+            m.annotation['drawn_in_maps'] = list(drawnpuremid_to_maps[puremid])
+        else:
+            m.annotation['drawn_in_maps'] = []
+    return universe

gsrap/commons/medium.py CHANGED Viewed

@@ -12,7 +12,7 @@ def apply_medium_given_column(logger, model, medium, column, is_reference=False)
     # retrieve metadata
     description = column.iloc[0]
     doi = column.iloc[1]
-    author = column.iloc[2]
+    curator = column.iloc[2]
     units = column.iloc[3]

gsrap 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl

gsrap 0.10.1py3-none-any.whl → 0.10.2py3-none-any.whl