PyPI - rcsb.exdb - Versions diffs - 0.94__tar.gz → 0.96__tar.gz - Mend

rcsb.exdb 0.94tar.gz → 0.96tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/HISTORY.txt RENAMED Viewed

@@ -88,3 +88,5 @@
  9-Jan-2023 V0.93 Configuration changes to support tox 4
  9-Mar-2023 V0.94 Update ExDbWorkflow to make use of multiple processors for 'upd_ref_seq' operation;
                   Lower refChunkSize to 10 for requests to UniProt API
+13-Mar-2023 V0.95 Updates to PubChem workflow to use multiprocess count, disable git stash testing, remove obsolete entries from test data
+12-Apr-2023 V0.96 Add CARD ontology data to tree builder

{rcsb.exdb-0.94/rcsb.exdb.egg-info → rcsb.exdb-0.96}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: rcsb.exdb
-Version: 0.94
+Version: 0.96
 Summary: RCSB Python ExDB data extraction and loading workflows
 Home-page: https://github.com/rcsb/py-rcsb_exdb
 Author: John Westbrook

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/chemref/PubChemDataCacheProvider.py RENAMED Viewed

@@ -8,6 +8,7 @@
 #  9-May-2020 jdw separate cache behavior with separate option rebuildChemIndices=True/False
 # 16-Jul-2020 jdw separate index and reference data management.
 # 23-Jul-2021 jdw Make PubChemDataCacheProvider a subclass of StashableBase()
+# 15-Mar-2023 aae Update default numProc to 2
 #
 ##
 __docformat__ = "google en"
@@ -228,12 +229,12 @@ class PubChemDataCacheProvider(StashableBase):
         # --
         return numUpd
-    def updateMissing(self, idList, exportPath=None, numProc=1, chunkSize=5):
+    def updateMissing(self, idList, exportPath=None, numProc=2, chunkSize=5):
         """Fetch and load reference data for any missing PubChem ID codes in the input list.
         Args:
             idList (list): PubChem ID codes
-            numProc (int, optional): number of processor to use. Defaults to 1.
+            numProc (int, optional): number of processor to use. Defaults to 2.
             chunkSize (int, optional): chunk size between data store updates. Defaults to 5.
             exportPath (str, optional): store raw fetched data in this path. Defaults to None.
@@ -252,13 +253,13 @@ class PubChemDataCacheProvider(StashableBase):
         return ok, failList
-    def load(self, idList, exportPath=None, numProc=1, chunkSize=5):
+    def load(self, idList, exportPath=None, numProc=2, chunkSize=5):
         """Fetch and load reference data for the input list of PubChem compound codes.
         Args:
             idList (list): PubChem ID codes
             exportPath (str, optional): store raw fetched data in this path. Defaults to None.
-            numProc (int, optional): number of processor to use. Defaults to 1.
+            numProc (int, optional): number of processor to use. Defaults to 2.
             chunkSize (int, optional): chunk size between data store updates. Defaults to 5.

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/chemref/PubChemEtlWrapper.py RENAMED Viewed

@@ -4,7 +4,7 @@
 #
 #
 # Updates:
-#
+#  14-Mar-2023 aae Updates to use multiprocess count
 #
 ##
 __docformat__ = "google en"
@@ -80,6 +80,8 @@ class PubChemEtlWrapper(object):
         Args:
             contentType (str): target content to stash (data|index|identifiers)
+            useStash (bool):  should stash (Buildlocker) be updated? (default: True)
+            useGit (bool):  should stash (GitHub) be updated? (default: True)
         Returns:
             (bool): True for success or False otherwise
         """
@@ -117,7 +119,8 @@ class PubChemEtlWrapper(object):
             rebuildChemIndices (bool, optional): rebuild indices from source (default: False)
             fetchLimit (int, optional): maximum number of definitions to process (default: None)
             exportPath(str, optional): path to export raw PubChem search results  (default: None)
-            numProc(int):  number processors to include in multiprocessing mode (default: 12)
+            numProcChemComp (int, optional):  number processors to include in multiprocessing mode for ChemComp indices (default: 8)
+            numProc (int, optional):  number processors to include in multiprocessing mode for PubChem (default: 2)
             Returns:
                 (bool): True for success or False otherwise
@@ -131,7 +134,8 @@ class PubChemEtlWrapper(object):
             fetchLimit = kwargs.get("fetchLimit", None)
             exportPath = kwargs.get("exportPath", None)
             expireDays = kwargs.get("expireDays", 0)
-            numProc = kwargs.get("numProc", 12)
+            numProcChemComp = kwargs.get("numProcChemComp", 8)
+            numProc = kwargs.get("numProc", 2)
             #  -- Update/create mapping index cache  ---
             ok = self.__pcicP.updateMissing(
@@ -143,6 +147,7 @@ class PubChemEtlWrapper(object):
                 exportPath=exportPath,
                 rebuildChemIndices=rebuildChemIndices,
                 fetchLimit=fetchLimit,
+                numProcChemComp=numProcChemComp,
                 numProc=numProc,
             )
         except Exception as e:
@@ -174,7 +179,7 @@ class PubChemEtlWrapper(object):
         logger.debug("mapD (%d) extraMapD (%d) %r", len(mapD), len(extraMapD), extraMapD)
         return mapD, extraMapD
-    def updateData(self, pcidList, doExport=False):
+    def updateData(self, pcidList, doExport=False, numProc=2):
         """Update PubChem reference data for the input list of compound identifiers.
         Args:
@@ -186,14 +191,14 @@ class PubChemEtlWrapper(object):
         ok = False
         try:
             exportPath = self.__dirPath if doExport else None
-            ok, failList = self.__pcdcP.updateMissing(pcidList, exportPath=exportPath)
+            ok, failList = self.__pcdcP.updateMissing(pcidList, exportPath=exportPath, numProc=numProc)
             if failList:
                 logger.info("No data updated for %r", failList)
         except Exception as e:
             logger.exception("Failing with %s", str(e))
         return ok
-    def updateMatchedData(self, exportRaw=False):
+    def updateMatchedData(self, exportRaw=False, numProc=2):
         """Update PubChem reference data using matched compound identifiers in the current index.
         Returns:
@@ -203,7 +208,7 @@ class PubChemEtlWrapper(object):
         try:
             pcidList = self.getMatches()
             exportPath = self.__dirPath if exportRaw else None
-            ok, failList = self.__pcdcP.updateMissing(pcidList, exportPath=exportPath)
+            ok, failList = self.__pcdcP.updateMissing(pcidList, exportPath=exportPath, numProc=numProc)
             if failList:
                 logger.info("No data updated for %r", failList)
         except Exception as e:

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py RENAMED Viewed

@@ -8,6 +8,7 @@
 #  9-May-2020 jdw separate cache behavior with separate option rebuildChemIndices=True/False
 # 16-Jul-2020 jdw separate index and reference data management.
 # 23-Jul-2021 jdw Make PubChemIndexCacheProvider a subclass of StashableBase()
+#  2-Mar-2023 aae Return correct status from Single proc
 #
 ##
 __docformat__ = "google en"
@@ -291,14 +292,15 @@ class PubChemIndexCacheProvider(StashableBase):
         # --
         return numUpd
-    def updateMissing(self, expireDays=0, fetchLimit=None, updateUnmatched=True, numProc=12, **kwargs):
+    def updateMissing(self, expireDays=0, fetchLimit=None, updateUnmatched=True, numProcChemComp=8, numProc=2, **kwargs):
         """Update match index from object store
         Args:
             expireDays (int): expiration days on match data (default 0 meaning none)
             fetchLimit (int): limit to the number of entry updates performed (None)
             updateUnmatched (bool): Previously unmatched search definitions will be retried on update (default=True)
-            numProc (int): for rebuilding local chemical indices the number processors to apply (default=12)
+            numProcChemComp (int): for rebuilding local ChemComp indices the number processors to apply (default=8)
+            numProc (int): for rebuilding local PubChem indices the number processors to apply (default=2)
         Returns:
             bool: True for success or False otherwise
@@ -320,7 +322,7 @@ class PubChemIndexCacheProvider(StashableBase):
         try:
             # ---
             # Get current the indices of source chemical reference data -
-            ok, ccidxP, ccsidxP = self.__rebuildChemCompSourceIndices(numProc, **kwargs)
+            ok, ccidxP, ccsidxP = self.__rebuildChemCompSourceIndices(numProcChemComp, **kwargs)
             if not ok:
                 return matchD
             #
@@ -338,8 +340,8 @@ class PubChemIndexCacheProvider(StashableBase):
             updateIdList = updateIdList[:fetchLimit] if fetchLimit else updateIdList
             #
             if updateIdList:
-                logger.info("Update reference data cache for %d chemical identifers", len(updateIdList))
-                ok, failList = self.__updateReferenceData(updateIdList, searchIdxD, **kwargs)
+                logger.info("Update reference data cache for %d chemical identifiers", len(updateIdList))
+                ok, failList = self.__updateReferenceData(updateIdList, searchIdxD, numProc, **kwargs)
                 logger.info("Update reference data return status is %r missing count %d", ok, len(failList))
             else:
                 logger.info("No reference data updates required")
@@ -498,7 +500,7 @@ class PubChemIndexCacheProvider(StashableBase):
         objD = obEx.getObjects()
         return objD
-    def __updateReferenceData(self, idList, searchIdxD, **kwargs):
+    def __updateReferenceData(self, idList, searchIdxD, numProc=2, **kwargs):
         """Launch worker methods to update chemical reference data correspondences.
         Args:
@@ -507,7 +509,6 @@ class PubChemIndexCacheProvider(StashableBase):
         Returns:
             (bool, list): status flag, list of unmatched identifiers
         """
-        numProc = 1
         chunkSize = 50
         exportPath = kwargs.get("exportPath", None)
         logger.info("Length starting list is %d", len(idList))
@@ -522,7 +523,7 @@ class PubChemIndexCacheProvider(StashableBase):
         else:
             successList, _, _, _ = rWorker.updateList(idList, "SingleProc", optD, self.__dirPath)
             failList = list(set(idList) - set(successList))
-            ok = len(failList) > 0
+            ok = len(failList) == 0
             logger.info("Single-proc status %r failures %r", ok, len(failList))
         #
         return ok, failList
@@ -602,8 +603,6 @@ class PubChemIndexCacheProvider(StashableBase):
             logSizes = kwargs.get("logSizes", False)
             limitPerceptions = kwargs.get("limitPerceptions", False)
             #
-            # numProc = kwargs.get("numProc", 1)
-            # numProc = self.__numProc
             chunkSize = kwargs.get("chunkSize", 5)
             molLimit = kwargs.get("molLimit", None)
             ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc-full")

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/cli/__init__.py RENAMED Viewed

@@ -2,4 +2,4 @@ __docformat__ = "google en"
 __author__ = "John Westbrook"
 __email__ = "john.westbrook@rcsb.org"
 __license__ = "Apache 2.0"
-__version__ = "0.94"
+__version__ = "0.96"

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testGlycanUtils.py RENAMED Viewed

@@ -4,6 +4,7 @@
 # Date:    25-May-2021
 #
 # Update:
+#   9-Feb-2023 aae  Fix TOPDIR path
 ##
 """
 Tests for creating glycan accession mapping details.
@@ -22,7 +23,7 @@ from rcsb.exdb.branch.GlycanUtils import GlycanUtils
 from rcsb.utils.config.ConfigUtil import ConfigUtil
 HERE = os.path.abspath(os.path.dirname(__file__))
-TOPDIR = os.path.dirname(os.path.dirname(HERE))
+TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
 logger = logging.getLogger()

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testPubChemEtlWorkflow.py RENAMED Viewed

@@ -4,7 +4,7 @@
 # Date:    29-Jul-2020
 #
 # Updates:
-#
+#  13-Mar-2023 aae Disable git stash testing
 ##
 """
 Tests for PubChem ETL workflow methods
@@ -51,6 +51,10 @@ class PubChemEtlWorkflowTests(unittest.TestCase):
         self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
         self.__ccFileNamePrefix = "cc-abbrev"
         #
+        # This tests pushing files to the stash
+        self.__testStashServer = True
+        self.__testStashGit = False
+        #
         self.__startTime = time.time()
         logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
@@ -71,8 +75,10 @@ class PubChemEtlWorkflowTests(unittest.TestCase):
                 ccUrlTarget=self.__ccUrlTarget,
                 birdUrlTarget=self.__birdUrlTarget,
                 ccFileNamePrefix=self.__ccFileNamePrefix,
-                numProc=4,
+                numProcChemComp=4,
                 rebuildChemIndices=True,
+                useStash=self.__testStashServer,
+                useGit=self.__testStashGit
             )
             self.assertTrue(ok)
         except Exception as e:
@@ -84,7 +90,7 @@ class PubChemEtlWorkflowTests(unittest.TestCase):
         try:
             #  --
             pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
-            ok = pcewP.dump()
+            ok = pcewP.dump(useStash=self.__testStashServer, useGit=self.__testStashGit)
             self.assertTrue(ok)
         except Exception as e:
             logger.exception("Failing with %s", str(e))
@@ -106,7 +112,7 @@ class PubChemEtlWorkflowTests(unittest.TestCase):
         try:
             #  --
             pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
-            ok = pcewP.updateMatchedData()
+            ok = pcewP.updateMatchedData(useStash=self.__testStashServer, useGit=self.__testStashGit)
             self.assertTrue(ok)
         except Exception as e:
             logger.exception("Failing with %s", str(e))

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testPubChemEtlWrapper.py RENAMED Viewed

@@ -4,7 +4,8 @@
 # Date:    20-Jul-2020
 #
 # Updates:
-#
+#  13-Mar-2023 aae Updates to use multiprocess count, disable git stash testing,
+#                  Fix tests after removing obsolete entries from test data
 ##
 """
 Tests for PubChem ETL wrapper methods
@@ -56,11 +57,15 @@ class PubChemEtlWrapperTests(unittest.TestCase):
         # These are test source files for chemical component/BIRD indices
         self.__ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
         self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
-        self.__numComponents = 30
+        self.__numComponents = 25
         self.__numSelectMatches = 23
         self.__numAltMatches = 2
         self.__numTotalMatches = 50
         #
+        # This tests pushing files to the stash
+        self.__testStashServer = True
+        self.__testStashGit = False
+        #
         self.__startTime = time.time()
         logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
@@ -83,7 +88,7 @@ class PubChemEtlWrapperTests(unittest.TestCase):
                 ccFileNamePrefix="cc-abbrev",
                 exportPath=self.__dirPath,
                 rebuildChemIndices=True,
-                numProc=4,
+                numProcChemComp=4,
             )
             self.assertTrue(ok)
             #
@@ -97,7 +102,7 @@ class PubChemEtlWrapperTests(unittest.TestCase):
             #
             ok = pcewP.dump(contentType="index")
             self.assertTrue(ok)
-            ok = pcewP.toStash(contentType="index")
+            ok = pcewP.toStash(contentType="index", useStash=self.__testStashServer, useGit=self.__testStashGit)
             self.assertTrue(ok)
         except Exception as e:
             logger.exception("Failing with %s", str(e))
@@ -125,13 +130,13 @@ class PubChemEtlWrapperTests(unittest.TestCase):
             self.assertTrue(ok)
             ok = pcewP.dump(contentType="data")
             self.assertTrue(ok)
-            ok = pcewP.toStash(contentType="data")
+            ok = pcewP.toStash(contentType="data", useStash=self.__testStashServer, useGit=self.__testStashGit)
             self.assertTrue(ok)
             ok = pcewP.updateIdentifiers()
             self.assertTrue(ok)
             ok = pcewP.dump(contentType="identifiers")
             self.assertTrue(ok)
-            ok = pcewP.toStash(contentType="identifiers")
+            ok = pcewP.toStash(contentType="identifiers", useStash=self.__testStashServer, useGit=self.__testStashGit)
             self.assertTrue(ok)
         except Exception as e:
             logger.exception("Failing with %s", str(e))

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py RENAMED Viewed

@@ -4,7 +4,7 @@
 # Date:    16-Jul-2020
 #
 # Updates:
-#
+#  13-Mar-2023 aae Fix tests after removing obsolete entries from test data
 ##
 """
 Tests for PubChem index cache maintenance operations
@@ -66,7 +66,7 @@ class PubChemIndexCacheProviderTests(unittest.TestCase):
         """Test case - search, backup, restore and select PubChem correspondences for reference chemical definitions."""
         try:
             #  -- Update/create mapping index cache ---
-            numObj = 30
+            numObj = 25
             pcicP = PubChemIndexCacheProvider(self.__cfgOb, self.__cachePath)
             pcicP.updateMissing(
                 expireDays=0,

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tree/TreeNodeListWorker.py RENAMED Viewed

@@ -6,7 +6,8 @@
 #
 # Updates:
 #  9-Sep-2019 jdw add AtcProvider() and ChemrefExtractor() for ATC tree.
-# JDW TODO TEST
+# 12-Apr-2023 dwp add CARD ontology tree
+#
 ##
 __docformat__ = "google en"
 __author__ = "John Westbrook"
@@ -23,6 +24,7 @@ from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
 from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
 from rcsb.utils.chemref.AtcProvider import AtcProvider
 from rcsb.utils.ec.EnzymeDatabaseProvider import EnzymeDatabaseProvider
+from rcsb.utils.targets.CARDTargetOntologyProvider import CARDTargetOntologyProvider
 from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
 from rcsb.utils.struct.CathClassificationProvider import CathClassificationProvider
 from rcsb.utils.struct.EcodClassificationProvider import EcodClassificationProvider
@@ -172,6 +174,14 @@ class TreeNodeListWorker(object):
                 collectionName = "tree_ec_node_list"
                 ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
                 self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
+            # ---- CARD
+            cou = CARDTargetOntologyProvider(cachePath=self.__cachePath, useCache=False)
+            nL = cou.getTreeNodeList()
+            logger.info("Starting load of EC node tree length %d", len(nL))
+            if doLoad:
+                collectionName = "tree_card_node_list"
+                ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
+                self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
             # ---- Taxonomy
             # Get the taxon coverage in the current data set -
             epe = TaxonomyExtractor(self.__cfgOb)

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/wf/PubChemEtlWorkflow.py RENAMED Viewed

@@ -5,7 +5,7 @@
 #  Workflow wrapper  --  PubChem ETL utilities
 #
 #  Updates:
-#
+#  13-Mar-2023 aae Updates to use multiprocess count, disable git stash testing
 ##
 __docformat__ = "google en"
 __author__ = "John Westbrook"
@@ -47,23 +47,48 @@ class PubChemEtlWorkflow(object):
             logger.setLevel(logging.DEBUG)
         #
-    def dump(self):
-        """Dump the current object store of PubChem correspondences and data."""
+    def dump(self, **kwargs):
+        """Dump the current object store of PubChem correspondences and data.
+        Args:
+            useStash (bool):  should stash (Buildlocker) be updated? (default: True)
+            useGit (bool):  should stash (GitHub) be updated? (default: True)
+        Returns:
+            (bool): True for success or False otherwise
+        """
         ok1 = ok2 = ok3 = ok4 = False
         try:
+            useStash = kwargs.get("useStash", True)
+            useGit = kwargs.get("useGit", True)  # Revisit stashing in GitHub as file timestamp will always cause a commit
             #  -- Update local chemical indices and  create PubChem mapping index ---
             pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
             sTime = time.time()
             logger.info("Dumping index data")
             ok1 = pcewP.dump(contentType="index")
-            ok2 = pcewP.toStash(contentType="index")
             eTime = time.time()
             logger.info("Dumping index data done in (%.4f seconds)", eTime - sTime)
+            if useGit or useStash:
+                sTime = time.time()
+                logger.info("Stashing index data")
+                ok2 = pcewP.toStash(contentType="index", useStash=useStash, useGit=useGit)
+                eTime = time.time()
+                logger.info("Stashing index data done in (%.4f seconds)", eTime - sTime)
+            else:
+                ok2 = True
             sTime = time.time()
             logger.info("Dumping reference data")
             ok3 = pcewP.dump(contentType="data")
-            ok4 = pcewP.toStash(contentType="data")
+            if useGit or useStash:
+                sTime = time.time()
+                logger.info("Stashing reference data")
+                ok4 = pcewP.toStash(contentType="data", useStash=useStash, useGit=useGit)
+                eTime = time.time()
+                logger.info("Stashing reference data done in (%.4f seconds)", eTime - sTime)
+            else:
+                ok4 = True
             eTime = time.time()
             logger.info("Dumping data done in (%.4f seconds)", eTime - sTime)
         except Exception as e:
@@ -77,7 +102,7 @@ class PubChemEtlWorkflow(object):
             #  -- Update local chemical indices and  create PubChem mapping index ---
             pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
             sTime = time.time()
-            ok1 = pcewP.toStash(contentType="index")
+            ok1 = pcewP. toStash(contentType="index")
             eTime = time.time()
             logger.info("Stashing index data done in (%.4f seconds)", eTime - sTime)
@@ -122,8 +147,11 @@ class PubChemEtlWorkflow(object):
             birdUrlTarget (str, optional): target url for bird dictionary resource file (cc format) (default: None=all public)
             ccFileNamePrefix (str, optional): index file prefix (default: full)
             rebuildChemIndices (bool, optional): rebuild indices from source (default: False)
-            exportPath(str, optional): path to export raw PubChem search results  (default: None)
-            numProc(int):  number processors to include in multiprocessing mode (default: 12)
+            exportPath (str, optional): path to export raw PubChem search results  (default: None)
+            numProcChemComp (int, optional):  number processors to include in multiprocessing mode (default: 8)
+            numProc (int, optional):  number processors to include in multiprocessing mode (default: 2)
+            useStash (bool, optional):  should stash (Buildlocker) be updated? (default: True)
+            useGit (bool, optional):  should stash (GitHub) be updated? (default: True)
         Returns:
             (bool): True for success or False otherwise
@@ -135,9 +163,12 @@ class PubChemEtlWorkflow(object):
             ccUrlTarget = kwargs.get("ccUrlTarget", None)
             birdUrlTarget = kwargs.get("birdUrlTarget", None)
             ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc-full")
-            numProc = kwargs.get("numProc", 12)
+            numProcChemComp = kwargs.get("numProcChemComp", 8)
+            numProc = kwargs.get("numProc", 2)
             rebuildChemIndices = kwargs.get("rebuildChemIndices", True)
             exportPath = kwargs.get("exportPath", None)
+            useStash = kwargs.get("useStash", True)
+            useGit = kwargs.get("useGit", True)
             #
             pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
             ok1 = pcewP.updateIndex(
@@ -146,30 +177,52 @@ class PubChemEtlWorkflow(object):
                 ccFileNamePrefix=ccFileNamePrefix,
                 exportPath=exportPath,
                 rebuildChemIndices=rebuildChemIndices,
+                numProcChemComp=numProcChemComp,
                 numProc=numProc,
             )
             ok2 = pcewP.dump(contentType="index")
-            ok3 = pcewP.toStash(contentType="index")
+            if useGit or useStash:
+                ok3 = pcewP.toStash(contentType="index", useStash=useStash, useGit=useGit)
+            else:
+                ok3 = True
         except Exception as e:
             logger.exception("Failing with %s", str(e))
         #
         return ok1 and ok2 and ok3
-    def updateMatchedData(self):
+    def updateMatchedData(self, **kwargs):
         """Update PubChem annotation data for matched correspondences.  Generate and stash
         related identifiers for corresponding components and BIRD chemical definitions.
+        Args:
+            numProc(int):  number processors to include in multiprocessing mode (default: 2)
+            useStash(bool):  should stash (Buildlocker) be updated? (default: True)
+            useGit(bool):  should stash (GitHub) be updated? (default: True)
+        Returns:
+            (bool): True for success or False otherwise
         """
         try:
             ok1 = ok2 = ok3 = ok4 = ok5 = ok6 = False
             #  --
+            numProc = kwargs.get("numProc", 2)
+            useStash = kwargs.get("useStash", True)
+            useGit = kwargs.get("useGit", True)
+            #
             pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
-            ok1 = pcewP.updateMatchedData()
+            ok1 = pcewP.updateMatchedData(numProc=numProc)
             ok2 = pcewP.dump(contentType="data")
-            ok3 = pcewP.toStash(contentType="data")
+            if useGit or useStash:
+                ok3 = pcewP.toStash(contentType="data", useStash=useStash, useGit=useGit)
+            else:
+                ok3 = True
             #
             ok4 = pcewP.updateIdentifiers()
             ok5 = pcewP.dump(contentType="identifiers")
-            ok6 = pcewP.toStash(contentType="identifiers")
+            if useGit or useStash:
+                ok6 = pcewP.toStash(contentType="identifiers", useStash=useStash, useGit=useGit)
+            else:
+                ok6 = True
             #
         except Exception as e:
             logger.exception("Failing with %s", str(e))

{rcsb.exdb-0.94 → rcsb.exdb-0.96/rcsb.exdb.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: rcsb.exdb
-Version: 0.94
+Version: 0.96
 Summary: RCSB Python ExDB data extraction and loading workflows
 Home-page: https://github.com/rcsb/py-rcsb_exdb
 Author: John Westbrook

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/LICENSE RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/MANIFEST.in RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/README.md RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/branch/BranchedEntityExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/branch/GlycanProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/branch/GlycanUtils.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/branch/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/chemref/ChemRefEtlWorker.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/chemref/ChemRefExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/chemref/ChemRefMappingProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/chemref/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/citation/CitationAdapter.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/citation/CitationExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/citation/CitationUtils.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/citation/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/cli/ExDbExec.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/entry/EntryInfoProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/entry/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/AnnotationExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/LigandNeighborMappingProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/PolymerEntityExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/TaxonomyExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/UniProtCoreEtlWorker.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/UniProtExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/seq/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/fixturePdbxLoader.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testAnnotationExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testBranchedEntityExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testChemRefLoader.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testChemRefMappingProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testCitationAdapter.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testCitationExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testCitationUtils.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testEntryInfoProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testExDbWorkflow.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testGlycanEtlWorkflow.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testGlycanProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testObjectExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testObjectTransformer.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testObjectUpdater.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testPolymerEntityExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testPubChemDataCacheProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testTaxonomyExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testTreeNodeListWorker.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tests/testUniProtExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/tree/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/utils/ObjectAdapterBase.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/utils/ObjectExtractor.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/utils/ObjectTransformer.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/utils/ObjectUpdater.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/utils/ObjectValidator.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/utils/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/wf/ExDbWorkflow.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/wf/GlycanEtlWorkflow.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb/exdb/wf/__init__.py RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb.exdb.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb.exdb.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb.exdb.egg-info/entry_points.txt RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb.exdb.egg-info/not-zip-safe RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb.exdb.egg-info/requires.txt RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/rcsb.exdb.egg-info/top_level.txt RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/requirements.txt RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/setup.cfg RENAMED Viewed

File without changes

{rcsb.exdb-0.94 → rcsb.exdb-0.96}/setup.py RENAMED Viewed

File without changes

rcsb.exdb 0.94__tar.gz → 0.96__tar.gz

rcsb.exdb 0.94tar.gz → 0.96tar.gz