rcsb.exdb 0.96__tar.gz → 0.98__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/HISTORY.txt +4 -1
- {rcsb.exdb-0.96/rcsb.exdb.egg-info → rcsb.exdb-0.98}/PKG-INFO +1 -1
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/cli/__init__.py +1 -1
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/fixturePdbxLoader.py +4 -4
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testExDbWorkflow.py +1 -1
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testTreeNodeListWorker.py +2 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tree/TreeNodeListWorker.py +37 -29
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +2 -1
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/ExDbWorkflow.py +2 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/GlycanEtlWorkflow.py +2 -1
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/PubChemEtlWorkflow.py +3 -2
- {rcsb.exdb-0.96 → rcsb.exdb-0.98/rcsb.exdb.egg-info}/PKG-INFO +1 -1
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/LICENSE +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/MANIFEST.in +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/README.md +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/GlycanProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/GlycanUtils.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationAdapter.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationUtils.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/cli/ExDbExec.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/entry/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationUtils.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tree/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectValidator.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/__init__.py +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/SOURCES.txt +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/entry_points.txt +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/not-zip-safe +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/requires.txt +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/top_level.txt +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/requirements.txt +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/setup.cfg +0 -0
- {rcsb.exdb-0.96 → rcsb.exdb-0.98}/setup.py +0 -0
|
@@ -89,4 +89,7 @@
|
|
|
89
89
|
9-Mar-2023 V0.94 Update ExDbWorkflow to make use of multiple processors for 'upd_ref_seq' operation;
|
|
90
90
|
Lower refChunkSize to 10 for requests to UniProt API
|
|
91
91
|
13-Mar-2023 V0.95 Updates to PubChem workflow to use multiprocess count, disable git stash testing, remove obsolete entries from test data
|
|
92
|
-
12-Apr-2023 V0.96 Add CARD ontology data to tree builder
|
|
92
|
+
12-Apr-2023 V0.96 Add CARD ontology data to tree builder
|
|
93
|
+
1-Jun-2023 V0.97 Don't back up resources to GitHub during cache update workflows
|
|
94
|
+
8-Aug-2023 V0.98 Reduce memory and cpu footprint for Azure test cases;
|
|
95
|
+
Load full (unfiltered) taxonomy tree node list, and stop loading GO tree
|
|
@@ -64,8 +64,8 @@ class PdbxLoaderFixture(unittest.TestCase):
|
|
|
64
64
|
self.__failedFilePath = os.path.join(HERE, "test-output", "failed-list.txt")
|
|
65
65
|
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
66
66
|
self.__readBackCheck = True
|
|
67
|
-
self.__numProc =
|
|
68
|
-
self.__chunkSize =
|
|
67
|
+
self.__numProc = 1
|
|
68
|
+
self.__chunkSize = 5
|
|
69
69
|
self.__fileLimit = 38
|
|
70
70
|
self.__documentStyle = "rowwise_by_name_with_cardinality"
|
|
71
71
|
#
|
|
@@ -172,7 +172,7 @@ class PdbxLoaderFixture(unittest.TestCase):
|
|
|
172
172
|
{
|
|
173
173
|
"databaseName": "pdbx_core",
|
|
174
174
|
"collectionNameList": None,
|
|
175
|
-
"loadType": "
|
|
175
|
+
"loadType": "replace",
|
|
176
176
|
"mergeContentTypes": ["vrpt"],
|
|
177
177
|
"validationLevel": "min",
|
|
178
178
|
"inputIdCodeList": self.__pdbIdList
|
|
@@ -228,7 +228,7 @@ class PdbxLoaderFixture(unittest.TestCase):
|
|
|
228
228
|
fileLimit=kwargs.get("fileLimit", self.__fileLimit),
|
|
229
229
|
verbose=self.__verbose,
|
|
230
230
|
readBackCheck=self.__readBackCheck,
|
|
231
|
-
maxStepLength=
|
|
231
|
+
maxStepLength=1000,
|
|
232
232
|
useSchemaCache=True,
|
|
233
233
|
rebuildSchemaFlag=False,
|
|
234
234
|
)
|
|
@@ -59,7 +59,7 @@ class ExDbWorkflowTests(unittest.TestCase):
|
|
|
59
59
|
"restoreUseGit": True,
|
|
60
60
|
"restoreUseStash": False,
|
|
61
61
|
}
|
|
62
|
-
self.__loadCommonD = {"readBackCheck": True, "numProc": 2, "chunkSize": 5, "refChunkSize": 5, "loadType": "full"}
|
|
62
|
+
self.__loadCommonD = {"readBackCheck": True, "numProc": 2, "chunkSize": 5, "refChunkSize": 5, "loadType": "full", "useFilteredLists": True}
|
|
63
63
|
#
|
|
64
64
|
# These are test source files for chemical component/BIRD indices
|
|
65
65
|
ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
|
|
@@ -63,6 +63,7 @@ class TreeNodeListWorkerTests(unittest.TestCase):
|
|
|
63
63
|
self.__debugFlag = False
|
|
64
64
|
self.__loadType = "full"
|
|
65
65
|
self.__useCache = True
|
|
66
|
+
self.__useFilteredLists = True
|
|
66
67
|
#
|
|
67
68
|
self.__startTime = time.time()
|
|
68
69
|
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
@@ -88,6 +89,7 @@ class TreeNodeListWorkerTests(unittest.TestCase):
|
|
|
88
89
|
verbose=self.__debugFlag,
|
|
89
90
|
readBackCheck=self.__readBackCheck,
|
|
90
91
|
useCache=self.__useCache,
|
|
92
|
+
useFilteredLists=self.__useFilteredLists,
|
|
91
93
|
)
|
|
92
94
|
#
|
|
93
95
|
ok = rhw.load(updateId, loadType=self.__loadType, doLoad=self.__doLoad)
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
# Updates:
|
|
8
8
|
# 9-Sep-2019 jdw add AtcProvider() and ChemrefExtractor() for ATC tree.
|
|
9
9
|
# 12-Apr-2023 dwp add CARD ontology tree
|
|
10
|
+
# 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead)
|
|
10
11
|
#
|
|
11
12
|
##
|
|
12
13
|
__docformat__ = "google en"
|
|
@@ -20,17 +21,17 @@ import os.path
|
|
|
20
21
|
from rcsb.db.mongo.DocumentLoader import DocumentLoader
|
|
21
22
|
from rcsb.db.processors.DataExchangeStatus import DataExchangeStatus
|
|
22
23
|
from rcsb.exdb.chemref.ChemRefExtractor import ChemRefExtractor
|
|
23
|
-
from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
|
|
24
|
-
from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
|
|
25
24
|
from rcsb.utils.chemref.AtcProvider import AtcProvider
|
|
26
25
|
from rcsb.utils.ec.EnzymeDatabaseProvider import EnzymeDatabaseProvider
|
|
27
26
|
from rcsb.utils.targets.CARDTargetOntologyProvider import CARDTargetOntologyProvider
|
|
28
|
-
from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
|
|
29
27
|
from rcsb.utils.struct.CathClassificationProvider import CathClassificationProvider
|
|
30
28
|
from rcsb.utils.struct.EcodClassificationProvider import EcodClassificationProvider
|
|
31
29
|
from rcsb.utils.struct.ScopClassificationProvider import ScopClassificationProvider
|
|
32
30
|
from rcsb.utils.struct.Scop2ClassificationProvider import Scop2ClassificationProvider
|
|
33
31
|
from rcsb.utils.taxonomy.TaxonomyProvider import TaxonomyProvider
|
|
32
|
+
from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
|
|
33
|
+
# from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
|
|
34
|
+
# from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
|
|
34
35
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
@@ -38,7 +39,7 @@ logger = logging.getLogger(__name__)
|
|
|
38
39
|
class TreeNodeListWorker(object):
|
|
39
40
|
"""Prepare and load repository holdings and repository update data."""
|
|
40
41
|
|
|
41
|
-
def __init__(self, cfgOb, cachePath, numProc=1, chunkSize=10, readBackCheck=False, documentLimit=None, verbose=False, useCache=False):
|
|
42
|
+
def __init__(self, cfgOb, cachePath, numProc=1, chunkSize=10, readBackCheck=False, documentLimit=None, verbose=False, useCache=False, useFilteredLists=False):
|
|
42
43
|
self.__cfgOb = cfgOb
|
|
43
44
|
self.__cachePath = os.path.abspath(cachePath)
|
|
44
45
|
self.__readBackCheck = readBackCheck
|
|
@@ -50,6 +51,7 @@ class TreeNodeListWorker(object):
|
|
|
50
51
|
self.__verbose = verbose
|
|
51
52
|
self.__statusList = []
|
|
52
53
|
self.__useCache = useCache
|
|
54
|
+
self.__useFilteredLists = useFilteredLists
|
|
53
55
|
|
|
54
56
|
def __updateStatus(self, updateId, databaseName, collectionName, status, startTimestamp):
|
|
55
57
|
try:
|
|
@@ -122,19 +124,21 @@ class TreeNodeListWorker(object):
|
|
|
122
124
|
# collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
|
|
123
125
|
# addValues = {"_schema_version": collectionVersion}
|
|
124
126
|
addValues = None
|
|
125
|
-
#
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
127
|
+
#
|
|
128
|
+
# --- GO - TURNED OFF 08 Aug 2023 dwp (tree is now loaded in DW)
|
|
129
|
+
# goP = GeneOntologyProvider(goDirPath=os.path.join(self.__cachePath, "go"), useCache=useCache)
|
|
130
|
+
# ok = goP.testCache()
|
|
131
|
+
# anEx = AnnotationExtractor(self.__cfgOb)
|
|
132
|
+
# goIdL = anEx.getUniqueIdentifiers("GO")
|
|
133
|
+
# logger.info("Unique GO assignments %d", len(goIdL))
|
|
134
|
+
# nL = goP.exportTreeNodeList(goIdL)
|
|
135
|
+
# logger.info("GO tree node list length %d", len(nL))
|
|
136
|
+
# if doLoad:
|
|
137
|
+
# collectionName = "tree_go_node_list"
|
|
138
|
+
# ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
139
|
+
# self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
140
|
+
#
|
|
141
|
+
# ---- CATH
|
|
138
142
|
ccu = CathClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
139
143
|
nL = ccu.getTreeNodeList()
|
|
140
144
|
logger.info("Starting load SCOP node tree length %d", len(nL))
|
|
@@ -183,19 +187,23 @@ class TreeNodeListWorker(object):
|
|
|
183
187
|
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
184
188
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
185
189
|
# ---- Taxonomy
|
|
186
|
-
# Get the taxon coverage in the current data set -
|
|
187
|
-
epe = TaxonomyExtractor(self.__cfgOb)
|
|
188
|
-
tL = epe.getUniqueTaxons()
|
|
189
|
-
logger.info("Taxon coverage length %d", len(tL))
|
|
190
|
-
#
|
|
191
190
|
tU = TaxonomyProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
191
|
+
if self.__useFilteredLists:
|
|
192
|
+
# Get the taxon coverage in the current data set -
|
|
193
|
+
epe = TaxonomyExtractor(self.__cfgOb)
|
|
194
|
+
tL = epe.getUniqueTaxons()
|
|
195
|
+
logger.info("Taxon coverage length %d", len(tL))
|
|
196
|
+
#
|
|
197
|
+
fD = {1}
|
|
198
|
+
for taxId in tL:
|
|
199
|
+
fD.update({k: True for k in tU.getLineage(taxId)})
|
|
200
|
+
logger.info("Taxon filter dictionary length %d", len(fD))
|
|
201
|
+
logger.debug("fD %r", sorted(fD))
|
|
202
|
+
#
|
|
203
|
+
nL = tU.exportNodeList(filterD=fD)
|
|
204
|
+
else:
|
|
205
|
+
# Get the full taxon node list without filtering
|
|
206
|
+
nL = tU.exportNodeList()
|
|
199
207
|
self.__checkTaxonNodeList(nL)
|
|
200
208
|
logger.info("Starting load of taxonomy node tree length %d", len(nL))
|
|
201
209
|
if doLoad:
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
# Workflow wrapper -- Entry-level annotations extracted from ExDB
|
|
6
6
|
#
|
|
7
7
|
# Updates:
|
|
8
|
+
# 1-Jun-2023 aae Don't back up resources to GitHub during cache update workflows
|
|
8
9
|
#
|
|
9
10
|
##
|
|
10
11
|
__docformat__ = "google en"
|
|
@@ -59,7 +60,7 @@ class EntryInfoEtlWorkflow(object):
|
|
|
59
60
|
eiP.update(self.__cfgOb, fmt="json", indent=3)
|
|
60
61
|
#
|
|
61
62
|
if backup:
|
|
62
|
-
ok = eiP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=
|
|
63
|
+
ok = eiP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=False, useStash=True)
|
|
63
64
|
logger.info("Backup entry-level annotations (%r)", ok)
|
|
64
65
|
else:
|
|
65
66
|
ok = True
|
|
@@ -91,6 +91,7 @@ class ExDbWorkflow(object):
|
|
|
91
91
|
rebuildSequenceCache = kwargs.get("rebuildSequenceCache", False)
|
|
92
92
|
useSequenceCache = not rebuildSequenceCache
|
|
93
93
|
#
|
|
94
|
+
useFilteredLists = kwargs.get("useFilteredLists", False)
|
|
94
95
|
except Exception as e:
|
|
95
96
|
logger.exception("Argument or configuration processing failing with %s", str(e))
|
|
96
97
|
return False
|
|
@@ -107,6 +108,7 @@ class ExDbWorkflow(object):
|
|
|
107
108
|
verbose=self.__debugFlag,
|
|
108
109
|
readBackCheck=readBackCheck,
|
|
109
110
|
useCache=self.__useCache,
|
|
111
|
+
useFilteredLists=useFilteredLists,
|
|
110
112
|
)
|
|
111
113
|
ok = rhw.load(dataSetId, loadType=loadType)
|
|
112
114
|
okS = self.loadStatus(rhw.getLoadStatus(), readBackCheck=readBackCheck)
|
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
# Workflow wrapper -- Glycan ETL utilities
|
|
6
6
|
#
|
|
7
7
|
# Updates:
|
|
8
|
+
# 1-Jun-2023 aae Don't back up resources to GitHub during cache update workflows
|
|
8
9
|
#
|
|
9
10
|
##
|
|
10
11
|
__docformat__ = "google en"
|
|
@@ -64,7 +65,7 @@ class GlycanEtlWorkflow(object):
|
|
|
64
65
|
logger.info("Matched glycan identifiers (%d)", len(riD))
|
|
65
66
|
#
|
|
66
67
|
if backup:
|
|
67
|
-
ok2 = gP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=
|
|
68
|
+
ok2 = gP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=False, useStash=True)
|
|
68
69
|
logger.info("Backup matched glycan identifiers (%r)", ok2)
|
|
69
70
|
else:
|
|
70
71
|
ok2 = True
|
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
#
|
|
7
7
|
# Updates:
|
|
8
8
|
# 13-Mar-2023 aae Updates to use multiprocess count, disable git stash testing
|
|
9
|
+
# 1-Jun-2023 aae Don't back up resources to GitHub during cache update workflows
|
|
9
10
|
##
|
|
10
11
|
__docformat__ = "google en"
|
|
11
12
|
__author__ = "John Westbrook"
|
|
@@ -168,7 +169,7 @@ class PubChemEtlWorkflow(object):
|
|
|
168
169
|
rebuildChemIndices = kwargs.get("rebuildChemIndices", True)
|
|
169
170
|
exportPath = kwargs.get("exportPath", None)
|
|
170
171
|
useStash = kwargs.get("useStash", True)
|
|
171
|
-
useGit = kwargs.get("useGit",
|
|
172
|
+
useGit = kwargs.get("useGit", False)
|
|
172
173
|
#
|
|
173
174
|
pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
|
|
174
175
|
ok1 = pcewP.updateIndex(
|
|
@@ -207,7 +208,7 @@ class PubChemEtlWorkflow(object):
|
|
|
207
208
|
# --
|
|
208
209
|
numProc = kwargs.get("numProc", 2)
|
|
209
210
|
useStash = kwargs.get("useStash", True)
|
|
210
|
-
useGit = kwargs.get("useGit",
|
|
211
|
+
useGit = kwargs.get("useGit", False)
|
|
211
212
|
#
|
|
212
213
|
pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
|
|
213
214
|
ok1 = pcewP.updateMatchedData(numProc=numProc)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py
RENAMED
|
File without changes
|
{rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|