rcsb.exdb 0.97__tar.gz → 0.98__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/HISTORY.txt +3 -1
- {rcsb.exdb-0.97/rcsb.exdb.egg-info → rcsb.exdb-0.98}/PKG-INFO +1 -1
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/cli/__init__.py +1 -1
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/fixturePdbxLoader.py +4 -4
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testExDbWorkflow.py +1 -1
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testTreeNodeListWorker.py +2 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tree/TreeNodeListWorker.py +37 -29
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/wf/ExDbWorkflow.py +2 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98/rcsb.exdb.egg-info}/PKG-INFO +1 -1
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/LICENSE +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/MANIFEST.in +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/README.md +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/branch/GlycanProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/branch/GlycanUtils.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/branch/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/chemref/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationAdapter.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationUtils.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/citation/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/cli/ExDbExec.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/entry/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/seq/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationUtils.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tree/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectValidator.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/utils/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/wf/PubChemEtlWorkflow.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/wf/__init__.py +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/SOURCES.txt +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/entry_points.txt +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/not-zip-safe +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/requires.txt +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/top_level.txt +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/requirements.txt +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/setup.cfg +0 -0
- {rcsb.exdb-0.97 → rcsb.exdb-0.98}/setup.py +0 -0
|
@@ -90,4 +90,6 @@
|
|
|
90
90
|
Lower refChunkSize to 10 for requests to UniProt API
|
|
91
91
|
13-Mar-2023 V0.95 Updates to PubChem workflow to use multiprocess count, disable git stash testing, remove obsolete entries from test data
|
|
92
92
|
12-Apr-2023 V0.96 Add CARD ontology data to tree builder
|
|
93
|
-
1-Jun-2023 V0.97 Don't back up resources to GitHub during cache update workflows
|
|
93
|
+
1-Jun-2023 V0.97 Don't back up resources to GitHub during cache update workflows
|
|
94
|
+
8-Aug-2023 V0.98 Reduce memory and cpu footprint for Azure test cases;
|
|
95
|
+
Load full (unfiltered) taxonomy tree node list, and stop loading GO tree
|
|
@@ -64,8 +64,8 @@ class PdbxLoaderFixture(unittest.TestCase):
|
|
|
64
64
|
self.__failedFilePath = os.path.join(HERE, "test-output", "failed-list.txt")
|
|
65
65
|
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
66
66
|
self.__readBackCheck = True
|
|
67
|
-
self.__numProc =
|
|
68
|
-
self.__chunkSize =
|
|
67
|
+
self.__numProc = 1
|
|
68
|
+
self.__chunkSize = 5
|
|
69
69
|
self.__fileLimit = 38
|
|
70
70
|
self.__documentStyle = "rowwise_by_name_with_cardinality"
|
|
71
71
|
#
|
|
@@ -172,7 +172,7 @@ class PdbxLoaderFixture(unittest.TestCase):
|
|
|
172
172
|
{
|
|
173
173
|
"databaseName": "pdbx_core",
|
|
174
174
|
"collectionNameList": None,
|
|
175
|
-
"loadType": "
|
|
175
|
+
"loadType": "replace",
|
|
176
176
|
"mergeContentTypes": ["vrpt"],
|
|
177
177
|
"validationLevel": "min",
|
|
178
178
|
"inputIdCodeList": self.__pdbIdList
|
|
@@ -228,7 +228,7 @@ class PdbxLoaderFixture(unittest.TestCase):
|
|
|
228
228
|
fileLimit=kwargs.get("fileLimit", self.__fileLimit),
|
|
229
229
|
verbose=self.__verbose,
|
|
230
230
|
readBackCheck=self.__readBackCheck,
|
|
231
|
-
maxStepLength=
|
|
231
|
+
maxStepLength=1000,
|
|
232
232
|
useSchemaCache=True,
|
|
233
233
|
rebuildSchemaFlag=False,
|
|
234
234
|
)
|
|
@@ -59,7 +59,7 @@ class ExDbWorkflowTests(unittest.TestCase):
|
|
|
59
59
|
"restoreUseGit": True,
|
|
60
60
|
"restoreUseStash": False,
|
|
61
61
|
}
|
|
62
|
-
self.__loadCommonD = {"readBackCheck": True, "numProc": 2, "chunkSize": 5, "refChunkSize": 5, "loadType": "full"}
|
|
62
|
+
self.__loadCommonD = {"readBackCheck": True, "numProc": 2, "chunkSize": 5, "refChunkSize": 5, "loadType": "full", "useFilteredLists": True}
|
|
63
63
|
#
|
|
64
64
|
# These are test source files for chemical component/BIRD indices
|
|
65
65
|
ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
|
|
@@ -63,6 +63,7 @@ class TreeNodeListWorkerTests(unittest.TestCase):
|
|
|
63
63
|
self.__debugFlag = False
|
|
64
64
|
self.__loadType = "full"
|
|
65
65
|
self.__useCache = True
|
|
66
|
+
self.__useFilteredLists = True
|
|
66
67
|
#
|
|
67
68
|
self.__startTime = time.time()
|
|
68
69
|
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
@@ -88,6 +89,7 @@ class TreeNodeListWorkerTests(unittest.TestCase):
|
|
|
88
89
|
verbose=self.__debugFlag,
|
|
89
90
|
readBackCheck=self.__readBackCheck,
|
|
90
91
|
useCache=self.__useCache,
|
|
92
|
+
useFilteredLists=self.__useFilteredLists,
|
|
91
93
|
)
|
|
92
94
|
#
|
|
93
95
|
ok = rhw.load(updateId, loadType=self.__loadType, doLoad=self.__doLoad)
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
# Updates:
|
|
8
8
|
# 9-Sep-2019 jdw add AtcProvider() and ChemrefExtractor() for ATC tree.
|
|
9
9
|
# 12-Apr-2023 dwp add CARD ontology tree
|
|
10
|
+
# 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead)
|
|
10
11
|
#
|
|
11
12
|
##
|
|
12
13
|
__docformat__ = "google en"
|
|
@@ -20,17 +21,17 @@ import os.path
|
|
|
20
21
|
from rcsb.db.mongo.DocumentLoader import DocumentLoader
|
|
21
22
|
from rcsb.db.processors.DataExchangeStatus import DataExchangeStatus
|
|
22
23
|
from rcsb.exdb.chemref.ChemRefExtractor import ChemRefExtractor
|
|
23
|
-
from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
|
|
24
|
-
from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
|
|
25
24
|
from rcsb.utils.chemref.AtcProvider import AtcProvider
|
|
26
25
|
from rcsb.utils.ec.EnzymeDatabaseProvider import EnzymeDatabaseProvider
|
|
27
26
|
from rcsb.utils.targets.CARDTargetOntologyProvider import CARDTargetOntologyProvider
|
|
28
|
-
from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
|
|
29
27
|
from rcsb.utils.struct.CathClassificationProvider import CathClassificationProvider
|
|
30
28
|
from rcsb.utils.struct.EcodClassificationProvider import EcodClassificationProvider
|
|
31
29
|
from rcsb.utils.struct.ScopClassificationProvider import ScopClassificationProvider
|
|
32
30
|
from rcsb.utils.struct.Scop2ClassificationProvider import Scop2ClassificationProvider
|
|
33
31
|
from rcsb.utils.taxonomy.TaxonomyProvider import TaxonomyProvider
|
|
32
|
+
from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
|
|
33
|
+
# from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
|
|
34
|
+
# from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
|
|
34
35
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
@@ -38,7 +39,7 @@ logger = logging.getLogger(__name__)
|
|
|
38
39
|
class TreeNodeListWorker(object):
|
|
39
40
|
"""Prepare and load repository holdings and repository update data."""
|
|
40
41
|
|
|
41
|
-
def __init__(self, cfgOb, cachePath, numProc=1, chunkSize=10, readBackCheck=False, documentLimit=None, verbose=False, useCache=False):
|
|
42
|
+
def __init__(self, cfgOb, cachePath, numProc=1, chunkSize=10, readBackCheck=False, documentLimit=None, verbose=False, useCache=False, useFilteredLists=False):
|
|
42
43
|
self.__cfgOb = cfgOb
|
|
43
44
|
self.__cachePath = os.path.abspath(cachePath)
|
|
44
45
|
self.__readBackCheck = readBackCheck
|
|
@@ -50,6 +51,7 @@ class TreeNodeListWorker(object):
|
|
|
50
51
|
self.__verbose = verbose
|
|
51
52
|
self.__statusList = []
|
|
52
53
|
self.__useCache = useCache
|
|
54
|
+
self.__useFilteredLists = useFilteredLists
|
|
53
55
|
|
|
54
56
|
def __updateStatus(self, updateId, databaseName, collectionName, status, startTimestamp):
|
|
55
57
|
try:
|
|
@@ -122,19 +124,21 @@ class TreeNodeListWorker(object):
|
|
|
122
124
|
# collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
|
|
123
125
|
# addValues = {"_schema_version": collectionVersion}
|
|
124
126
|
addValues = None
|
|
125
|
-
#
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
127
|
+
#
|
|
128
|
+
# --- GO - TURNED OFF 08 Aug 2023 dwp (tree is now loaded in DW)
|
|
129
|
+
# goP = GeneOntologyProvider(goDirPath=os.path.join(self.__cachePath, "go"), useCache=useCache)
|
|
130
|
+
# ok = goP.testCache()
|
|
131
|
+
# anEx = AnnotationExtractor(self.__cfgOb)
|
|
132
|
+
# goIdL = anEx.getUniqueIdentifiers("GO")
|
|
133
|
+
# logger.info("Unique GO assignments %d", len(goIdL))
|
|
134
|
+
# nL = goP.exportTreeNodeList(goIdL)
|
|
135
|
+
# logger.info("GO tree node list length %d", len(nL))
|
|
136
|
+
# if doLoad:
|
|
137
|
+
# collectionName = "tree_go_node_list"
|
|
138
|
+
# ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
139
|
+
# self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
140
|
+
#
|
|
141
|
+
# ---- CATH
|
|
138
142
|
ccu = CathClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
139
143
|
nL = ccu.getTreeNodeList()
|
|
140
144
|
logger.info("Starting load SCOP node tree length %d", len(nL))
|
|
@@ -183,19 +187,23 @@ class TreeNodeListWorker(object):
|
|
|
183
187
|
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
184
188
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
185
189
|
# ---- Taxonomy
|
|
186
|
-
# Get the taxon coverage in the current data set -
|
|
187
|
-
epe = TaxonomyExtractor(self.__cfgOb)
|
|
188
|
-
tL = epe.getUniqueTaxons()
|
|
189
|
-
logger.info("Taxon coverage length %d", len(tL))
|
|
190
|
-
#
|
|
191
190
|
tU = TaxonomyProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
191
|
+
if self.__useFilteredLists:
|
|
192
|
+
# Get the taxon coverage in the current data set -
|
|
193
|
+
epe = TaxonomyExtractor(self.__cfgOb)
|
|
194
|
+
tL = epe.getUniqueTaxons()
|
|
195
|
+
logger.info("Taxon coverage length %d", len(tL))
|
|
196
|
+
#
|
|
197
|
+
fD = {1}
|
|
198
|
+
for taxId in tL:
|
|
199
|
+
fD.update({k: True for k in tU.getLineage(taxId)})
|
|
200
|
+
logger.info("Taxon filter dictionary length %d", len(fD))
|
|
201
|
+
logger.debug("fD %r", sorted(fD))
|
|
202
|
+
#
|
|
203
|
+
nL = tU.exportNodeList(filterD=fD)
|
|
204
|
+
else:
|
|
205
|
+
# Get the full taxon node list without filtering
|
|
206
|
+
nL = tU.exportNodeList()
|
|
199
207
|
self.__checkTaxonNodeList(nL)
|
|
200
208
|
logger.info("Starting load of taxonomy node tree length %d", len(nL))
|
|
201
209
|
if doLoad:
|
|
@@ -91,6 +91,7 @@ class ExDbWorkflow(object):
|
|
|
91
91
|
rebuildSequenceCache = kwargs.get("rebuildSequenceCache", False)
|
|
92
92
|
useSequenceCache = not rebuildSequenceCache
|
|
93
93
|
#
|
|
94
|
+
useFilteredLists = kwargs.get("useFilteredLists", False)
|
|
94
95
|
except Exception as e:
|
|
95
96
|
logger.exception("Argument or configuration processing failing with %s", str(e))
|
|
96
97
|
return False
|
|
@@ -107,6 +108,7 @@ class ExDbWorkflow(object):
|
|
|
107
108
|
verbose=self.__debugFlag,
|
|
108
109
|
readBackCheck=readBackCheck,
|
|
109
110
|
useCache=self.__useCache,
|
|
111
|
+
useFilteredLists=useFilteredLists,
|
|
110
112
|
)
|
|
111
113
|
ok = rhw.load(dataSetId, loadType=loadType)
|
|
112
114
|
okS = self.loadStatus(rhw.getLoadStatus(), readBackCheck=readBackCheck)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py
RENAMED
|
File without changes
|
{rcsb.exdb-0.97 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|