rcsb.exdb 1.31__py3-none-any.whl → 1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/METADATA +1 -1
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/RECORD +4 -41
- rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +0 -19
- rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +0 -12
- rcsb/exdb/tests/__init__.py +0 -0
- rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -104
- rcsb/exdb/tests/fixturePdbxLoader.py +0 -298
- rcsb/exdb/tests/test-data/components-abbrev.cif +0 -2739
- rcsb/exdb/tests/test-data/prdcc-abbrev.cif +0 -9171
- rcsb/exdb/tests/testAnnotationExtractor.py +0 -79
- rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -81
- rcsb/exdb/tests/testChemRefLoader.py +0 -106
- rcsb/exdb/tests/testChemRefMappingProvider.py +0 -95
- rcsb/exdb/tests/testCitationAdapter.py +0 -97
- rcsb/exdb/tests/testCitationExtractor.py +0 -93
- rcsb/exdb/tests/testCitationUtils.py +0 -92
- rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testEntryInfoProvider.py +0 -97
- rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testGlycanProvider.py +0 -98
- rcsb/exdb/tests/testGlycanUtils.py +0 -64
- rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -90
- rcsb/exdb/tests/testObjectExtractor.py +0 -342
- rcsb/exdb/tests/testObjectTransformer.py +0 -83
- rcsb/exdb/tests/testObjectUpdater.py +0 -120
- rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -93
- rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -124
- rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -134
- rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -155
- rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -123
- rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -106
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -121
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -122
- rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -117
- rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -94
- rcsb/exdb/tests/testTaxonomyExtractor.py +0 -75
- rcsb/exdb/tests/testTreeNodeListWorker.py +0 -111
- rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -99
- rcsb/exdb/tests/testUniProtExtractor.py +0 -77
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/WHEEL +0 -0
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: ReferenceSequenceAssignmentAdapterTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 25-Apr-2019
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
# 2GKI_1,3ZXR_1,2I6F_1,1NQP_1,1NQP_2,1BL4_1,1F9E_1,1SFI_2,1EBO_1,1R6Z_1,1MH3_1,1MH4_1,1MOW_3,1BWM_1 --2I6F_1
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests of reference seequence assignment adapter.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.seq.ReferenceSequenceAssignmentAdapter import ReferenceSequenceAssignmentAdapter
|
|
26
|
-
from rcsb.exdb.seq.ReferenceSequenceAssignmentProvider import ReferenceSequenceAssignmentProvider
|
|
27
|
-
from rcsb.exdb.utils.ObjectTransformer import ObjectTransformer
|
|
28
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
29
|
-
|
|
30
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
31
|
-
logger = logging.getLogger()
|
|
32
|
-
|
|
33
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
34
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class ReferenceSequenceAssignmentAdapterTests(unittest.TestCase):
|
|
38
|
-
def __init__(self, methodName="runTest"):
|
|
39
|
-
super(ReferenceSequenceAssignmentAdapterTests, self).__init__(methodName)
|
|
40
|
-
self.__verbose = True
|
|
41
|
-
|
|
42
|
-
def setUp(self):
|
|
43
|
-
#
|
|
44
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
45
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
46
|
-
configName = "site_info_configuration"
|
|
47
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
48
|
-
#
|
|
49
|
-
self.__resourceName = "MONGO_DB"
|
|
50
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
51
|
-
# self.__testEntityCacheKwargs = {"fmt": "json", "indent": 3}
|
|
52
|
-
self.__testEntityCacheKwargs = {"fmt": "pickle"}
|
|
53
|
-
self.__fetchLimit = None
|
|
54
|
-
#
|
|
55
|
-
self.__startTime = time.time()
|
|
56
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
57
|
-
|
|
58
|
-
def tearDown(self):
|
|
59
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
60
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
61
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
62
|
-
endTime = time.time()
|
|
63
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
64
|
-
|
|
65
|
-
def testAssignmentAdapter(self):
|
|
66
|
-
"""Test case - create and read cache reference sequences assignments and related data."""
|
|
67
|
-
try:
|
|
68
|
-
databaseName = "pdbx_core"
|
|
69
|
-
collectionName = "pdbx_core_polymer_entity"
|
|
70
|
-
polymerType = "Protein"
|
|
71
|
-
referenceDatabaseName = "UniProt"
|
|
72
|
-
provSource = "PDB"
|
|
73
|
-
#
|
|
74
|
-
# -- create cache ---
|
|
75
|
-
rsaP = ReferenceSequenceAssignmentProvider(
|
|
76
|
-
self.__cfgOb,
|
|
77
|
-
databaseName=databaseName,
|
|
78
|
-
collectionName=collectionName,
|
|
79
|
-
polymerType=polymerType,
|
|
80
|
-
referenceDatabaseName=referenceDatabaseName,
|
|
81
|
-
provSource=provSource,
|
|
82
|
-
useCache=True,
|
|
83
|
-
cachePath=self.__cachePath,
|
|
84
|
-
cacheKwargs=self.__testEntityCacheKwargs,
|
|
85
|
-
fetchLimit=self.__fetchLimit,
|
|
86
|
-
siftsAbbreviated="TEST",
|
|
87
|
-
)
|
|
88
|
-
ok = rsaP.testCache()
|
|
89
|
-
self.assertTrue(ok)
|
|
90
|
-
numRef1 = rsaP.getRefDataCount()
|
|
91
|
-
#
|
|
92
|
-
# --- Reload from cache ---
|
|
93
|
-
rsaP = ReferenceSequenceAssignmentProvider(
|
|
94
|
-
self.__cfgOb, referenceDatabaseName=referenceDatabaseName, useCache=True, cachePath=self.__cachePath, cacheKwargs=self.__testEntityCacheKwargs
|
|
95
|
-
)
|
|
96
|
-
ok = rsaP.testCache()
|
|
97
|
-
self.assertTrue(ok)
|
|
98
|
-
numRef2 = rsaP.getRefDataCount()
|
|
99
|
-
self.assertEqual(numRef1, numRef2)
|
|
100
|
-
#
|
|
101
|
-
rsa = ReferenceSequenceAssignmentAdapter(refSeqAssignProvider=rsaP)
|
|
102
|
-
obTr = ObjectTransformer(self.__cfgOb, objectAdapter=rsa)
|
|
103
|
-
ok = obTr.doTransform(
|
|
104
|
-
databaseName=databaseName, collectionName=collectionName, fetchLimit=self.__fetchLimit, selectionQuery={"entity_poly.rcsb_entity_polymer_type": polymerType}
|
|
105
|
-
)
|
|
106
|
-
self.assertTrue(ok)
|
|
107
|
-
|
|
108
|
-
except Exception as e:
|
|
109
|
-
logger.exception("Failing with %s", str(e))
|
|
110
|
-
self.fail()
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def referenceSequenceAssignmentAdapterSuite():
|
|
114
|
-
suiteSelect = unittest.TestSuite()
|
|
115
|
-
suiteSelect.addTest(ReferenceSequenceAssignmentAdapterTests("testAssignmentAdapter"))
|
|
116
|
-
return suiteSelect
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if __name__ == "__main__":
|
|
120
|
-
mySuite = referenceSequenceAssignmentAdapterSuite()
|
|
121
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: ReferenceSequenceAssignmentAdapterValidateTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 25-Apr-2019
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
# 2GKI_1,3ZXR_1,2I6F_1,1NQP_1,1NQP_2,1BL4_1,1F9E_1,1SFI_2,1EBO_1,1R6Z_1,1MH3_1,1MH4_1,1MOW_3,1BWM_1 --2I6F_1
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests of reference seequence assignment adapter.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.seq.ReferenceSequenceAssignmentAdapter import ReferenceSequenceAssignmentAdapter
|
|
26
|
-
from rcsb.exdb.seq.ReferenceSequenceAssignmentProvider import ReferenceSequenceAssignmentProvider
|
|
27
|
-
from rcsb.exdb.utils.ObjectValidator import ObjectValidator
|
|
28
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
29
|
-
|
|
30
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
31
|
-
logger = logging.getLogger()
|
|
32
|
-
|
|
33
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
34
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class ReferenceSequenceAssignmentAdapterTests(unittest.TestCase):
|
|
38
|
-
def __init__(self, methodName="runTest"):
|
|
39
|
-
super(ReferenceSequenceAssignmentAdapterTests, self).__init__(methodName)
|
|
40
|
-
self.__verbose = True
|
|
41
|
-
|
|
42
|
-
def setUp(self):
|
|
43
|
-
#
|
|
44
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
45
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
46
|
-
configName = "site_info_configuration"
|
|
47
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
48
|
-
#
|
|
49
|
-
self.__resourceName = "MONGO_DB"
|
|
50
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
51
|
-
# self.__testEntityCacheKwargs = {"fmt": "json", "indent": 3}
|
|
52
|
-
self.__testEntityCacheKwargs = {"fmt": "pickle"}
|
|
53
|
-
self.__fetchLimit = None
|
|
54
|
-
#
|
|
55
|
-
self.__startTime = time.time()
|
|
56
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
57
|
-
|
|
58
|
-
def tearDown(self):
|
|
59
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
60
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
61
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
62
|
-
endTime = time.time()
|
|
63
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
64
|
-
|
|
65
|
-
@unittest.skip("Disable test - for troubleshooting only")
|
|
66
|
-
def testAssignmentAdapter(self):
|
|
67
|
-
"""Test case - create and read cache reference sequences assignments and related data."""
|
|
68
|
-
try:
|
|
69
|
-
databaseName = "pdbx_core"
|
|
70
|
-
collectionName = "pdbx_core_polymer_entity"
|
|
71
|
-
polymerType = "Protein"
|
|
72
|
-
referenceDatabaseName = "UniProt"
|
|
73
|
-
provSource = "PDB"
|
|
74
|
-
#
|
|
75
|
-
# -- create cache ---
|
|
76
|
-
rsaP = ReferenceSequenceAssignmentProvider(
|
|
77
|
-
self.__cfgOb,
|
|
78
|
-
databaseName=databaseName,
|
|
79
|
-
collectionName=collectionName,
|
|
80
|
-
polymerType=polymerType,
|
|
81
|
-
referenceDatabaseName=referenceDatabaseName,
|
|
82
|
-
provSource=provSource,
|
|
83
|
-
useCache=True,
|
|
84
|
-
cachePath=self.__cachePath,
|
|
85
|
-
cacheKwargs=self.__testEntityCacheKwargs,
|
|
86
|
-
fetchLimit=self.__fetchLimit,
|
|
87
|
-
siftsAbbreviated="TEST",
|
|
88
|
-
)
|
|
89
|
-
ok = rsaP.testCache()
|
|
90
|
-
self.assertTrue(ok)
|
|
91
|
-
numRef1 = rsaP.getRefDataCount()
|
|
92
|
-
#
|
|
93
|
-
# --- Reload from cache ---
|
|
94
|
-
rsaP = ReferenceSequenceAssignmentProvider(
|
|
95
|
-
self.__cfgOb, referenceDatabaseName=referenceDatabaseName, useCache=True, cachePath=self.__cachePath, cacheKwargs=self.__testEntityCacheKwargs
|
|
96
|
-
)
|
|
97
|
-
ok = rsaP.testCache()
|
|
98
|
-
self.assertTrue(ok)
|
|
99
|
-
numRef2 = rsaP.getRefDataCount()
|
|
100
|
-
self.assertEqual(numRef1, numRef2)
|
|
101
|
-
#
|
|
102
|
-
rsa = ReferenceSequenceAssignmentAdapter(refSeqAssignProvider=rsaP)
|
|
103
|
-
obTr = ObjectValidator(self.__cfgOb, objectAdapter=rsa, cachePath=self.__cachePath, useCache=False)
|
|
104
|
-
ok = obTr.doTransform(
|
|
105
|
-
databaseName=databaseName, collectionName=collectionName, fetchLimit=self.__fetchLimit, selectionQuery={"entity_poly.rcsb_entity_polymer_type": polymerType}
|
|
106
|
-
)
|
|
107
|
-
self.assertTrue(ok)
|
|
108
|
-
|
|
109
|
-
except Exception as e:
|
|
110
|
-
logger.exception("Failing with %s", str(e))
|
|
111
|
-
self.fail()
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def referenceSequenceAssignmentAdapterSuite():
|
|
115
|
-
suiteSelect = unittest.TestSuite()
|
|
116
|
-
suiteSelect.addTest(ReferenceSequenceAssignmentAdapterTests("testAssignmentAdapter"))
|
|
117
|
-
return suiteSelect
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
if __name__ == "__main__":
|
|
121
|
-
mySuite = referenceSequenceAssignmentAdapterSuite()
|
|
122
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: ReferenceSequenceAssignmentProviderTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 17-Oct-2019
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
#
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests for reference sequence assignment update operations
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.seq.ReferenceSequenceAssignmentProvider import ReferenceSequenceAssignmentProvider
|
|
26
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
27
|
-
|
|
28
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
29
|
-
logger = logging.getLogger()
|
|
30
|
-
|
|
31
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
32
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class ReferenceSequenceAssignmentProviderTests(unittest.TestCase):
|
|
36
|
-
def __init__(self, methodName="runTest"):
|
|
37
|
-
super(ReferenceSequenceAssignmentProviderTests, self).__init__(methodName)
|
|
38
|
-
self.__verbose = True
|
|
39
|
-
self.__traceMemory = False
|
|
40
|
-
|
|
41
|
-
def setUp(self):
|
|
42
|
-
#
|
|
43
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
44
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
45
|
-
configName = "site_info_configuration"
|
|
46
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
47
|
-
#
|
|
48
|
-
self.__resourceName = "MONGO_DB"
|
|
49
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
50
|
-
# self.__testEntityCacheKwargs = {"fmt": "json", "indent": 3}
|
|
51
|
-
self.__testEntityCacheKwargs = {"fmt": "pickle"}
|
|
52
|
-
self.__fetchLimitTest = None
|
|
53
|
-
self.__useCache = False
|
|
54
|
-
#
|
|
55
|
-
self.__startTime = time.time()
|
|
56
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
57
|
-
|
|
58
|
-
def tearDown(self):
|
|
59
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
60
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
61
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
62
|
-
endTime = time.time()
|
|
63
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
64
|
-
|
|
65
|
-
def testAssignmentProvider(self):
|
|
66
|
-
"""Test case - create and read cache reference sequences assignments and related data.
|
|
67
|
-
|
|
68
|
-
Some profiling statistics -
|
|
69
|
-
Current memory usage is 0.711864MB; Peak was 4646.476926MB (full cache no limit)
|
|
70
|
-
Current memory usage is 1.080839MB; Peak was 1258.231275MB (163)
|
|
71
|
-
Current memory usage is 0.874476MB; Peak was 1920.689116MB (918)
|
|
72
|
-
Current memory usage is 0.937091MB; Peak was 2086.910197MB (2739)
|
|
73
|
-
Current memory usage is 1.3539 MB; Peak was 2300.5170 MB (10000)
|
|
74
|
-
Current memory usage is 1.3517 MB; Peak was 2714.5467 MB (20K entries)
|
|
75
|
-
"""
|
|
76
|
-
try:
|
|
77
|
-
# -- create cache ---
|
|
78
|
-
rsaP = ReferenceSequenceAssignmentProvider(
|
|
79
|
-
self.__cfgOb,
|
|
80
|
-
databaseName="pdbx_core",
|
|
81
|
-
collectionName="pdbx_core_polymer_entity",
|
|
82
|
-
polymerType="Protein",
|
|
83
|
-
referenceDatabaseName="UniProt",
|
|
84
|
-
provSource="PDB",
|
|
85
|
-
useCache=self.__useCache,
|
|
86
|
-
cachePath=self.__cachePath,
|
|
87
|
-
cacheKwargs=self.__testEntityCacheKwargs,
|
|
88
|
-
fetchLimit=self.__fetchLimitTest,
|
|
89
|
-
siftsAbbreviated="TEST",
|
|
90
|
-
)
|
|
91
|
-
ok = rsaP.testCache()
|
|
92
|
-
self.assertTrue(ok)
|
|
93
|
-
numRef = rsaP.getRefDataCount()
|
|
94
|
-
self.assertGreaterEqual(numRef, 49)
|
|
95
|
-
#
|
|
96
|
-
# --- Reload from cache ---
|
|
97
|
-
rsaP = ReferenceSequenceAssignmentProvider(
|
|
98
|
-
self.__cfgOb, referenceDatabaseName="UniProt", useCache=True, cachePath=self.__cachePath, cacheKwargs=self.__testEntityCacheKwargs
|
|
99
|
-
)
|
|
100
|
-
ok = rsaP.testCache()
|
|
101
|
-
self.assertTrue(ok)
|
|
102
|
-
numRef = rsaP.getRefDataCount()
|
|
103
|
-
self.assertGreaterEqual(numRef, 49)
|
|
104
|
-
except Exception as e:
|
|
105
|
-
logger.exception("Failing with %s", str(e))
|
|
106
|
-
self.fail()
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def referenceSequenceAssignmentProviderSuite():
|
|
110
|
-
suiteSelect = unittest.TestSuite()
|
|
111
|
-
suiteSelect.addTest(ReferenceSequenceAssignmentProviderTests("testAssignmentProvider"))
|
|
112
|
-
return suiteSelect
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
if __name__ == "__main__":
|
|
116
|
-
mySuite = referenceSequenceAssignmentProviderSuite()
|
|
117
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: ReferenceSequenceCacheProviderTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 10-Feb-2020
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
#
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests for reference sequence cache maintenance operations
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.seq.ReferenceSequenceCacheProvider import ReferenceSequenceCacheProvider
|
|
26
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
30
|
-
logger = logging.getLogger()
|
|
31
|
-
|
|
32
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
33
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class ReferenceSequenceCacheProviderTests(unittest.TestCase):
|
|
37
|
-
def __init__(self, methodName="runTest"):
|
|
38
|
-
super(ReferenceSequenceCacheProviderTests, self).__init__(methodName)
|
|
39
|
-
self.__verbose = True
|
|
40
|
-
self.__traceMemory = False
|
|
41
|
-
|
|
42
|
-
def setUp(self):
|
|
43
|
-
#
|
|
44
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
45
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
46
|
-
configName = "site_info_configuration"
|
|
47
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
48
|
-
#
|
|
49
|
-
self.__fetchLimitTest = None
|
|
50
|
-
#
|
|
51
|
-
self.__startTime = time.time()
|
|
52
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
53
|
-
|
|
54
|
-
def tearDown(self):
|
|
55
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
56
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
57
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
58
|
-
endTime = time.time()
|
|
59
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
60
|
-
|
|
61
|
-
def testReferenceCacheProvider(self):
|
|
62
|
-
"""Test case - create and read cached reference sequences."""
|
|
63
|
-
try:
|
|
64
|
-
# -- Update/create cache ---
|
|
65
|
-
databaseName = "pdbx_core"
|
|
66
|
-
collectionName = "pdbx_core_polymer_entity"
|
|
67
|
-
polymerType = "Protein"
|
|
68
|
-
#
|
|
69
|
-
rsaP = ReferenceSequenceCacheProvider(self.__cfgOb, databaseName, collectionName, polymerType, maxChunkSize=50, numProc=2, expireDays=0)
|
|
70
|
-
ok = rsaP.testCache()
|
|
71
|
-
self.assertTrue(ok)
|
|
72
|
-
numRef = rsaP.getRefDataCount()
|
|
73
|
-
self.assertGreaterEqual(numRef, 44)
|
|
74
|
-
#
|
|
75
|
-
# --- Reload from cache ---
|
|
76
|
-
rsaP = ReferenceSequenceCacheProvider(self.__cfgOb, databaseName, collectionName, polymerType, maxChunkSize=50, numProc=2, expireDays=14)
|
|
77
|
-
ok = rsaP.testCache()
|
|
78
|
-
self.assertTrue(ok)
|
|
79
|
-
numRef = rsaP.getRefDataCount()
|
|
80
|
-
self.assertGreaterEqual(numRef, 44)
|
|
81
|
-
except Exception as e:
|
|
82
|
-
logger.exception("Failing with %s", str(e))
|
|
83
|
-
self.fail()
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def referenceSequenceCacheProviderSuite():
|
|
87
|
-
suiteSelect = unittest.TestSuite()
|
|
88
|
-
suiteSelect.addTest(ReferenceSequenceCacheProviderTests("testCacheProvider"))
|
|
89
|
-
return suiteSelect
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if __name__ == "__main__":
|
|
93
|
-
mySuite = referenceSequenceCacheProviderSuite()
|
|
94
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: TaxonomyExtractorTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 15-Oct-2019
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
#
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests for extraction of taxonomy information.
|
|
11
|
-
"""
|
|
12
|
-
__docformat__ = "google en"
|
|
13
|
-
__author__ = "John Westbrook"
|
|
14
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
15
|
-
__license__ = "Apache 2.0"
|
|
16
|
-
|
|
17
|
-
import logging
|
|
18
|
-
import os
|
|
19
|
-
import platform
|
|
20
|
-
import resource
|
|
21
|
-
import time
|
|
22
|
-
import unittest
|
|
23
|
-
|
|
24
|
-
from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
|
|
25
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
26
|
-
|
|
27
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
28
|
-
logger = logging.getLogger()
|
|
29
|
-
|
|
30
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
31
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class TaxonomyExtractorTests(unittest.TestCase):
|
|
35
|
-
def __init__(self, methodName="runTest"):
|
|
36
|
-
super(TaxonomyExtractorTests, self).__init__(methodName)
|
|
37
|
-
self.__verbose = True
|
|
38
|
-
|
|
39
|
-
def setUp(self):
|
|
40
|
-
#
|
|
41
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
42
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
43
|
-
configName = "site_info_configuration"
|
|
44
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
45
|
-
#
|
|
46
|
-
self.__startTime = time.time()
|
|
47
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
48
|
-
|
|
49
|
-
def tearDown(self):
|
|
50
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
51
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
52
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
53
|
-
endTime = time.time()
|
|
54
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
55
|
-
|
|
56
|
-
def testGetTaxons(self):
|
|
57
|
-
"""Test case - get reference sequences and update candidates"""
|
|
58
|
-
try:
|
|
59
|
-
urs = TaxonomyExtractor(self.__cfgOb)
|
|
60
|
-
taxIdL = urs.getUniqueTaxons()
|
|
61
|
-
logger.info("Unique taxon count %d", len(taxIdL))
|
|
62
|
-
except Exception as e:
|
|
63
|
-
logger.exception("Failing with %s", str(e))
|
|
64
|
-
self.fail()
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def extractorSuite():
|
|
68
|
-
suiteSelect = unittest.TestSuite()
|
|
69
|
-
suiteSelect.addTest(TaxonomyExtractorTests("testGetTaxons"))
|
|
70
|
-
return suiteSelect
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
if __name__ == "__main__":
|
|
74
|
-
mySuite = extractorSuite()
|
|
75
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,111 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: testTreeNodeListWorker.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 23-Apr-2019
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
#
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests for for tree node list worker ---
|
|
11
|
-
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
__docformat__ = "google en"
|
|
15
|
-
__author__ = "John Westbrook"
|
|
16
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
17
|
-
__license__ = "Apache 2.0"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
import logging
|
|
21
|
-
import os
|
|
22
|
-
import platform
|
|
23
|
-
import resource
|
|
24
|
-
import time
|
|
25
|
-
import unittest
|
|
26
|
-
|
|
27
|
-
from rcsb.db.utils.TimeUtil import TimeUtil
|
|
28
|
-
from rcsb.exdb.tree.TreeNodeListWorker import TreeNodeListWorker
|
|
29
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
30
|
-
from rcsb.utils.io.MarshalUtil import MarshalUtil
|
|
31
|
-
|
|
32
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
33
|
-
logger = logging.getLogger()
|
|
34
|
-
|
|
35
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
36
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class TreeNodeListWorkerTests(unittest.TestCase):
|
|
40
|
-
def __init__(self, methodName="runTest"):
|
|
41
|
-
super(TreeNodeListWorkerTests, self).__init__(methodName)
|
|
42
|
-
self.__verbose = True
|
|
43
|
-
|
|
44
|
-
def setUp(self):
|
|
45
|
-
self.__isMac = platform.system() == "Darwin"
|
|
46
|
-
self.__doLoad = True if self.__isMac else False
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
50
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
51
|
-
#
|
|
52
|
-
configName = "site_info_configuration"
|
|
53
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
54
|
-
#
|
|
55
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
56
|
-
#
|
|
57
|
-
self.__mU = MarshalUtil()
|
|
58
|
-
#
|
|
59
|
-
self.__readBackCheck = True
|
|
60
|
-
self.__numProc = 2
|
|
61
|
-
self.__chunkSize = 10
|
|
62
|
-
self.__documentLimit = None
|
|
63
|
-
self.__debugFlag = False
|
|
64
|
-
self.__loadType = "full"
|
|
65
|
-
self.__useCache = True
|
|
66
|
-
self.__useFilteredLists = True
|
|
67
|
-
#
|
|
68
|
-
self.__startTime = time.time()
|
|
69
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
70
|
-
|
|
71
|
-
def tearDown(self):
|
|
72
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
73
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
74
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
75
|
-
endTime = time.time()
|
|
76
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
77
|
-
|
|
78
|
-
def testTreeLoader(self):
|
|
79
|
-
"""Test case - extract entity polymer info"""
|
|
80
|
-
try:
|
|
81
|
-
tU = TimeUtil()
|
|
82
|
-
updateId = tU.getCurrentWeekSignature()
|
|
83
|
-
rhw = TreeNodeListWorker(
|
|
84
|
-
self.__cfgOb,
|
|
85
|
-
self.__cachePath,
|
|
86
|
-
numProc=self.__numProc,
|
|
87
|
-
chunkSize=self.__chunkSize,
|
|
88
|
-
documentLimit=self.__documentLimit,
|
|
89
|
-
verbose=self.__debugFlag,
|
|
90
|
-
readBackCheck=self.__readBackCheck,
|
|
91
|
-
useCache=self.__useCache,
|
|
92
|
-
useFilteredLists=self.__useFilteredLists,
|
|
93
|
-
)
|
|
94
|
-
#
|
|
95
|
-
ok = rhw.load(updateId, loadType=self.__loadType, doLoad=self.__doLoad)
|
|
96
|
-
self.assertTrue(ok)
|
|
97
|
-
#
|
|
98
|
-
except Exception as e:
|
|
99
|
-
logger.exception("Failing with %s", str(e))
|
|
100
|
-
self.fail()
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def treeNodeListSuite():
|
|
104
|
-
suiteSelect = unittest.TestSuite()
|
|
105
|
-
suiteSelect.addTest(TreeNodeListWorkerTests("testTreeLoader"))
|
|
106
|
-
return suiteSelect
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
if __name__ == "__main__":
|
|
110
|
-
mySuite = treeNodeListSuite()
|
|
111
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|