rcsb.exdb 1.31__py3-none-any.whl → 1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/METADATA +1 -1
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/RECORD +4 -41
- rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +0 -19
- rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +0 -12
- rcsb/exdb/tests/__init__.py +0 -0
- rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -104
- rcsb/exdb/tests/fixturePdbxLoader.py +0 -298
- rcsb/exdb/tests/test-data/components-abbrev.cif +0 -2739
- rcsb/exdb/tests/test-data/prdcc-abbrev.cif +0 -9171
- rcsb/exdb/tests/testAnnotationExtractor.py +0 -79
- rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -81
- rcsb/exdb/tests/testChemRefLoader.py +0 -106
- rcsb/exdb/tests/testChemRefMappingProvider.py +0 -95
- rcsb/exdb/tests/testCitationAdapter.py +0 -97
- rcsb/exdb/tests/testCitationExtractor.py +0 -93
- rcsb/exdb/tests/testCitationUtils.py +0 -92
- rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testEntryInfoProvider.py +0 -97
- rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testGlycanProvider.py +0 -98
- rcsb/exdb/tests/testGlycanUtils.py +0 -64
- rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -90
- rcsb/exdb/tests/testObjectExtractor.py +0 -342
- rcsb/exdb/tests/testObjectTransformer.py +0 -83
- rcsb/exdb/tests/testObjectUpdater.py +0 -120
- rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -93
- rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -124
- rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -134
- rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -155
- rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -123
- rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -106
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -121
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -122
- rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -117
- rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -94
- rcsb/exdb/tests/testTaxonomyExtractor.py +0 -75
- rcsb/exdb/tests/testTreeNodeListWorker.py +0 -111
- rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -99
- rcsb/exdb/tests/testUniProtExtractor.py +0 -77
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/WHEEL +0 -0
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: PubChemDataCacheProviderTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 17-Jul-2020
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
#
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests for reference data cache maintenance operations
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.chemref.PubChemDataCacheProvider import PubChemDataCacheProvider
|
|
26
|
-
|
|
27
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
28
|
-
|
|
29
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
30
|
-
logger = logging.getLogger()
|
|
31
|
-
|
|
32
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
33
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class PubChemDataCacheProviderTests(unittest.TestCase):
|
|
37
|
-
def __init__(self, methodName="runTest"):
|
|
38
|
-
super(PubChemDataCacheProviderTests, self).__init__(methodName)
|
|
39
|
-
self._verbose = True
|
|
40
|
-
|
|
41
|
-
def setUp(self):
|
|
42
|
-
#
|
|
43
|
-
self.__cidList = ["49866376", "66835630", "71664579", "11915", "12072107"]
|
|
44
|
-
self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
|
|
45
|
-
#
|
|
46
|
-
# Site configuration used for database resource access -
|
|
47
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
48
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
49
|
-
configName = "site_info_configuration"
|
|
50
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
51
|
-
#
|
|
52
|
-
self.__startTime = time.time()
|
|
53
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
54
|
-
|
|
55
|
-
def tearDown(self):
|
|
56
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
57
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
58
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
59
|
-
endTime = time.time()
|
|
60
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
61
|
-
|
|
62
|
-
def testALoadAndUpdate(self):
|
|
63
|
-
"""Test case - load and reload/update data store."""
|
|
64
|
-
try:
|
|
65
|
-
# -- Update/create cache ---
|
|
66
|
-
exportPath = os.path.join(self.__cachePath, "PubChem")
|
|
67
|
-
pcdcP = PubChemDataCacheProvider(self.__cfgOb, self.__cachePath)
|
|
68
|
-
ok, failList = pcdcP.load(self.__cidList, exportPath=exportPath)
|
|
69
|
-
self.assertTrue(ok)
|
|
70
|
-
self.assertEqual(len(failList), 0)
|
|
71
|
-
logger.info("Status %r failList %r", ok, failList)
|
|
72
|
-
#
|
|
73
|
-
idL = pcdcP.getRefIdCodes()
|
|
74
|
-
logger.info("idL %r", idL)
|
|
75
|
-
self.assertGreaterEqual(len(idL), len(self.__cidList))
|
|
76
|
-
#
|
|
77
|
-
ok, failList = pcdcP.updateMissing(self.__cidList, exportPath=exportPath)
|
|
78
|
-
self.assertTrue(ok)
|
|
79
|
-
self.assertEqual(len(failList), 0)
|
|
80
|
-
except Exception as e:
|
|
81
|
-
logger.exception("Failing with %s", str(e))
|
|
82
|
-
self.fail()
|
|
83
|
-
|
|
84
|
-
def testBackupAndRestore(self):
|
|
85
|
-
"""Test case - load and dump operations."""
|
|
86
|
-
try:
|
|
87
|
-
# -- Backup/Restore cache ---
|
|
88
|
-
pcdcP = PubChemDataCacheProvider(self.__cfgOb, self.__cachePath)
|
|
89
|
-
ok, failList = pcdcP.load(self.__cidList, exportPath=None)
|
|
90
|
-
self.assertEqual(len(failList), 0)
|
|
91
|
-
self.assertTrue(ok)
|
|
92
|
-
ok = pcdcP.dump(fmt="json")
|
|
93
|
-
self.assertTrue(ok)
|
|
94
|
-
#
|
|
95
|
-
except Exception as e:
|
|
96
|
-
logger.exception("Failing with %s", str(e))
|
|
97
|
-
self.fail()
|
|
98
|
-
|
|
99
|
-
def testGetRelatedIdentifiers(self):
|
|
100
|
-
"""Test case - get PubChem xrefs."""
|
|
101
|
-
try:
|
|
102
|
-
# --- Get related identifiers ---
|
|
103
|
-
pcdcP = PubChemDataCacheProvider(self.__cfgOb, self.__cachePath)
|
|
104
|
-
rD = pcdcP.getRelatedMapping(self.__cidList)
|
|
105
|
-
logger.info("rD %r", rD)
|
|
106
|
-
self.assertGreaterEqual(len(rD), len(self.__cidList))
|
|
107
|
-
#
|
|
108
|
-
except Exception as e:
|
|
109
|
-
logger.exception("Failing with %s", str(e))
|
|
110
|
-
self.fail()
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def pubChemDataCacheProviderSuite():
|
|
114
|
-
suiteSelect = unittest.TestSuite()
|
|
115
|
-
suiteSelect.addTest(PubChemDataCacheProviderTests("testALoadAndUpdate"))
|
|
116
|
-
suiteSelect.addTest(PubChemDataCacheProviderTests("testBackupAndRestore"))
|
|
117
|
-
suiteSelect.addTest(PubChemDataCacheProviderTests("testGetRelatedIdentifiers"))
|
|
118
|
-
|
|
119
|
-
return suiteSelect
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if __name__ == "__main__":
|
|
123
|
-
mySuite = pubChemDataCacheProviderSuite()
|
|
124
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: PubChemEtlWorkflowTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 29-Jul-2020
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
# 13-Mar-2023 aae Disable git stash testing
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests for PubChem ETL workflow methods
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.wf.PubChemEtlWorkflow import PubChemEtlWorkflow
|
|
26
|
-
|
|
27
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
28
|
-
logger = logging.getLogger()
|
|
29
|
-
|
|
30
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
31
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class PubChemEtlWorkflowTests(unittest.TestCase):
|
|
35
|
-
def __init__(self, methodName="runTest"):
|
|
36
|
-
super(PubChemEtlWorkflowTests, self).__init__(methodName)
|
|
37
|
-
self.__verbose = True
|
|
38
|
-
|
|
39
|
-
def setUp(self):
|
|
40
|
-
#
|
|
41
|
-
self.__dataPath = os.path.join(HERE, "test-data")
|
|
42
|
-
self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
|
|
43
|
-
#
|
|
44
|
-
# Site configuration used for database resource access -
|
|
45
|
-
# self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
46
|
-
self.__configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
47
|
-
self.__configName = "site_info_configuration"
|
|
48
|
-
#
|
|
49
|
-
# These are test source files for chemical component/BIRD indices
|
|
50
|
-
self.__ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
|
|
51
|
-
self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
|
|
52
|
-
self.__ccFileNamePrefix = "cc-abbrev"
|
|
53
|
-
#
|
|
54
|
-
# This tests pushing files to the stash
|
|
55
|
-
self.__testStashServer = True
|
|
56
|
-
self.__testStashGit = False
|
|
57
|
-
#
|
|
58
|
-
self.__startTime = time.time()
|
|
59
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
60
|
-
|
|
61
|
-
def tearDown(self):
|
|
62
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
63
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
64
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
65
|
-
endTime = time.time()
|
|
66
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
67
|
-
|
|
68
|
-
def testAUpdateIndex(self):
|
|
69
|
-
"""Test case - build CCD/BIRD search indices and search for PubChem matches."""
|
|
70
|
-
try:
|
|
71
|
-
# -- Update local chemical indices and create PubChem mapping index ---
|
|
72
|
-
|
|
73
|
-
pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
|
|
74
|
-
ok = pcewP.updateMatchedIndex(
|
|
75
|
-
ccUrlTarget=self.__ccUrlTarget,
|
|
76
|
-
birdUrlTarget=self.__birdUrlTarget,
|
|
77
|
-
ccFileNamePrefix=self.__ccFileNamePrefix,
|
|
78
|
-
numProcChemComp=4,
|
|
79
|
-
rebuildChemIndices=True,
|
|
80
|
-
useStash=self.__testStashServer,
|
|
81
|
-
useGit=self.__testStashGit
|
|
82
|
-
)
|
|
83
|
-
self.assertTrue(ok)
|
|
84
|
-
except Exception as e:
|
|
85
|
-
logger.exception("Failing with %s", str(e))
|
|
86
|
-
self.fail()
|
|
87
|
-
|
|
88
|
-
def testBDump(self):
|
|
89
|
-
"""Test case - dump current stored state"""
|
|
90
|
-
try:
|
|
91
|
-
# --
|
|
92
|
-
pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
|
|
93
|
-
ok = pcewP.dump(useStash=self.__testStashServer, useGit=self.__testStashGit)
|
|
94
|
-
self.assertTrue(ok)
|
|
95
|
-
except Exception as e:
|
|
96
|
-
logger.exception("Failing with %s", str(e))
|
|
97
|
-
self.fail()
|
|
98
|
-
|
|
99
|
-
def testCRestore(self):
|
|
100
|
-
"""Test case - restore object store from the prior dump"""
|
|
101
|
-
try:
|
|
102
|
-
# --
|
|
103
|
-
pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
|
|
104
|
-
ok = pcewP.restore()
|
|
105
|
-
self.assertTrue(ok)
|
|
106
|
-
except Exception as e:
|
|
107
|
-
logger.exception("Failing with %s", str(e))
|
|
108
|
-
self.fail()
|
|
109
|
-
|
|
110
|
-
def testDUpdateData(self):
|
|
111
|
-
"""Test case - update corresponding data and generate corresponding identifiers."""
|
|
112
|
-
try:
|
|
113
|
-
# --
|
|
114
|
-
pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
|
|
115
|
-
ok = pcewP.updateMatchedData(useStash=self.__testStashServer, useGit=self.__testStashGit)
|
|
116
|
-
self.assertTrue(ok)
|
|
117
|
-
except Exception as e:
|
|
118
|
-
logger.exception("Failing with %s", str(e))
|
|
119
|
-
self.fail()
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def pubChemEtlWorkflowSuite():
|
|
123
|
-
suiteSelect = unittest.TestSuite()
|
|
124
|
-
suiteSelect.addTest(PubChemEtlWorkflowTests("testAUpdateIndex"))
|
|
125
|
-
suiteSelect.addTest(PubChemEtlWorkflowTests("testBDump"))
|
|
126
|
-
suiteSelect.addTest(PubChemEtlWorkflowTests("testCRestore"))
|
|
127
|
-
suiteSelect.addTest(PubChemEtlWorkflowTests("testDUpdateData"))
|
|
128
|
-
|
|
129
|
-
return suiteSelect
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
if __name__ == "__main__":
|
|
133
|
-
mySuite = pubChemEtlWorkflowSuite()
|
|
134
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,155 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: PubChemEtlWrapperTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 20-Jul-2020
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
# 13-Mar-2023 aae Updates to use multiprocess count, disable git stash testing,
|
|
8
|
-
# Fix tests after removing obsolete entries from test data
|
|
9
|
-
##
|
|
10
|
-
"""
|
|
11
|
-
Tests for PubChem ETL wrapper methods
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
__docformat__ = "google en"
|
|
15
|
-
__author__ = "John Westbrook"
|
|
16
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
17
|
-
__license__ = "Apache 2.0"
|
|
18
|
-
|
|
19
|
-
import logging
|
|
20
|
-
import os
|
|
21
|
-
import platform
|
|
22
|
-
import resource
|
|
23
|
-
import time
|
|
24
|
-
import unittest
|
|
25
|
-
|
|
26
|
-
from rcsb.exdb.chemref.PubChemEtlWrapper import PubChemEtlWrapper
|
|
27
|
-
|
|
28
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
29
|
-
from rcsb.utils.io.MarshalUtil import MarshalUtil
|
|
30
|
-
|
|
31
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
32
|
-
logger = logging.getLogger()
|
|
33
|
-
|
|
34
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
35
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class PubChemEtlWrapperTests(unittest.TestCase):
|
|
39
|
-
def __init__(self, methodName="runTest"):
|
|
40
|
-
super(PubChemEtlWrapperTests, self).__init__(methodName)
|
|
41
|
-
self.__verbose = True
|
|
42
|
-
|
|
43
|
-
def setUp(self):
|
|
44
|
-
#
|
|
45
|
-
self.__workPath = os.path.join(HERE, "test-output")
|
|
46
|
-
self.__dataPath = os.path.join(HERE, "test-data")
|
|
47
|
-
self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
|
|
48
|
-
self.__dirPath = os.path.join(self.__cachePath, "PubChem")
|
|
49
|
-
self.__mU = MarshalUtil(workPath=self.__cachePath)
|
|
50
|
-
#
|
|
51
|
-
# Site configuration used for database resource access -
|
|
52
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
53
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
54
|
-
configName = "site_info_configuration"
|
|
55
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
56
|
-
#
|
|
57
|
-
# These are test source files for chemical component/BIRD indices
|
|
58
|
-
self.__ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
|
|
59
|
-
self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
|
|
60
|
-
self.__numComponents = 25
|
|
61
|
-
self.__numSelectMatches = 23
|
|
62
|
-
self.__numAltMatches = 2
|
|
63
|
-
self.__numTotalMatches = 50
|
|
64
|
-
#
|
|
65
|
-
# This tests pushing files to the stash
|
|
66
|
-
self.__testStashServer = True
|
|
67
|
-
self.__testStashGit = False
|
|
68
|
-
#
|
|
69
|
-
self.__startTime = time.time()
|
|
70
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
71
|
-
|
|
72
|
-
def tearDown(self):
|
|
73
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
74
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
75
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
76
|
-
endTime = time.time()
|
|
77
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
78
|
-
|
|
79
|
-
def testAFromBootstrap(self):
|
|
80
|
-
"""Test case - build CCD/BIRD search indices and search for PubChem matches."""
|
|
81
|
-
try:
|
|
82
|
-
# -- Update local chemical indices and create PubChem mapping index ---
|
|
83
|
-
|
|
84
|
-
pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath)
|
|
85
|
-
ok = pcewP.updateIndex(
|
|
86
|
-
ccUrlTarget=self.__ccUrlTarget,
|
|
87
|
-
birdUrlTarget=self.__birdUrlTarget,
|
|
88
|
-
ccFileNamePrefix="cc-abbrev",
|
|
89
|
-
exportPath=self.__dirPath,
|
|
90
|
-
rebuildChemIndices=True,
|
|
91
|
-
numProcChemComp=4,
|
|
92
|
-
)
|
|
93
|
-
self.assertTrue(ok)
|
|
94
|
-
#
|
|
95
|
-
mL = pcewP.getMatches()
|
|
96
|
-
self.assertGreaterEqual(len(mL), self.__numTotalMatches)
|
|
97
|
-
selectMatchD, altMatchD = pcewP.getSelectedMatches()
|
|
98
|
-
#
|
|
99
|
-
logger.info("matchD (%d)", len(selectMatchD))
|
|
100
|
-
self.assertGreaterEqual(len(selectMatchD), self.__numSelectMatches)
|
|
101
|
-
self.assertGreaterEqual(len(altMatchD), self.__numAltMatches)
|
|
102
|
-
#
|
|
103
|
-
ok = pcewP.dump(contentType="index")
|
|
104
|
-
self.assertTrue(ok)
|
|
105
|
-
ok = pcewP.toStash(contentType="index", useStash=self.__testStashServer, useGit=self.__testStashGit)
|
|
106
|
-
self.assertTrue(ok)
|
|
107
|
-
except Exception as e:
|
|
108
|
-
logger.exception("Failing with %s", str(e))
|
|
109
|
-
self.fail()
|
|
110
|
-
|
|
111
|
-
def testBFromRestore(self):
|
|
112
|
-
"""Test case - operations from a restored starting point"""
|
|
113
|
-
try:
|
|
114
|
-
# --
|
|
115
|
-
pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath)
|
|
116
|
-
ok = pcewP.fromStash(contentType="index")
|
|
117
|
-
self.assertTrue(ok)
|
|
118
|
-
#
|
|
119
|
-
numObjects = pcewP.reloadDump(contentType="index")
|
|
120
|
-
logger.info("Restored %d correspondence records", numObjects)
|
|
121
|
-
self.assertGreaterEqual(numObjects, self.__numComponents)
|
|
122
|
-
mapD, extraMapD = pcewP.getSelectedMatches(exportPath=os.path.join(self.__cachePath, "mapping"))
|
|
123
|
-
self.assertGreaterEqual(len(mapD), self.__numSelectMatches)
|
|
124
|
-
logger.info("mapD (%d) extraMapD (%d) %r", len(mapD), len(extraMapD), extraMapD)
|
|
125
|
-
self.assertGreaterEqual(len(extraMapD), self.__numAltMatches)
|
|
126
|
-
cidList = pcewP.getMatches()
|
|
127
|
-
logger.info("cidList (%d)", len(cidList))
|
|
128
|
-
self.assertGreaterEqual(len(cidList), self.__numTotalMatches - 2)
|
|
129
|
-
ok = pcewP.updateMatchedData()
|
|
130
|
-
self.assertTrue(ok)
|
|
131
|
-
ok = pcewP.dump(contentType="data")
|
|
132
|
-
self.assertTrue(ok)
|
|
133
|
-
ok = pcewP.toStash(contentType="data", useStash=self.__testStashServer, useGit=self.__testStashGit)
|
|
134
|
-
self.assertTrue(ok)
|
|
135
|
-
ok = pcewP.updateIdentifiers()
|
|
136
|
-
self.assertTrue(ok)
|
|
137
|
-
ok = pcewP.dump(contentType="identifiers")
|
|
138
|
-
self.assertTrue(ok)
|
|
139
|
-
ok = pcewP.toStash(contentType="identifiers", useStash=self.__testStashServer, useGit=self.__testStashGit)
|
|
140
|
-
self.assertTrue(ok)
|
|
141
|
-
except Exception as e:
|
|
142
|
-
logger.exception("Failing with %s", str(e))
|
|
143
|
-
self.fail()
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def pubChemEtlWrapperSuite():
|
|
147
|
-
suiteSelect = unittest.TestSuite()
|
|
148
|
-
suiteSelect.addTest(PubChemEtlWrapperTests("testAFromBootstrap"))
|
|
149
|
-
suiteSelect.addTest(PubChemEtlWrapperTests("testBFromRestore"))
|
|
150
|
-
return suiteSelect
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
if __name__ == "__main__":
|
|
154
|
-
mySuite = pubChemEtlWrapperSuite()
|
|
155
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: PubChemIndexCacheProviderTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 16-Jul-2020
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
# 13-Mar-2023 aae Fix tests after removing obsolete entries from test data
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests for PubChem index cache maintenance operations
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.chemref.PubChemIndexCacheProvider import PubChemIndexCacheProvider
|
|
26
|
-
|
|
27
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
28
|
-
|
|
29
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
30
|
-
logger = logging.getLogger()
|
|
31
|
-
|
|
32
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
33
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class PubChemIndexCacheProviderTests(unittest.TestCase):
|
|
37
|
-
def __init__(self, methodName="runTest"):
|
|
38
|
-
super(PubChemIndexCacheProviderTests, self).__init__(methodName)
|
|
39
|
-
self.__verbose = True
|
|
40
|
-
|
|
41
|
-
def setUp(self):
|
|
42
|
-
#
|
|
43
|
-
self.__dataPath = os.path.join(HERE, "test-data")
|
|
44
|
-
self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
|
|
45
|
-
#
|
|
46
|
-
# Site configuration used for database resource access -
|
|
47
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
48
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
49
|
-
self.__configName = "site_info_configuration"
|
|
50
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=self.__configName, mockTopPath=self.__mockTopPath)
|
|
51
|
-
#
|
|
52
|
-
# These are test source files for chemical component/BIRD indices
|
|
53
|
-
self.__ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
|
|
54
|
-
self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
|
|
55
|
-
self.__startTime = time.time()
|
|
56
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
57
|
-
|
|
58
|
-
def tearDown(self):
|
|
59
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
60
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
61
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
62
|
-
endTime = time.time()
|
|
63
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
64
|
-
|
|
65
|
-
def testAPubChemIndexCacheProvider(self):
|
|
66
|
-
"""Test case - search, backup, restore and select PubChem correspondences for reference chemical definitions."""
|
|
67
|
-
try:
|
|
68
|
-
# -- Update/create mapping index cache ---
|
|
69
|
-
numObj = 25
|
|
70
|
-
pcicP = PubChemIndexCacheProvider(self.__cfgOb, self.__cachePath)
|
|
71
|
-
pcicP.updateMissing(
|
|
72
|
-
expireDays=0,
|
|
73
|
-
cachePath=self.__cachePath,
|
|
74
|
-
ccUrlTarget=self.__ccUrlTarget,
|
|
75
|
-
birdUrlTarget=self.__birdUrlTarget,
|
|
76
|
-
ccFileNamePrefix="cc-abbrev",
|
|
77
|
-
exportPath=os.path.join(self.__cachePath, "PubChem"),
|
|
78
|
-
rebuildChemIndices=False,
|
|
79
|
-
fetchLimit=None,
|
|
80
|
-
)
|
|
81
|
-
matchD = pcicP.getMatchData(expireDays=0)
|
|
82
|
-
logger.info("matchD (%d)", len(matchD))
|
|
83
|
-
self.assertGreaterEqual(len(matchD), numObj)
|
|
84
|
-
ok = pcicP.testCache()
|
|
85
|
-
self.assertTrue(ok)
|
|
86
|
-
#
|
|
87
|
-
ok = pcicP.dump()
|
|
88
|
-
self.assertTrue(ok)
|
|
89
|
-
#
|
|
90
|
-
mapD, extraMapD = pcicP.getSelectedMatches(exportPath=os.path.join(self.__cachePath, "mapping"))
|
|
91
|
-
self.assertGreaterEqual(len(mapD), 20)
|
|
92
|
-
logger.info("mapD (%d) extraMapD (%d) %r", len(mapD), len(extraMapD), extraMapD)
|
|
93
|
-
self.assertGreaterEqual(len(extraMapD), 2)
|
|
94
|
-
cidList = pcicP.getMatches()
|
|
95
|
-
logger.info("cidList (%d)", len(cidList))
|
|
96
|
-
self.assertGreaterEqual(len(cidList), 49)
|
|
97
|
-
#
|
|
98
|
-
except Exception as e:
|
|
99
|
-
logger.exception("Failing with %s", str(e))
|
|
100
|
-
self.fail()
|
|
101
|
-
|
|
102
|
-
def testBPubChemIndexCacheProvider(self):
|
|
103
|
-
"""Test case - verify the PubChem index cache"""
|
|
104
|
-
try:
|
|
105
|
-
# -- check cache
|
|
106
|
-
pcicP = PubChemIndexCacheProvider(self.__cfgOb, self.__cachePath)
|
|
107
|
-
ok = pcicP.testCache()
|
|
108
|
-
self.assertTrue(ok)
|
|
109
|
-
except Exception as e:
|
|
110
|
-
logger.exception("Failing with %s", str(e))
|
|
111
|
-
self.fail()
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def pubChemIndexCacheProviderSuite():
|
|
115
|
-
suiteSelect = unittest.TestSuite()
|
|
116
|
-
suiteSelect.addTest(PubChemIndexCacheProviderTests("testAPubChemIndexCacheProvider"))
|
|
117
|
-
suiteSelect.addTest(PubChemIndexCacheProviderTests("testBPubChemIndexCacheProviderCache"))
|
|
118
|
-
return suiteSelect
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
if __name__ == "__main__":
|
|
122
|
-
mySuite = pubChemIndexCacheProviderSuite()
|
|
123
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: ReferenceSequenceAnnotationAdapterTests.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 14-Feb-2020
|
|
5
|
-
#
|
|
6
|
-
# Updates:
|
|
7
|
-
#
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Tests of reference seequence annotation adapter.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
__docformat__ = "google en"
|
|
14
|
-
__author__ = "John Westbrook"
|
|
15
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
-
__license__ = "Apache 2.0"
|
|
17
|
-
|
|
18
|
-
import logging
|
|
19
|
-
import os
|
|
20
|
-
import platform
|
|
21
|
-
import resource
|
|
22
|
-
import time
|
|
23
|
-
import unittest
|
|
24
|
-
|
|
25
|
-
from rcsb.exdb.seq.ReferenceSequenceAnnotationAdapter import ReferenceSequenceAnnotationAdapter
|
|
26
|
-
from rcsb.exdb.seq.ReferenceSequenceAnnotationProvider import ReferenceSequenceAnnotationProvider
|
|
27
|
-
from rcsb.exdb.utils.ObjectTransformer import ObjectTransformer
|
|
28
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
29
|
-
|
|
30
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
31
|
-
logger = logging.getLogger()
|
|
32
|
-
|
|
33
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
34
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class ReferenceSequenceAnnotationAdapterTests(unittest.TestCase):
|
|
38
|
-
def __init__(self, methodName="runTest"):
|
|
39
|
-
super(ReferenceSequenceAnnotationAdapterTests, self).__init__(methodName)
|
|
40
|
-
self.__verbose = True
|
|
41
|
-
|
|
42
|
-
def setUp(self):
|
|
43
|
-
#
|
|
44
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
45
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
46
|
-
self.__useCache = True
|
|
47
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
48
|
-
configName = "site_info_configuration"
|
|
49
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
50
|
-
#
|
|
51
|
-
self.__resourceName = "MONGO_DB"
|
|
52
|
-
self.__fetchLimit = None
|
|
53
|
-
self.__startTime = time.time()
|
|
54
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
55
|
-
|
|
56
|
-
def tearDown(self):
|
|
57
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
58
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
59
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
60
|
-
endTime = time.time()
|
|
61
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
62
|
-
|
|
63
|
-
# NOTE: IF YOU DISABLE THE TEST BELOW, THEN 'testReferenceCacheProvider' FAILS. CHECK WHETHER ALL 'Reference' PROVIDERS CAN BE DISABLED.
|
|
64
|
-
# @unittest.skip("Disable test - no longer using in production, and fails too frequently with 'Bad xml text' when fetching from UniProt")
|
|
65
|
-
def testAnnotationAdapter(self):
|
|
66
|
-
"""Test case - create and read cache reference sequences assignments and related data."""
|
|
67
|
-
try:
|
|
68
|
-
databaseName = "pdbx_core"
|
|
69
|
-
collectionName = "pdbx_core_polymer_entity"
|
|
70
|
-
polymerType = "Protein"
|
|
71
|
-
# -- create cache ---
|
|
72
|
-
rsaP = ReferenceSequenceAnnotationProvider(
|
|
73
|
-
self.__cfgOb, databaseName, collectionName, polymerType, fetchLimit=self.__fetchLimit, siftsAbbreviated="TEST", cachePath=self.__cachePath, useCache=True
|
|
74
|
-
)
|
|
75
|
-
ok = rsaP.testCache(minMissing=10)
|
|
76
|
-
self.assertTrue(ok)
|
|
77
|
-
numRef1 = rsaP.getRefDataCount()
|
|
78
|
-
#
|
|
79
|
-
# --- Reload from cache ---
|
|
80
|
-
rsaP = ReferenceSequenceAnnotationProvider(self.__cfgOb, databaseName, collectionName, polymerType, cachePath=self.__cachePath, useCache=True)
|
|
81
|
-
ok = rsaP.testCache(minMissing=10)
|
|
82
|
-
self.assertTrue(ok)
|
|
83
|
-
numRef2 = rsaP.getRefDataCount()
|
|
84
|
-
self.assertEqual(numRef1, numRef2)
|
|
85
|
-
#
|
|
86
|
-
rsa = ReferenceSequenceAnnotationAdapter(rsaP)
|
|
87
|
-
obTr = ObjectTransformer(self.__cfgOb, objectAdapter=rsa)
|
|
88
|
-
ok = obTr.doTransform(
|
|
89
|
-
databaseName=databaseName, collectionName=collectionName, fetchLimit=self.__fetchLimit, selectionQuery={"entity_poly.rcsb_entity_polymer_type": polymerType}
|
|
90
|
-
)
|
|
91
|
-
self.assertTrue(ok)
|
|
92
|
-
|
|
93
|
-
except Exception as e:
|
|
94
|
-
logger.exception("Failing with %s", str(e))
|
|
95
|
-
self.fail()
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def referenceSequenceAnnotationAdapterSuite():
|
|
99
|
-
suiteSelect = unittest.TestSuite()
|
|
100
|
-
suiteSelect.addTest(ReferenceSequenceAnnotationAdapterTests("testAnnotationAdapter"))
|
|
101
|
-
return suiteSelect
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if __name__ == "__main__":
|
|
105
|
-
mySuite = referenceSequenceAnnotationAdapterSuite()
|
|
106
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|