rcsb.exdb 1.31__py3-none-any.whl → 1.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/METADATA +1 -1
  2. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/RECORD +4 -41
  3. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +0 -19
  4. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +0 -12
  5. rcsb/exdb/tests/__init__.py +0 -0
  6. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -104
  7. rcsb/exdb/tests/fixturePdbxLoader.py +0 -298
  8. rcsb/exdb/tests/test-data/components-abbrev.cif +0 -2739
  9. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +0 -9171
  10. rcsb/exdb/tests/testAnnotationExtractor.py +0 -79
  11. rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -81
  12. rcsb/exdb/tests/testChemRefLoader.py +0 -106
  13. rcsb/exdb/tests/testChemRefMappingProvider.py +0 -95
  14. rcsb/exdb/tests/testCitationAdapter.py +0 -97
  15. rcsb/exdb/tests/testCitationExtractor.py +0 -93
  16. rcsb/exdb/tests/testCitationUtils.py +0 -92
  17. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -70
  18. rcsb/exdb/tests/testEntryInfoProvider.py +0 -97
  19. rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -70
  20. rcsb/exdb/tests/testGlycanProvider.py +0 -98
  21. rcsb/exdb/tests/testGlycanUtils.py +0 -64
  22. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -90
  23. rcsb/exdb/tests/testObjectExtractor.py +0 -342
  24. rcsb/exdb/tests/testObjectTransformer.py +0 -83
  25. rcsb/exdb/tests/testObjectUpdater.py +0 -120
  26. rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -93
  27. rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -124
  28. rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -134
  29. rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -155
  30. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -123
  31. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -106
  32. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -121
  33. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -122
  34. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -117
  35. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -94
  36. rcsb/exdb/tests/testTaxonomyExtractor.py +0 -75
  37. rcsb/exdb/tests/testTreeNodeListWorker.py +0 -111
  38. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -99
  39. rcsb/exdb/tests/testUniProtExtractor.py +0 -77
  40. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/WHEEL +0 -0
  41. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/licenses/LICENSE +0 -0
@@ -1,124 +0,0 @@
1
- ##
2
- # File: PubChemDataCacheProviderTests.py
3
- # Author: J. Westbrook
4
- # Date: 17-Jul-2020
5
- #
6
- # Updates:
7
- #
8
- ##
9
- """
10
- Tests for reference data cache maintenance operations
11
- """
12
-
13
- __docformat__ = "google en"
14
- __author__ = "John Westbrook"
15
- __email__ = "jwest@rcsb.rutgers.edu"
16
- __license__ = "Apache 2.0"
17
-
18
- import logging
19
- import os
20
- import platform
21
- import resource
22
- import time
23
- import unittest
24
-
25
- from rcsb.exdb.chemref.PubChemDataCacheProvider import PubChemDataCacheProvider
26
-
27
- from rcsb.utils.config.ConfigUtil import ConfigUtil
28
-
29
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
30
- logger = logging.getLogger()
31
-
32
- HERE = os.path.abspath(os.path.dirname(__file__))
33
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
34
-
35
-
36
- class PubChemDataCacheProviderTests(unittest.TestCase):
37
- def __init__(self, methodName="runTest"):
38
- super(PubChemDataCacheProviderTests, self).__init__(methodName)
39
- self._verbose = True
40
-
41
- def setUp(self):
42
- #
43
- self.__cidList = ["49866376", "66835630", "71664579", "11915", "12072107"]
44
- self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
45
- #
46
- # Site configuration used for database resource access -
47
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
48
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
49
- configName = "site_info_configuration"
50
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
51
- #
52
- self.__startTime = time.time()
53
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
54
-
55
- def tearDown(self):
56
- unitS = "MB" if platform.system() == "Darwin" else "GB"
57
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
58
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
59
- endTime = time.time()
60
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
61
-
62
- def testALoadAndUpdate(self):
63
- """Test case - load and reload/update data store."""
64
- try:
65
- # -- Update/create cache ---
66
- exportPath = os.path.join(self.__cachePath, "PubChem")
67
- pcdcP = PubChemDataCacheProvider(self.__cfgOb, self.__cachePath)
68
- ok, failList = pcdcP.load(self.__cidList, exportPath=exportPath)
69
- self.assertTrue(ok)
70
- self.assertEqual(len(failList), 0)
71
- logger.info("Status %r failList %r", ok, failList)
72
- #
73
- idL = pcdcP.getRefIdCodes()
74
- logger.info("idL %r", idL)
75
- self.assertGreaterEqual(len(idL), len(self.__cidList))
76
- #
77
- ok, failList = pcdcP.updateMissing(self.__cidList, exportPath=exportPath)
78
- self.assertTrue(ok)
79
- self.assertEqual(len(failList), 0)
80
- except Exception as e:
81
- logger.exception("Failing with %s", str(e))
82
- self.fail()
83
-
84
- def testBackupAndRestore(self):
85
- """Test case - load and dump operations."""
86
- try:
87
- # -- Backup/Restore cache ---
88
- pcdcP = PubChemDataCacheProvider(self.__cfgOb, self.__cachePath)
89
- ok, failList = pcdcP.load(self.__cidList, exportPath=None)
90
- self.assertEqual(len(failList), 0)
91
- self.assertTrue(ok)
92
- ok = pcdcP.dump(fmt="json")
93
- self.assertTrue(ok)
94
- #
95
- except Exception as e:
96
- logger.exception("Failing with %s", str(e))
97
- self.fail()
98
-
99
- def testGetRelatedIdentifiers(self):
100
- """Test case - get PubChem xrefs."""
101
- try:
102
- # --- Get related identifiers ---
103
- pcdcP = PubChemDataCacheProvider(self.__cfgOb, self.__cachePath)
104
- rD = pcdcP.getRelatedMapping(self.__cidList)
105
- logger.info("rD %r", rD)
106
- self.assertGreaterEqual(len(rD), len(self.__cidList))
107
- #
108
- except Exception as e:
109
- logger.exception("Failing with %s", str(e))
110
- self.fail()
111
-
112
-
113
- def pubChemDataCacheProviderSuite():
114
- suiteSelect = unittest.TestSuite()
115
- suiteSelect.addTest(PubChemDataCacheProviderTests("testALoadAndUpdate"))
116
- suiteSelect.addTest(PubChemDataCacheProviderTests("testBackupAndRestore"))
117
- suiteSelect.addTest(PubChemDataCacheProviderTests("testGetRelatedIdentifiers"))
118
-
119
- return suiteSelect
120
-
121
-
122
- if __name__ == "__main__":
123
- mySuite = pubChemDataCacheProviderSuite()
124
- unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -1,134 +0,0 @@
1
- ##
2
- # File: PubChemEtlWorkflowTests.py
3
- # Author: J. Westbrook
4
- # Date: 29-Jul-2020
5
- #
6
- # Updates:
7
- # 13-Mar-2023 aae Disable git stash testing
8
- ##
9
- """
10
- Tests for PubChem ETL workflow methods
11
- """
12
-
13
- __docformat__ = "google en"
14
- __author__ = "John Westbrook"
15
- __email__ = "jwest@rcsb.rutgers.edu"
16
- __license__ = "Apache 2.0"
17
-
18
- import logging
19
- import os
20
- import platform
21
- import resource
22
- import time
23
- import unittest
24
-
25
- from rcsb.exdb.wf.PubChemEtlWorkflow import PubChemEtlWorkflow
26
-
27
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
28
- logger = logging.getLogger()
29
-
30
- HERE = os.path.abspath(os.path.dirname(__file__))
31
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
32
-
33
-
34
- class PubChemEtlWorkflowTests(unittest.TestCase):
35
- def __init__(self, methodName="runTest"):
36
- super(PubChemEtlWorkflowTests, self).__init__(methodName)
37
- self.__verbose = True
38
-
39
- def setUp(self):
40
- #
41
- self.__dataPath = os.path.join(HERE, "test-data")
42
- self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
43
- #
44
- # Site configuration used for database resource access -
45
- # self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
46
- self.__configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
47
- self.__configName = "site_info_configuration"
48
- #
49
- # These are test source files for chemical component/BIRD indices
50
- self.__ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
51
- self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
52
- self.__ccFileNamePrefix = "cc-abbrev"
53
- #
54
- # This tests pushing files to the stash
55
- self.__testStashServer = True
56
- self.__testStashGit = False
57
- #
58
- self.__startTime = time.time()
59
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
60
-
61
- def tearDown(self):
62
- unitS = "MB" if platform.system() == "Darwin" else "GB"
63
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
64
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
65
- endTime = time.time()
66
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
67
-
68
- def testAUpdateIndex(self):
69
- """Test case - build CCD/BIRD search indices and search for PubChem matches."""
70
- try:
71
- # -- Update local chemical indices and create PubChem mapping index ---
72
-
73
- pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
74
- ok = pcewP.updateMatchedIndex(
75
- ccUrlTarget=self.__ccUrlTarget,
76
- birdUrlTarget=self.__birdUrlTarget,
77
- ccFileNamePrefix=self.__ccFileNamePrefix,
78
- numProcChemComp=4,
79
- rebuildChemIndices=True,
80
- useStash=self.__testStashServer,
81
- useGit=self.__testStashGit
82
- )
83
- self.assertTrue(ok)
84
- except Exception as e:
85
- logger.exception("Failing with %s", str(e))
86
- self.fail()
87
-
88
- def testBDump(self):
89
- """Test case - dump current stored state"""
90
- try:
91
- # --
92
- pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
93
- ok = pcewP.dump(useStash=self.__testStashServer, useGit=self.__testStashGit)
94
- self.assertTrue(ok)
95
- except Exception as e:
96
- logger.exception("Failing with %s", str(e))
97
- self.fail()
98
-
99
- def testCRestore(self):
100
- """Test case - restore object store from the prior dump"""
101
- try:
102
- # --
103
- pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
104
- ok = pcewP.restore()
105
- self.assertTrue(ok)
106
- except Exception as e:
107
- logger.exception("Failing with %s", str(e))
108
- self.fail()
109
-
110
- def testDUpdateData(self):
111
- """Test case - update corresponding data and generate corresponding identifiers."""
112
- try:
113
- # --
114
- pcewP = PubChemEtlWorkflow(configPath=self.__configPath, configName=self.__configName, cachePath=self.__cachePath)
115
- ok = pcewP.updateMatchedData(useStash=self.__testStashServer, useGit=self.__testStashGit)
116
- self.assertTrue(ok)
117
- except Exception as e:
118
- logger.exception("Failing with %s", str(e))
119
- self.fail()
120
-
121
-
122
- def pubChemEtlWorkflowSuite():
123
- suiteSelect = unittest.TestSuite()
124
- suiteSelect.addTest(PubChemEtlWorkflowTests("testAUpdateIndex"))
125
- suiteSelect.addTest(PubChemEtlWorkflowTests("testBDump"))
126
- suiteSelect.addTest(PubChemEtlWorkflowTests("testCRestore"))
127
- suiteSelect.addTest(PubChemEtlWorkflowTests("testDUpdateData"))
128
-
129
- return suiteSelect
130
-
131
-
132
- if __name__ == "__main__":
133
- mySuite = pubChemEtlWorkflowSuite()
134
- unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -1,155 +0,0 @@
1
- ##
2
- # File: PubChemEtlWrapperTests.py
3
- # Author: J. Westbrook
4
- # Date: 20-Jul-2020
5
- #
6
- # Updates:
7
- # 13-Mar-2023 aae Updates to use multiprocess count, disable git stash testing,
8
- # Fix tests after removing obsolete entries from test data
9
- ##
10
- """
11
- Tests for PubChem ETL wrapper methods
12
- """
13
-
14
- __docformat__ = "google en"
15
- __author__ = "John Westbrook"
16
- __email__ = "jwest@rcsb.rutgers.edu"
17
- __license__ = "Apache 2.0"
18
-
19
- import logging
20
- import os
21
- import platform
22
- import resource
23
- import time
24
- import unittest
25
-
26
- from rcsb.exdb.chemref.PubChemEtlWrapper import PubChemEtlWrapper
27
-
28
- from rcsb.utils.config.ConfigUtil import ConfigUtil
29
- from rcsb.utils.io.MarshalUtil import MarshalUtil
30
-
31
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
32
- logger = logging.getLogger()
33
-
34
- HERE = os.path.abspath(os.path.dirname(__file__))
35
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
36
-
37
-
38
- class PubChemEtlWrapperTests(unittest.TestCase):
39
- def __init__(self, methodName="runTest"):
40
- super(PubChemEtlWrapperTests, self).__init__(methodName)
41
- self.__verbose = True
42
-
43
- def setUp(self):
44
- #
45
- self.__workPath = os.path.join(HERE, "test-output")
46
- self.__dataPath = os.path.join(HERE, "test-data")
47
- self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
48
- self.__dirPath = os.path.join(self.__cachePath, "PubChem")
49
- self.__mU = MarshalUtil(workPath=self.__cachePath)
50
- #
51
- # Site configuration used for database resource access -
52
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
53
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
54
- configName = "site_info_configuration"
55
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
56
- #
57
- # These are test source files for chemical component/BIRD indices
58
- self.__ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
59
- self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
60
- self.__numComponents = 25
61
- self.__numSelectMatches = 23
62
- self.__numAltMatches = 2
63
- self.__numTotalMatches = 50
64
- #
65
- # This tests pushing files to the stash
66
- self.__testStashServer = True
67
- self.__testStashGit = False
68
- #
69
- self.__startTime = time.time()
70
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
71
-
72
- def tearDown(self):
73
- unitS = "MB" if platform.system() == "Darwin" else "GB"
74
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
75
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
76
- endTime = time.time()
77
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
78
-
79
- def testAFromBootstrap(self):
80
- """Test case - build CCD/BIRD search indices and search for PubChem matches."""
81
- try:
82
- # -- Update local chemical indices and create PubChem mapping index ---
83
-
84
- pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath)
85
- ok = pcewP.updateIndex(
86
- ccUrlTarget=self.__ccUrlTarget,
87
- birdUrlTarget=self.__birdUrlTarget,
88
- ccFileNamePrefix="cc-abbrev",
89
- exportPath=self.__dirPath,
90
- rebuildChemIndices=True,
91
- numProcChemComp=4,
92
- )
93
- self.assertTrue(ok)
94
- #
95
- mL = pcewP.getMatches()
96
- self.assertGreaterEqual(len(mL), self.__numTotalMatches)
97
- selectMatchD, altMatchD = pcewP.getSelectedMatches()
98
- #
99
- logger.info("matchD (%d)", len(selectMatchD))
100
- self.assertGreaterEqual(len(selectMatchD), self.__numSelectMatches)
101
- self.assertGreaterEqual(len(altMatchD), self.__numAltMatches)
102
- #
103
- ok = pcewP.dump(contentType="index")
104
- self.assertTrue(ok)
105
- ok = pcewP.toStash(contentType="index", useStash=self.__testStashServer, useGit=self.__testStashGit)
106
- self.assertTrue(ok)
107
- except Exception as e:
108
- logger.exception("Failing with %s", str(e))
109
- self.fail()
110
-
111
- def testBFromRestore(self):
112
- """Test case - operations from a restored starting point"""
113
- try:
114
- # --
115
- pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath)
116
- ok = pcewP.fromStash(contentType="index")
117
- self.assertTrue(ok)
118
- #
119
- numObjects = pcewP.reloadDump(contentType="index")
120
- logger.info("Restored %d correspondence records", numObjects)
121
- self.assertGreaterEqual(numObjects, self.__numComponents)
122
- mapD, extraMapD = pcewP.getSelectedMatches(exportPath=os.path.join(self.__cachePath, "mapping"))
123
- self.assertGreaterEqual(len(mapD), self.__numSelectMatches)
124
- logger.info("mapD (%d) extraMapD (%d) %r", len(mapD), len(extraMapD), extraMapD)
125
- self.assertGreaterEqual(len(extraMapD), self.__numAltMatches)
126
- cidList = pcewP.getMatches()
127
- logger.info("cidList (%d)", len(cidList))
128
- self.assertGreaterEqual(len(cidList), self.__numTotalMatches - 2)
129
- ok = pcewP.updateMatchedData()
130
- self.assertTrue(ok)
131
- ok = pcewP.dump(contentType="data")
132
- self.assertTrue(ok)
133
- ok = pcewP.toStash(contentType="data", useStash=self.__testStashServer, useGit=self.__testStashGit)
134
- self.assertTrue(ok)
135
- ok = pcewP.updateIdentifiers()
136
- self.assertTrue(ok)
137
- ok = pcewP.dump(contentType="identifiers")
138
- self.assertTrue(ok)
139
- ok = pcewP.toStash(contentType="identifiers", useStash=self.__testStashServer, useGit=self.__testStashGit)
140
- self.assertTrue(ok)
141
- except Exception as e:
142
- logger.exception("Failing with %s", str(e))
143
- self.fail()
144
-
145
-
146
- def pubChemEtlWrapperSuite():
147
- suiteSelect = unittest.TestSuite()
148
- suiteSelect.addTest(PubChemEtlWrapperTests("testAFromBootstrap"))
149
- suiteSelect.addTest(PubChemEtlWrapperTests("testBFromRestore"))
150
- return suiteSelect
151
-
152
-
153
- if __name__ == "__main__":
154
- mySuite = pubChemEtlWrapperSuite()
155
- unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -1,123 +0,0 @@
1
- ##
2
- # File: PubChemIndexCacheProviderTests.py
3
- # Author: J. Westbrook
4
- # Date: 16-Jul-2020
5
- #
6
- # Updates:
7
- # 13-Mar-2023 aae Fix tests after removing obsolete entries from test data
8
- ##
9
- """
10
- Tests for PubChem index cache maintenance operations
11
- """
12
-
13
- __docformat__ = "google en"
14
- __author__ = "John Westbrook"
15
- __email__ = "jwest@rcsb.rutgers.edu"
16
- __license__ = "Apache 2.0"
17
-
18
- import logging
19
- import os
20
- import platform
21
- import resource
22
- import time
23
- import unittest
24
-
25
- from rcsb.exdb.chemref.PubChemIndexCacheProvider import PubChemIndexCacheProvider
26
-
27
- from rcsb.utils.config.ConfigUtil import ConfigUtil
28
-
29
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
30
- logger = logging.getLogger()
31
-
32
- HERE = os.path.abspath(os.path.dirname(__file__))
33
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
34
-
35
-
36
- class PubChemIndexCacheProviderTests(unittest.TestCase):
37
- def __init__(self, methodName="runTest"):
38
- super(PubChemIndexCacheProviderTests, self).__init__(methodName)
39
- self.__verbose = True
40
-
41
- def setUp(self):
42
- #
43
- self.__dataPath = os.path.join(HERE, "test-data")
44
- self.__cachePath = os.path.join(HERE, "test-output", "CACHE")
45
- #
46
- # Site configuration used for database resource access -
47
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
48
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
49
- self.__configName = "site_info_configuration"
50
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=self.__configName, mockTopPath=self.__mockTopPath)
51
- #
52
- # These are test source files for chemical component/BIRD indices
53
- self.__ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
54
- self.__birdUrlTarget = os.path.join(self.__dataPath, "prdcc-abbrev.cif")
55
- self.__startTime = time.time()
56
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
57
-
58
- def tearDown(self):
59
- unitS = "MB" if platform.system() == "Darwin" else "GB"
60
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
61
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
62
- endTime = time.time()
63
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
64
-
65
- def testAPubChemIndexCacheProvider(self):
66
- """Test case - search, backup, restore and select PubChem correspondences for reference chemical definitions."""
67
- try:
68
- # -- Update/create mapping index cache ---
69
- numObj = 25
70
- pcicP = PubChemIndexCacheProvider(self.__cfgOb, self.__cachePath)
71
- pcicP.updateMissing(
72
- expireDays=0,
73
- cachePath=self.__cachePath,
74
- ccUrlTarget=self.__ccUrlTarget,
75
- birdUrlTarget=self.__birdUrlTarget,
76
- ccFileNamePrefix="cc-abbrev",
77
- exportPath=os.path.join(self.__cachePath, "PubChem"),
78
- rebuildChemIndices=False,
79
- fetchLimit=None,
80
- )
81
- matchD = pcicP.getMatchData(expireDays=0)
82
- logger.info("matchD (%d)", len(matchD))
83
- self.assertGreaterEqual(len(matchD), numObj)
84
- ok = pcicP.testCache()
85
- self.assertTrue(ok)
86
- #
87
- ok = pcicP.dump()
88
- self.assertTrue(ok)
89
- #
90
- mapD, extraMapD = pcicP.getSelectedMatches(exportPath=os.path.join(self.__cachePath, "mapping"))
91
- self.assertGreaterEqual(len(mapD), 20)
92
- logger.info("mapD (%d) extraMapD (%d) %r", len(mapD), len(extraMapD), extraMapD)
93
- self.assertGreaterEqual(len(extraMapD), 2)
94
- cidList = pcicP.getMatches()
95
- logger.info("cidList (%d)", len(cidList))
96
- self.assertGreaterEqual(len(cidList), 49)
97
- #
98
- except Exception as e:
99
- logger.exception("Failing with %s", str(e))
100
- self.fail()
101
-
102
- def testBPubChemIndexCacheProvider(self):
103
- """Test case - verify the PubChem index cache"""
104
- try:
105
- # -- check cache
106
- pcicP = PubChemIndexCacheProvider(self.__cfgOb, self.__cachePath)
107
- ok = pcicP.testCache()
108
- self.assertTrue(ok)
109
- except Exception as e:
110
- logger.exception("Failing with %s", str(e))
111
- self.fail()
112
-
113
-
114
- def pubChemIndexCacheProviderSuite():
115
- suiteSelect = unittest.TestSuite()
116
- suiteSelect.addTest(PubChemIndexCacheProviderTests("testAPubChemIndexCacheProvider"))
117
- suiteSelect.addTest(PubChemIndexCacheProviderTests("testBPubChemIndexCacheProviderCache"))
118
- return suiteSelect
119
-
120
-
121
- if __name__ == "__main__":
122
- mySuite = pubChemIndexCacheProviderSuite()
123
- unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -1,106 +0,0 @@
1
- ##
2
- # File: ReferenceSequenceAnnotationAdapterTests.py
3
- # Author: J. Westbrook
4
- # Date: 14-Feb-2020
5
- #
6
- # Updates:
7
- #
8
- ##
9
- """
10
- Tests of reference seequence annotation adapter.
11
- """
12
-
13
- __docformat__ = "google en"
14
- __author__ = "John Westbrook"
15
- __email__ = "jwest@rcsb.rutgers.edu"
16
- __license__ = "Apache 2.0"
17
-
18
- import logging
19
- import os
20
- import platform
21
- import resource
22
- import time
23
- import unittest
24
-
25
- from rcsb.exdb.seq.ReferenceSequenceAnnotationAdapter import ReferenceSequenceAnnotationAdapter
26
- from rcsb.exdb.seq.ReferenceSequenceAnnotationProvider import ReferenceSequenceAnnotationProvider
27
- from rcsb.exdb.utils.ObjectTransformer import ObjectTransformer
28
- from rcsb.utils.config.ConfigUtil import ConfigUtil
29
-
30
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
31
- logger = logging.getLogger()
32
-
33
- HERE = os.path.abspath(os.path.dirname(__file__))
34
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
35
-
36
-
37
- class ReferenceSequenceAnnotationAdapterTests(unittest.TestCase):
38
- def __init__(self, methodName="runTest"):
39
- super(ReferenceSequenceAnnotationAdapterTests, self).__init__(methodName)
40
- self.__verbose = True
41
-
42
- def setUp(self):
43
- #
44
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
45
- self.__cachePath = os.path.join(TOPDIR, "CACHE")
46
- self.__useCache = True
47
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
48
- configName = "site_info_configuration"
49
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
50
- #
51
- self.__resourceName = "MONGO_DB"
52
- self.__fetchLimit = None
53
- self.__startTime = time.time()
54
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
55
-
56
- def tearDown(self):
57
- unitS = "MB" if platform.system() == "Darwin" else "GB"
58
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
59
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
60
- endTime = time.time()
61
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
62
-
63
- # NOTE: IF YOU DISABLE THE TEST BELOW, THEN 'testReferenceCacheProvider' FAILS. CHECK WHETHER ALL 'Reference' PROVIDERS CAN BE DISABLED.
64
- # @unittest.skip("Disable test - no longer using in production, and fails too frequently with 'Bad xml text' when fetching from UniProt")
65
- def testAnnotationAdapter(self):
66
- """Test case - create and read cache reference sequences assignments and related data."""
67
- try:
68
- databaseName = "pdbx_core"
69
- collectionName = "pdbx_core_polymer_entity"
70
- polymerType = "Protein"
71
- # -- create cache ---
72
- rsaP = ReferenceSequenceAnnotationProvider(
73
- self.__cfgOb, databaseName, collectionName, polymerType, fetchLimit=self.__fetchLimit, siftsAbbreviated="TEST", cachePath=self.__cachePath, useCache=True
74
- )
75
- ok = rsaP.testCache(minMissing=10)
76
- self.assertTrue(ok)
77
- numRef1 = rsaP.getRefDataCount()
78
- #
79
- # --- Reload from cache ---
80
- rsaP = ReferenceSequenceAnnotationProvider(self.__cfgOb, databaseName, collectionName, polymerType, cachePath=self.__cachePath, useCache=True)
81
- ok = rsaP.testCache(minMissing=10)
82
- self.assertTrue(ok)
83
- numRef2 = rsaP.getRefDataCount()
84
- self.assertEqual(numRef1, numRef2)
85
- #
86
- rsa = ReferenceSequenceAnnotationAdapter(rsaP)
87
- obTr = ObjectTransformer(self.__cfgOb, objectAdapter=rsa)
88
- ok = obTr.doTransform(
89
- databaseName=databaseName, collectionName=collectionName, fetchLimit=self.__fetchLimit, selectionQuery={"entity_poly.rcsb_entity_polymer_type": polymerType}
90
- )
91
- self.assertTrue(ok)
92
-
93
- except Exception as e:
94
- logger.exception("Failing with %s", str(e))
95
- self.fail()
96
-
97
-
98
- def referenceSequenceAnnotationAdapterSuite():
99
- suiteSelect = unittest.TestSuite()
100
- suiteSelect.addTest(ReferenceSequenceAnnotationAdapterTests("testAnnotationAdapter"))
101
- return suiteSelect
102
-
103
-
104
- if __name__ == "__main__":
105
- mySuite = referenceSequenceAnnotationAdapterSuite()
106
- unittest.TextTestRunner(verbosity=2).run(mySuite)