rcsb.exdb 1.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rcsb/__init__.py +1 -0
- rcsb/exdb/__init__.py +1 -0
- rcsb/exdb/branch/BranchedEntityExtractor.py +82 -0
- rcsb/exdb/branch/GlycanProvider.py +116 -0
- rcsb/exdb/branch/GlycanUtils.py +114 -0
- rcsb/exdb/branch/__init__.py +0 -0
- rcsb/exdb/chemref/ChemRefEtlWorker.py +118 -0
- rcsb/exdb/chemref/ChemRefExtractor.py +70 -0
- rcsb/exdb/chemref/ChemRefMappingProvider.py +139 -0
- rcsb/exdb/chemref/PubChemDataCacheProvider.py +372 -0
- rcsb/exdb/chemref/PubChemEtlWrapper.py +280 -0
- rcsb/exdb/chemref/PubChemIndexCacheProvider.py +638 -0
- rcsb/exdb/chemref/__init__.py +0 -0
- rcsb/exdb/citation/CitationAdapter.py +91 -0
- rcsb/exdb/citation/CitationExtractor.py +190 -0
- rcsb/exdb/citation/CitationUtils.py +51 -0
- rcsb/exdb/citation/__init__.py +0 -0
- rcsb/exdb/cli/__init__.py +0 -0
- rcsb/exdb/entry/EntryInfoProvider.py +148 -0
- rcsb/exdb/entry/__init__.py +0 -0
- rcsb/exdb/examples-seq/EntityInstanceExtractor.py +557 -0
- rcsb/exdb/examples-seq/EntityPolymerExtractor.py +544 -0
- rcsb/exdb/examples-seq/EntityPolymerExtractorFullTests.py +176 -0
- rcsb/exdb/examples-seq/ReferenceSequenceAssignmentUpdater.py +449 -0
- rcsb/exdb/examples-seq/ReferenceSequenceUtils.py +123 -0
- rcsb/exdb/examples-seq/ReferenceSequenceUtilsTests.py +109 -0
- rcsb/exdb/examples-seq/exampleObjectExtractor.py +109 -0
- rcsb/exdb/examples-seq/fixtureEntityPolymerExtractor.py +85 -0
- rcsb/exdb/examples-seq/testEntityInstanceExtractor.py +170 -0
- rcsb/exdb/examples-seq/testEntityPolymerExtractor.py +171 -0
- rcsb/exdb/examples-seq/testReferenceSequenceAssignmentUpdater.py +79 -0
- rcsb/exdb/examples-seq/testReferenceSequenceUtils.py +108 -0
- rcsb/exdb/seq/AnnotationExtractor.py +76 -0
- rcsb/exdb/seq/LigandNeighborMappingExtractor.py +84 -0
- rcsb/exdb/seq/LigandNeighborMappingProvider.py +106 -0
- rcsb/exdb/seq/PolymerEntityExtractor.py +328 -0
- rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +598 -0
- rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +228 -0
- rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +534 -0
- rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +388 -0
- rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +397 -0
- rcsb/exdb/seq/TaxonomyExtractor.py +69 -0
- rcsb/exdb/seq/UniProtCoreEtlWorker.py +177 -0
- rcsb/exdb/seq/UniProtExtractor.py +80 -0
- rcsb/exdb/seq/__init__.py +0 -0
- rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +19 -0
- rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +12 -0
- rcsb/exdb/tests/__init__.py +0 -0
- rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +104 -0
- rcsb/exdb/tests/fixturePdbxLoader.py +298 -0
- rcsb/exdb/tests/test-data/components-abbrev.cif +2739 -0
- rcsb/exdb/tests/test-data/prdcc-abbrev.cif +9171 -0
- rcsb/exdb/tests/testAnnotationExtractor.py +79 -0
- rcsb/exdb/tests/testBranchedEntityExtractor.py +81 -0
- rcsb/exdb/tests/testChemRefLoader.py +106 -0
- rcsb/exdb/tests/testChemRefMappingProvider.py +95 -0
- rcsb/exdb/tests/testCitationAdapter.py +97 -0
- rcsb/exdb/tests/testCitationExtractor.py +93 -0
- rcsb/exdb/tests/testCitationUtils.py +92 -0
- rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +70 -0
- rcsb/exdb/tests/testEntryInfoProvider.py +97 -0
- rcsb/exdb/tests/testGlycanEtlWorkflow.py +70 -0
- rcsb/exdb/tests/testGlycanProvider.py +98 -0
- rcsb/exdb/tests/testGlycanUtils.py +64 -0
- rcsb/exdb/tests/testLigandNeighborMappingProvider.py +90 -0
- rcsb/exdb/tests/testObjectExtractor.py +342 -0
- rcsb/exdb/tests/testObjectTransformer.py +83 -0
- rcsb/exdb/tests/testObjectUpdater.py +120 -0
- rcsb/exdb/tests/testPolymerEntityExtractor.py +93 -0
- rcsb/exdb/tests/testPubChemDataCacheProvider.py +124 -0
- rcsb/exdb/tests/testPubChemEtlWorkflow.py +134 -0
- rcsb/exdb/tests/testPubChemEtlWrapper.py +155 -0
- rcsb/exdb/tests/testPubChemIndexCacheProvider.py +123 -0
- rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +106 -0
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +121 -0
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +122 -0
- rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +117 -0
- rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +94 -0
- rcsb/exdb/tests/testTaxonomyExtractor.py +75 -0
- rcsb/exdb/tests/testTreeNodeListWorker.py +111 -0
- rcsb/exdb/tests/testUniProtCoreEtlWorker.py +99 -0
- rcsb/exdb/tests/testUniProtExtractor.py +77 -0
- rcsb/exdb/tree/TreeNodeListWorker.py +228 -0
- rcsb/exdb/tree/__init__.py +0 -0
- rcsb/exdb/utils/ObjectAdapterBase.py +22 -0
- rcsb/exdb/utils/ObjectExtractor.py +286 -0
- rcsb/exdb/utils/ObjectTransformer.py +124 -0
- rcsb/exdb/utils/ObjectUpdater.py +121 -0
- rcsb/exdb/utils/ObjectValidator.py +160 -0
- rcsb/exdb/utils/__init__.py +0 -0
- rcsb/exdb/wf/EntryInfoEtlWorkflow.py +71 -0
- rcsb/exdb/wf/GlycanEtlWorkflow.py +76 -0
- rcsb/exdb/wf/PubChemEtlWorkflow.py +240 -0
- rcsb/exdb/wf/__init__.py +0 -0
- rcsb_exdb-1.31.dist-info/METADATA +103 -0
- rcsb_exdb-1.31.dist-info/RECORD +98 -0
- rcsb_exdb-1.31.dist-info/WHEEL +4 -0
- rcsb_exdb-1.31.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
##
|
|
2
|
+
# File: AnnotationExtractorTests.py
|
|
3
|
+
# Author: J. Westbrook
|
|
4
|
+
# Date: 26-Jan-2020
|
|
5
|
+
#
|
|
6
|
+
# Updates:
|
|
7
|
+
#
|
|
8
|
+
##
|
|
9
|
+
"""
|
|
10
|
+
Tests for extraction of annotation identifiers from the polymer entity collection.
|
|
11
|
+
Currently
|
|
12
|
+
"""
|
|
13
|
+
__docformat__ = "google en"
|
|
14
|
+
__author__ = "John Westbrook"
|
|
15
|
+
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
+
__license__ = "Apache 2.0"
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import platform
|
|
21
|
+
import resource
|
|
22
|
+
import time
|
|
23
|
+
import unittest
|
|
24
|
+
|
|
25
|
+
from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
|
|
26
|
+
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
30
|
+
logger = logging.getLogger()
|
|
31
|
+
|
|
32
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
33
|
+
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AnnotationExtractorTests(unittest.TestCase):
|
|
37
|
+
def __init__(self, methodName="runTest"):
|
|
38
|
+
super(AnnotationExtractorTests, self).__init__(methodName)
|
|
39
|
+
self.__verbose = True
|
|
40
|
+
|
|
41
|
+
def setUp(self):
|
|
42
|
+
#
|
|
43
|
+
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
44
|
+
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
45
|
+
configName = "site_info_configuration"
|
|
46
|
+
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
47
|
+
#
|
|
48
|
+
#
|
|
49
|
+
self.__startTime = time.time()
|
|
50
|
+
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
51
|
+
|
|
52
|
+
def tearDown(self):
|
|
53
|
+
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
54
|
+
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
55
|
+
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
56
|
+
endTime = time.time()
|
|
57
|
+
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
58
|
+
|
|
59
|
+
def testGetGoIds(self):
|
|
60
|
+
"""Test case - get reference sequences and update candidates"""
|
|
61
|
+
try:
|
|
62
|
+
urs = AnnotationExtractor(self.__cfgOb)
|
|
63
|
+
goIdL = urs.getUniqueIdentifiers("GO")
|
|
64
|
+
logger.debug("goIdL %r", goIdL)
|
|
65
|
+
logger.info("Unique GO ID count %d", len(goIdL))
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.exception("Failing with %s", str(e))
|
|
68
|
+
self.fail()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def extractorSuite():
|
|
72
|
+
suiteSelect = unittest.TestSuite()
|
|
73
|
+
suiteSelect.addTest(AnnotationExtractorTests("testGetGoIds"))
|
|
74
|
+
return suiteSelect
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
if __name__ == "__main__":
|
|
78
|
+
mySuite = extractorSuite()
|
|
79
|
+
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
##
|
|
2
|
+
# File: BranchedEntityExtractorTests.py
|
|
3
|
+
# Author: J. Westbrook
|
|
4
|
+
# Date: 24-May-2021
|
|
5
|
+
#
|
|
6
|
+
# Updates:
|
|
7
|
+
#
|
|
8
|
+
##
|
|
9
|
+
"""
|
|
10
|
+
Tests for extraction of polymer entity sequence details from the ExDB core collections.
|
|
11
|
+
"""
|
|
12
|
+
__docformat__ = "google en"
|
|
13
|
+
__author__ = "John Westbrook"
|
|
14
|
+
__email__ = "jwest@rcsb.rutgers.edu"
|
|
15
|
+
__license__ = "Apache 2.0"
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
import os
|
|
19
|
+
import platform
|
|
20
|
+
import resource
|
|
21
|
+
import time
|
|
22
|
+
import unittest
|
|
23
|
+
|
|
24
|
+
from rcsb.exdb.branch.BranchedEntityExtractor import BranchedEntityExtractor
|
|
25
|
+
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
29
|
+
logger = logging.getLogger()
|
|
30
|
+
|
|
31
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
32
|
+
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BranchedEntityExtractorTests(unittest.TestCase):
|
|
36
|
+
def __init__(self, methodName="runTest"):
|
|
37
|
+
super(BranchedEntityExtractorTests, self).__init__(methodName)
|
|
38
|
+
self.__verbose = True
|
|
39
|
+
|
|
40
|
+
def setUp(self):
|
|
41
|
+
#
|
|
42
|
+
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
43
|
+
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
44
|
+
configName = "site_info_configuration"
|
|
45
|
+
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
46
|
+
#
|
|
47
|
+
self.__detailsPath = os.path.join(HERE, "test-output", "CACHE", "branched-entity-details.json")
|
|
48
|
+
#
|
|
49
|
+
self.__startTime = time.time()
|
|
50
|
+
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
51
|
+
|
|
52
|
+
def tearDown(self):
|
|
53
|
+
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
54
|
+
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
55
|
+
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
56
|
+
endTime = time.time()
|
|
57
|
+
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
58
|
+
|
|
59
|
+
def testGetBranchedEntityDetails(self):
|
|
60
|
+
"""Test case - get branched entity BIRD and WURCS details"""
|
|
61
|
+
try:
|
|
62
|
+
bEx = BranchedEntityExtractor(self.__cfgOb)
|
|
63
|
+
rD = bEx.getBranchedDetails()
|
|
64
|
+
self.assertGreaterEqual(len(rD), 12)
|
|
65
|
+
logger.info("Branched entity descriptor count %d", len(rD))
|
|
66
|
+
ok = bEx.exportBranchedEntityDetails(self.__detailsPath, fmt="json")
|
|
67
|
+
self.assertTrue(ok)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.exception("Failing with %s", str(e))
|
|
70
|
+
self.fail()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def extractorSuite():
|
|
74
|
+
suiteSelect = unittest.TestSuite()
|
|
75
|
+
suiteSelect.addTest(BranchedEntityExtractorTests("testGetBranchedEntityDetails"))
|
|
76
|
+
return suiteSelect
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
mySuite = extractorSuite()
|
|
81
|
+
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
##
|
|
2
|
+
# File: ChemRefLoaderTests.py
|
|
3
|
+
# Author: J. Westbrook
|
|
4
|
+
# Date: 18-Jun-2021
|
|
5
|
+
#
|
|
6
|
+
# Updates:
|
|
7
|
+
#
|
|
8
|
+
##
|
|
9
|
+
"""
|
|
10
|
+
Tests for loading chemical reference data and identifer mapping information.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
__docformat__ = "google en"
|
|
15
|
+
__author__ = "John Westbrook"
|
|
16
|
+
__email__ = "jwest@rcsb.rutgers.edu"
|
|
17
|
+
__license__ = "Apache 2.0"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import platform
|
|
23
|
+
import resource
|
|
24
|
+
import time
|
|
25
|
+
import unittest
|
|
26
|
+
|
|
27
|
+
from rcsb.exdb.chemref.ChemRefExtractor import ChemRefExtractor
|
|
28
|
+
from rcsb.exdb.chemref.ChemRefEtlWorker import ChemRefEtlWorker
|
|
29
|
+
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
30
|
+
from rcsb.utils.io.MarshalUtil import MarshalUtil
|
|
31
|
+
|
|
32
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
33
|
+
logger = logging.getLogger()
|
|
34
|
+
|
|
35
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
36
|
+
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ChemRefLoaderTests(unittest.TestCase):
|
|
40
|
+
def __init__(self, methodName="runTest"):
|
|
41
|
+
super(ChemRefLoaderTests, self).__init__(methodName)
|
|
42
|
+
self.__verbose = True
|
|
43
|
+
|
|
44
|
+
def setUp(self):
|
|
45
|
+
#
|
|
46
|
+
#
|
|
47
|
+
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
48
|
+
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
49
|
+
configName = "site_info_configuration"
|
|
50
|
+
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
51
|
+
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
52
|
+
#
|
|
53
|
+
# sample data set
|
|
54
|
+
self.__updateId = "2021_10"
|
|
55
|
+
#
|
|
56
|
+
self.__startTime = time.time()
|
|
57
|
+
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
58
|
+
|
|
59
|
+
def tearDown(self):
|
|
60
|
+
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
61
|
+
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
62
|
+
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
63
|
+
endTime = time.time()
|
|
64
|
+
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
65
|
+
|
|
66
|
+
def testLoadIntegratedDrugBankData(self):
|
|
67
|
+
"""Test case - load integrated DrugBank chemical reference data -"""
|
|
68
|
+
try:
|
|
69
|
+
crw = ChemRefEtlWorker(self.__cfgOb, self.__cachePath)
|
|
70
|
+
crExt = ChemRefExtractor(self.__cfgOb)
|
|
71
|
+
|
|
72
|
+
idD = crExt.getChemCompAccessionMapping(referenceResourceName="DrugBank")
|
|
73
|
+
logger.info("Mapping dictionary %r", len(idD))
|
|
74
|
+
#
|
|
75
|
+
ok = crw.load(self.__updateId, extResource="DrugBank", loadType="full")
|
|
76
|
+
#
|
|
77
|
+
self.assertTrue(ok)
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logger.exception("Failing with %s", str(e))
|
|
80
|
+
self.fail()
|
|
81
|
+
|
|
82
|
+
def testDrugBankDataMapping(self):
|
|
83
|
+
"""Test case - get DrugBank mapping -"""
|
|
84
|
+
try:
|
|
85
|
+
crExt = ChemRefExtractor(self.__cfgOb)
|
|
86
|
+
idD = crExt.getChemCompAccessionMapping(referenceResourceName="DrugBank")
|
|
87
|
+
logger.info("Mapping dictionary %r", len(idD))
|
|
88
|
+
#
|
|
89
|
+
mU = MarshalUtil()
|
|
90
|
+
fp = os.path.join(HERE, "test-output", "drugbank-mapping.json")
|
|
91
|
+
mU.doExport(fp, idD, fmt="json", indent=3)
|
|
92
|
+
except Exception as e:
|
|
93
|
+
logger.exception("Failing with %s", str(e))
|
|
94
|
+
self.fail()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def chemRefLoadSuite():
|
|
98
|
+
suiteSelect = unittest.TestSuite()
|
|
99
|
+
suiteSelect.addTest(ChemRefLoaderTests("testLoadIntegratedDrugBankData"))
|
|
100
|
+
return suiteSelect
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
#
|
|
105
|
+
mySuite = chemRefLoadSuite()
|
|
106
|
+
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
##
|
|
2
|
+
# File: ChemRefMappingProviderTests.py
|
|
3
|
+
# Author: J. Westbrook
|
|
4
|
+
# Date: 18-Jun-2021
|
|
5
|
+
#
|
|
6
|
+
# Updates:
|
|
7
|
+
#
|
|
8
|
+
##
|
|
9
|
+
"""
|
|
10
|
+
Tests for
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
__docformat__ = "google en"
|
|
15
|
+
__author__ = "John Westbrook"
|
|
16
|
+
__email__ = "jwest@rcsb.rutgers.edu"
|
|
17
|
+
__license__ = "Apache 2.0"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import platform
|
|
23
|
+
import resource
|
|
24
|
+
import time
|
|
25
|
+
import unittest
|
|
26
|
+
|
|
27
|
+
from rcsb.exdb.chemref.ChemRefMappingProvider import ChemRefMappingProvider
|
|
28
|
+
|
|
29
|
+
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
30
|
+
|
|
31
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
32
|
+
logger = logging.getLogger()
|
|
33
|
+
|
|
34
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
35
|
+
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ChemRefMappingProviderTests(unittest.TestCase):
|
|
39
|
+
def __init__(self, methodName="runTest"):
|
|
40
|
+
super(ChemRefMappingProviderTests, self).__init__(methodName)
|
|
41
|
+
self.__verbose = True
|
|
42
|
+
|
|
43
|
+
def setUp(self):
|
|
44
|
+
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
45
|
+
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
46
|
+
configName = "site_info_configuration"
|
|
47
|
+
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
48
|
+
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
49
|
+
self.__startTime = time.time()
|
|
50
|
+
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
51
|
+
|
|
52
|
+
def tearDown(self):
|
|
53
|
+
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
54
|
+
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
55
|
+
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
56
|
+
endTime = time.time()
|
|
57
|
+
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
58
|
+
|
|
59
|
+
def testChemRefMapping(self):
|
|
60
|
+
"""Test case - load and access mapping cache"""
|
|
61
|
+
try:
|
|
62
|
+
crmP = ChemRefMappingProvider(self.__cachePath, useCache=True)
|
|
63
|
+
ok = crmP.testCache()
|
|
64
|
+
self.assertTrue(ok)
|
|
65
|
+
#
|
|
66
|
+
ok = crmP.fetchChemRefMapping(self.__cfgOb, referenceResourceNameList=None)
|
|
67
|
+
self.assertTrue(ok)
|
|
68
|
+
crmP = ChemRefMappingProvider(self.__cachePath, useCache=True)
|
|
69
|
+
ok = crmP.testCache(minCount=2)
|
|
70
|
+
self.assertTrue(ok)
|
|
71
|
+
# tD = {"CHEMBL": ("CHEMBL14249", "ATP"), "DRUGBANK": ("DB00171", "ATP")}
|
|
72
|
+
tD = {"DRUGBANK": ("DB00171", "ATP")}
|
|
73
|
+
for refName, refTup in tD.items():
|
|
74
|
+
tL = crmP.getReferenceIds(refName, refTup[1])
|
|
75
|
+
logger.info("tL %r", tL)
|
|
76
|
+
self.assertTrue(refTup[0] in tL)
|
|
77
|
+
tL = crmP.getLocalIds(refName, refTup[0])
|
|
78
|
+
logger.info("tL %r", tL)
|
|
79
|
+
self.assertTrue(refTup[1] in tL)
|
|
80
|
+
#
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.exception("Failing with %s", str(e))
|
|
83
|
+
self.fail()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def chemRefMappingSuite():
|
|
87
|
+
suiteSelect = unittest.TestSuite()
|
|
88
|
+
suiteSelect.addTest(ChemRefMappingProviderTests("testChemRefMapping"))
|
|
89
|
+
return suiteSelect
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
#
|
|
94
|
+
mySuite = chemRefMappingSuite()
|
|
95
|
+
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
##
|
|
2
|
+
# File: CitationAdapterTests.py
|
|
3
|
+
# Author: J. Westbrook
|
|
4
|
+
# Date: 22-Nov-2019
|
|
5
|
+
#
|
|
6
|
+
# Updates:
|
|
7
|
+
#
|
|
8
|
+
##
|
|
9
|
+
"""
|
|
10
|
+
Tests of reference sequence assignment adapter.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
__docformat__ = "google en"
|
|
14
|
+
__author__ = "John Westbrook"
|
|
15
|
+
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
+
__license__ = "Apache 2.0"
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
import platform
|
|
21
|
+
import resource
|
|
22
|
+
import time
|
|
23
|
+
import unittest
|
|
24
|
+
|
|
25
|
+
from rcsb.exdb.citation.CitationAdapter import CitationAdapter
|
|
26
|
+
from rcsb.exdb.utils.ObjectTransformer import ObjectTransformer
|
|
27
|
+
from rcsb.utils.citation.CitationReferenceProvider import CitationReferenceProvider
|
|
28
|
+
from rcsb.utils.citation.JournalTitleAbbreviationProvider import JournalTitleAbbreviationProvider
|
|
29
|
+
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
30
|
+
|
|
31
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
32
|
+
logger = logging.getLogger()
|
|
33
|
+
|
|
34
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
35
|
+
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class CitationAdapterTests(unittest.TestCase):
|
|
39
|
+
def __init__(self, methodName="runTest"):
|
|
40
|
+
super(CitationAdapterTests, self).__init__(methodName)
|
|
41
|
+
self.__verbose = True
|
|
42
|
+
|
|
43
|
+
def setUp(self):
|
|
44
|
+
#
|
|
45
|
+
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
46
|
+
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
47
|
+
configName = "site_info_configuration"
|
|
48
|
+
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
49
|
+
#
|
|
50
|
+
self.__resourceName = "MONGO_DB"
|
|
51
|
+
self.__cachePath = os.path.join(TOPDIR, "CACHE", "cit_ref")
|
|
52
|
+
self.__testEntityCacheKwargs = {"fmt": "json", "indent": 3}
|
|
53
|
+
self.__fetchLimit = None
|
|
54
|
+
#
|
|
55
|
+
self.__startTime = time.time()
|
|
56
|
+
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
57
|
+
|
|
58
|
+
def tearDown(self):
|
|
59
|
+
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
60
|
+
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
61
|
+
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
62
|
+
endTime = time.time()
|
|
63
|
+
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
64
|
+
|
|
65
|
+
def testCitationAdapter(self):
|
|
66
|
+
"""Test case - create and read cache reference sequences assignments and related data."""
|
|
67
|
+
try:
|
|
68
|
+
databaseName = "pdbx_core"
|
|
69
|
+
collectionName = "pdbx_core_entry"
|
|
70
|
+
useCache = False
|
|
71
|
+
#
|
|
72
|
+
crP = CitationReferenceProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
73
|
+
ok = crP.testCache()
|
|
74
|
+
self.assertTrue(ok)
|
|
75
|
+
jtaP = JournalTitleAbbreviationProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
76
|
+
ok = jtaP.testCache()
|
|
77
|
+
self.assertTrue(ok)
|
|
78
|
+
#
|
|
79
|
+
ca = CitationAdapter(crP, jtaP)
|
|
80
|
+
obTr = ObjectTransformer(self.__cfgOb, objectAdapter=ca)
|
|
81
|
+
ok = obTr.doTransform(databaseName=databaseName, collectionName=collectionName, fetchLimit=self.__fetchLimit)
|
|
82
|
+
self.assertTrue(ok)
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
logger.exception("Failing with %s", str(e))
|
|
86
|
+
self.fail()
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def citationAdapterSuite():
|
|
90
|
+
suiteSelect = unittest.TestSuite()
|
|
91
|
+
suiteSelect.addTest(CitationAdapterTests("testCitationAdapter"))
|
|
92
|
+
return suiteSelect
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
mySuite = citationAdapterSuite()
|
|
97
|
+
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
##
|
|
2
|
+
# File: CitationExtractorTests.py
|
|
3
|
+
# Author: J. Westbrook
|
|
4
|
+
# Date: 25-Apr-2019
|
|
5
|
+
#
|
|
6
|
+
# Updates:
|
|
7
|
+
#
|
|
8
|
+
##
|
|
9
|
+
"""
|
|
10
|
+
Tests for extractor selected values from collections (limited tests from mock-data repos)
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
__docformat__ = "google en"
|
|
15
|
+
__author__ = "John Westbrook"
|
|
16
|
+
__email__ = "jwest@rcsb.rutgers.edu"
|
|
17
|
+
__license__ = "Apache 2.0"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import os
|
|
22
|
+
import platform
|
|
23
|
+
import resource
|
|
24
|
+
import time
|
|
25
|
+
import unittest
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
from rcsb.exdb.citation.CitationExtractor import CitationExtractor
|
|
29
|
+
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
30
|
+
from rcsb.utils.io.MarshalUtil import MarshalUtil
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
34
|
+
logger = logging.getLogger()
|
|
35
|
+
|
|
36
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
37
|
+
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class CitationExtractorTests(unittest.TestCase):
|
|
41
|
+
def __init__(self, methodName="runTest"):
|
|
42
|
+
super(CitationExtractorTests, self).__init__(methodName)
|
|
43
|
+
self.__verbose = True
|
|
44
|
+
|
|
45
|
+
def setUp(self):
|
|
46
|
+
#
|
|
47
|
+
#
|
|
48
|
+
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
49
|
+
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
50
|
+
#
|
|
51
|
+
configName = "site_info_configuration"
|
|
52
|
+
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
53
|
+
#
|
|
54
|
+
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
55
|
+
#
|
|
56
|
+
self.__cacheKwargs = {"fmt": "json", "indent": 3}
|
|
57
|
+
self.__exdbDirPath = os.path.join(self.__cachePath, self.__cfgOb.get("EXDB_CACHE_DIR", sectionName=configName))
|
|
58
|
+
#
|
|
59
|
+
self.__mU = MarshalUtil()
|
|
60
|
+
self.__entryLimitTest = 20
|
|
61
|
+
#
|
|
62
|
+
self.__startTime = time.time()
|
|
63
|
+
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
64
|
+
|
|
65
|
+
def tearDown(self):
|
|
66
|
+
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
67
|
+
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
68
|
+
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
69
|
+
endTime = time.time()
|
|
70
|
+
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
71
|
+
|
|
72
|
+
def testExtractEntryCitations(self):
|
|
73
|
+
"""Test case - extract entry citations"""
|
|
74
|
+
try:
|
|
75
|
+
ce = CitationExtractor(self.__cfgOb, exdbDirPath=self.__exdbDirPath, useCache=True, cacheKwargs=self.__cacheKwargs, entryLimit=self.__entryLimitTest)
|
|
76
|
+
eCount = ce.getEntryCount()
|
|
77
|
+
self.assertGreaterEqual(eCount, self.__entryLimitTest)
|
|
78
|
+
#
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.exception("Failing with %s", str(e))
|
|
81
|
+
self.fail()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def citationExtractorSuite():
|
|
85
|
+
suiteSelect = unittest.TestSuite()
|
|
86
|
+
suiteSelect.addTest(CitationExtractorTests("testExtractEntryCitations"))
|
|
87
|
+
|
|
88
|
+
return suiteSelect
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
mySuite = citationExtractorSuite()
|
|
93
|
+
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
##
|
|
2
|
+
# File: CitationUtilsTests.py
|
|
3
|
+
# Author: J. Westbrook
|
|
4
|
+
# Date: 25-Apr-2019
|
|
5
|
+
#
|
|
6
|
+
# Updates:
|
|
7
|
+
#
|
|
8
|
+
##
|
|
9
|
+
"""
|
|
10
|
+
Tests for citation process and normalization.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
__docformat__ = "google en"
|
|
14
|
+
__author__ = "John Westbrook"
|
|
15
|
+
__email__ = "jwest@rcsb.rutgers.edu"
|
|
16
|
+
__license__ = "Apache 2.0"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
import os
|
|
21
|
+
import platform
|
|
22
|
+
import resource
|
|
23
|
+
import time
|
|
24
|
+
import unittest
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
from rcsb.exdb.citation.CitationUtils import CitationUtils
|
|
28
|
+
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
29
|
+
from rcsb.utils.io.MarshalUtil import MarshalUtil
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
33
|
+
logger = logging.getLogger()
|
|
34
|
+
|
|
35
|
+
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
36
|
+
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CitationUtilsTests(unittest.TestCase):
|
|
40
|
+
def __init__(self, methodName="runTest"):
|
|
41
|
+
super(CitationUtilsTests, self).__init__(methodName)
|
|
42
|
+
self.__verbose = True
|
|
43
|
+
|
|
44
|
+
def setUp(self):
|
|
45
|
+
#
|
|
46
|
+
#
|
|
47
|
+
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
48
|
+
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
49
|
+
#
|
|
50
|
+
configName = "site_info_configuration"
|
|
51
|
+
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
52
|
+
|
|
53
|
+
#
|
|
54
|
+
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
55
|
+
#
|
|
56
|
+
self.__cacheKwargs = {"fmt": "json", "indent": 3}
|
|
57
|
+
self.__exdbDirPath = os.path.join(self.__cachePath, self.__cfgOb.get("EXDB_CACHE_DIR", sectionName=configName))
|
|
58
|
+
#
|
|
59
|
+
self.__mU = MarshalUtil()
|
|
60
|
+
self.__entryLimitTest = 20
|
|
61
|
+
#
|
|
62
|
+
self.__startTime = time.time()
|
|
63
|
+
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
64
|
+
|
|
65
|
+
def tearDown(self):
|
|
66
|
+
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
67
|
+
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
68
|
+
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
69
|
+
endTime = time.time()
|
|
70
|
+
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
71
|
+
|
|
72
|
+
def testEntryCitationAccess(self):
|
|
73
|
+
"""Test case - extract entry citations"""
|
|
74
|
+
try:
|
|
75
|
+
ce = CitationUtils(self.__cfgOb, exdbDirPath=self.__exdbDirPath, useCache=True, cacheKwargs=self.__cacheKwargs, entryLimit=self.__entryLimitTest)
|
|
76
|
+
eCount = ce.getCitationEntryCount()
|
|
77
|
+
self.assertGreaterEqual(eCount, self.__entryLimitTest)
|
|
78
|
+
#
|
|
79
|
+
except Exception as e:
|
|
80
|
+
logger.exception("Failing with %s", str(e))
|
|
81
|
+
self.fail()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def citationUtilsSuite():
|
|
85
|
+
suiteSelect = unittest.TestSuite()
|
|
86
|
+
suiteSelect.addTest(CitationUtilsTests("testEntryCitationAccess"))
|
|
87
|
+
return suiteSelect
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
if __name__ == "__main__":
|
|
91
|
+
mySuite = citationUtilsSuite()
|
|
92
|
+
unittest.TextTestRunner(verbosity=2).run(mySuite)
|