rcsb.exdb 1.31__py3-none-any.whl → 1.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/METADATA +1 -1
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/RECORD +4 -41
- rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +0 -19
- rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +0 -12
- rcsb/exdb/tests/__init__.py +0 -0
- rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -104
- rcsb/exdb/tests/fixturePdbxLoader.py +0 -298
- rcsb/exdb/tests/test-data/components-abbrev.cif +0 -2739
- rcsb/exdb/tests/test-data/prdcc-abbrev.cif +0 -9171
- rcsb/exdb/tests/testAnnotationExtractor.py +0 -79
- rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -81
- rcsb/exdb/tests/testChemRefLoader.py +0 -106
- rcsb/exdb/tests/testChemRefMappingProvider.py +0 -95
- rcsb/exdb/tests/testCitationAdapter.py +0 -97
- rcsb/exdb/tests/testCitationExtractor.py +0 -93
- rcsb/exdb/tests/testCitationUtils.py +0 -92
- rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testEntryInfoProvider.py +0 -97
- rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testGlycanProvider.py +0 -98
- rcsb/exdb/tests/testGlycanUtils.py +0 -64
- rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -90
- rcsb/exdb/tests/testObjectExtractor.py +0 -342
- rcsb/exdb/tests/testObjectTransformer.py +0 -83
- rcsb/exdb/tests/testObjectUpdater.py +0 -120
- rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -93
- rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -124
- rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -134
- rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -155
- rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -123
- rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -106
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -121
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -122
- rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -117
- rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -94
- rcsb/exdb/tests/testTaxonomyExtractor.py +0 -75
- rcsb/exdb/tests/testTreeNodeListWorker.py +0 -111
- rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -99
- rcsb/exdb/tests/testUniProtExtractor.py +0 -77
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/WHEEL +0 -0
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.32.dist-info}/licenses/LICENSE +0 -0
|
@@ -43,43 +43,6 @@ rcsb/exdb/seq/TaxonomyExtractor.py,sha256=I7jsb5Kanrnh4X-znl9kZPZMJ7o2dp4fsnp2IW
|
|
|
43
43
|
rcsb/exdb/seq/UniProtCoreEtlWorker.py,sha256=-fEojXF3lAJ1tbMsPIxT9In6ooiPThuKSoIRQ0YlZ1s,7590
|
|
44
44
|
rcsb/exdb/seq/UniProtExtractor.py,sha256=pR_A9e82YvbQ813M8rNPu1bCPOHMjGnCqJmLDMM23Qo,2695
|
|
45
45
|
rcsb/exdb/seq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
-
rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh,sha256=DWiM-DeyG_GFRki4WWSIl41ErYVxrp8t0lPypizterc,778
|
|
47
|
-
rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh,sha256=CPQ8rRpfBVIl5r6IFt2t-P4aBBIWUo4NioR7M0LM6Nc,769
|
|
48
|
-
rcsb/exdb/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
rcsb/exdb/tests/fixtureDictMethodResourceProvider.py,sha256=h_iRfmZR9UG0nFYnVoPDtAE3VPpyrnwJRXbrdfeh0gY,3999
|
|
50
|
-
rcsb/exdb/tests/fixturePdbxLoader.py,sha256=wfXyJ4k2Z-85J6sCSArf-eAuYQVbsTilwWmnm8cNPa4,10167
|
|
51
|
-
rcsb/exdb/tests/testAnnotationExtractor.py,sha256=rpeB4RtOcYD27S3ZwnePHHSaac2j4bmvqfu-wJ5254U,2657
|
|
52
|
-
rcsb/exdb/tests/testBranchedEntityExtractor.py,sha256=efchTzuQylD7L7Kc0j-rEvBeLjnPzvSWVLA8I7JIBys,2936
|
|
53
|
-
rcsb/exdb/tests/testChemRefLoader.py,sha256=pZqWiSIQZ9bfZMJplw_ym9swCe0N6rjq63mfcqWtNO0,3696
|
|
54
|
-
rcsb/exdb/tests/testChemRefMappingProvider.py,sha256=wxypTg-Y5DsAksT4a2x2yh1_JCf-7HqP9z0jKDmNcNg,3353
|
|
55
|
-
rcsb/exdb/tests/testCitationAdapter.py,sha256=ZlvG6aTHzKVnREj3LXQd1iniwC-KXR45vbyzBgdoiu8,3625
|
|
56
|
-
rcsb/exdb/tests/testCitationExtractor.py,sha256=7Xg75JHtnZkfrFnZVNggLRVFeRNW6V5O0tGse40C9fc,3141
|
|
57
|
-
rcsb/exdb/tests/testCitationUtils.py,sha256=lCn5n_49vAnQIaUd4fSY-sQ6Ld5r5G32WRaprErryo8,3064
|
|
58
|
-
rcsb/exdb/tests/testEntryInfoEtlWorkflow.py,sha256=TutfC949U0nDkFbmMdXo_MgbxMfvll4O-5C7N8Qj3TA,2340
|
|
59
|
-
rcsb/exdb/tests/testEntryInfoProvider.py,sha256=2Ti-o2A8QlB0reJHFqXHjyrvehlLRFtycFhCTBTY_d0,3360
|
|
60
|
-
rcsb/exdb/tests/testGlycanEtlWorkflow.py,sha256=Olml-71jiWAGOU-NQNHiRSPrew7RJtLK3R6u6WTlUSc,2295
|
|
61
|
-
rcsb/exdb/tests/testGlycanProvider.py,sha256=n15qTncV289o-E5J_AsizPW1tgeCHZH8A7_qXG2Afr4,3201
|
|
62
|
-
rcsb/exdb/tests/testGlycanUtils.py,sha256=h59g_iuEEzhcWRW50VdYlRHw3UrpqiRYgJZKDU4C9Wc,1953
|
|
63
|
-
rcsb/exdb/tests/testLigandNeighborMappingProvider.py,sha256=JjY4y5a4ZuZPJ4gdOxIwRLVz5U6F0f0K963OeDZmFAM,3144
|
|
64
|
-
rcsb/exdb/tests/testObjectExtractor.py,sha256=aHP6q9UQqDFd44u8KNQrnfTWy8Rf9dQ8FnHhUHAmqq0,14122
|
|
65
|
-
rcsb/exdb/tests/testObjectTransformer.py,sha256=7fPWTfN4G6wFj6MZkw_L0dtONMsVBlBCzoHvxzrgsUo,2944
|
|
66
|
-
rcsb/exdb/tests/testObjectUpdater.py,sha256=Kq-Mk4ZbD-eMoIsR3zGJvamFq-OEX4tFXyPB9FePWxc,4950
|
|
67
|
-
rcsb/exdb/tests/testPolymerEntityExtractor.py,sha256=RTsEWZDarrtdVn2Xy7fbPIrZTw7Nd9VJ21aBBr3QFU0,3536
|
|
68
|
-
rcsb/exdb/tests/testPubChemDataCacheProvider.py,sha256=K2Fg4Bibtdg9Z9zLwwJqZDa-XlIKHiSb9A6ewIa2-sM,4679
|
|
69
|
-
rcsb/exdb/tests/testPubChemEtlWorkflow.py,sha256=vENOnc7D-EukdjElvhsZqTL0h5N6alPso4Gn3rDNMXg,5187
|
|
70
|
-
rcsb/exdb/tests/testPubChemEtlWrapper.py,sha256=yGg0HvtYvFTvEEK9OsqvSSRVzCIG90UPEK_Lh7SifAk,6511
|
|
71
|
-
rcsb/exdb/tests/testPubChemIndexCacheProvider.py,sha256=KVPfgnMdp7V57LnuwLLkbcZ-DjGHdEx4q0V1RAyOCXw,4947
|
|
72
|
-
rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py,sha256=hABmh743REIhFe3vP_dKr0uUvUH6-Ci71fpHGUIzt6E,4495
|
|
73
|
-
rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py,sha256=g9-t6-TMZNqxtjgZy9cYCXA19ZkOfjb0un7o8sasuAA,4897
|
|
74
|
-
rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py,sha256=p3wPa-9_Il9eHP3fQGI1ag2FE0mvuOdeGu2uet6APOU,5005
|
|
75
|
-
rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py,sha256=6qzk-xt0QVrX4IC8Ml6HnDzFcee14vIUs14AKAWFg9w,4631
|
|
76
|
-
rcsb/exdb/tests/testReferenceSequenceCacheProvider.py,sha256=thQnpv1dxnL46PfZS6alhyS2N_rnATRmvh8SU9HJHAM,3469
|
|
77
|
-
rcsb/exdb/tests/testTaxonomyExtractor.py,sha256=NZv0UFBnnRUxQN3gzPIruOqyJEG6F81avI8tQu9ILao,2536
|
|
78
|
-
rcsb/exdb/tests/testTreeNodeListWorker.py,sha256=PiVaSOG0140fonhHsp0yxft3KlYaBzDb2hwmPKZxy5Y,3653
|
|
79
|
-
rcsb/exdb/tests/testUniProtCoreEtlWorker.py,sha256=8LyJjeMiKkGSYZ1641xpO1clKSJSESWrHywrNqMeJ9o,3403
|
|
80
|
-
rcsb/exdb/tests/testUniProtExtractor.py,sha256=jNNAI1GIedEbZhpdG1C9CjuZskUwxFg8jxgZJi1tr2U,2604
|
|
81
|
-
rcsb/exdb/tests/test-data/components-abbrev.cif,sha256=Vodm2R_8ipO2X4e8W3Wa1MiH3YF7RSczdtXC0TA13sM,139314
|
|
82
|
-
rcsb/exdb/tests/test-data/prdcc-abbrev.cif,sha256=pA2yCImadbuh51q59fdIOsGnyXVBPFv-8Z8Zrk2jUqA,415606
|
|
83
46
|
rcsb/exdb/tree/TreeNodeListWorker.py,sha256=VLd7MWCxw9fONoC3xYbjvARp2O2V8Vyy-kUZnwQWi30,10233
|
|
84
47
|
rcsb/exdb/tree/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
48
|
rcsb/exdb/utils/ObjectAdapterBase.py,sha256=w-MGvs-TFQXzfgOfAX3aNyCfaN9gY8WP-7MU2FcMAYs,466
|
|
@@ -92,7 +55,7 @@ rcsb/exdb/wf/EntryInfoEtlWorkflow.py,sha256=YVr75Wz1BPjLr_satd28B9BeD3QL6HwmkR17
|
|
|
92
55
|
rcsb/exdb/wf/GlycanEtlWorkflow.py,sha256=oJ6wf438K2e-eLmy8Ni3MCPxjAKgVJY38SWO885gnmg,2820
|
|
93
56
|
rcsb/exdb/wf/PubChemEtlWorkflow.py,sha256=fNX3A6kf0S1XiJMz7ywNpFuuua5lT3XaUFjcCJtvQsU,11235
|
|
94
57
|
rcsb/exdb/wf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
|
-
rcsb_exdb-1.
|
|
96
|
-
rcsb_exdb-1.
|
|
97
|
-
rcsb_exdb-1.
|
|
98
|
-
rcsb_exdb-1.
|
|
58
|
+
rcsb_exdb-1.32.dist-info/METADATA,sha256=ZKtnT7xFqpDHcUGISba091bqV1ckkjVPBKd-ot1ICJ4,3845
|
|
59
|
+
rcsb_exdb-1.32.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
60
|
+
rcsb_exdb-1.32.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
61
|
+
rcsb_exdb-1.32.dist-info/RECORD,,
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# File: TEST-EXDB-CL-EXEC.sh
|
|
3
|
-
# Date: 3-Sep-2019 jdw
|
|
4
|
-
#
|
|
5
|
-
# Examples
|
|
6
|
-
#
|
|
7
|
-
# tree node list load
|
|
8
|
-
#
|
|
9
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGTREENODELIST
|
|
10
|
-
#
|
|
11
|
-
# Chemref load
|
|
12
|
-
#
|
|
13
|
-
exdb_exec_cli --mock --full --etl_chemref --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGCHEMREF
|
|
14
|
-
#
|
|
15
|
-
# Reference sequence update
|
|
16
|
-
#
|
|
17
|
-
exdb_exec_cli --mock --upd_ref_seq --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUPDREFSEQ
|
|
18
|
-
#
|
|
19
|
-
#
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
#!/bin/bash
|
|
2
|
-
# File: TEST-EXDB-CLI-REFSEQ-EXEC.sh
|
|
3
|
-
# Date: 17-Oct-2019 jdw
|
|
4
|
-
#
|
|
5
|
-
# Reference sequence update --mock is required for example SIFTS files -
|
|
6
|
-
#
|
|
7
|
-
exdb_exec_cli --mock --upd_ref_seq --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUPDREFSEQ
|
|
8
|
-
#
|
|
9
|
-
exdb_exec_cli --test_req_seq_cache --mock --upd_ref_seq --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUPDREFSEQTEST
|
|
10
|
-
#
|
|
11
|
-
exdb_exec_cli --mock --full --etl_uniprot --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUNIPROT
|
|
12
|
-
#
|
rcsb/exdb/tests/__init__.py
DELETED
|
File without changes
|
|
@@ -1,104 +0,0 @@
|
|
|
1
|
-
# File: DictMethodResourceProviderFixture.py
|
|
2
|
-
# Author: J. Westbrook
|
|
3
|
-
# Date: 12-Aug-2019
|
|
4
|
-
# Version: 0.001
|
|
5
|
-
#
|
|
6
|
-
# Update:
|
|
7
|
-
|
|
8
|
-
##
|
|
9
|
-
"""
|
|
10
|
-
Fixture for setting up cached resources for dictionary method helpers
|
|
11
|
-
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
__docformat__ = "google en"
|
|
15
|
-
__author__ = "John Westbrook"
|
|
16
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
17
|
-
__license__ = "Apache 2.0"
|
|
18
|
-
|
|
19
|
-
import logging
|
|
20
|
-
import os
|
|
21
|
-
import platform
|
|
22
|
-
import resource
|
|
23
|
-
import time
|
|
24
|
-
import unittest
|
|
25
|
-
|
|
26
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
27
|
-
from rcsb.utils.dictionary.DictMethodResourceProvider import DictMethodResourceProvider
|
|
28
|
-
|
|
29
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
30
|
-
logger = logging.getLogger()
|
|
31
|
-
logger.setLevel(logging.INFO)
|
|
32
|
-
|
|
33
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
34
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class DictMethodResourceProviderFixture(unittest.TestCase):
|
|
38
|
-
def setUp(self):
|
|
39
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
40
|
-
mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
41
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
42
|
-
configName = "site_info_configuration"
|
|
43
|
-
self.__configName = configName
|
|
44
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath)
|
|
45
|
-
|
|
46
|
-
self.__startTime = time.time()
|
|
47
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
48
|
-
|
|
49
|
-
def tearDown(self):
|
|
50
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
51
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
52
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
53
|
-
endTime = time.time()
|
|
54
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
55
|
-
|
|
56
|
-
def testBuildResourceCache(self):
|
|
57
|
-
"""Fixture - generate and check selected resource caches"""
|
|
58
|
-
try:
|
|
59
|
-
resourceNameL = [
|
|
60
|
-
"AtcProvider instance",
|
|
61
|
-
"DrugBankProvider instance",
|
|
62
|
-
"PubChemProvider instance",
|
|
63
|
-
"CitationReferenceProvider instance",
|
|
64
|
-
"JournalTitleAbbreviationProvider instance",
|
|
65
|
-
"EnzymeDatabaseProvider instance",
|
|
66
|
-
"PfamProvider instance",
|
|
67
|
-
"SiftsSummaryProvider instance",
|
|
68
|
-
"CathProvider instance",
|
|
69
|
-
"ScopProvider instance",
|
|
70
|
-
"EcodProvider instance",
|
|
71
|
-
"Scop2Provider instance",
|
|
72
|
-
"TaxonomyProvider instance",
|
|
73
|
-
]
|
|
74
|
-
rP = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath, restoreUseStash=False, restoreUseGit=True)
|
|
75
|
-
for resourceName in resourceNameL:
|
|
76
|
-
rP.getResource(resourceName, useCache=True, default=None, doRestore=True, doBackup=False)
|
|
77
|
-
#
|
|
78
|
-
except Exception as e:
|
|
79
|
-
logger.exception("Failing with %s", str(e))
|
|
80
|
-
self.fail()
|
|
81
|
-
|
|
82
|
-
@unittest.skip("Troubleshooting test")
|
|
83
|
-
def testRecoverResourceCache(self):
|
|
84
|
-
"""Fixture - generate and check resource caches"""
|
|
85
|
-
try:
|
|
86
|
-
rp = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath)
|
|
87
|
-
ret = rp.cacheResources(useCache=True)
|
|
88
|
-
self.assertTrue(ret)
|
|
89
|
-
except Exception as e:
|
|
90
|
-
logger.exception("Failing with %s", str(e))
|
|
91
|
-
self.fail()
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def dictMethodResourceProviderSuite():
|
|
95
|
-
suiteSelect = unittest.TestSuite()
|
|
96
|
-
suiteSelect.addTest(DictMethodResourceProviderFixture("testBuildResourceCache"))
|
|
97
|
-
# suiteSelect.addTest(DictMethodResourceProviderFixture("testRecoverResourceCache"))
|
|
98
|
-
return suiteSelect
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
if __name__ == "__main__":
|
|
102
|
-
|
|
103
|
-
mySuite = dictMethodResourceProviderSuite()
|
|
104
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|
|
@@ -1,298 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: PdbxLoaderFixture.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 4-Sep-2019
|
|
5
|
-
# Version: 0.001
|
|
6
|
-
#
|
|
7
|
-
# Updates:
|
|
8
|
-
#
|
|
9
|
-
##
|
|
10
|
-
"""
|
|
11
|
-
Fixture for loading the chemical reference and pdbx_core collections in a loca mongo instance.
|
|
12
|
-
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
__docformat__ = "google en"
|
|
16
|
-
__author__ = "John Westbrook"
|
|
17
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
18
|
-
__license__ = "Apache 2.0"
|
|
19
|
-
|
|
20
|
-
# import glob
|
|
21
|
-
import logging
|
|
22
|
-
import os
|
|
23
|
-
import platform
|
|
24
|
-
import resource
|
|
25
|
-
import time
|
|
26
|
-
import unittest
|
|
27
|
-
|
|
28
|
-
from rcsb.db.mongo.DocumentLoader import DocumentLoader
|
|
29
|
-
from rcsb.db.mongo.PdbxLoader import PdbxLoader
|
|
30
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
31
|
-
# from rcsb.utils.io.FileUtil import FileUtil
|
|
32
|
-
|
|
33
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
34
|
-
logger = logging.getLogger()
|
|
35
|
-
logger.setLevel(logging.INFO)
|
|
36
|
-
|
|
37
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
38
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class PdbxLoaderFixture(unittest.TestCase):
|
|
42
|
-
|
|
43
|
-
def __init__(self, methodName="runTest"):
|
|
44
|
-
super(PdbxLoaderFixture, self).__init__(methodName)
|
|
45
|
-
self.__verbose = True
|
|
46
|
-
|
|
47
|
-
def setUp(self):
|
|
48
|
-
#
|
|
49
|
-
#
|
|
50
|
-
self.__isMac = platform.system() == "Darwin"
|
|
51
|
-
self.__excludeTypeL = None if self.__isMac else ["optional"]
|
|
52
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
53
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
54
|
-
# configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example-local.yml")
|
|
55
|
-
# To Do: Investigate why GitUtil sometimes gives divergence error when using 'DISCOVERY_MODE: remote', but not with 'local':
|
|
56
|
-
# stderr: 'fatal: Need to specify how to reconcile divergent branches.'
|
|
57
|
-
# Behavior isn't entirely predictable, since it happens sometimes but not all the time.
|
|
58
|
-
# To fully debug, will need to add more logging statements to GitUtil, StashableBase, & StashUtil (in rcsb.utils.io)
|
|
59
|
-
# Or, can try to resolve error directly by specifying how to reconcile diverent branches in git.Repo class.
|
|
60
|
-
configName = "site_info_configuration"
|
|
61
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
62
|
-
#
|
|
63
|
-
self.__resourceName = "MONGO_DB"
|
|
64
|
-
self.__failedFilePath = os.path.join(HERE, "test-output", "failed-list.txt")
|
|
65
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
66
|
-
self.__readBackCheck = True
|
|
67
|
-
self.__numProc = 1
|
|
68
|
-
self.__chunkSize = 2
|
|
69
|
-
self.__fileLimit = 38
|
|
70
|
-
self.__documentStyle = "rowwise_by_name_with_cardinality"
|
|
71
|
-
#
|
|
72
|
-
self.__birdChemCompCoreIdList = [
|
|
73
|
-
"PRD_000010",
|
|
74
|
-
"PRD_000060",
|
|
75
|
-
"PRD_000220",
|
|
76
|
-
"PRD_000882",
|
|
77
|
-
"PRD_000154",
|
|
78
|
-
"PRD_000877",
|
|
79
|
-
"PRD_000198",
|
|
80
|
-
"PRD_000009",
|
|
81
|
-
"PRD_000979",
|
|
82
|
-
"PRDCC_000010",
|
|
83
|
-
"PRDCC_000220",
|
|
84
|
-
"PRDCC_000882",
|
|
85
|
-
"PRDCC_000154",
|
|
86
|
-
"PRDCC_000198",
|
|
87
|
-
"PRDCC_000009",
|
|
88
|
-
"FAM_000010",
|
|
89
|
-
"FAM_000210",
|
|
90
|
-
"FAM_000220",
|
|
91
|
-
"FAM_000001",
|
|
92
|
-
"FAM_000391",
|
|
93
|
-
"FAM_000093",
|
|
94
|
-
"FAM_000084",
|
|
95
|
-
"FAM_000016",
|
|
96
|
-
"FAM_000336",
|
|
97
|
-
"1G1",
|
|
98
|
-
"2RT",
|
|
99
|
-
"2XL",
|
|
100
|
-
"2XN",
|
|
101
|
-
"ATP",
|
|
102
|
-
"BJA",
|
|
103
|
-
"BM3",
|
|
104
|
-
"CNC",
|
|
105
|
-
"DAL",
|
|
106
|
-
"DDZ",
|
|
107
|
-
"DHA",
|
|
108
|
-
"DSN",
|
|
109
|
-
"GTP",
|
|
110
|
-
"HKL",
|
|
111
|
-
"NAC",
|
|
112
|
-
"NAG",
|
|
113
|
-
"NND",
|
|
114
|
-
"PTR",
|
|
115
|
-
"SEP",
|
|
116
|
-
"SMJ",
|
|
117
|
-
"STL",
|
|
118
|
-
"UNK",
|
|
119
|
-
"UNX",
|
|
120
|
-
"UVL",
|
|
121
|
-
]
|
|
122
|
-
#
|
|
123
|
-
self.__pdbIdList = [
|
|
124
|
-
"1AH1",
|
|
125
|
-
"1B5F",
|
|
126
|
-
"1BMV",
|
|
127
|
-
"1C58",
|
|
128
|
-
"1DSR",
|
|
129
|
-
"1DUL",
|
|
130
|
-
"1KQE",
|
|
131
|
-
"1O3Q",
|
|
132
|
-
"1SFO",
|
|
133
|
-
"2HW3",
|
|
134
|
-
"2HYV",
|
|
135
|
-
"2OSL",
|
|
136
|
-
"2VOO",
|
|
137
|
-
"2WMG",
|
|
138
|
-
"3AD7",
|
|
139
|
-
"3HYA",
|
|
140
|
-
"3IYD",
|
|
141
|
-
"3MBG",
|
|
142
|
-
"3RER",
|
|
143
|
-
"3VD8",
|
|
144
|
-
"3VFJ",
|
|
145
|
-
"3X11",
|
|
146
|
-
"3ZTJ",
|
|
147
|
-
"4E2O",
|
|
148
|
-
"4EN8",
|
|
149
|
-
"4MEY",
|
|
150
|
-
"5EU8",
|
|
151
|
-
"5KDS",
|
|
152
|
-
# "5TM0",
|
|
153
|
-
"5VH4",
|
|
154
|
-
# "5VP2",
|
|
155
|
-
# "6FSZ",
|
|
156
|
-
"6LU7",
|
|
157
|
-
"6NN7",
|
|
158
|
-
# "6Q20",
|
|
159
|
-
"6RFK",
|
|
160
|
-
"6RKU",
|
|
161
|
-
"6YRQ",
|
|
162
|
-
]
|
|
163
|
-
self.__ldList = [
|
|
164
|
-
{
|
|
165
|
-
# "databaseName": "dw",
|
|
166
|
-
"collectionGroupName": "core_chem_comp",
|
|
167
|
-
"contentType": "bird_chem_comp_core",
|
|
168
|
-
"collectionNameList": None,
|
|
169
|
-
"loadType": "full",
|
|
170
|
-
"mergeContentTypes": None,
|
|
171
|
-
"validationLevel": "min",
|
|
172
|
-
"inputIdCodeList": self.__birdChemCompCoreIdList
|
|
173
|
-
},
|
|
174
|
-
{
|
|
175
|
-
# "databaseName": "pdbx_core",
|
|
176
|
-
"collectionGroupName": "pdbx_core",
|
|
177
|
-
"contentType": "pdbx_core",
|
|
178
|
-
"collectionNameList": None,
|
|
179
|
-
"loadType": "replace",
|
|
180
|
-
"mergeContentTypes": ["vrpt"],
|
|
181
|
-
"validationLevel": "min",
|
|
182
|
-
"inputIdCodeList": self.__pdbIdList
|
|
183
|
-
},
|
|
184
|
-
# {
|
|
185
|
-
# "databaseName": "pdbx_comp_model_core",
|
|
186
|
-
# "collectionGroupName": "pdbx_comp_model_core",
|
|
187
|
-
# "contentType": "pdbx_comp_model_core",
|
|
188
|
-
# "collectionNameList": None,
|
|
189
|
-
# "loadType": "full",
|
|
190
|
-
# "mergeContentTypes": None,
|
|
191
|
-
# "validationLevel": "min",
|
|
192
|
-
# "inputIdCodeList": None
|
|
193
|
-
# },
|
|
194
|
-
]
|
|
195
|
-
#
|
|
196
|
-
# self.__modelFixture()
|
|
197
|
-
self.__startTime = time.time()
|
|
198
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
199
|
-
|
|
200
|
-
def tearDown(self):
|
|
201
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
202
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
203
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
204
|
-
endTime = time.time()
|
|
205
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
206
|
-
|
|
207
|
-
# def __modelFixture(self):
|
|
208
|
-
# fU = FileUtil()
|
|
209
|
-
# modelSourcePath = os.path.join(self.__mockTopPath, "AF")
|
|
210
|
-
# for iPath in glob.iglob(os.path.join(modelSourcePath, "*.cif.gz")):
|
|
211
|
-
# fn = os.path.basename(iPath)
|
|
212
|
-
# uId = fn.split("-")[1]
|
|
213
|
-
# h3 = uId[-2:]
|
|
214
|
-
# h2 = uId[-4:-2]
|
|
215
|
-
# h1 = uId[-6:-4]
|
|
216
|
-
# oPath = os.path.join(self.__cachePath, "computed-models", h1, h2, h3, fn)
|
|
217
|
-
# fU.put(iPath, oPath)
|
|
218
|
-
|
|
219
|
-
def testPdbxLoader(self):
|
|
220
|
-
#
|
|
221
|
-
for ld in self.__ldList:
|
|
222
|
-
ok = self.__pdbxLoaderWrapper(**ld)
|
|
223
|
-
self.assertTrue(ok)
|
|
224
|
-
|
|
225
|
-
def __pdbxLoaderWrapper(self, **kwargs):
|
|
226
|
-
"""Wrapper for the PDBx loader module"""
|
|
227
|
-
ok = False
|
|
228
|
-
try:
|
|
229
|
-
logger.info("Loading %s", kwargs["collectionGroupName"])
|
|
230
|
-
mw = PdbxLoader(
|
|
231
|
-
self.__cfgOb,
|
|
232
|
-
cachePath=self.__cachePath,
|
|
233
|
-
resourceName=self.__resourceName,
|
|
234
|
-
numProc=self.__numProc,
|
|
235
|
-
chunkSize=self.__chunkSize,
|
|
236
|
-
fileLimit=kwargs.get("fileLimit", self.__fileLimit),
|
|
237
|
-
verbose=self.__verbose,
|
|
238
|
-
readBackCheck=self.__readBackCheck,
|
|
239
|
-
maxStepLength=1000,
|
|
240
|
-
useSchemaCache=True,
|
|
241
|
-
rebuildSchemaFlag=False,
|
|
242
|
-
)
|
|
243
|
-
ok = mw.load(
|
|
244
|
-
collectionGroupName=kwargs["collectionGroupName"],
|
|
245
|
-
collectionLoadList=kwargs["collectionNameList"],
|
|
246
|
-
contentType=kwargs["contentType"],
|
|
247
|
-
loadType=kwargs["loadType"],
|
|
248
|
-
inputPathList=None,
|
|
249
|
-
inputIdCodeList=kwargs["inputIdCodeList"],
|
|
250
|
-
styleType=self.__documentStyle,
|
|
251
|
-
dataSelectors=["PUBLIC_RELEASE"],
|
|
252
|
-
failedFilePath=self.__failedFilePath,
|
|
253
|
-
saveInputFileListPath=None,
|
|
254
|
-
pruneDocumentSize=None,
|
|
255
|
-
logSize=False,
|
|
256
|
-
validationLevel=kwargs["validationLevel"],
|
|
257
|
-
mergeContentTypes=kwargs["mergeContentTypes"],
|
|
258
|
-
useNameFlag=False,
|
|
259
|
-
providerTypeExcludeL=self.__excludeTypeL,
|
|
260
|
-
restoreUseGit=True,
|
|
261
|
-
restoreUseStash=False,
|
|
262
|
-
)
|
|
263
|
-
self.assertTrue(ok)
|
|
264
|
-
ok = self.__loadStatus(mw.getLoadStatus())
|
|
265
|
-
self.assertTrue(ok)
|
|
266
|
-
except Exception as e:
|
|
267
|
-
logger.exception("Failing with %s", str(e))
|
|
268
|
-
self.fail()
|
|
269
|
-
return ok
|
|
270
|
-
|
|
271
|
-
def __loadStatus(self, statusList):
|
|
272
|
-
sectionName = "data_exchange_configuration"
|
|
273
|
-
dl = DocumentLoader(
|
|
274
|
-
self.__cfgOb,
|
|
275
|
-
self.__cachePath,
|
|
276
|
-
resourceName=self.__resourceName,
|
|
277
|
-
numProc=self.__numProc,
|
|
278
|
-
chunkSize=self.__chunkSize,
|
|
279
|
-
documentLimit=None,
|
|
280
|
-
verbose=self.__verbose,
|
|
281
|
-
readBackCheck=self.__readBackCheck,
|
|
282
|
-
)
|
|
283
|
-
#
|
|
284
|
-
databaseName = self.__cfgOb.get("DATABASE_NAME", sectionName=sectionName)
|
|
285
|
-
collectionName = self.__cfgOb.get("COLLECTION_UPDATE_STATUS", sectionName=sectionName)
|
|
286
|
-
ok = dl.load(databaseName, collectionName, loadType="append", documentList=statusList, indexAttributeList=["update_id", "database_name", "object_name"], keyNames=None)
|
|
287
|
-
return ok
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
def mongoLoadPdbxSuite():
|
|
291
|
-
suiteSelect = unittest.TestSuite()
|
|
292
|
-
suiteSelect.addTest(PdbxLoaderFixture("testPdbxLoader"))
|
|
293
|
-
return suiteSelect
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
if __name__ == "__main__":
|
|
297
|
-
mySuite = mongoLoadPdbxSuite()
|
|
298
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|