rcsb.exdb 1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. rcsb/__init__.py +1 -0
  2. rcsb/exdb/__init__.py +1 -0
  3. rcsb/exdb/branch/BranchedEntityExtractor.py +82 -0
  4. rcsb/exdb/branch/GlycanProvider.py +116 -0
  5. rcsb/exdb/branch/GlycanUtils.py +114 -0
  6. rcsb/exdb/branch/__init__.py +0 -0
  7. rcsb/exdb/chemref/ChemRefEtlWorker.py +118 -0
  8. rcsb/exdb/chemref/ChemRefExtractor.py +70 -0
  9. rcsb/exdb/chemref/ChemRefMappingProvider.py +139 -0
  10. rcsb/exdb/chemref/PubChemDataCacheProvider.py +372 -0
  11. rcsb/exdb/chemref/PubChemEtlWrapper.py +280 -0
  12. rcsb/exdb/chemref/PubChemIndexCacheProvider.py +638 -0
  13. rcsb/exdb/chemref/__init__.py +0 -0
  14. rcsb/exdb/citation/CitationAdapter.py +91 -0
  15. rcsb/exdb/citation/CitationExtractor.py +190 -0
  16. rcsb/exdb/citation/CitationUtils.py +51 -0
  17. rcsb/exdb/citation/__init__.py +0 -0
  18. rcsb/exdb/cli/__init__.py +0 -0
  19. rcsb/exdb/entry/EntryInfoProvider.py +148 -0
  20. rcsb/exdb/entry/__init__.py +0 -0
  21. rcsb/exdb/examples-seq/EntityInstanceExtractor.py +557 -0
  22. rcsb/exdb/examples-seq/EntityPolymerExtractor.py +544 -0
  23. rcsb/exdb/examples-seq/EntityPolymerExtractorFullTests.py +176 -0
  24. rcsb/exdb/examples-seq/ReferenceSequenceAssignmentUpdater.py +449 -0
  25. rcsb/exdb/examples-seq/ReferenceSequenceUtils.py +123 -0
  26. rcsb/exdb/examples-seq/ReferenceSequenceUtilsTests.py +109 -0
  27. rcsb/exdb/examples-seq/exampleObjectExtractor.py +109 -0
  28. rcsb/exdb/examples-seq/fixtureEntityPolymerExtractor.py +85 -0
  29. rcsb/exdb/examples-seq/testEntityInstanceExtractor.py +170 -0
  30. rcsb/exdb/examples-seq/testEntityPolymerExtractor.py +171 -0
  31. rcsb/exdb/examples-seq/testReferenceSequenceAssignmentUpdater.py +79 -0
  32. rcsb/exdb/examples-seq/testReferenceSequenceUtils.py +108 -0
  33. rcsb/exdb/seq/AnnotationExtractor.py +76 -0
  34. rcsb/exdb/seq/LigandNeighborMappingExtractor.py +84 -0
  35. rcsb/exdb/seq/LigandNeighborMappingProvider.py +106 -0
  36. rcsb/exdb/seq/PolymerEntityExtractor.py +328 -0
  37. rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +598 -0
  38. rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +228 -0
  39. rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +534 -0
  40. rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +388 -0
  41. rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +397 -0
  42. rcsb/exdb/seq/TaxonomyExtractor.py +69 -0
  43. rcsb/exdb/seq/UniProtCoreEtlWorker.py +177 -0
  44. rcsb/exdb/seq/UniProtExtractor.py +80 -0
  45. rcsb/exdb/seq/__init__.py +0 -0
  46. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +19 -0
  47. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +12 -0
  48. rcsb/exdb/tests/__init__.py +0 -0
  49. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +104 -0
  50. rcsb/exdb/tests/fixturePdbxLoader.py +298 -0
  51. rcsb/exdb/tests/test-data/components-abbrev.cif +2739 -0
  52. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +9171 -0
  53. rcsb/exdb/tests/testAnnotationExtractor.py +79 -0
  54. rcsb/exdb/tests/testBranchedEntityExtractor.py +81 -0
  55. rcsb/exdb/tests/testChemRefLoader.py +106 -0
  56. rcsb/exdb/tests/testChemRefMappingProvider.py +95 -0
  57. rcsb/exdb/tests/testCitationAdapter.py +97 -0
  58. rcsb/exdb/tests/testCitationExtractor.py +93 -0
  59. rcsb/exdb/tests/testCitationUtils.py +92 -0
  60. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +70 -0
  61. rcsb/exdb/tests/testEntryInfoProvider.py +97 -0
  62. rcsb/exdb/tests/testGlycanEtlWorkflow.py +70 -0
  63. rcsb/exdb/tests/testGlycanProvider.py +98 -0
  64. rcsb/exdb/tests/testGlycanUtils.py +64 -0
  65. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +90 -0
  66. rcsb/exdb/tests/testObjectExtractor.py +342 -0
  67. rcsb/exdb/tests/testObjectTransformer.py +83 -0
  68. rcsb/exdb/tests/testObjectUpdater.py +120 -0
  69. rcsb/exdb/tests/testPolymerEntityExtractor.py +93 -0
  70. rcsb/exdb/tests/testPubChemDataCacheProvider.py +124 -0
  71. rcsb/exdb/tests/testPubChemEtlWorkflow.py +134 -0
  72. rcsb/exdb/tests/testPubChemEtlWrapper.py +155 -0
  73. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +123 -0
  74. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +106 -0
  75. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +121 -0
  76. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +122 -0
  77. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +117 -0
  78. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +94 -0
  79. rcsb/exdb/tests/testTaxonomyExtractor.py +75 -0
  80. rcsb/exdb/tests/testTreeNodeListWorker.py +111 -0
  81. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +99 -0
  82. rcsb/exdb/tests/testUniProtExtractor.py +77 -0
  83. rcsb/exdb/tree/TreeNodeListWorker.py +228 -0
  84. rcsb/exdb/tree/__init__.py +0 -0
  85. rcsb/exdb/utils/ObjectAdapterBase.py +22 -0
  86. rcsb/exdb/utils/ObjectExtractor.py +286 -0
  87. rcsb/exdb/utils/ObjectTransformer.py +124 -0
  88. rcsb/exdb/utils/ObjectUpdater.py +121 -0
  89. rcsb/exdb/utils/ObjectValidator.py +160 -0
  90. rcsb/exdb/utils/__init__.py +0 -0
  91. rcsb/exdb/wf/EntryInfoEtlWorkflow.py +71 -0
  92. rcsb/exdb/wf/GlycanEtlWorkflow.py +76 -0
  93. rcsb/exdb/wf/PubChemEtlWorkflow.py +240 -0
  94. rcsb/exdb/wf/__init__.py +0 -0
  95. rcsb_exdb-1.31.dist-info/METADATA +103 -0
  96. rcsb_exdb-1.31.dist-info/RECORD +98 -0
  97. rcsb_exdb-1.31.dist-info/WHEEL +4 -0
  98. rcsb_exdb-1.31.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,80 @@
1
+ ##
2
+ # File: UniProtExtractor.py
3
+ # Date: 5-Dec-2020 jdw
4
+ #
5
+ # Utilities to extract selected details from the UniProt exchange collections.
6
+ #
7
+ ##
8
+ __docformat__ = "google en"
9
+ __author__ = "John Westbrook"
10
+ __email__ = "jwest@rcsb.rutgers.edu"
11
+ __license__ = "Apache 2.0"
12
+
13
+ import logging
14
+
15
+ from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
16
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class UniProtExtractor(object):
22
+ """Utilities to extract selected details from the UniProt exchange collections."""
23
+
24
+ def __init__(self, cfgOb):
25
+ self.__cfgOb = cfgOb
26
+
27
+ def exportReferenceSequenceDetails(self, filePath, fmt="json"):
28
+ rD = self.getReferenceSequenceDetails()
29
+ mU = MarshalUtil()
30
+ ok = mU.doExport(filePath, rD, fmt=fmt, indent=3)
31
+ logger.info("Exporting (%d) UniProt reference sequences (status=%r)", len(rD), ok)
32
+ return ok
33
+
34
+ def getReferenceSequenceDetails(self):
35
+ """Get reference protein sequence essential details (sequence, taxonomy, name, gene, ...)"""
36
+ uD = None
37
+ try:
38
+ obEx = ObjectExtractor(
39
+ self.__cfgOb,
40
+ databaseName="uniprot_exdb",
41
+ collectionName="reference_entry",
42
+ useCache=False,
43
+ keyAttribute="uniprot",
44
+ uniqueAttributes=["rcsb_id"],
45
+ selectionQuery={},
46
+ selectionList=[
47
+ "source_scientific",
48
+ "taxonomy_id",
49
+ "rcsb_id",
50
+ "gene",
51
+ "names",
52
+ "sequence",
53
+ ],
54
+ )
55
+ #
56
+ eCount = obEx.getCount()
57
+ logger.info("Reference entry count is %d", eCount)
58
+ objD = obEx.getObjects()
59
+ rD = {}
60
+ for rId, uD in objD.items():
61
+ taxId = uD["taxonomy_id"]
62
+ sn = uD["source_scientific"]
63
+ sequence = uD["sequence"]
64
+ gn = None
65
+ pn = None
66
+ if "gene" in uD:
67
+ for tD in uD["gene"]:
68
+ if tD["type"] == "primary":
69
+ gn = tD["name"]
70
+ break
71
+ for tD in uD["names"]:
72
+ if tD["nameType"] == "recommendedName":
73
+ pn = tD["name"]
74
+ break
75
+ rD[rId] = {"accession": rId, "taxId": taxId, "scientific_name": sn, "gene": gn, "name": pn, "sequence": sequence}
76
+
77
+ except Exception as e:
78
+ logger.exception("Failing uD %r with %s", uD, str(e))
79
+ #
80
+ return rD
File without changes
@@ -0,0 +1,19 @@
1
+ #!/bin/bash
2
+ # File: TEST-EXDB-CL-EXEC.sh
3
+ # Date: 3-Sep-2019 jdw
4
+ #
5
+ # Examples
6
+ #
7
+ # tree node list load
8
+ #
9
+ exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGTREENODELIST
10
+ #
11
+ # Chemref load
12
+ #
13
+ exdb_exec_cli --mock --full --etl_chemref --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGCHEMREF
14
+ #
15
+ # Reference sequence update
16
+ #
17
+ exdb_exec_cli --mock --upd_ref_seq --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUPDREFSEQ
18
+ #
19
+ #
@@ -0,0 +1,12 @@
1
+ #!/bin/bash
2
+ # File: TEST-EXDB-CLI-REFSEQ-EXEC.sh
3
+ # Date: 17-Oct-2019 jdw
4
+ #
5
+ # Reference sequence update --mock is required for example SIFTS files -
6
+ #
7
+ exdb_exec_cli --mock --upd_ref_seq --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUPDREFSEQ
8
+ #
9
+ exdb_exec_cli --test_req_seq_cache --mock --upd_ref_seq --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUPDREFSEQTEST
10
+ #
11
+ exdb_exec_cli --mock --full --etl_uniprot --cache_path ../../../CACHE --config_path ../../mock-data/config/dbload-setup-example.yml --config_name site_info_configuration >& ./test-output/LOGUNIPROT
12
+ #
File without changes
@@ -0,0 +1,104 @@
1
+ # File: DictMethodResourceProviderFixture.py
2
+ # Author: J. Westbrook
3
+ # Date: 12-Aug-2019
4
+ # Version: 0.001
5
+ #
6
+ # Update:
7
+
8
+ ##
9
+ """
10
+ Fixture for setting up cached resources for dictionary method helpers
11
+
12
+ """
13
+
14
+ __docformat__ = "google en"
15
+ __author__ = "John Westbrook"
16
+ __email__ = "jwest@rcsb.rutgers.edu"
17
+ __license__ = "Apache 2.0"
18
+
19
+ import logging
20
+ import os
21
+ import platform
22
+ import resource
23
+ import time
24
+ import unittest
25
+
26
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
27
+ from rcsb.utils.dictionary.DictMethodResourceProvider import DictMethodResourceProvider
28
+
29
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
30
+ logger = logging.getLogger()
31
+ logger.setLevel(logging.INFO)
32
+
33
+ HERE = os.path.abspath(os.path.dirname(__file__))
34
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
35
+
36
+
37
+ class DictMethodResourceProviderFixture(unittest.TestCase):
38
+ def setUp(self):
39
+ self.__cachePath = os.path.join(TOPDIR, "CACHE")
40
+ mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
41
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
42
+ configName = "site_info_configuration"
43
+ self.__configName = configName
44
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath)
45
+
46
+ self.__startTime = time.time()
47
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
48
+
49
+ def tearDown(self):
50
+ unitS = "MB" if platform.system() == "Darwin" else "GB"
51
+ rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
52
+ logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
53
+ endTime = time.time()
54
+ logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
55
+
56
+ def testBuildResourceCache(self):
57
+ """Fixture - generate and check selected resource caches"""
58
+ try:
59
+ resourceNameL = [
60
+ "AtcProvider instance",
61
+ "DrugBankProvider instance",
62
+ "PubChemProvider instance",
63
+ "CitationReferenceProvider instance",
64
+ "JournalTitleAbbreviationProvider instance",
65
+ "EnzymeDatabaseProvider instance",
66
+ "PfamProvider instance",
67
+ "SiftsSummaryProvider instance",
68
+ "CathProvider instance",
69
+ "ScopProvider instance",
70
+ "EcodProvider instance",
71
+ "Scop2Provider instance",
72
+ "TaxonomyProvider instance",
73
+ ]
74
+ rP = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath, restoreUseStash=False, restoreUseGit=True)
75
+ for resourceName in resourceNameL:
76
+ rP.getResource(resourceName, useCache=True, default=None, doRestore=True, doBackup=False)
77
+ #
78
+ except Exception as e:
79
+ logger.exception("Failing with %s", str(e))
80
+ self.fail()
81
+
82
+ @unittest.skip("Troubleshooting test")
83
+ def testRecoverResourceCache(self):
84
+ """Fixture - generate and check resource caches"""
85
+ try:
86
+ rp = DictMethodResourceProvider(self.__cfgOb, configName=self.__configName, cachePath=self.__cachePath)
87
+ ret = rp.cacheResources(useCache=True)
88
+ self.assertTrue(ret)
89
+ except Exception as e:
90
+ logger.exception("Failing with %s", str(e))
91
+ self.fail()
92
+
93
+
94
+ def dictMethodResourceProviderSuite():
95
+ suiteSelect = unittest.TestSuite()
96
+ suiteSelect.addTest(DictMethodResourceProviderFixture("testBuildResourceCache"))
97
+ # suiteSelect.addTest(DictMethodResourceProviderFixture("testRecoverResourceCache"))
98
+ return suiteSelect
99
+
100
+
101
+ if __name__ == "__main__":
102
+
103
+ mySuite = dictMethodResourceProviderSuite()
104
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,298 @@
1
+ ##
2
+ # File: PdbxLoaderFixture.py
3
+ # Author: J. Westbrook
4
+ # Date: 4-Sep-2019
5
+ # Version: 0.001
6
+ #
7
+ # Updates:
8
+ #
9
+ ##
10
+ """
11
+ Fixture for loading the chemical reference and pdbx_core collections in a loca mongo instance.
12
+
13
+ """
14
+
15
+ __docformat__ = "google en"
16
+ __author__ = "John Westbrook"
17
+ __email__ = "jwest@rcsb.rutgers.edu"
18
+ __license__ = "Apache 2.0"
19
+
20
+ # import glob
21
+ import logging
22
+ import os
23
+ import platform
24
+ import resource
25
+ import time
26
+ import unittest
27
+
28
+ from rcsb.db.mongo.DocumentLoader import DocumentLoader
29
+ from rcsb.db.mongo.PdbxLoader import PdbxLoader
30
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
31
+ # from rcsb.utils.io.FileUtil import FileUtil
32
+
33
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
34
+ logger = logging.getLogger()
35
+ logger.setLevel(logging.INFO)
36
+
37
+ HERE = os.path.abspath(os.path.dirname(__file__))
38
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
39
+
40
+
41
+ class PdbxLoaderFixture(unittest.TestCase):
42
+
43
+ def __init__(self, methodName="runTest"):
44
+ super(PdbxLoaderFixture, self).__init__(methodName)
45
+ self.__verbose = True
46
+
47
+ def setUp(self):
48
+ #
49
+ #
50
+ self.__isMac = platform.system() == "Darwin"
51
+ self.__excludeTypeL = None if self.__isMac else ["optional"]
52
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
53
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
54
+ # configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example-local.yml")
55
+ # To Do: Investigate why GitUtil sometimes gives divergence error when using 'DISCOVERY_MODE: remote', but not with 'local':
56
+ # stderr: 'fatal: Need to specify how to reconcile divergent branches.'
57
+ # Behavior isn't entirely predictable, since it happens sometimes but not all the time.
58
+ # To fully debug, will need to add more logging statements to GitUtil, StashableBase, & StashUtil (in rcsb.utils.io)
59
+ # Or, can try to resolve error directly by specifying how to reconcile diverent branches in git.Repo class.
60
+ configName = "site_info_configuration"
61
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
62
+ #
63
+ self.__resourceName = "MONGO_DB"
64
+ self.__failedFilePath = os.path.join(HERE, "test-output", "failed-list.txt")
65
+ self.__cachePath = os.path.join(TOPDIR, "CACHE")
66
+ self.__readBackCheck = True
67
+ self.__numProc = 1
68
+ self.__chunkSize = 2
69
+ self.__fileLimit = 38
70
+ self.__documentStyle = "rowwise_by_name_with_cardinality"
71
+ #
72
+ self.__birdChemCompCoreIdList = [
73
+ "PRD_000010",
74
+ "PRD_000060",
75
+ "PRD_000220",
76
+ "PRD_000882",
77
+ "PRD_000154",
78
+ "PRD_000877",
79
+ "PRD_000198",
80
+ "PRD_000009",
81
+ "PRD_000979",
82
+ "PRDCC_000010",
83
+ "PRDCC_000220",
84
+ "PRDCC_000882",
85
+ "PRDCC_000154",
86
+ "PRDCC_000198",
87
+ "PRDCC_000009",
88
+ "FAM_000010",
89
+ "FAM_000210",
90
+ "FAM_000220",
91
+ "FAM_000001",
92
+ "FAM_000391",
93
+ "FAM_000093",
94
+ "FAM_000084",
95
+ "FAM_000016",
96
+ "FAM_000336",
97
+ "1G1",
98
+ "2RT",
99
+ "2XL",
100
+ "2XN",
101
+ "ATP",
102
+ "BJA",
103
+ "BM3",
104
+ "CNC",
105
+ "DAL",
106
+ "DDZ",
107
+ "DHA",
108
+ "DSN",
109
+ "GTP",
110
+ "HKL",
111
+ "NAC",
112
+ "NAG",
113
+ "NND",
114
+ "PTR",
115
+ "SEP",
116
+ "SMJ",
117
+ "STL",
118
+ "UNK",
119
+ "UNX",
120
+ "UVL",
121
+ ]
122
+ #
123
+ self.__pdbIdList = [
124
+ "1AH1",
125
+ "1B5F",
126
+ "1BMV",
127
+ "1C58",
128
+ "1DSR",
129
+ "1DUL",
130
+ "1KQE",
131
+ "1O3Q",
132
+ "1SFO",
133
+ "2HW3",
134
+ "2HYV",
135
+ "2OSL",
136
+ "2VOO",
137
+ "2WMG",
138
+ "3AD7",
139
+ "3HYA",
140
+ "3IYD",
141
+ "3MBG",
142
+ "3RER",
143
+ "3VD8",
144
+ "3VFJ",
145
+ "3X11",
146
+ "3ZTJ",
147
+ "4E2O",
148
+ "4EN8",
149
+ "4MEY",
150
+ "5EU8",
151
+ "5KDS",
152
+ # "5TM0",
153
+ "5VH4",
154
+ # "5VP2",
155
+ # "6FSZ",
156
+ "6LU7",
157
+ "6NN7",
158
+ # "6Q20",
159
+ "6RFK",
160
+ "6RKU",
161
+ "6YRQ",
162
+ ]
163
+ self.__ldList = [
164
+ {
165
+ # "databaseName": "dw",
166
+ "collectionGroupName": "core_chem_comp",
167
+ "contentType": "bird_chem_comp_core",
168
+ "collectionNameList": None,
169
+ "loadType": "full",
170
+ "mergeContentTypes": None,
171
+ "validationLevel": "min",
172
+ "inputIdCodeList": self.__birdChemCompCoreIdList
173
+ },
174
+ {
175
+ # "databaseName": "pdbx_core",
176
+ "collectionGroupName": "pdbx_core",
177
+ "contentType": "pdbx_core",
178
+ "collectionNameList": None,
179
+ "loadType": "replace",
180
+ "mergeContentTypes": ["vrpt"],
181
+ "validationLevel": "min",
182
+ "inputIdCodeList": self.__pdbIdList
183
+ },
184
+ # {
185
+ # "databaseName": "pdbx_comp_model_core",
186
+ # "collectionGroupName": "pdbx_comp_model_core",
187
+ # "contentType": "pdbx_comp_model_core",
188
+ # "collectionNameList": None,
189
+ # "loadType": "full",
190
+ # "mergeContentTypes": None,
191
+ # "validationLevel": "min",
192
+ # "inputIdCodeList": None
193
+ # },
194
+ ]
195
+ #
196
+ # self.__modelFixture()
197
+ self.__startTime = time.time()
198
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
199
+
200
+ def tearDown(self):
201
+ unitS = "MB" if platform.system() == "Darwin" else "GB"
202
+ rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
203
+ logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
204
+ endTime = time.time()
205
+ logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
206
+
207
+ # def __modelFixture(self):
208
+ # fU = FileUtil()
209
+ # modelSourcePath = os.path.join(self.__mockTopPath, "AF")
210
+ # for iPath in glob.iglob(os.path.join(modelSourcePath, "*.cif.gz")):
211
+ # fn = os.path.basename(iPath)
212
+ # uId = fn.split("-")[1]
213
+ # h3 = uId[-2:]
214
+ # h2 = uId[-4:-2]
215
+ # h1 = uId[-6:-4]
216
+ # oPath = os.path.join(self.__cachePath, "computed-models", h1, h2, h3, fn)
217
+ # fU.put(iPath, oPath)
218
+
219
+ def testPdbxLoader(self):
220
+ #
221
+ for ld in self.__ldList:
222
+ ok = self.__pdbxLoaderWrapper(**ld)
223
+ self.assertTrue(ok)
224
+
225
+ def __pdbxLoaderWrapper(self, **kwargs):
226
+ """Wrapper for the PDBx loader module"""
227
+ ok = False
228
+ try:
229
+ logger.info("Loading %s", kwargs["collectionGroupName"])
230
+ mw = PdbxLoader(
231
+ self.__cfgOb,
232
+ cachePath=self.__cachePath,
233
+ resourceName=self.__resourceName,
234
+ numProc=self.__numProc,
235
+ chunkSize=self.__chunkSize,
236
+ fileLimit=kwargs.get("fileLimit", self.__fileLimit),
237
+ verbose=self.__verbose,
238
+ readBackCheck=self.__readBackCheck,
239
+ maxStepLength=1000,
240
+ useSchemaCache=True,
241
+ rebuildSchemaFlag=False,
242
+ )
243
+ ok = mw.load(
244
+ collectionGroupName=kwargs["collectionGroupName"],
245
+ collectionLoadList=kwargs["collectionNameList"],
246
+ contentType=kwargs["contentType"],
247
+ loadType=kwargs["loadType"],
248
+ inputPathList=None,
249
+ inputIdCodeList=kwargs["inputIdCodeList"],
250
+ styleType=self.__documentStyle,
251
+ dataSelectors=["PUBLIC_RELEASE"],
252
+ failedFilePath=self.__failedFilePath,
253
+ saveInputFileListPath=None,
254
+ pruneDocumentSize=None,
255
+ logSize=False,
256
+ validationLevel=kwargs["validationLevel"],
257
+ mergeContentTypes=kwargs["mergeContentTypes"],
258
+ useNameFlag=False,
259
+ providerTypeExcludeL=self.__excludeTypeL,
260
+ restoreUseGit=True,
261
+ restoreUseStash=False,
262
+ )
263
+ self.assertTrue(ok)
264
+ ok = self.__loadStatus(mw.getLoadStatus())
265
+ self.assertTrue(ok)
266
+ except Exception as e:
267
+ logger.exception("Failing with %s", str(e))
268
+ self.fail()
269
+ return ok
270
+
271
+ def __loadStatus(self, statusList):
272
+ sectionName = "data_exchange_configuration"
273
+ dl = DocumentLoader(
274
+ self.__cfgOb,
275
+ self.__cachePath,
276
+ resourceName=self.__resourceName,
277
+ numProc=self.__numProc,
278
+ chunkSize=self.__chunkSize,
279
+ documentLimit=None,
280
+ verbose=self.__verbose,
281
+ readBackCheck=self.__readBackCheck,
282
+ )
283
+ #
284
+ databaseName = self.__cfgOb.get("DATABASE_NAME", sectionName=sectionName)
285
+ collectionName = self.__cfgOb.get("COLLECTION_UPDATE_STATUS", sectionName=sectionName)
286
+ ok = dl.load(databaseName, collectionName, loadType="append", documentList=statusList, indexAttributeList=["update_id", "database_name", "object_name"], keyNames=None)
287
+ return ok
288
+
289
+
290
+ def mongoLoadPdbxSuite():
291
+ suiteSelect = unittest.TestSuite()
292
+ suiteSelect.addTest(PdbxLoaderFixture("testPdbxLoader"))
293
+ return suiteSelect
294
+
295
+
296
+ if __name__ == "__main__":
297
+ mySuite = mongoLoadPdbxSuite()
298
+ unittest.TextTestRunner(verbosity=2).run(mySuite)