rcsb.exdb 1.31__py3-none-any.whl → 1.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rcsb/exdb/tree/TreeNodeListWorker.py +72 -49
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/METADATA +2 -2
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/RECORD +5 -42
- rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +0 -19
- rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +0 -12
- rcsb/exdb/tests/__init__.py +0 -0
- rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -104
- rcsb/exdb/tests/fixturePdbxLoader.py +0 -298
- rcsb/exdb/tests/test-data/components-abbrev.cif +0 -2739
- rcsb/exdb/tests/test-data/prdcc-abbrev.cif +0 -9171
- rcsb/exdb/tests/testAnnotationExtractor.py +0 -79
- rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -81
- rcsb/exdb/tests/testChemRefLoader.py +0 -106
- rcsb/exdb/tests/testChemRefMappingProvider.py +0 -95
- rcsb/exdb/tests/testCitationAdapter.py +0 -97
- rcsb/exdb/tests/testCitationExtractor.py +0 -93
- rcsb/exdb/tests/testCitationUtils.py +0 -92
- rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testEntryInfoProvider.py +0 -97
- rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -70
- rcsb/exdb/tests/testGlycanProvider.py +0 -98
- rcsb/exdb/tests/testGlycanUtils.py +0 -64
- rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -90
- rcsb/exdb/tests/testObjectExtractor.py +0 -342
- rcsb/exdb/tests/testObjectTransformer.py +0 -83
- rcsb/exdb/tests/testObjectUpdater.py +0 -120
- rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -93
- rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -124
- rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -134
- rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -155
- rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -123
- rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -106
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -121
- rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -122
- rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -117
- rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -94
- rcsb/exdb/tests/testTaxonomyExtractor.py +0 -75
- rcsb/exdb/tests/testTreeNodeListWorker.py +0 -111
- rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -99
- rcsb/exdb/tests/testUniProtExtractor.py +0 -77
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/WHEEL +0 -0
- {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,298 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: PdbxLoaderFixture.py
|
|
3
|
-
# Author: J. Westbrook
|
|
4
|
-
# Date: 4-Sep-2019
|
|
5
|
-
# Version: 0.001
|
|
6
|
-
#
|
|
7
|
-
# Updates:
|
|
8
|
-
#
|
|
9
|
-
##
|
|
10
|
-
"""
|
|
11
|
-
Fixture for loading the chemical reference and pdbx_core collections in a loca mongo instance.
|
|
12
|
-
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
__docformat__ = "google en"
|
|
16
|
-
__author__ = "John Westbrook"
|
|
17
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
18
|
-
__license__ = "Apache 2.0"
|
|
19
|
-
|
|
20
|
-
# import glob
|
|
21
|
-
import logging
|
|
22
|
-
import os
|
|
23
|
-
import platform
|
|
24
|
-
import resource
|
|
25
|
-
import time
|
|
26
|
-
import unittest
|
|
27
|
-
|
|
28
|
-
from rcsb.db.mongo.DocumentLoader import DocumentLoader
|
|
29
|
-
from rcsb.db.mongo.PdbxLoader import PdbxLoader
|
|
30
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
31
|
-
# from rcsb.utils.io.FileUtil import FileUtil
|
|
32
|
-
|
|
33
|
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
34
|
-
logger = logging.getLogger()
|
|
35
|
-
logger.setLevel(logging.INFO)
|
|
36
|
-
|
|
37
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
38
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class PdbxLoaderFixture(unittest.TestCase):
|
|
42
|
-
|
|
43
|
-
def __init__(self, methodName="runTest"):
|
|
44
|
-
super(PdbxLoaderFixture, self).__init__(methodName)
|
|
45
|
-
self.__verbose = True
|
|
46
|
-
|
|
47
|
-
def setUp(self):
|
|
48
|
-
#
|
|
49
|
-
#
|
|
50
|
-
self.__isMac = platform.system() == "Darwin"
|
|
51
|
-
self.__excludeTypeL = None if self.__isMac else ["optional"]
|
|
52
|
-
self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
|
|
53
|
-
configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
|
|
54
|
-
# configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example-local.yml")
|
|
55
|
-
# To Do: Investigate why GitUtil sometimes gives divergence error when using 'DISCOVERY_MODE: remote', but not with 'local':
|
|
56
|
-
# stderr: 'fatal: Need to specify how to reconcile divergent branches.'
|
|
57
|
-
# Behavior isn't entirely predictable, since it happens sometimes but not all the time.
|
|
58
|
-
# To fully debug, will need to add more logging statements to GitUtil, StashableBase, & StashUtil (in rcsb.utils.io)
|
|
59
|
-
# Or, can try to resolve error directly by specifying how to reconcile diverent branches in git.Repo class.
|
|
60
|
-
configName = "site_info_configuration"
|
|
61
|
-
self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
|
|
62
|
-
#
|
|
63
|
-
self.__resourceName = "MONGO_DB"
|
|
64
|
-
self.__failedFilePath = os.path.join(HERE, "test-output", "failed-list.txt")
|
|
65
|
-
self.__cachePath = os.path.join(TOPDIR, "CACHE")
|
|
66
|
-
self.__readBackCheck = True
|
|
67
|
-
self.__numProc = 1
|
|
68
|
-
self.__chunkSize = 2
|
|
69
|
-
self.__fileLimit = 38
|
|
70
|
-
self.__documentStyle = "rowwise_by_name_with_cardinality"
|
|
71
|
-
#
|
|
72
|
-
self.__birdChemCompCoreIdList = [
|
|
73
|
-
"PRD_000010",
|
|
74
|
-
"PRD_000060",
|
|
75
|
-
"PRD_000220",
|
|
76
|
-
"PRD_000882",
|
|
77
|
-
"PRD_000154",
|
|
78
|
-
"PRD_000877",
|
|
79
|
-
"PRD_000198",
|
|
80
|
-
"PRD_000009",
|
|
81
|
-
"PRD_000979",
|
|
82
|
-
"PRDCC_000010",
|
|
83
|
-
"PRDCC_000220",
|
|
84
|
-
"PRDCC_000882",
|
|
85
|
-
"PRDCC_000154",
|
|
86
|
-
"PRDCC_000198",
|
|
87
|
-
"PRDCC_000009",
|
|
88
|
-
"FAM_000010",
|
|
89
|
-
"FAM_000210",
|
|
90
|
-
"FAM_000220",
|
|
91
|
-
"FAM_000001",
|
|
92
|
-
"FAM_000391",
|
|
93
|
-
"FAM_000093",
|
|
94
|
-
"FAM_000084",
|
|
95
|
-
"FAM_000016",
|
|
96
|
-
"FAM_000336",
|
|
97
|
-
"1G1",
|
|
98
|
-
"2RT",
|
|
99
|
-
"2XL",
|
|
100
|
-
"2XN",
|
|
101
|
-
"ATP",
|
|
102
|
-
"BJA",
|
|
103
|
-
"BM3",
|
|
104
|
-
"CNC",
|
|
105
|
-
"DAL",
|
|
106
|
-
"DDZ",
|
|
107
|
-
"DHA",
|
|
108
|
-
"DSN",
|
|
109
|
-
"GTP",
|
|
110
|
-
"HKL",
|
|
111
|
-
"NAC",
|
|
112
|
-
"NAG",
|
|
113
|
-
"NND",
|
|
114
|
-
"PTR",
|
|
115
|
-
"SEP",
|
|
116
|
-
"SMJ",
|
|
117
|
-
"STL",
|
|
118
|
-
"UNK",
|
|
119
|
-
"UNX",
|
|
120
|
-
"UVL",
|
|
121
|
-
]
|
|
122
|
-
#
|
|
123
|
-
self.__pdbIdList = [
|
|
124
|
-
"1AH1",
|
|
125
|
-
"1B5F",
|
|
126
|
-
"1BMV",
|
|
127
|
-
"1C58",
|
|
128
|
-
"1DSR",
|
|
129
|
-
"1DUL",
|
|
130
|
-
"1KQE",
|
|
131
|
-
"1O3Q",
|
|
132
|
-
"1SFO",
|
|
133
|
-
"2HW3",
|
|
134
|
-
"2HYV",
|
|
135
|
-
"2OSL",
|
|
136
|
-
"2VOO",
|
|
137
|
-
"2WMG",
|
|
138
|
-
"3AD7",
|
|
139
|
-
"3HYA",
|
|
140
|
-
"3IYD",
|
|
141
|
-
"3MBG",
|
|
142
|
-
"3RER",
|
|
143
|
-
"3VD8",
|
|
144
|
-
"3VFJ",
|
|
145
|
-
"3X11",
|
|
146
|
-
"3ZTJ",
|
|
147
|
-
"4E2O",
|
|
148
|
-
"4EN8",
|
|
149
|
-
"4MEY",
|
|
150
|
-
"5EU8",
|
|
151
|
-
"5KDS",
|
|
152
|
-
# "5TM0",
|
|
153
|
-
"5VH4",
|
|
154
|
-
# "5VP2",
|
|
155
|
-
# "6FSZ",
|
|
156
|
-
"6LU7",
|
|
157
|
-
"6NN7",
|
|
158
|
-
# "6Q20",
|
|
159
|
-
"6RFK",
|
|
160
|
-
"6RKU",
|
|
161
|
-
"6YRQ",
|
|
162
|
-
]
|
|
163
|
-
self.__ldList = [
|
|
164
|
-
{
|
|
165
|
-
# "databaseName": "dw",
|
|
166
|
-
"collectionGroupName": "core_chem_comp",
|
|
167
|
-
"contentType": "bird_chem_comp_core",
|
|
168
|
-
"collectionNameList": None,
|
|
169
|
-
"loadType": "full",
|
|
170
|
-
"mergeContentTypes": None,
|
|
171
|
-
"validationLevel": "min",
|
|
172
|
-
"inputIdCodeList": self.__birdChemCompCoreIdList
|
|
173
|
-
},
|
|
174
|
-
{
|
|
175
|
-
# "databaseName": "pdbx_core",
|
|
176
|
-
"collectionGroupName": "pdbx_core",
|
|
177
|
-
"contentType": "pdbx_core",
|
|
178
|
-
"collectionNameList": None,
|
|
179
|
-
"loadType": "replace",
|
|
180
|
-
"mergeContentTypes": ["vrpt"],
|
|
181
|
-
"validationLevel": "min",
|
|
182
|
-
"inputIdCodeList": self.__pdbIdList
|
|
183
|
-
},
|
|
184
|
-
# {
|
|
185
|
-
# "databaseName": "pdbx_comp_model_core",
|
|
186
|
-
# "collectionGroupName": "pdbx_comp_model_core",
|
|
187
|
-
# "contentType": "pdbx_comp_model_core",
|
|
188
|
-
# "collectionNameList": None,
|
|
189
|
-
# "loadType": "full",
|
|
190
|
-
# "mergeContentTypes": None,
|
|
191
|
-
# "validationLevel": "min",
|
|
192
|
-
# "inputIdCodeList": None
|
|
193
|
-
# },
|
|
194
|
-
]
|
|
195
|
-
#
|
|
196
|
-
# self.__modelFixture()
|
|
197
|
-
self.__startTime = time.time()
|
|
198
|
-
logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
|
|
199
|
-
|
|
200
|
-
def tearDown(self):
|
|
201
|
-
unitS = "MB" if platform.system() == "Darwin" else "GB"
|
|
202
|
-
rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
|
|
203
|
-
logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
|
|
204
|
-
endTime = time.time()
|
|
205
|
-
logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
|
|
206
|
-
|
|
207
|
-
# def __modelFixture(self):
|
|
208
|
-
# fU = FileUtil()
|
|
209
|
-
# modelSourcePath = os.path.join(self.__mockTopPath, "AF")
|
|
210
|
-
# for iPath in glob.iglob(os.path.join(modelSourcePath, "*.cif.gz")):
|
|
211
|
-
# fn = os.path.basename(iPath)
|
|
212
|
-
# uId = fn.split("-")[1]
|
|
213
|
-
# h3 = uId[-2:]
|
|
214
|
-
# h2 = uId[-4:-2]
|
|
215
|
-
# h1 = uId[-6:-4]
|
|
216
|
-
# oPath = os.path.join(self.__cachePath, "computed-models", h1, h2, h3, fn)
|
|
217
|
-
# fU.put(iPath, oPath)
|
|
218
|
-
|
|
219
|
-
def testPdbxLoader(self):
|
|
220
|
-
#
|
|
221
|
-
for ld in self.__ldList:
|
|
222
|
-
ok = self.__pdbxLoaderWrapper(**ld)
|
|
223
|
-
self.assertTrue(ok)
|
|
224
|
-
|
|
225
|
-
def __pdbxLoaderWrapper(self, **kwargs):
|
|
226
|
-
"""Wrapper for the PDBx loader module"""
|
|
227
|
-
ok = False
|
|
228
|
-
try:
|
|
229
|
-
logger.info("Loading %s", kwargs["collectionGroupName"])
|
|
230
|
-
mw = PdbxLoader(
|
|
231
|
-
self.__cfgOb,
|
|
232
|
-
cachePath=self.__cachePath,
|
|
233
|
-
resourceName=self.__resourceName,
|
|
234
|
-
numProc=self.__numProc,
|
|
235
|
-
chunkSize=self.__chunkSize,
|
|
236
|
-
fileLimit=kwargs.get("fileLimit", self.__fileLimit),
|
|
237
|
-
verbose=self.__verbose,
|
|
238
|
-
readBackCheck=self.__readBackCheck,
|
|
239
|
-
maxStepLength=1000,
|
|
240
|
-
useSchemaCache=True,
|
|
241
|
-
rebuildSchemaFlag=False,
|
|
242
|
-
)
|
|
243
|
-
ok = mw.load(
|
|
244
|
-
collectionGroupName=kwargs["collectionGroupName"],
|
|
245
|
-
collectionLoadList=kwargs["collectionNameList"],
|
|
246
|
-
contentType=kwargs["contentType"],
|
|
247
|
-
loadType=kwargs["loadType"],
|
|
248
|
-
inputPathList=None,
|
|
249
|
-
inputIdCodeList=kwargs["inputIdCodeList"],
|
|
250
|
-
styleType=self.__documentStyle,
|
|
251
|
-
dataSelectors=["PUBLIC_RELEASE"],
|
|
252
|
-
failedFilePath=self.__failedFilePath,
|
|
253
|
-
saveInputFileListPath=None,
|
|
254
|
-
pruneDocumentSize=None,
|
|
255
|
-
logSize=False,
|
|
256
|
-
validationLevel=kwargs["validationLevel"],
|
|
257
|
-
mergeContentTypes=kwargs["mergeContentTypes"],
|
|
258
|
-
useNameFlag=False,
|
|
259
|
-
providerTypeExcludeL=self.__excludeTypeL,
|
|
260
|
-
restoreUseGit=True,
|
|
261
|
-
restoreUseStash=False,
|
|
262
|
-
)
|
|
263
|
-
self.assertTrue(ok)
|
|
264
|
-
ok = self.__loadStatus(mw.getLoadStatus())
|
|
265
|
-
self.assertTrue(ok)
|
|
266
|
-
except Exception as e:
|
|
267
|
-
logger.exception("Failing with %s", str(e))
|
|
268
|
-
self.fail()
|
|
269
|
-
return ok
|
|
270
|
-
|
|
271
|
-
def __loadStatus(self, statusList):
|
|
272
|
-
sectionName = "data_exchange_configuration"
|
|
273
|
-
dl = DocumentLoader(
|
|
274
|
-
self.__cfgOb,
|
|
275
|
-
self.__cachePath,
|
|
276
|
-
resourceName=self.__resourceName,
|
|
277
|
-
numProc=self.__numProc,
|
|
278
|
-
chunkSize=self.__chunkSize,
|
|
279
|
-
documentLimit=None,
|
|
280
|
-
verbose=self.__verbose,
|
|
281
|
-
readBackCheck=self.__readBackCheck,
|
|
282
|
-
)
|
|
283
|
-
#
|
|
284
|
-
databaseName = self.__cfgOb.get("DATABASE_NAME", sectionName=sectionName)
|
|
285
|
-
collectionName = self.__cfgOb.get("COLLECTION_UPDATE_STATUS", sectionName=sectionName)
|
|
286
|
-
ok = dl.load(databaseName, collectionName, loadType="append", documentList=statusList, indexAttributeList=["update_id", "database_name", "object_name"], keyNames=None)
|
|
287
|
-
return ok
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
def mongoLoadPdbxSuite():
|
|
291
|
-
suiteSelect = unittest.TestSuite()
|
|
292
|
-
suiteSelect.addTest(PdbxLoaderFixture("testPdbxLoader"))
|
|
293
|
-
return suiteSelect
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
if __name__ == "__main__":
|
|
297
|
-
mySuite = mongoLoadPdbxSuite()
|
|
298
|
-
unittest.TextTestRunner(verbosity=2).run(mySuite)
|