rcsb.exdb 1.31__py3-none-any.whl → 1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. rcsb/exdb/tree/TreeNodeListWorker.py +72 -49
  2. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/METADATA +2 -2
  3. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/RECORD +5 -42
  4. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +0 -19
  5. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +0 -12
  6. rcsb/exdb/tests/__init__.py +0 -0
  7. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -104
  8. rcsb/exdb/tests/fixturePdbxLoader.py +0 -298
  9. rcsb/exdb/tests/test-data/components-abbrev.cif +0 -2739
  10. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +0 -9171
  11. rcsb/exdb/tests/testAnnotationExtractor.py +0 -79
  12. rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -81
  13. rcsb/exdb/tests/testChemRefLoader.py +0 -106
  14. rcsb/exdb/tests/testChemRefMappingProvider.py +0 -95
  15. rcsb/exdb/tests/testCitationAdapter.py +0 -97
  16. rcsb/exdb/tests/testCitationExtractor.py +0 -93
  17. rcsb/exdb/tests/testCitationUtils.py +0 -92
  18. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -70
  19. rcsb/exdb/tests/testEntryInfoProvider.py +0 -97
  20. rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -70
  21. rcsb/exdb/tests/testGlycanProvider.py +0 -98
  22. rcsb/exdb/tests/testGlycanUtils.py +0 -64
  23. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -90
  24. rcsb/exdb/tests/testObjectExtractor.py +0 -342
  25. rcsb/exdb/tests/testObjectTransformer.py +0 -83
  26. rcsb/exdb/tests/testObjectUpdater.py +0 -120
  27. rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -93
  28. rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -124
  29. rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -134
  30. rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -155
  31. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -123
  32. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -106
  33. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -121
  34. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -122
  35. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -117
  36. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -94
  37. rcsb/exdb/tests/testTaxonomyExtractor.py +0 -75
  38. rcsb/exdb/tests/testTreeNodeListWorker.py +0 -111
  39. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -99
  40. rcsb/exdb/tests/testUniProtExtractor.py +0 -77
  41. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/WHEEL +0 -0
  42. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/licenses/LICENSE +0 -0
@@ -1,298 +0,0 @@
1
- ##
2
- # File: PdbxLoaderFixture.py
3
- # Author: J. Westbrook
4
- # Date: 4-Sep-2019
5
- # Version: 0.001
6
- #
7
- # Updates:
8
- #
9
- ##
10
- """
11
- Fixture for loading the chemical reference and pdbx_core collections in a loca mongo instance.
12
-
13
- """
14
-
15
- __docformat__ = "google en"
16
- __author__ = "John Westbrook"
17
- __email__ = "jwest@rcsb.rutgers.edu"
18
- __license__ = "Apache 2.0"
19
-
20
- # import glob
21
- import logging
22
- import os
23
- import platform
24
- import resource
25
- import time
26
- import unittest
27
-
28
- from rcsb.db.mongo.DocumentLoader import DocumentLoader
29
- from rcsb.db.mongo.PdbxLoader import PdbxLoader
30
- from rcsb.utils.config.ConfigUtil import ConfigUtil
31
- # from rcsb.utils.io.FileUtil import FileUtil
32
-
33
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
34
- logger = logging.getLogger()
35
- logger.setLevel(logging.INFO)
36
-
37
- HERE = os.path.abspath(os.path.dirname(__file__))
38
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
39
-
40
-
41
- class PdbxLoaderFixture(unittest.TestCase):
42
-
43
- def __init__(self, methodName="runTest"):
44
- super(PdbxLoaderFixture, self).__init__(methodName)
45
- self.__verbose = True
46
-
47
- def setUp(self):
48
- #
49
- #
50
- self.__isMac = platform.system() == "Darwin"
51
- self.__excludeTypeL = None if self.__isMac else ["optional"]
52
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
53
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
54
- # configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example-local.yml")
55
- # To Do: Investigate why GitUtil sometimes gives divergence error when using 'DISCOVERY_MODE: remote', but not with 'local':
56
- # stderr: 'fatal: Need to specify how to reconcile divergent branches.'
57
- # Behavior isn't entirely predictable, since it happens sometimes but not all the time.
58
- # To fully debug, will need to add more logging statements to GitUtil, StashableBase, & StashUtil (in rcsb.utils.io)
59
- # Or, can try to resolve error directly by specifying how to reconcile diverent branches in git.Repo class.
60
- configName = "site_info_configuration"
61
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
62
- #
63
- self.__resourceName = "MONGO_DB"
64
- self.__failedFilePath = os.path.join(HERE, "test-output", "failed-list.txt")
65
- self.__cachePath = os.path.join(TOPDIR, "CACHE")
66
- self.__readBackCheck = True
67
- self.__numProc = 1
68
- self.__chunkSize = 2
69
- self.__fileLimit = 38
70
- self.__documentStyle = "rowwise_by_name_with_cardinality"
71
- #
72
- self.__birdChemCompCoreIdList = [
73
- "PRD_000010",
74
- "PRD_000060",
75
- "PRD_000220",
76
- "PRD_000882",
77
- "PRD_000154",
78
- "PRD_000877",
79
- "PRD_000198",
80
- "PRD_000009",
81
- "PRD_000979",
82
- "PRDCC_000010",
83
- "PRDCC_000220",
84
- "PRDCC_000882",
85
- "PRDCC_000154",
86
- "PRDCC_000198",
87
- "PRDCC_000009",
88
- "FAM_000010",
89
- "FAM_000210",
90
- "FAM_000220",
91
- "FAM_000001",
92
- "FAM_000391",
93
- "FAM_000093",
94
- "FAM_000084",
95
- "FAM_000016",
96
- "FAM_000336",
97
- "1G1",
98
- "2RT",
99
- "2XL",
100
- "2XN",
101
- "ATP",
102
- "BJA",
103
- "BM3",
104
- "CNC",
105
- "DAL",
106
- "DDZ",
107
- "DHA",
108
- "DSN",
109
- "GTP",
110
- "HKL",
111
- "NAC",
112
- "NAG",
113
- "NND",
114
- "PTR",
115
- "SEP",
116
- "SMJ",
117
- "STL",
118
- "UNK",
119
- "UNX",
120
- "UVL",
121
- ]
122
- #
123
- self.__pdbIdList = [
124
- "1AH1",
125
- "1B5F",
126
- "1BMV",
127
- "1C58",
128
- "1DSR",
129
- "1DUL",
130
- "1KQE",
131
- "1O3Q",
132
- "1SFO",
133
- "2HW3",
134
- "2HYV",
135
- "2OSL",
136
- "2VOO",
137
- "2WMG",
138
- "3AD7",
139
- "3HYA",
140
- "3IYD",
141
- "3MBG",
142
- "3RER",
143
- "3VD8",
144
- "3VFJ",
145
- "3X11",
146
- "3ZTJ",
147
- "4E2O",
148
- "4EN8",
149
- "4MEY",
150
- "5EU8",
151
- "5KDS",
152
- # "5TM0",
153
- "5VH4",
154
- # "5VP2",
155
- # "6FSZ",
156
- "6LU7",
157
- "6NN7",
158
- # "6Q20",
159
- "6RFK",
160
- "6RKU",
161
- "6YRQ",
162
- ]
163
- self.__ldList = [
164
- {
165
- # "databaseName": "dw",
166
- "collectionGroupName": "core_chem_comp",
167
- "contentType": "bird_chem_comp_core",
168
- "collectionNameList": None,
169
- "loadType": "full",
170
- "mergeContentTypes": None,
171
- "validationLevel": "min",
172
- "inputIdCodeList": self.__birdChemCompCoreIdList
173
- },
174
- {
175
- # "databaseName": "pdbx_core",
176
- "collectionGroupName": "pdbx_core",
177
- "contentType": "pdbx_core",
178
- "collectionNameList": None,
179
- "loadType": "replace",
180
- "mergeContentTypes": ["vrpt"],
181
- "validationLevel": "min",
182
- "inputIdCodeList": self.__pdbIdList
183
- },
184
- # {
185
- # "databaseName": "pdbx_comp_model_core",
186
- # "collectionGroupName": "pdbx_comp_model_core",
187
- # "contentType": "pdbx_comp_model_core",
188
- # "collectionNameList": None,
189
- # "loadType": "full",
190
- # "mergeContentTypes": None,
191
- # "validationLevel": "min",
192
- # "inputIdCodeList": None
193
- # },
194
- ]
195
- #
196
- # self.__modelFixture()
197
- self.__startTime = time.time()
198
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
199
-
200
- def tearDown(self):
201
- unitS = "MB" if platform.system() == "Darwin" else "GB"
202
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
203
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
204
- endTime = time.time()
205
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
206
-
207
- # def __modelFixture(self):
208
- # fU = FileUtil()
209
- # modelSourcePath = os.path.join(self.__mockTopPath, "AF")
210
- # for iPath in glob.iglob(os.path.join(modelSourcePath, "*.cif.gz")):
211
- # fn = os.path.basename(iPath)
212
- # uId = fn.split("-")[1]
213
- # h3 = uId[-2:]
214
- # h2 = uId[-4:-2]
215
- # h1 = uId[-6:-4]
216
- # oPath = os.path.join(self.__cachePath, "computed-models", h1, h2, h3, fn)
217
- # fU.put(iPath, oPath)
218
-
219
- def testPdbxLoader(self):
220
- #
221
- for ld in self.__ldList:
222
- ok = self.__pdbxLoaderWrapper(**ld)
223
- self.assertTrue(ok)
224
-
225
- def __pdbxLoaderWrapper(self, **kwargs):
226
- """Wrapper for the PDBx loader module"""
227
- ok = False
228
- try:
229
- logger.info("Loading %s", kwargs["collectionGroupName"])
230
- mw = PdbxLoader(
231
- self.__cfgOb,
232
- cachePath=self.__cachePath,
233
- resourceName=self.__resourceName,
234
- numProc=self.__numProc,
235
- chunkSize=self.__chunkSize,
236
- fileLimit=kwargs.get("fileLimit", self.__fileLimit),
237
- verbose=self.__verbose,
238
- readBackCheck=self.__readBackCheck,
239
- maxStepLength=1000,
240
- useSchemaCache=True,
241
- rebuildSchemaFlag=False,
242
- )
243
- ok = mw.load(
244
- collectionGroupName=kwargs["collectionGroupName"],
245
- collectionLoadList=kwargs["collectionNameList"],
246
- contentType=kwargs["contentType"],
247
- loadType=kwargs["loadType"],
248
- inputPathList=None,
249
- inputIdCodeList=kwargs["inputIdCodeList"],
250
- styleType=self.__documentStyle,
251
- dataSelectors=["PUBLIC_RELEASE"],
252
- failedFilePath=self.__failedFilePath,
253
- saveInputFileListPath=None,
254
- pruneDocumentSize=None,
255
- logSize=False,
256
- validationLevel=kwargs["validationLevel"],
257
- mergeContentTypes=kwargs["mergeContentTypes"],
258
- useNameFlag=False,
259
- providerTypeExcludeL=self.__excludeTypeL,
260
- restoreUseGit=True,
261
- restoreUseStash=False,
262
- )
263
- self.assertTrue(ok)
264
- ok = self.__loadStatus(mw.getLoadStatus())
265
- self.assertTrue(ok)
266
- except Exception as e:
267
- logger.exception("Failing with %s", str(e))
268
- self.fail()
269
- return ok
270
-
271
- def __loadStatus(self, statusList):
272
- sectionName = "data_exchange_configuration"
273
- dl = DocumentLoader(
274
- self.__cfgOb,
275
- self.__cachePath,
276
- resourceName=self.__resourceName,
277
- numProc=self.__numProc,
278
- chunkSize=self.__chunkSize,
279
- documentLimit=None,
280
- verbose=self.__verbose,
281
- readBackCheck=self.__readBackCheck,
282
- )
283
- #
284
- databaseName = self.__cfgOb.get("DATABASE_NAME", sectionName=sectionName)
285
- collectionName = self.__cfgOb.get("COLLECTION_UPDATE_STATUS", sectionName=sectionName)
286
- ok = dl.load(databaseName, collectionName, loadType="append", documentList=statusList, indexAttributeList=["update_id", "database_name", "object_name"], keyNames=None)
287
- return ok
288
-
289
-
290
- def mongoLoadPdbxSuite():
291
- suiteSelect = unittest.TestSuite()
292
- suiteSelect.addTest(PdbxLoaderFixture("testPdbxLoader"))
293
- return suiteSelect
294
-
295
-
296
- if __name__ == "__main__":
297
- mySuite = mongoLoadPdbxSuite()
298
- unittest.TextTestRunner(verbosity=2).run(mySuite)