rcsb.exdb 1.31__py3-none-any.whl → 1.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. rcsb/exdb/tree/TreeNodeListWorker.py +72 -49
  2. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/METADATA +2 -2
  3. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/RECORD +5 -42
  4. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +0 -19
  5. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +0 -12
  6. rcsb/exdb/tests/__init__.py +0 -0
  7. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -104
  8. rcsb/exdb/tests/fixturePdbxLoader.py +0 -298
  9. rcsb/exdb/tests/test-data/components-abbrev.cif +0 -2739
  10. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +0 -9171
  11. rcsb/exdb/tests/testAnnotationExtractor.py +0 -79
  12. rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -81
  13. rcsb/exdb/tests/testChemRefLoader.py +0 -106
  14. rcsb/exdb/tests/testChemRefMappingProvider.py +0 -95
  15. rcsb/exdb/tests/testCitationAdapter.py +0 -97
  16. rcsb/exdb/tests/testCitationExtractor.py +0 -93
  17. rcsb/exdb/tests/testCitationUtils.py +0 -92
  18. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -70
  19. rcsb/exdb/tests/testEntryInfoProvider.py +0 -97
  20. rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -70
  21. rcsb/exdb/tests/testGlycanProvider.py +0 -98
  22. rcsb/exdb/tests/testGlycanUtils.py +0 -64
  23. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -90
  24. rcsb/exdb/tests/testObjectExtractor.py +0 -342
  25. rcsb/exdb/tests/testObjectTransformer.py +0 -83
  26. rcsb/exdb/tests/testObjectUpdater.py +0 -120
  27. rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -93
  28. rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -124
  29. rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -134
  30. rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -155
  31. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -123
  32. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -106
  33. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -121
  34. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -122
  35. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -117
  36. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -94
  37. rcsb/exdb/tests/testTaxonomyExtractor.py +0 -75
  38. rcsb/exdb/tests/testTreeNodeListWorker.py +0 -111
  39. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -99
  40. rcsb/exdb/tests/testUniProtExtractor.py +0 -77
  41. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/WHEEL +0 -0
  42. {rcsb_exdb-1.31.dist-info → rcsb_exdb-1.33.dist-info}/licenses/LICENSE +0 -0
@@ -1,342 +0,0 @@
1
- ##
2
- # File: ObjectExtractorTests.py
3
- # Author: J. Westbrook
4
- # Date: 25-Apr-2019
5
- #
6
- # Updates:
7
- #
8
- ##
9
- """
10
- Tests for extractor selected values from collections (limited tests from mock-data repos)
11
- """
12
-
13
- __docformat__ = "google en"
14
- __author__ = "John Westbrook"
15
- __email__ = "jwest@rcsb.rutgers.edu"
16
- __license__ = "Apache 2.0"
17
-
18
-
19
- import logging
20
- import os
21
- import platform
22
- import resource
23
- import pprint
24
- import time
25
- import unittest
26
- from collections import defaultdict
27
-
28
- from rcsb.db.mongo.Connection import Connection
29
- from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
30
- from rcsb.utils.config.ConfigUtil import ConfigUtil
31
- from rcsb.utils.io.TimeUtil import TimeUtil
32
-
33
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
34
- logger = logging.getLogger()
35
-
36
- HERE = os.path.abspath(os.path.dirname(__file__))
37
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
38
-
39
-
40
- class ObjectExtractorTests(unittest.TestCase):
41
- def __init__(self, methodName="runTest"):
42
- super(ObjectExtractorTests, self).__init__(methodName)
43
- self.__verbose = False
44
-
45
- def setUp(self):
46
- #
47
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
48
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
49
- #
50
- configName = "site_info_configuration"
51
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
52
- #
53
- self.__resourceName = "MONGO_DB"
54
- self.__workPath = os.path.join(TOPDIR, "CACHE", "exdb")
55
- #
56
- self.__testEntryCacheKwargs = {"fmt": "json", "indent": 3}
57
- self.__objectLimitTest = 5
58
- #
59
- self.__startTime = time.time()
60
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
61
-
62
- def tearDown(self):
63
- unitS = "MB" if platform.system() == "Darwin" else "GB"
64
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
65
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
66
- endTime = time.time()
67
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
68
-
69
- def testCreateMultipleConnections(self):
70
- """Test case - multiple connection creation"""
71
- try:
72
- for _ in range(5):
73
- with Connection(cfgOb=self.__cfgOb, resourceName=self.__resourceName) as client:
74
- self.assertNotEqual(client, None)
75
- except Exception as e:
76
- logger.exception("Failing with %s", str(e))
77
- self.fail()
78
-
79
- def testExtractDrugbankMapping(self):
80
- """Test case - extract Drugbank mapping"""
81
- try:
82
- obEx = ObjectExtractor(
83
- self.__cfgOb,
84
- databaseName="dw",
85
- collectionName="core_chem_comp",
86
- cacheFilePath=os.path.join(self.__workPath, "drugbank-mapping-cache.json"),
87
- useCache=False,
88
- cacheKwargs=self.__testEntryCacheKwargs,
89
- keyAttribute="chem_comp",
90
- uniqueAttributes=["rcsb_id"],
91
- selectionQuery={"rcsb_chem_comp_container_identifiers.drugbank_id": {"$exists": True}},
92
- selectionList=["rcsb_id", "rcsb_chem_comp_container_identifiers", "rcsb_chem_comp_related"],
93
- )
94
- eCount = obEx.getCount()
95
- logger.info("Component count ifs %d", eCount)
96
- self.assertGreaterEqual(eCount, 3)
97
- except Exception as e:
98
- logger.exception("Failing with %s", str(e))
99
- self.fail()
100
-
101
- def testExtractEntriesBefore(self):
102
- """Test case - extract entries subject to date restriction"""
103
- try:
104
- tU = TimeUtil()
105
- tS = tU.getTimestamp(useUtc=True, before={"days": 365 * 7})
106
- tD = tU.getDateTimeObj(tS)
107
- obEx = ObjectExtractor(
108
- self.__cfgOb,
109
- databaseName="pdbx_core",
110
- collectionName="pdbx_core_entry",
111
- useCache=False,
112
- keyAttribute="entry",
113
- uniqueAttributes=["rcsb_id"],
114
- selectionQuery={"rcsb_accession_info.initial_release_date": {"$gt": tD}},
115
- selectionList=["rcsb_id", "rcsb_accession_info"],
116
- )
117
- eD = obEx.getObjects()
118
- eCount = obEx.getCount()
119
- logger.info("Entry count is %d", eCount)
120
- logger.info("Entries are %r", list(eD.keys()))
121
- self.assertGreaterEqual(eCount, 5)
122
- except Exception as e:
123
- logger.exception("Failing with %s", str(e))
124
- self.fail()
125
-
126
- def testExtractEntries(self):
127
- """Test case - extract entries"""
128
- try:
129
- obEx = ObjectExtractor(
130
- self.__cfgOb,
131
- databaseName="pdbx_core",
132
- collectionName="pdbx_core_entry",
133
- cacheFilePath=os.path.join(self.__workPath, "entry-data-test-cache.json"),
134
- useCache=False,
135
- keyAttribute="entry",
136
- uniqueAttributes=["rcsb_id"],
137
- cacheKwargs=self.__testEntryCacheKwargs,
138
- objectLimit=self.__objectLimitTest,
139
- )
140
- eCount = obEx.getCount()
141
- logger.info("Entry count is %d", eCount)
142
- self.assertGreaterEqual(eCount, self.__objectLimitTest)
143
-
144
- objD = obEx.getObjects()
145
- for _, obj in objD.items():
146
- # obEx.genPathList(obj["software"], path=["software"])
147
- obEx.genPathList(obj, path=None)
148
-
149
- #
150
- pL = obEx.getPathList(filterList=True)
151
- obEx.setPathList(pL)
152
- if self.__verbose:
153
- for ky, obj in objD.items():
154
- obEx.genValueList(obj, path=None)
155
- tD = obEx.getValues()
156
- logger.debug("Index object %r %s", ky, pprint.pformat(tD, indent=3, width=120))
157
-
158
- except Exception as e:
159
- logger.exception("Failing with %s", str(e))
160
- self.fail()
161
-
162
- def testExtractEntities(self):
163
- """Test case - extract entities"""
164
- try:
165
- obEx = ObjectExtractor(
166
- self.__cfgOb,
167
- databaseName="pdbx_core",
168
- collectionName="pdbx_core_polymer_entity",
169
- cacheFilePath=os.path.join(self.__workPath, "entity-data-test-cache.json"),
170
- useCache=False,
171
- keyAttribute="entity",
172
- uniqueAttributes=["rcsb_id"],
173
- cacheKwargs=self.__testEntryCacheKwargs,
174
- objectLimit=self.__objectLimitTest,
175
- )
176
- eCount = obEx.getCount()
177
- logger.info("Entity count is %d", eCount)
178
- self.assertGreaterEqual(eCount, self.__objectLimitTest)
179
-
180
- objD = obEx.getObjects()
181
- for _, obj in objD.items():
182
- obEx.genPathList(obj, path=None)
183
- #
184
- pL = obEx.getPathList(filterList=False)
185
- logger.debug("Path list (unfiltered) %r", pL)
186
- #
187
- pL = obEx.getPathList()
188
- logger.debug("Path list %r", pL)
189
- obEx.setPathList(pL)
190
- if self.__verbose:
191
- for ky, obj in objD.items():
192
- obEx.genValueList(obj, path=None)
193
- tD = obEx.getValues()
194
- logger.info("Index object %r %s", ky, pprint.pformat(tD, indent=3, width=120))
195
-
196
- except Exception as e:
197
- logger.exception("Failing with %s", str(e))
198
- self.fail()
199
-
200
- def testExtractSelectedEntityContent(self):
201
- """Test case - extract selected entity content
202
-
203
- "reference_sequence_identifiers": [
204
- {
205
- "database_name": "UniProt",
206
- "database_accession": "Q5SHN1",
207
- "provenance_source": "SIFTS"
208
- },
209
- {
210
- "database_name": "UniProt",
211
- "database_accession": "Q5SHN1",
212
- "provenance_source": "PDB"
213
- }
214
- ]
215
- """
216
- try:
217
- obEx = ObjectExtractor(
218
- self.__cfgOb,
219
- databaseName="pdbx_core",
220
- collectionName="pdbx_core_polymer_entity",
221
- cacheFilePath=os.path.join(self.__workPath, "entity-selected-content-test-cache.json"),
222
- useCache=False,
223
- keyAttribute="entity",
224
- uniqueAttributes=["rcsb_id"],
225
- cacheKwargs=self.__testEntryCacheKwargs,
226
- # objectLimit=self.__objectLimitTest,
227
- objectLimit=None,
228
- selectionQuery={"entity_poly.rcsb_entity_polymer_type": "Protein"},
229
- selectionList=["rcsb_id", "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers"],
230
- )
231
- eCount = obEx.getCount()
232
- logger.info("Entity count is %d", eCount)
233
- #
234
- #
235
- if self.__objectLimitTest is not None:
236
- self.assertGreaterEqual(eCount, self.__objectLimitTest)
237
- objD = obEx.getObjects()
238
- for _, obj in objD.items():
239
- obEx.genPathList(obj, path=None)
240
- #
241
- pL = obEx.getPathList(filterList=False)
242
- logger.debug("Path list (unfiltered) %r", pL)
243
- #
244
- pL = obEx.getPathList()
245
- logger.debug("Path list %r", pL)
246
- obEx.setPathList(pL)
247
- if self.__verbose:
248
- for ky, obj in objD.items():
249
- obEx.genValueList(obj, path=None)
250
- tD = obEx.getValues()
251
- logger.info("Index object %r %s", ky, pprint.pformat(tD, indent=3, width=120))
252
-
253
- objD = obEx.getObjects()
254
- # logger.info("objD.keys() %r", list(objD.keys()))
255
- totCount = 0
256
- difCount = 0
257
- pdbUnpIdD = defaultdict(int)
258
- siftsUnpIdD = defaultdict(int)
259
- pdbDifUnpIdD = defaultdict(int)
260
- for entityKey, eD in objD.items():
261
- try:
262
- siftsS = set()
263
- pdbS = set()
264
- for tD in eD["rcsb_polymer_entity_container_identifiers"]["reference_sequence_identifiers"]:
265
- if tD["database_name"] == "UniProt":
266
- if tD["provenance_source"] == "SIFTS":
267
- siftsS.add(tD["database_accession"])
268
- siftsUnpIdD[tD["database_accession"]] += 1
269
- elif tD["provenance_source"] == "PDB":
270
- pdbS.add(tD["database_accession"])
271
- pdbUnpIdD[tD["database_accession"]] += 1
272
- else:
273
- logger.debug("No UniProt for %r", eD["rcsb_polymer_entity_container_identifiers"])
274
- logger.debug("PDB assigned sequence length %d", len(pdbS))
275
- logger.debug("SIFTS assigned sequence length %d", len(siftsS))
276
-
277
- if pdbS and siftsS:
278
- totCount += 1
279
- if pdbS != siftsS:
280
- difCount += 1
281
- for idV in pdbS:
282
- pdbDifUnpIdD[idV] += 1
283
-
284
- except Exception as e:
285
- logger.warning("No identifiers for %s with %s", entityKey, str(e))
286
- logger.info("Total %d differences %d", totCount, difCount)
287
- logger.info("Unique UniProt ids PDB %d SIFTS %d", len(pdbUnpIdD), len(siftsUnpIdD))
288
- logger.info("Unique UniProt differences %d ", len(pdbDifUnpIdD))
289
- except Exception as e:
290
- logger.exception("Failing with %s", str(e))
291
- self.fail()
292
-
293
- def testExtractEntityTaxonomyContent(self):
294
- """Test case - extract unique entity source and host taxonomies"""
295
- try:
296
- obEx = ObjectExtractor(
297
- self.__cfgOb,
298
- databaseName="pdbx_core",
299
- collectionName="pdbx_core_polymer_entity",
300
- cacheFilePath=os.path.join(self.__workPath, "entity-taxonomy-test-cache.json"),
301
- useCache=False,
302
- keyAttribute="entity",
303
- uniqueAttributes=["rcsb_id"],
304
- cacheKwargs=self.__testEntryCacheKwargs,
305
- # objectLimit=self.__objectLimitTest,
306
- objectLimit=None,
307
- selectionQuery=None,
308
- selectionList=["rcsb_id", "rcsb_entity_source_organism.ncbi_taxonomy_id", "rcsb_entity_host_organism.ncbi_taxonomy_id"],
309
- )
310
- eCount = obEx.getCount()
311
- logger.info("Polymer entity count is %d", eCount)
312
- taxIdS = set()
313
- objD = obEx.getObjects()
314
- for _, eD in objD.items():
315
- try:
316
- for tD in eD["rcsb_entity_source_organism"]:
317
- taxIdS.add(tD["ncbi_taxonomy_id"])
318
- except Exception:
319
- pass
320
- try:
321
- for tD in eD["rcsb_entity_host_organism"]:
322
- taxIdS.add(tD["ncbi_taxonomy_id"])
323
- except Exception:
324
- pass
325
-
326
- logger.info("Unique taxons %d", len(taxIdS))
327
- except Exception as e:
328
- logger.exception("Failing with %s", str(e))
329
- self.fail()
330
-
331
-
332
- def objectExtractorSuite():
333
- suiteSelect = unittest.TestSuite()
334
- suiteSelect.addTest(ObjectExtractorTests("testExtractEntries"))
335
- suiteSelect.addTest(ObjectExtractorTests("testExtractEntities"))
336
- suiteSelect.addTest(ObjectExtractorTests("testExtractSelectedEntityContent"))
337
- return suiteSelect
338
-
339
-
340
- if __name__ == "__main__":
341
- mySuite = objectExtractorSuite()
342
- unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -1,83 +0,0 @@
1
- ##
2
- # File: ObjectTransformerTests.py
3
- # Author: J. Westbrook
4
- # Date: 25-Apr-2019
5
- #
6
- # Updates:
7
- #
8
- ##
9
- """
10
- Tests for extractor and updater or selected values from collections (limited tests from mock-data repos)
11
- """
12
-
13
- __docformat__ = "google en"
14
- __author__ = "John Westbrook"
15
- __email__ = "jwest@rcsb.rutgers.edu"
16
- __license__ = "Apache 2.0"
17
-
18
- import logging
19
- import os
20
- import platform
21
- import resource
22
- import time
23
- import unittest
24
-
25
- from rcsb.exdb.utils.ObjectTransformer import ObjectTransformer
26
- from rcsb.utils.config.ConfigUtil import ConfigUtil
27
-
28
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
29
- logger = logging.getLogger()
30
-
31
- HERE = os.path.abspath(os.path.dirname(__file__))
32
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
33
-
34
-
35
- class ObjectTransformerTests(unittest.TestCase):
36
- def __init__(self, methodName="runTest"):
37
- super(ObjectTransformerTests, self).__init__(methodName)
38
- self.__verbose = True
39
-
40
- def setUp(self):
41
- #
42
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
43
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
44
- #
45
- configName = "site_info_configuration"
46
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
47
- #
48
- self.__fetchLimit = 5
49
- #
50
- self.__startTime = time.time()
51
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
52
-
53
- def tearDown(self):
54
- unitS = "MB" if platform.system() == "Darwin" else "GB"
55
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
56
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
57
- endTime = time.time()
58
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
59
-
60
- def testTranformEntityProteinContent(self):
61
- """Test case - transform selected entity protein documents"""
62
- try:
63
- databaseName = "pdbx_core"
64
- collectionName = "pdbx_core_polymer_entity"
65
- obTr = ObjectTransformer(self.__cfgOb)
66
- ok = obTr.doTransform(
67
- databaseName=databaseName, collectionName=collectionName, fetchLimit=self.__fetchLimit, selectionQuery={"entity_poly.rcsb_entity_polymer_type": "Protein"}
68
- )
69
- self.assertTrue(ok)
70
- except Exception as e:
71
- logger.exception("Failing with %s", str(e))
72
- self.fail()
73
-
74
-
75
- def objectTransformerSuite():
76
- suiteSelect = unittest.TestSuite()
77
- suiteSelect.addTest(ObjectTransformerTests("testTransformEntityProteinContent"))
78
- return suiteSelect
79
-
80
-
81
- if __name__ == "__main__":
82
- mySuite = objectTransformerSuite()
83
- unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -1,120 +0,0 @@
1
- ##
2
- # File: ObjectUpdaterTests.py
3
- # Author: J. Westbrook
4
- # Date: 25-Apr-2019
5
- #
6
- # Updates:
7
- #
8
- ##
9
- """
10
- Tests for extractor and updater or selected values from collections (limited tests from mock-data repos)
11
- """
12
-
13
- __docformat__ = "google en"
14
- __author__ = "John Westbrook"
15
- __email__ = "jwest@rcsb.rutgers.edu"
16
- __license__ = "Apache 2.0"
17
-
18
- import logging
19
- import os
20
- import platform
21
- import resource
22
- import time
23
- import unittest
24
-
25
- from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
26
- from rcsb.exdb.utils.ObjectUpdater import ObjectUpdater
27
- from rcsb.utils.config.ConfigUtil import ConfigUtil
28
-
29
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
30
- logger = logging.getLogger()
31
-
32
- HERE = os.path.abspath(os.path.dirname(__file__))
33
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
34
-
35
-
36
- class ObjectUpdaterTests(unittest.TestCase):
37
- def __init__(self, methodName="runTest"):
38
- super(ObjectUpdaterTests, self).__init__(methodName)
39
- self.__verbose = True
40
-
41
- def setUp(self):
42
- #
43
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
44
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
45
- #
46
- configName = "site_info_configuration"
47
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
48
- #
49
- self.__workPath = os.path.join(TOPDIR, "CACHE", "exdb")
50
- self.__testEntryCacheKwargs = {"fmt": "json", "indent": 3}
51
- self.__objectLimitTest = 5
52
- #
53
- self.__startTime = time.time()
54
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
55
-
56
- def tearDown(self):
57
- unitS = "MB" if platform.system() == "Darwin" else "GB"
58
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
59
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
60
- endTime = time.time()
61
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
62
-
63
- def testUpdateSelectedEntityContent(self):
64
- """Test case - update of selected entity reference sequence content"""
65
- try:
66
- databaseName = "pdbx_core"
67
- collectionName = "pdbx_core_polymer_entity"
68
- obEx = ObjectExtractor(
69
- self.__cfgOb,
70
- databaseName=databaseName,
71
- collectionName=collectionName,
72
- cacheFilePath=os.path.join(self.__workPath, "entity-selected-content-test-cache.json"),
73
- useCache=False,
74
- keyAttribute="entity",
75
- uniqueAttributes=["rcsb_id"],
76
- cacheKwargs=self.__testEntryCacheKwargs,
77
- objectLimit=self.__objectLimitTest,
78
- # objectLimit=None,
79
- selectionQuery={"entity_poly.rcsb_entity_polymer_type": "Protein"},
80
- selectionList=["rcsb_id", "rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers"],
81
- )
82
- eCount = obEx.getCount()
83
- logger.info("Entity count is %d", eCount)
84
- objD = obEx.getObjects()
85
- updateDL = []
86
- for entityKey, eD in objD.items():
87
- try:
88
- selectD = {"rcsb_id": entityKey}
89
- tL = (
90
- eD["rcsb_polymer_entity_container_identifiers"]["reference_sequence_identifiers"]
91
- if "reference_sequence_identifiers" in eD["rcsb_polymer_entity_container_identifiers"]
92
- else []
93
- )
94
- tL.append({"database_accession": "1111111", "database_name": "PDB", "provenance_source": "RCSB"})
95
- #
96
- updateD = {"rcsb_polymer_entity_container_identifiers.reference_sequence_identifiers": tL}
97
- updateDL.append({"selectD": selectD, "updateD": updateD})
98
- except Exception as e:
99
- logger.exception("Failing with %s", str(e))
100
- for ii, uD in enumerate(updateDL):
101
- logger.debug(" >>>> (%d) selectD %r updateD %r", ii, uD["selectD"], uD["updateD"])
102
- #
103
- obUpd = ObjectUpdater(self.__cfgOb)
104
- numUpd = obUpd.update(databaseName, collectionName, updateDL)
105
- self.assertGreaterEqual(numUpd, len(updateDL))
106
- logger.info("Update count is %d", numUpd)
107
- except Exception as e:
108
- logger.exception("Failing with %s", str(e))
109
- self.fail()
110
-
111
-
112
- def objectUpdaterSuite():
113
- suiteSelect = unittest.TestSuite()
114
- suiteSelect.addTest(ObjectUpdaterTests("testUpdateSelectedEntityContent"))
115
- return suiteSelect
116
-
117
-
118
- if __name__ == "__main__":
119
- mySuite = objectUpdaterSuite()
120
- unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -1,93 +0,0 @@
1
- ##
2
- # File: PolymerEntityExtractorTests.py
3
- # Author: J. Westbrook
4
- # Date: 5-Dec-2020
5
- #
6
- # Updates:
7
- #
8
- ##
9
- """
10
- Tests for extraction of polymer entity sequence details from the ExDB core collections.
11
- """
12
- __docformat__ = "google en"
13
- __author__ = "John Westbrook"
14
- __email__ = "jwest@rcsb.rutgers.edu"
15
- __license__ = "Apache 2.0"
16
-
17
- import logging
18
- import os
19
- import platform
20
- import resource
21
- import time
22
- import unittest
23
-
24
- from rcsb.exdb.seq.PolymerEntityExtractor import PolymerEntityExtractor
25
- from rcsb.utils.config.ConfigUtil import ConfigUtil
26
-
27
-
28
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
29
- logger = logging.getLogger()
30
-
31
- HERE = os.path.abspath(os.path.dirname(__file__))
32
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
33
-
34
-
35
- class PolymerEntityExtractorTests(unittest.TestCase):
36
- def __init__(self, methodName="runTest"):
37
- super(PolymerEntityExtractorTests, self).__init__(methodName)
38
- self.__verbose = True
39
-
40
- def setUp(self):
41
- #
42
- self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
43
- configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
44
- configName = "site_info_configuration"
45
- self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
46
- #
47
- self.__fastaPath = os.path.join(HERE, "test-output", "CACHE", "pdb-protein-entity.fa")
48
- self.__taxonPath = os.path.join(HERE, "test-output", "CACHE", "pdb-protein-entity-taxon.tdd")
49
- self.__detailsPath = os.path.join(HERE, "test-output", "CACHE", "pdb-protein-entity-details.json")
50
- #
51
- self.__startTime = time.time()
52
- logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
53
-
54
- def tearDown(self):
55
- unitS = "MB" if platform.system() == "Darwin" else "GB"
56
- rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
57
- logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
58
- endTime = time.time()
59
- logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
60
-
61
- def testGetProteinEntityDetails(self):
62
- """Test case - get protein entity sequences and essential details"""
63
- try:
64
- pEx = PolymerEntityExtractor(self.__cfgOb)
65
- pD, _ = pEx.getProteinSequenceDetails()
66
- #
67
- self.assertGreaterEqual(len(pD), 70)
68
- logger.info("Polymer entity count %d", len(pD))
69
- except Exception as e:
70
- logger.exception("Failing with %s", str(e))
71
- self.fail()
72
-
73
- def testExportProteinEntityFasta(self):
74
- """Test case - export protein entity sequence Fasta"""
75
- try:
76
- pEx = PolymerEntityExtractor(self.__cfgOb)
77
- ok = pEx.exportProteinEntityFasta(self.__fastaPath, self.__taxonPath, self.__detailsPath)
78
- self.assertTrue(ok)
79
- except Exception as e:
80
- logger.exception("Failing with %s", str(e))
81
- self.fail()
82
-
83
-
84
- def extractorSuite():
85
- suiteSelect = unittest.TestSuite()
86
- suiteSelect.addTest(PolymerEntityExtractorTests("testGetProteinEntityDetails"))
87
- suiteSelect.addTest(PolymerEntityExtractorTests("testExportProteinEntityFasta"))
88
- return suiteSelect
89
-
90
-
91
- if __name__ == "__main__":
92
- mySuite = extractorSuite()
93
- unittest.TextTestRunner(verbosity=2).run(mySuite)