rcsb.exdb 1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. rcsb/__init__.py +1 -0
  2. rcsb/exdb/__init__.py +1 -0
  3. rcsb/exdb/branch/BranchedEntityExtractor.py +82 -0
  4. rcsb/exdb/branch/GlycanProvider.py +116 -0
  5. rcsb/exdb/branch/GlycanUtils.py +114 -0
  6. rcsb/exdb/branch/__init__.py +0 -0
  7. rcsb/exdb/chemref/ChemRefEtlWorker.py +118 -0
  8. rcsb/exdb/chemref/ChemRefExtractor.py +70 -0
  9. rcsb/exdb/chemref/ChemRefMappingProvider.py +139 -0
  10. rcsb/exdb/chemref/PubChemDataCacheProvider.py +372 -0
  11. rcsb/exdb/chemref/PubChemEtlWrapper.py +280 -0
  12. rcsb/exdb/chemref/PubChemIndexCacheProvider.py +638 -0
  13. rcsb/exdb/chemref/__init__.py +0 -0
  14. rcsb/exdb/citation/CitationAdapter.py +91 -0
  15. rcsb/exdb/citation/CitationExtractor.py +190 -0
  16. rcsb/exdb/citation/CitationUtils.py +51 -0
  17. rcsb/exdb/citation/__init__.py +0 -0
  18. rcsb/exdb/cli/__init__.py +0 -0
  19. rcsb/exdb/entry/EntryInfoProvider.py +148 -0
  20. rcsb/exdb/entry/__init__.py +0 -0
  21. rcsb/exdb/examples-seq/EntityInstanceExtractor.py +557 -0
  22. rcsb/exdb/examples-seq/EntityPolymerExtractor.py +544 -0
  23. rcsb/exdb/examples-seq/EntityPolymerExtractorFullTests.py +176 -0
  24. rcsb/exdb/examples-seq/ReferenceSequenceAssignmentUpdater.py +449 -0
  25. rcsb/exdb/examples-seq/ReferenceSequenceUtils.py +123 -0
  26. rcsb/exdb/examples-seq/ReferenceSequenceUtilsTests.py +109 -0
  27. rcsb/exdb/examples-seq/exampleObjectExtractor.py +109 -0
  28. rcsb/exdb/examples-seq/fixtureEntityPolymerExtractor.py +85 -0
  29. rcsb/exdb/examples-seq/testEntityInstanceExtractor.py +170 -0
  30. rcsb/exdb/examples-seq/testEntityPolymerExtractor.py +171 -0
  31. rcsb/exdb/examples-seq/testReferenceSequenceAssignmentUpdater.py +79 -0
  32. rcsb/exdb/examples-seq/testReferenceSequenceUtils.py +108 -0
  33. rcsb/exdb/seq/AnnotationExtractor.py +76 -0
  34. rcsb/exdb/seq/LigandNeighborMappingExtractor.py +84 -0
  35. rcsb/exdb/seq/LigandNeighborMappingProvider.py +106 -0
  36. rcsb/exdb/seq/PolymerEntityExtractor.py +328 -0
  37. rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +598 -0
  38. rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +228 -0
  39. rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +534 -0
  40. rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +388 -0
  41. rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +397 -0
  42. rcsb/exdb/seq/TaxonomyExtractor.py +69 -0
  43. rcsb/exdb/seq/UniProtCoreEtlWorker.py +177 -0
  44. rcsb/exdb/seq/UniProtExtractor.py +80 -0
  45. rcsb/exdb/seq/__init__.py +0 -0
  46. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +19 -0
  47. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +12 -0
  48. rcsb/exdb/tests/__init__.py +0 -0
  49. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +104 -0
  50. rcsb/exdb/tests/fixturePdbxLoader.py +298 -0
  51. rcsb/exdb/tests/test-data/components-abbrev.cif +2739 -0
  52. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +9171 -0
  53. rcsb/exdb/tests/testAnnotationExtractor.py +79 -0
  54. rcsb/exdb/tests/testBranchedEntityExtractor.py +81 -0
  55. rcsb/exdb/tests/testChemRefLoader.py +106 -0
  56. rcsb/exdb/tests/testChemRefMappingProvider.py +95 -0
  57. rcsb/exdb/tests/testCitationAdapter.py +97 -0
  58. rcsb/exdb/tests/testCitationExtractor.py +93 -0
  59. rcsb/exdb/tests/testCitationUtils.py +92 -0
  60. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +70 -0
  61. rcsb/exdb/tests/testEntryInfoProvider.py +97 -0
  62. rcsb/exdb/tests/testGlycanEtlWorkflow.py +70 -0
  63. rcsb/exdb/tests/testGlycanProvider.py +98 -0
  64. rcsb/exdb/tests/testGlycanUtils.py +64 -0
  65. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +90 -0
  66. rcsb/exdb/tests/testObjectExtractor.py +342 -0
  67. rcsb/exdb/tests/testObjectTransformer.py +83 -0
  68. rcsb/exdb/tests/testObjectUpdater.py +120 -0
  69. rcsb/exdb/tests/testPolymerEntityExtractor.py +93 -0
  70. rcsb/exdb/tests/testPubChemDataCacheProvider.py +124 -0
  71. rcsb/exdb/tests/testPubChemEtlWorkflow.py +134 -0
  72. rcsb/exdb/tests/testPubChemEtlWrapper.py +155 -0
  73. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +123 -0
  74. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +106 -0
  75. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +121 -0
  76. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +122 -0
  77. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +117 -0
  78. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +94 -0
  79. rcsb/exdb/tests/testTaxonomyExtractor.py +75 -0
  80. rcsb/exdb/tests/testTreeNodeListWorker.py +111 -0
  81. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +99 -0
  82. rcsb/exdb/tests/testUniProtExtractor.py +77 -0
  83. rcsb/exdb/tree/TreeNodeListWorker.py +228 -0
  84. rcsb/exdb/tree/__init__.py +0 -0
  85. rcsb/exdb/utils/ObjectAdapterBase.py +22 -0
  86. rcsb/exdb/utils/ObjectExtractor.py +286 -0
  87. rcsb/exdb/utils/ObjectTransformer.py +124 -0
  88. rcsb/exdb/utils/ObjectUpdater.py +121 -0
  89. rcsb/exdb/utils/ObjectValidator.py +160 -0
  90. rcsb/exdb/utils/__init__.py +0 -0
  91. rcsb/exdb/wf/EntryInfoEtlWorkflow.py +71 -0
  92. rcsb/exdb/wf/GlycanEtlWorkflow.py +76 -0
  93. rcsb/exdb/wf/PubChemEtlWorkflow.py +240 -0
  94. rcsb/exdb/wf/__init__.py +0 -0
  95. rcsb_exdb-1.31.dist-info/METADATA +103 -0
  96. rcsb_exdb-1.31.dist-info/RECORD +98 -0
  97. rcsb_exdb-1.31.dist-info/WHEEL +4 -0
  98. rcsb_exdb-1.31.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,123 @@
1
+ ##
2
+ # File: ReferenceSequenceUtils.py
3
+ # Date: 28-Mar-2019 jdw
4
+ #
5
+ # Selected utilities to integrate reference sequence information with PDB polymer entity data.
6
+ #
7
+ # Updates:
8
+ # 21-Apr-2019 jdw refactor
9
+ #
10
+ ##
11
+ __docformat__ = "google en"
12
+ __author__ = "John Westbrook"
13
+ __email__ = "jwest@rcsb.rutgers.edu"
14
+ __license__ = "Apache 2.0"
15
+
16
+ import logging
17
+ import os
18
+
19
+ from rcsb.exdb.seq.EntityPolymerExtractor import EntityPolymerExtractor
20
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
21
+ from rcsb.utils.seq.UniProtUtils import UniProtUtils
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class ReferenceSequenceUtils(object):
27
+ """Selected utilities to integrate reference sequence information with PDB polymer entity data."""
28
+
29
+ def __init__(self, cfgOb, refDbName, **kwargs):
30
+ self.__cfgOb = cfgOb
31
+ self.__refDbName = refDbName
32
+ self.__mU = MarshalUtil()
33
+ #
34
+ self.__refIdList = self.__getReferenceAssignments(refDbName, **kwargs)
35
+ self.__refD, self.__matchD = self.__rebuildCache(refDbName, self.__refIdList, **kwargs)
36
+
37
+ def __getReferenceAssignments(self, refDbName, **kwargs):
38
+ """Get all accessions assigned to input reference sequence database"""
39
+ rL = []
40
+ exdbDirPath = kwargs.get("exdbDirPath", None)
41
+ cacheKwargs = kwargs.get("cacheKwargs", None)
42
+ useCache = kwargs.get("useCache", True)
43
+ entryLimit = kwargs.get("entryLimit", None)
44
+
45
+ try:
46
+ epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=exdbDirPath, useCache=useCache, cacheKwargs=cacheKwargs, entryLimit=entryLimit)
47
+ eCount = epe.getEntryCount()
48
+ rL = epe.getRefSeqAccessions(refDbName)
49
+ logger.info("Reading polymer entity cache with repository entry count %d ref accession length %d ", eCount, len(rL))
50
+ #
51
+ except Exception as e:
52
+ logger.exception("Failing with %s", str(e))
53
+
54
+ return rL
55
+
56
+ def __rebuildCache(self, refDbName, idList, **kwargs):
57
+ """ """
58
+ dD = {}
59
+ dirPath = kwargs.get("exdbDirPath", None)
60
+ cacheKwargs = kwargs.get("cacheKwargs", None)
61
+ useCache = kwargs.get("useCache", True)
62
+ fetchLimit = kwargs.get("fetchLimit", None)
63
+ saveText = kwargs.get("saveText", False)
64
+
65
+ ext = "pic" if cacheKwargs["fmt"] == "pickle" else "json"
66
+ fn = "ref-sequence-data-cache" + "." + ext
67
+ cacheFilePath = os.path.join(dirPath, fn)
68
+ self.__mU.mkdir(dirPath)
69
+ if not useCache:
70
+ for fp in [cacheFilePath]:
71
+ try:
72
+ os.remove(fp)
73
+ except Exception:
74
+ pass
75
+ #
76
+ if useCache and cacheFilePath and self.__mU.exists(cacheFilePath):
77
+ dD = self.__mU.doImport(cacheFilePath, **cacheKwargs)
78
+ else:
79
+ dD = self.__fetchReferenceEntries(refDbName, idList, saveText=saveText, fetchLimit=fetchLimit)
80
+ if cacheFilePath and cacheKwargs:
81
+ self.__mU.mkdir(dirPath)
82
+ ok = self.__mU.doExport(cacheFilePath, dD, **cacheKwargs)
83
+ logger.info("Cache save status %r", ok)
84
+
85
+ return dD["refDbCache"], dD["matchInfo"]
86
+
87
+ def __fetchReferenceEntries(self, refDbName, idList, saveText=False, fetchLimit=None):
88
+ """Fetch database entries from the input reference sequence database name."""
89
+ dD = {"refDbName": refDbName, "refDbCache": {}, "matchInfo": {}}
90
+
91
+ try:
92
+ idList = idList[:fetchLimit] if fetchLimit else idList
93
+ logger.info("Starting fetch for %d %s entries", len(idList), refDbName)
94
+ if refDbName == "UNP":
95
+ fobj = UniProtUtils(saveText=saveText)
96
+ refD, matchD = fobj.fetchList(idList)
97
+ dD = {"refDbName": refDbName, "refDbCache": refD, "matchInfo": matchD}
98
+
99
+ except Exception as e:
100
+ logger.exception("Failing with %s", str(e))
101
+
102
+ return dD
103
+
104
+ def __dumpEntries(self, refD):
105
+ for (eId, eDict) in refD.items():
106
+ logger.info("------ Entry id %s", eId)
107
+ for k, v in eDict.items():
108
+ logger.info("%-15s = %r", k, v)
109
+
110
+ def getReferenceAccessionAlignSummary(self):
111
+ """Summarize the alignment of PDB accession assignments with the current reference sequence database."""
112
+ numPrimary = 0
113
+ numSecondary = 0
114
+ numNone = 0
115
+ for _, mD in self.__matchD.items():
116
+ if mD["matched"] == "primary":
117
+ numPrimary += 1
118
+ elif mD["matched"] == "secondary":
119
+ numSecondary += 1
120
+ else:
121
+ numNone += 1
122
+ logger.debug("Matched primary: %d secondary: %d none %d", numPrimary, numSecondary, numNone)
123
+ return numPrimary, numSecondary, numNone
@@ -0,0 +1,109 @@
1
+ ##
2
+ # File: ReferenceSequenceUtilsTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 22-Apr-2019
5
+ #
6
+ # Updates:
7
+ #
8
+ ##
9
+ """
10
+ Tests updating reference sequence cache
11
+
12
+ """
13
+
14
+ __docformat__ = "google en"
15
+ __author__ = "John Westbrook"
16
+ __email__ = "jwest@rcsb.rutgers.edu"
17
+ __license__ = "Apache 2.0"
18
+
19
+
20
+ import logging
21
+ import os
22
+ import time
23
+ import unittest
24
+
25
+
26
+ from rcsb.exdb.seq.ReferenceSequenceUtils import ReferenceSequenceUtils
27
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
28
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
29
+
30
+
31
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
32
+ logger = logging.getLogger()
33
+
34
+ HERE = os.path.abspath(os.path.dirname(__file__))
35
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
36
+
37
+
38
+ class ReferenceSequenceUtilsTests(unittest.TestCase):
39
+ def __init__(self, methodName="runTest"):
40
+ super(ReferenceSequenceUtilsTests, self).__init__(methodName)
41
+ self.__verbose = True
42
+
43
+ def setUp(self):
44
+ #
45
+ #
46
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
47
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
48
+ #
49
+ # Caution: this is very site specific setting !
50
+ configName = "site_info_remote"
51
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
52
+ if configName != "site_info_configuration":
53
+ self.__cfgOb.replaceSectionName("site_info_configuration", configName)
54
+ #
55
+ self.__workPath = os.path.join(HERE, "test-cache-preserve")
56
+ #
57
+ self.__entityPolymerCachePath = os.path.join(self.__workPath, "entity-polymer-data-cache.pic")
58
+ self.__entityPolymerCacheKwargs = {"fmt": "pickle"}
59
+ self.__useEntityPolymerCache = True
60
+ #
61
+ self.__refDbCachePath = os.path.join(self.__workPath, "unp-data-test-cache.json")
62
+ self.__refDbCacheKwargs = {"fmt": "json", "indent": 3}
63
+ #
64
+ self.__refDbUseCache = True
65
+ self.__fetchLimit = 500
66
+ #
67
+ self.__mU = MarshalUtil()
68
+ #
69
+ self.__startTime = time.time()
70
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
71
+
72
+ def tearDown(self):
73
+ endTime = time.time()
74
+ logger.info("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
75
+
76
+ def testUpdateUniProtCache(self):
77
+ """Test case - extract entity polymer info and update reference sequence cache"""
78
+ try:
79
+ refDbName = "UNP"
80
+ rsu = ReferenceSequenceUtils(
81
+ self.__cfgOb,
82
+ refDbName,
83
+ referenceCachePath=self.__refDbCachePath,
84
+ referenceCacheKwargs=self.__refDbCacheKwargs,
85
+ useReferenceCache=self.__refDbUseCache,
86
+ entityPolymerCachePath=self.__entityPolymerCachePath,
87
+ entityPolymerCacheKwargs=self.__entityPolymerCacheKwargs,
88
+ useEntityPolymerCache=self.__useEntityPolymerCache,
89
+ fetchLimit=self.__fetchLimit,
90
+ )
91
+ numPrimary, numSecondary, numNone = rsu.getReferenceAccessionAlignSummary()
92
+ self.assertGreaterEqual(numPrimary, 70)
93
+ logger.info("For %r matched primary: %d secondary: %d none %d", refDbName, numPrimary, numSecondary, numNone)
94
+ #
95
+ except Exception as e:
96
+ logger.exception("Failing with %s", str(e))
97
+ self.fail()
98
+
99
+
100
+ def unpFetchSuite():
101
+ suiteSelect = unittest.TestSuite()
102
+ suiteSelect.addTest(ReferenceSequenceUtilsTests("testUpdateUniProtCache"))
103
+ return suiteSelect
104
+
105
+
106
+ if __name__ == "__main__":
107
+
108
+ mySuite = unpFetchSuite()
109
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,109 @@
1
+ ##
2
+ # File: ObjectExtractorTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 25-Apr-2019
5
+ #
6
+ # Updates:
7
+ #
8
+ ##
9
+ """
10
+ Tests for extractor selected values from collections (limited tests from mock-data repos)
11
+ """
12
+
13
+ __docformat__ = "google en"
14
+ __author__ = "John Westbrook"
15
+ __email__ = "jwest@rcsb.rutgers.edu"
16
+ __license__ = "Apache 2.0"
17
+
18
+
19
+ import logging
20
+ import os
21
+
22
+ import time
23
+ import unittest
24
+
25
+ from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
26
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
27
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
28
+
29
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
30
+ logger = logging.getLogger()
31
+
32
+ HERE = os.path.abspath(os.path.dirname(__file__))
33
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
34
+
35
+
36
+ class ObjectExtractorTests(unittest.TestCase):
37
+ def __init__(self, methodName="runTest"):
38
+ super(ObjectExtractorTests, self).__init__(methodName)
39
+ self.__verbose = False
40
+
41
+ def setUp(self):
42
+ #
43
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
44
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
45
+ #
46
+ configName = "site_info_remote_configuration"
47
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
48
+ #
49
+ self.__workPath = "."
50
+ self.__mU = MarshalUtil(workPath=self.__workPath)
51
+ self.__entityTaxonPath = os.path.join(self.__workPath, "entity_taxon.tdd")
52
+ self.__startTime = time.time()
53
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
54
+
55
+ def tearDown(self):
56
+ endTime = time.time()
57
+ logger.info("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
58
+
59
+ def testExtractEntityTaxonomyContent(self):
60
+ """Test case - extract unique entity source and host taxonomies"""
61
+ tL = []
62
+ try:
63
+ obEx = ObjectExtractor(
64
+ self.__cfgOb,
65
+ databaseName="pdbx_core",
66
+ collectionName="pdbx_core_polymer_entity",
67
+ useCache=False,
68
+ keyAttribute="entity",
69
+ uniqueAttributes=["rcsb_id"],
70
+ selectionQuery=None,
71
+ selectionList=["rcsb_id", "rcsb_entity_source_organism.ncbi_taxonomy_id", "rcsb_entity_host_organism.ncbi_taxonomy_id"],
72
+ )
73
+ eCount = obEx.getCount()
74
+ logger.info("Polymer entity count is %d", eCount)
75
+ objD = obEx.getObjects()
76
+ sD = {}
77
+ hD = {}
78
+ for rId, eD in objD.items():
79
+ try:
80
+ for tD in eD["rcsb_entity_source_organism"]:
81
+ sD.setdefault(rId, []).append(str(tD["ncbi_taxonomy_id"]))
82
+
83
+ except Exception:
84
+ pass
85
+ try:
86
+ for tD in eD["rcsb_entity_host_organism"]:
87
+ hD.setdefault(rId, []).append(str(tD["ncbi_taxonomy_id"]))
88
+ except Exception:
89
+ pass
90
+ for rId, taxIdL in sD.items():
91
+ tS = "|".join(sorted(set(taxIdL)))
92
+ if tS:
93
+ lS = "%s\t%s" % (rId, "|".join(sorted(set(taxIdL))))
94
+ tL.append(lS)
95
+ self.__mU.doExport(self.__entityTaxonPath, tL, fmt="list")
96
+ except Exception as e:
97
+ logger.exception("Failing with %s", str(e))
98
+ self.fail()
99
+
100
+
101
+ def objectExtractorSuite():
102
+ suiteSelect = unittest.TestSuite()
103
+ suiteSelect.addTest(ObjectExtractorTests("testExtractEntityTaxonomyContent"))
104
+ return suiteSelect
105
+
106
+
107
+ if __name__ == "__main__":
108
+ mySuite = objectExtractorSuite()
109
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,85 @@
1
+ ##
2
+ # File: EntityPolymerExtractorFixture.py
3
+ # Author: J. Westbrook
4
+ # Date: 25-Mar-2019
5
+ #
6
+ # Updates:
7
+ # 21-Apr-2019 jdw Separate tests against the mock-data repo in this module
8
+ # 4-Sep-201 jdw make this a fixture
9
+ #
10
+ ##
11
+ """
12
+ Fixture extractor to preserve entity polymer data.
13
+
14
+ """
15
+
16
+ __docformat__ = "google en"
17
+ __author__ = "John Westbrook"
18
+ __email__ = "jwest@rcsb.rutgers.edu"
19
+ __license__ = "Apache 2.0"
20
+
21
+
22
+ import logging
23
+ import os
24
+ import time
25
+ import unittest
26
+
27
+ from rcsb.exdb.seq.EntityPolymerExtractor import EntityPolymerExtractor
28
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
29
+
30
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
31
+ logger = logging.getLogger()
32
+
33
+ HERE = os.path.abspath(os.path.dirname(__file__))
34
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
35
+
36
+
37
+ class EntityPolymerExtractorFixture(unittest.TestCase):
38
+ def __init__(self, methodName="runTest"):
39
+ super(EntityPolymerExtractorFixture, self).__init__(methodName)
40
+ self.__verbose = True
41
+
42
+ def setUp(self):
43
+ #
44
+ #
45
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
46
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
47
+ #
48
+ configName = "site_info_configuration"
49
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
50
+ #
51
+ self.__cachePath = os.path.join(TOPDIR, "CACHE")
52
+ #
53
+ self.__cacheKwargs = {"fmt": "pickle"}
54
+ self.__exdbCacheDirPath = os.path.join(self.__cachePath, self.__cfgOb.get("EXDB_CACHE_DIR", sectionName=configName))
55
+ #
56
+ self.__entryLimitTest = None
57
+ #
58
+ self.__startTime = time.time()
59
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
60
+
61
+ def tearDown(self):
62
+ endTime = time.time()
63
+ logger.info("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
64
+
65
+ def testExtractEntityPolymers(self):
66
+ """Fixture - extract and save entity polymer info"""
67
+ try:
68
+ epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs, entryLimit=self.__entryLimitTest)
69
+ eCount = epe.getEntryCount()
70
+ self.assertGreaterEqual(eCount, 10)
71
+ #
72
+ except Exception as e:
73
+ logger.exception("Failing with %s", str(e))
74
+ self.fail()
75
+
76
+
77
+ def entityPolymerExtractSuite():
78
+ suiteSelect = unittest.TestSuite()
79
+ suiteSelect.addTest(EntityPolymerExtractorFixture("testExtractEntityPolymers"))
80
+ return suiteSelect
81
+
82
+
83
+ if __name__ == "__main__":
84
+ mySuite = entityPolymerExtractSuite()
85
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,170 @@
1
+ ##
2
+ # File: EntityInstanceExtractorTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 19-Dec-2019
5
+ #
6
+ # Updates:
7
+ #
8
+ ##
9
+ """
10
+ Tests for preliminary version of the extractor selected values from entity instance collections.
11
+
12
+ PRELIMINARY VERSION
13
+ """
14
+
15
+ __docformat__ = "google en"
16
+ __author__ = "John Westbrook"
17
+ __email__ = "jwest@rcsb.rutgers.edu"
18
+ __license__ = "Apache 2.0"
19
+
20
+
21
+ import logging
22
+ import os
23
+ import time
24
+ import unittest
25
+
26
+ from rcsb.exdb.seq.EntityInstanceExtractor import EntityInstanceExtractor
27
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
28
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
29
+
30
+
31
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
32
+ logger = logging.getLogger()
33
+
34
+ HERE = os.path.abspath(os.path.dirname(__file__))
35
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
36
+
37
+
38
+ class EntityInstanceExtractorTests(unittest.TestCase):
39
+ def __init__(self, methodName="runTest"):
40
+ super(EntityInstanceExtractorTests, self).__init__(methodName)
41
+ self.__verbose = True
42
+
43
+ def setUp(self):
44
+ #
45
+ #
46
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
47
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
48
+ configName = "site_info_configuration"
49
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
50
+ # self.__cfgOb.dump()
51
+ self.__resourceName = "MONGO_DB"
52
+ self.__readBackCheck = True
53
+ self.__numProc = 2
54
+ self.__chunkSize = 10
55
+ self.__documentLimit = None
56
+ self.__filterType = "assign-dates"
57
+ #
58
+ #
59
+ self.__workPath = os.path.join(HERE, "test-output")
60
+ self.__entitySavePath = os.path.join(HERE, "test-output", "entity-data-dictionary.json")
61
+ self.__entrySavePath = os.path.join(HERE, "test-output", "entry-data-dictionary.json")
62
+ self.__instanceSavePath = os.path.join(HERE, "test-output", "instance-data-dictionary.json")
63
+ self.__saveKwargs = {"fmt": "json", "indent": 3}
64
+ self.__mU = MarshalUtil()
65
+ self.__entryLimit = 3
66
+ #
67
+ self.__startTime = time.time()
68
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
69
+
70
+ def tearDown(self):
71
+ endTime = time.time()
72
+ logger.debug("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
73
+
74
+ def testExtractEntryInfo(self):
75
+ """Test case - extract entry instance data -"""
76
+ try:
77
+ eiExt = EntityInstanceExtractor(self.__cfgOb)
78
+ entryD = eiExt.getEntryInfo()
79
+ self.assertTrue(len(entryD) > 15)
80
+ ok = self.__mU.doExport(self.__entrySavePath, entryD, fmt="json")
81
+ self.assertTrue(ok)
82
+ #
83
+ except Exception as e:
84
+ logger.exception("Failing with %s", str(e))
85
+ self.fail()
86
+
87
+ def testExtractEntityPolymers(self):
88
+ """Test case - extract entity polymer instance data -"""
89
+ try:
90
+ eiExt = EntityInstanceExtractor(self.__cfgOb)
91
+ entryD = eiExt.getEntryInfo()
92
+ self.assertTrue(len(entryD) > 15)
93
+ ok = self.__mU.doExport(self.__entrySavePath, entryD, fmt="json")
94
+ self.assertTrue(ok)
95
+ logger.info("EntryD length %d", len(entryD))
96
+ entryD = self.__mU.doImport(self.__entrySavePath, fmt="json")
97
+ #
98
+ entryD = eiExt.getPolymerEntities(entryD, savePath=self.__entitySavePath, entryLimit=None, saveKwargs=self.__saveKwargs)
99
+ self.assertTrue(len(entryD) > 15)
100
+ logger.info("EntryD + polymer entities length %d", len(entryD))
101
+ #
102
+ #
103
+ except Exception as e:
104
+ logger.exception("Failing with %s", str(e))
105
+ self.fail()
106
+
107
+ def testExtractEntityInstances(self):
108
+ """Test case - extract entity instance data -"""
109
+ try:
110
+ eiExt = EntityInstanceExtractor(self.__cfgOb)
111
+ entryD = eiExt.getEntryInfo()
112
+ self.assertTrue(len(entryD) > 15)
113
+ #
114
+ entryD = eiExt.getPolymerEntities(entryD, savePath=self.__entitySavePath, entryLimit=None, saveKwargs=self.__saveKwargs)
115
+ self.assertTrue(len(entryD) > 15)
116
+ #
117
+ entryD = eiExt.getEntityInstances(entryD, savePath=self.__instanceSavePath, entryLimit=self.__entryLimit, saveKwargs=self.__saveKwargs)
118
+ self.assertTrue(len(entryD) > 15)
119
+ #
120
+ except Exception as e:
121
+ logger.exception("Failing with %s", str(e))
122
+ self.fail()
123
+
124
+ def testAnalEntityInstances(self):
125
+ """Test case - analysis of entity instance data -"""
126
+ try:
127
+ eiExt = EntityInstanceExtractor(self.__cfgOb)
128
+ entryD = eiExt.getEntryInfo()
129
+ self.assertTrue(len(entryD) > 15)
130
+ #
131
+ entryD = eiExt.getPolymerEntities(entryD, savePath=self.__entitySavePath, entryLimit=None, saveKwargs=self.__saveKwargs)
132
+ self.assertTrue(len(entryD) > 15)
133
+ #
134
+ entryD = eiExt.getEntityInstances(entryD, savePath=self.__instanceSavePath, entryLimit=self.__entryLimit, saveKwargs=self.__saveKwargs)
135
+ self.assertTrue(len(entryD) > 15)
136
+
137
+ logger.info("EntryD + polymer entities instances length %d", len(entryD))
138
+ #
139
+ # entryD = self.__mU.doImport(self.__instanceSavePath, fmt="json")
140
+ # logger.info("entryD %r", entryD)
141
+ for entryId in entryD:
142
+ for entityId, eD in entryD[entryId]["selected_polymer_entities"].items():
143
+ analD = eD["anal_instances"] if "anal_instances" in eD else {}
144
+ for asymId, aD in analD.items():
145
+ logger.debug("entryId %s entityId %s asymId %s analD: %r", entryId, entityId, asymId, aD)
146
+ except Exception as e:
147
+ logger.exception("Failing with %s", str(e))
148
+ self.fail()
149
+
150
+
151
+ def entityInstanceExtractSuite():
152
+ suiteSelect = unittest.TestSuite()
153
+ suiteSelect.addTest(EntityInstanceExtractorTests("testExtractEntityPolymers"))
154
+ suiteSelect.addTest(EntityInstanceExtractorTests("testExtractEntityInstances"))
155
+ suiteSelect.addTest(EntityInstanceExtractorTests("testAnalEntityInstances"))
156
+ return suiteSelect
157
+
158
+
159
+ def entryExtractSuite():
160
+ suiteSelect = unittest.TestSuite()
161
+ suiteSelect.addTest(EntityInstanceExtractorTests("testExtractEntryInfo"))
162
+ return suiteSelect
163
+
164
+
165
+ if __name__ == "__main__":
166
+ mySuite = entityInstanceExtractSuite()
167
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
168
+
169
+ mySuite = entryExtractSuite()
170
+ unittest.TextTestRunner(verbosity=2).run(mySuite)