rcsb.exdb 1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. rcsb/__init__.py +1 -0
  2. rcsb/exdb/__init__.py +1 -0
  3. rcsb/exdb/branch/BranchedEntityExtractor.py +82 -0
  4. rcsb/exdb/branch/GlycanProvider.py +116 -0
  5. rcsb/exdb/branch/GlycanUtils.py +114 -0
  6. rcsb/exdb/branch/__init__.py +0 -0
  7. rcsb/exdb/chemref/ChemRefEtlWorker.py +118 -0
  8. rcsb/exdb/chemref/ChemRefExtractor.py +70 -0
  9. rcsb/exdb/chemref/ChemRefMappingProvider.py +139 -0
  10. rcsb/exdb/chemref/PubChemDataCacheProvider.py +372 -0
  11. rcsb/exdb/chemref/PubChemEtlWrapper.py +280 -0
  12. rcsb/exdb/chemref/PubChemIndexCacheProvider.py +638 -0
  13. rcsb/exdb/chemref/__init__.py +0 -0
  14. rcsb/exdb/citation/CitationAdapter.py +91 -0
  15. rcsb/exdb/citation/CitationExtractor.py +190 -0
  16. rcsb/exdb/citation/CitationUtils.py +51 -0
  17. rcsb/exdb/citation/__init__.py +0 -0
  18. rcsb/exdb/cli/__init__.py +0 -0
  19. rcsb/exdb/entry/EntryInfoProvider.py +148 -0
  20. rcsb/exdb/entry/__init__.py +0 -0
  21. rcsb/exdb/examples-seq/EntityInstanceExtractor.py +557 -0
  22. rcsb/exdb/examples-seq/EntityPolymerExtractor.py +544 -0
  23. rcsb/exdb/examples-seq/EntityPolymerExtractorFullTests.py +176 -0
  24. rcsb/exdb/examples-seq/ReferenceSequenceAssignmentUpdater.py +449 -0
  25. rcsb/exdb/examples-seq/ReferenceSequenceUtils.py +123 -0
  26. rcsb/exdb/examples-seq/ReferenceSequenceUtilsTests.py +109 -0
  27. rcsb/exdb/examples-seq/exampleObjectExtractor.py +109 -0
  28. rcsb/exdb/examples-seq/fixtureEntityPolymerExtractor.py +85 -0
  29. rcsb/exdb/examples-seq/testEntityInstanceExtractor.py +170 -0
  30. rcsb/exdb/examples-seq/testEntityPolymerExtractor.py +171 -0
  31. rcsb/exdb/examples-seq/testReferenceSequenceAssignmentUpdater.py +79 -0
  32. rcsb/exdb/examples-seq/testReferenceSequenceUtils.py +108 -0
  33. rcsb/exdb/seq/AnnotationExtractor.py +76 -0
  34. rcsb/exdb/seq/LigandNeighborMappingExtractor.py +84 -0
  35. rcsb/exdb/seq/LigandNeighborMappingProvider.py +106 -0
  36. rcsb/exdb/seq/PolymerEntityExtractor.py +328 -0
  37. rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +598 -0
  38. rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +228 -0
  39. rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +534 -0
  40. rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +388 -0
  41. rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +397 -0
  42. rcsb/exdb/seq/TaxonomyExtractor.py +69 -0
  43. rcsb/exdb/seq/UniProtCoreEtlWorker.py +177 -0
  44. rcsb/exdb/seq/UniProtExtractor.py +80 -0
  45. rcsb/exdb/seq/__init__.py +0 -0
  46. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +19 -0
  47. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +12 -0
  48. rcsb/exdb/tests/__init__.py +0 -0
  49. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +104 -0
  50. rcsb/exdb/tests/fixturePdbxLoader.py +298 -0
  51. rcsb/exdb/tests/test-data/components-abbrev.cif +2739 -0
  52. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +9171 -0
  53. rcsb/exdb/tests/testAnnotationExtractor.py +79 -0
  54. rcsb/exdb/tests/testBranchedEntityExtractor.py +81 -0
  55. rcsb/exdb/tests/testChemRefLoader.py +106 -0
  56. rcsb/exdb/tests/testChemRefMappingProvider.py +95 -0
  57. rcsb/exdb/tests/testCitationAdapter.py +97 -0
  58. rcsb/exdb/tests/testCitationExtractor.py +93 -0
  59. rcsb/exdb/tests/testCitationUtils.py +92 -0
  60. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +70 -0
  61. rcsb/exdb/tests/testEntryInfoProvider.py +97 -0
  62. rcsb/exdb/tests/testGlycanEtlWorkflow.py +70 -0
  63. rcsb/exdb/tests/testGlycanProvider.py +98 -0
  64. rcsb/exdb/tests/testGlycanUtils.py +64 -0
  65. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +90 -0
  66. rcsb/exdb/tests/testObjectExtractor.py +342 -0
  67. rcsb/exdb/tests/testObjectTransformer.py +83 -0
  68. rcsb/exdb/tests/testObjectUpdater.py +120 -0
  69. rcsb/exdb/tests/testPolymerEntityExtractor.py +93 -0
  70. rcsb/exdb/tests/testPubChemDataCacheProvider.py +124 -0
  71. rcsb/exdb/tests/testPubChemEtlWorkflow.py +134 -0
  72. rcsb/exdb/tests/testPubChemEtlWrapper.py +155 -0
  73. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +123 -0
  74. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +106 -0
  75. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +121 -0
  76. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +122 -0
  77. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +117 -0
  78. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +94 -0
  79. rcsb/exdb/tests/testTaxonomyExtractor.py +75 -0
  80. rcsb/exdb/tests/testTreeNodeListWorker.py +111 -0
  81. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +99 -0
  82. rcsb/exdb/tests/testUniProtExtractor.py +77 -0
  83. rcsb/exdb/tree/TreeNodeListWorker.py +228 -0
  84. rcsb/exdb/tree/__init__.py +0 -0
  85. rcsb/exdb/utils/ObjectAdapterBase.py +22 -0
  86. rcsb/exdb/utils/ObjectExtractor.py +286 -0
  87. rcsb/exdb/utils/ObjectTransformer.py +124 -0
  88. rcsb/exdb/utils/ObjectUpdater.py +121 -0
  89. rcsb/exdb/utils/ObjectValidator.py +160 -0
  90. rcsb/exdb/utils/__init__.py +0 -0
  91. rcsb/exdb/wf/EntryInfoEtlWorkflow.py +71 -0
  92. rcsb/exdb/wf/GlycanEtlWorkflow.py +76 -0
  93. rcsb/exdb/wf/PubChemEtlWorkflow.py +240 -0
  94. rcsb/exdb/wf/__init__.py +0 -0
  95. rcsb_exdb-1.31.dist-info/METADATA +103 -0
  96. rcsb_exdb-1.31.dist-info/RECORD +98 -0
  97. rcsb_exdb-1.31.dist-info/WHEEL +4 -0
  98. rcsb_exdb-1.31.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,171 @@
1
+ ##
2
+ # File: EntityPolymerExtractorTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 25-Mar-2019
5
+ #
6
+ # Updates:
7
+ # 21-Apr-2019 jdw Separate tests against the mock-data repo in this module
8
+ #
9
+ ##
10
+ """
11
+ Tests for extractor entity polymer collections (limited tests from mock-data repos)
12
+
13
+ """
14
+
15
+ __docformat__ = "google en"
16
+ __author__ = "John Westbrook"
17
+ __email__ = "jwest@rcsb.rutgers.edu"
18
+ __license__ = "Apache 2.0"
19
+
20
+
21
+ import logging
22
+ import os
23
+ import time
24
+ import unittest
25
+
26
+ from rcsb.exdb.seq.EntityPolymerExtractor import EntityPolymerExtractor
27
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
28
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
29
+ from rcsb.utils.taxonomy.TaxonomyProvider import TaxonomyProvider
30
+
31
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
32
+ logger = logging.getLogger()
33
+
34
+ HERE = os.path.abspath(os.path.dirname(__file__))
35
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
36
+
37
+
38
+ class EntityPolymerExtractorTests(unittest.TestCase):
39
+ def __init__(self, methodName="runTest"):
40
+ super(EntityPolymerExtractorTests, self).__init__(methodName)
41
+ self.__verbose = True
42
+
43
+ def setUp(self):
44
+ #
45
+ #
46
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
47
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
48
+ #
49
+ configName = "site_info_configuration"
50
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
51
+ #
52
+ self.__cachePath = os.path.join(TOPDIR, "CACHE")
53
+ self.__workPath = os.path.join(HERE, "test-output")
54
+ self.__taxonomyDataPath = os.path.join(self.__cachePath, self.__cfgOb.get("NCBI_TAXONOMY_CACHE_DIR", sectionName=configName))
55
+ #
56
+ self.__cacheKwargs = {"fmt": "json", "indent": 3}
57
+ self.__exdbCacheDirPath = os.path.join(self.__cachePath, self.__cfgOb.get("EXDB_CACHE_DIR", sectionName=configName))
58
+ #
59
+ self.__mU = MarshalUtil()
60
+ self.__entryLimitTest = 18
61
+ #
62
+ self.__startTime = time.time()
63
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
64
+
65
+ def tearDown(self):
66
+ endTime = time.time()
67
+ logger.info("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
68
+
69
+ def testExtractEntityPolymers(self):
70
+ """Test case - extract entity polymer info"""
71
+ try:
72
+ epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs, entryLimit=self.__entryLimitTest)
73
+ eCount = epe.getEntryCount()
74
+ self.assertGreaterEqual(eCount, self.__entryLimitTest)
75
+ #
76
+ except Exception as e:
77
+ logger.exception("Failing with %s", str(e))
78
+ self.fail()
79
+
80
+ def testAccessEntityPolymerFeatures(self):
81
+ """Test case - access cached entity polymer info from test cache"""
82
+ try:
83
+ epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs)
84
+ eCount = epe.getEntryCount()
85
+ logger.info("Entry count %d", eCount)
86
+ self.assertGreaterEqual(eCount, self.__entryLimitTest)
87
+ #
88
+ unpL = epe.getRefSeqAccessions("UNP")
89
+ logger.info("Ref seq count %d", len(unpL))
90
+ self.assertGreaterEqual(len(unpL), 1)
91
+ #
92
+ for entryId in ["3RER"]:
93
+ for entityId in ["1"]:
94
+ uL = epe.getEntityRefSeqAccessions("UNP", entryId, entityId)
95
+ logger.info("UNP for %s %s %r", entryId, entityId, uL)
96
+ #
97
+ except Exception as e:
98
+ logger.exception("Failing with %s", str(e))
99
+ self.fail()
100
+
101
+ def testTaxonomyReadCache(self):
102
+ """Test case - access cached entity polymer info from test cache"""
103
+ try:
104
+ epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs)
105
+ logger.info("Cache entry count %d", epe.getEntryCount())
106
+ #
107
+ obsL = []
108
+ tD = epe.getOrigTaxons()
109
+ logger.info("Taxons %d", len(tD))
110
+
111
+ tU = TaxonomyProvider(taxDirPath=self.__taxonomyDataPath, useCache=True)
112
+ #
113
+ for entryId, taxIdL in tD.items():
114
+ for entityId, iTaxId in taxIdL:
115
+ # logger.info("entryId %r entityId %r taxId %r" % (entryId, entityId, taxId))
116
+ mTaxId = tU.getMergedTaxId(iTaxId)
117
+ if iTaxId != mTaxId:
118
+ obsL.append({"entryId": entryId, "entityId": entityId, "taxId": iTaxId, "replaceTaxId": mTaxId})
119
+ logger.info("Obsolete list length %d", len(obsL))
120
+ self.__mU.doExport(os.path.join(self.__workPath, "obsolete-taxons.json"), obsL, fmt="json", indent=3)
121
+ #
122
+ except Exception as e:
123
+ logger.exception("Failing with %s", str(e))
124
+ self.fail()
125
+
126
+ def testAccessEntityPolymerReadCache(self):
127
+ """Test case - access cached entity polymer info from test cache"""
128
+ try:
129
+ epe = EntityPolymerExtractor(self.__cfgOb, exdbDirPath=self.__exdbCacheDirPath, useCache=False, cacheKwargs=self.__cacheKwargs)
130
+ logger.info("Cache entry count %d", epe.getEntryCount())
131
+ cD = epe.countRefSeqAccessions("UNP")
132
+ self.assertGreaterEqual(len(cD), 2)
133
+ logger.info("UNP reference sequences per entity %r", dict(sorted(cD.items())))
134
+ logger.info("Reference sequences per entity %r", dict(sorted(epe.countRefSeqAccessionAny().items())))
135
+ logger.info("Reference sequences per ref db %r", dict(sorted(epe.countRefSeqAccessionDbType().items())))
136
+ #
137
+ ok = epe.checkRefSeqAlignRange("UNP")
138
+ self.assertTrue(ok)
139
+ unpL = epe.getRefSeqAccessions("UNP")
140
+ logger.info("Unique UNP reference sequences %d", len(unpL))
141
+ self.assertTrue(ok)
142
+ tD = epe.getUniqueTaxons()
143
+ logger.info("Unique taxons %d", len(tD))
144
+ tD = epe.countRefSeqAccessionByTaxon("UNP")
145
+ logger.info("Unique taxons %d", len(tD))
146
+ #
147
+ except Exception as e:
148
+ logger.exception("Failing with %s", str(e))
149
+ self.fail()
150
+
151
+
152
+ def entityPolymerExtractSuite():
153
+ suiteSelect = unittest.TestSuite()
154
+ suiteSelect.addTest(EntityPolymerExtractorTests("testExtractEntityPolymers"))
155
+ suiteSelect.addTest(EntityPolymerExtractorTests("testAccessEntityPolymerFeatures"))
156
+ suiteSelect.addTest(EntityPolymerExtractorTests("testAccessEntityPolymerReadCache"))
157
+ return suiteSelect
158
+
159
+
160
+ def entityTaxonomyExtractSuite():
161
+ suiteSelect = unittest.TestSuite()
162
+ suiteSelect.addTest(EntityPolymerExtractorTests("testTaxonomyReadCache"))
163
+ return suiteSelect
164
+
165
+
166
+ if __name__ == "__main__":
167
+ mySuite = entityPolymerExtractSuite()
168
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
169
+
170
+ mySuite = entityTaxonomyExtractSuite()
171
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,79 @@
1
+ ##
2
+ # File: UpdateReferenceSequencesTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 12-Oct-2019
5
+ #
6
+ # Updates:
7
+ #
8
+ ##
9
+ """
10
+ Tests for reference sequence assignment update operations
11
+ """
12
+
13
+ __docformat__ = "google en"
14
+ __author__ = "John Westbrook"
15
+ __email__ = "jwest@rcsb.rutgers.edu"
16
+ __license__ = "Apache 2.0"
17
+
18
+ import logging
19
+ import os
20
+ import time
21
+ import unittest
22
+
23
+ from rcsb.exdb.seq.ReferenceSequenceAssignmentUpdater import ReferenceSequenceAssignmentUpdater
24
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
25
+
26
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
27
+ logger = logging.getLogger()
28
+
29
+ HERE = os.path.abspath(os.path.dirname(__file__))
30
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
31
+
32
+
33
+ class ReferenceSequenceAssignmentUpdaterTests(unittest.TestCase):
34
+ def __init__(self, methodName="runTest"):
35
+ super(ReferenceSequenceAssignmentUpdaterTests, self).__init__(methodName)
36
+ self.__verbose = True
37
+
38
+ def setUp(self):
39
+ #
40
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
41
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
42
+ configName = "site_info_configuration"
43
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
44
+ #
45
+ self.__resourceName = "MONGO_DB"
46
+ self.__cachePath = os.path.join(TOPDIR, "CACHE")
47
+ self.__testEntityCacheKwargs = {"fmt": "json", "indent": 3}
48
+ self.__fetchLimitTest = None
49
+ #
50
+ self.__startTime = time.time()
51
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
52
+
53
+ def tearDown(self):
54
+ endTime = time.time()
55
+ logger.info("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
56
+
57
+ def testUpdateAssignments(self):
58
+ """Test case - get reference sequences and update candidates"""
59
+ try:
60
+ rsau = ReferenceSequenceAssignmentUpdater(self.__cfgOb, useCache=False, cachePath=self.__cachePath, fetchLimit=self.__fetchLimitTest, siftsAbbreviated="TEST")
61
+ updateLimit = None
62
+ updateId = "2019_01"
63
+ lenUpd, numUpd = rsau.doUpdate(updateId, updateLimit=updateLimit)
64
+ logger.info("Update length %d numUpd %d", lenUpd, numUpd)
65
+ # self.assertEqual(numUpd, lenUpd)
66
+ except Exception as e:
67
+ logger.exception("Failing with %s", str(e))
68
+ self.fail()
69
+
70
+
71
+ def referenceUpdaterSuite():
72
+ suiteSelect = unittest.TestSuite()
73
+ suiteSelect.addTest(ReferenceSequenceAssignmentUpdaterTests("testUpdateAssignments"))
74
+ return suiteSelect
75
+
76
+
77
+ if __name__ == "__main__":
78
+ mySuite = referenceUpdaterSuite()
79
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,108 @@
1
+ ##
2
+ # File: ReferenceSequenceUtilsTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 25-Mar-2019
5
+ #
6
+ # Updates:
7
+ #
8
+ ##
9
+ """
10
+ Tests for accessing reference sequence data corresponding to polymer entity sequence assignments.
11
+
12
+ (Limited tests against to mock-data repos.)
13
+
14
+ """
15
+
16
+ __docformat__ = "google en"
17
+ __author__ = "John Westbrook"
18
+ __email__ = "jwest@rcsb.rutgers.edu"
19
+ __license__ = "Apache 2.0"
20
+
21
+
22
+ import logging
23
+ import os
24
+ import time
25
+ import unittest
26
+
27
+
28
+ from rcsb.exdb.seq.ReferenceSequenceUtils import ReferenceSequenceUtils
29
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
30
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
31
+
32
+
33
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
34
+ logger = logging.getLogger()
35
+
36
+ HERE = os.path.abspath(os.path.dirname(__file__))
37
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
38
+
39
+
40
+ class ReferenceSequenceUtilsTests(unittest.TestCase):
41
+ def __init__(self, methodName="runTest"):
42
+ super(ReferenceSequenceUtilsTests, self).__init__(methodName)
43
+ self.__verbose = True
44
+
45
+ def setUp(self):
46
+ #
47
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
48
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
49
+ #
50
+ configName = "site_info_configuration"
51
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
52
+ #
53
+ self.__cachePath = os.path.join(TOPDIR, "CACHE")
54
+ #
55
+ self.__cacheKwargs = {"fmt": "json", "indent": 3}
56
+ self.__exdbCacheDirPath = os.path.join(self.__cachePath, self.__cfgOb.get("EXDB_CACHE_DIR", sectionName=configName))
57
+ #
58
+ # Reference sequence test data cache -
59
+ #
60
+ self.__refDbCachePath = os.path.join(HERE, "test-output", "unp-data-test-cache.json")
61
+ self.__cacheKwargs = {"fmt": "json", "indent": 3}
62
+ self.__useCache = False
63
+ self.__fetchLimit = None
64
+ #
65
+ # Entity polymer extracted data ...
66
+ #
67
+ self.__entryLimit = 500
68
+ #
69
+ self.__mU = MarshalUtil()
70
+ #
71
+ self.__startTime = time.time()
72
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
73
+
74
+ def tearDown(self):
75
+ endTime = time.time()
76
+ logger.info("Completed %s at %s (%.4f seconds)\n", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
77
+
78
+ def testFetchUnp(self):
79
+ """Test case - extract entity polymer info -"""
80
+ try:
81
+ refDbName = "UNP"
82
+ rsu = ReferenceSequenceUtils(
83
+ self.__cfgOb,
84
+ refDbName,
85
+ exdbDirPath=self.__exdbCacheDirPath,
86
+ cacheKwargs=self.__cacheKwargs,
87
+ useCache=self.__useCache,
88
+ entryLimit=self.__entryLimit,
89
+ fetchLimit=self.__fetchLimit,
90
+ )
91
+ numPrimary, numSecondary, numNone = rsu.getReferenceAccessionAlignSummary()
92
+ self.assertGreaterEqual(numPrimary, 70)
93
+ logger.info("For %r matched primary: %d secondary: %d none %d", refDbName, numPrimary, numSecondary, numNone)
94
+ #
95
+ except Exception as e:
96
+ logger.exception("Failing with %s", str(e))
97
+ self.fail()
98
+
99
+
100
+ def unpFetchSuite():
101
+ suiteSelect = unittest.TestSuite()
102
+ suiteSelect.addTest(ReferenceSequenceUtilsTests("testFetchUnp"))
103
+ return suiteSelect
104
+
105
+
106
+ if __name__ == "__main__":
107
+ mySuite = unpFetchSuite()
108
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,76 @@
1
+ ##
2
+ # File: AnnotationExtractor.py
3
+ # Date: 15-Oct-2019 jdw
4
+ #
5
+ # Utilities to extract selected annotation details from the exchange collections.
6
+ #
7
+ # Currently, used to established covered annotations for scoping tree brower displays
8
+ # for expansive annotation hierarchies.
9
+ #
10
+ # Updates:
11
+ #
12
+ ##
13
+ __docformat__ = "google en"
14
+ __author__ = "John Westbrook"
15
+ __email__ = "jwest@rcsb.rutgers.edu"
16
+ __license__ = "Apache 2.0"
17
+
18
+ import logging
19
+
20
+ from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class AnnotationExtractor(object):
26
+ """Utilities to extract selected annotation details from the exchange collections."""
27
+
28
+ def __init__(self, cfgOb):
29
+ self.__cfgOb = cfgOb
30
+
31
+ def getUniqueIdentifiers(self, annotationType):
32
+ """Extract unique rcsb_polymer_entity_annotation ids for the input annotation type.
33
+
34
+ Args:
35
+ annotationType (str): a value of rcsb_polymer_entity_annotation.type
36
+
37
+ Returns:
38
+ list: unique list of identifiers of annotationType
39
+ """
40
+ idL = self.__extractEntityAnnotationIdentifiers(annotationType)
41
+ return idL
42
+
43
+ def __extractEntityAnnotationIdentifiers(self, annotationType):
44
+ """Extract unique rcsb_polymer_entity_annotation ids for the input annotation type."""
45
+ try:
46
+ databaseName = "pdbx_core"
47
+ collectionName = "pdbx_core_polymer_entity"
48
+ obEx = ObjectExtractor(
49
+ self.__cfgOb,
50
+ databaseName=databaseName,
51
+ collectionName=collectionName,
52
+ cacheFilePath=None,
53
+ useCache=False,
54
+ keyAttribute="entity",
55
+ uniqueAttributes=["rcsb_id"],
56
+ cacheKwargs=None,
57
+ objectLimit=None,
58
+ # selectionQuery={"rcsb_polymer_entity_annotation.type": annotationType},
59
+ selectionQuery=None,
60
+ selectionList=["rcsb_id", "rcsb_polymer_entity_annotation.annotation_id", "rcsb_polymer_entity_annotation.type"],
61
+ )
62
+ eCount = obEx.getCount()
63
+ logger.info("For type %r polymer entity annotation object count is %d", annotationType, eCount)
64
+ idS = set()
65
+ objD = obEx.getObjects()
66
+ for _, eD in objD.items():
67
+ try:
68
+ for tD in eD["rcsb_polymer_entity_annotation"]:
69
+ if tD["type"] == annotationType:
70
+ idS.add(tD["annotation_id"])
71
+ except Exception:
72
+ pass
73
+ logger.info("Unique identifiers %d", len(idS))
74
+ return list(idS)
75
+ except Exception as e:
76
+ logger.exception("Failing with %s", str(e))
@@ -0,0 +1,84 @@
1
+ ##
2
+ # File: LigandNeighborMappingExtractor.py
3
+ # Date: 28-Jun-2021 jdw
4
+ #
5
+ # Utilities to extract ligand neighbor mapping details from the exchange collections.
6
+ #
7
+ # Updates:
8
+ # 17-Jul-2024 dwp Stop fetching and including rcsb_ligand_neighbors.ligand_is_bound, since no longer populating that field
9
+ #
10
+ ##
11
+ __docformat__ = "google en"
12
+ __author__ = "John Westbrook"
13
+ __email__ = "jwest@rcsb.rutgers.edu"
14
+ __license__ = "Apache 2.0"
15
+
16
+ import logging
17
+
18
+ from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class LigandNeighborMappingExtractor(object):
24
+ """Utilities to extract ligand neighbor mapping details from the exchange collections."""
25
+
26
+ def __init__(self, cfgOb):
27
+ self.__cfgOb = cfgOb
28
+
29
+ def getLigandNeighbors(self):
30
+ """Extract unique chemical component ids involved in neighbor interactions with each
31
+ polymer and branched entity instance.
32
+
33
+ Returns:
34
+ dict: {'entryId_entityId': [(chem_comp_id, isBound),...], }
35
+ """
36
+ return self.__extractLigandNeighbors()
37
+
38
+ def __extractLigandNeighbors(self):
39
+ """Extract unique chemical component ids involved in neighbor interactions with each
40
+ polymer and branched entity instance."""
41
+ try:
42
+ databaseName = "pdbx_core"
43
+ collectionName = "pdbx_core_polymer_entity_instance"
44
+ obEx = ObjectExtractor(
45
+ self.__cfgOb,
46
+ databaseName=databaseName,
47
+ collectionName=collectionName,
48
+ cacheFilePath=None,
49
+ useCache=False,
50
+ keyAttribute="rcsb_id",
51
+ uniqueAttributes=["rcsb_id"],
52
+ cacheKwargs=None,
53
+ objectLimit=None,
54
+ # selectionQuery={"rcsb_polymer_entity_annotation.type": annotationType},
55
+ selectionQuery=None,
56
+ selectionList=[
57
+ "rcsb_id",
58
+ "rcsb_polymer_entity_instance_container_identifiers.entry_id",
59
+ "rcsb_polymer_entity_instance_container_identifiers.entity_id",
60
+ "rcsb_polymer_entity_instance_container_identifiers.asym_id",
61
+ "rcsb_ligand_neighbors.ligand_comp_id",
62
+ ],
63
+ )
64
+ eCount = obEx.getCount()
65
+ logger.info("Total neighbor count (%d)", eCount)
66
+ rD = {}
67
+ objD = obEx.getObjects()
68
+ for _, peiD in objD.items():
69
+ try:
70
+ entryId = peiD["rcsb_polymer_entity_instance_container_identifiers"]["entry_id"]
71
+ entityId = peiD["rcsb_polymer_entity_instance_container_identifiers"]["entity_id"]
72
+ ky = entryId + "_" + entityId
73
+ for lnD in peiD["rcsb_ligand_neighbors"] if "rcsb_ligand_neighbors" in peiD else []:
74
+ if "ligand_comp_id" in lnD:
75
+ rD.setdefault(ky, set()).add(lnD["ligand_comp_id"])
76
+ else:
77
+ logger.warning("%s %s missing details lnD %r", entryId, entityId, lnD)
78
+ except Exception as e:
79
+ logger.exception("Failing with %s", str(e))
80
+ rD = {k: list(v) for k, v in rD.items()}
81
+ logger.info("Unique instance %d", len(rD))
82
+ return rD
83
+ except Exception as e:
84
+ logger.exception("Failing with %s", str(e))
@@ -0,0 +1,106 @@
1
+ ##
2
+ # File: LigandNeighborMappingProvider.py
3
+ # Date: 28-Jun-2021 jdw
4
+ #
5
+ # Updated:
6
+ #
7
+ ##
8
+ """
9
+ Accessors for essential ligand neighbor mapping details associated with polymer and branched
10
+ entity instances.
11
+ """
12
+
13
+ import datetime
14
+ import logging
15
+ import os.path
16
+ import time
17
+
18
+ from rcsb.utils.io.FileUtil import FileUtil
19
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
20
+ from rcsb.utils.io.StashableBase import StashableBase
21
+ from rcsb.exdb.seq.LigandNeighborMappingExtractor import LigandNeighborMappingExtractor
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class LigandNeighborMappingProvider(StashableBase):
27
+ """Accessors for essential ligand neighbor mapping details associated with polymer and branched
28
+ entity instances."""
29
+
30
+ def __init__(self, cachePath, useCache=True):
31
+ #
32
+ self.__cachePath = cachePath
33
+ self.__useCache = useCache
34
+ self.__dirName = "ligand-neighbor-mapping"
35
+ super(LigandNeighborMappingProvider, self).__init__(self.__cachePath, [self.__dirName])
36
+ self.__dirPath = os.path.join(self.__cachePath, self.__dirName)
37
+ #
38
+ self.__mU = MarshalUtil(workPath=self.__dirPath)
39
+ self.__mapD = self.__reload(self.__dirPath, useCache)
40
+ #
41
+
42
+ def testCache(self, minCount=0):
43
+ logger.info("Cached ligand neighbor mapping count %d", len(self.__mapD["mapping"]) if "mapping" in self.__mapD else 0)
44
+ if minCount == 0 or self.__mapD and "mapping" in self.__mapD and len(self.__mapD["mapping"]) >= minCount:
45
+ return True
46
+ else:
47
+ return False
48
+
49
+ def getLigandNeighbors(self, rcsbEntityId):
50
+ """Get the unique list of ligand neighbors for the input polymer or branched entity instance.
51
+
52
+ Args:
53
+ rcsbEntityId (str): entryId '_' entityId
54
+
55
+ Returns:
56
+ list: [chem_comp_id, ... ]
57
+ """
58
+ try:
59
+ return list(set([t for t in self.__mapD["mapping"][rcsbEntityId.upper()]]))
60
+ except Exception:
61
+ return []
62
+
63
+ def reload(self):
64
+ self.__mapD = self.__reload(self.__dirPath, useCache=True)
65
+
66
+ def __reload(self, dirPath, useCache):
67
+ startTime = time.time()
68
+ retD = {}
69
+ ok = False
70
+ mappingPath = self.__getMappingDataPath()
71
+ #
72
+ logger.info("useCache %r mappingPath %r", useCache, mappingPath)
73
+ if useCache and self.__mU.exists(mappingPath):
74
+ retD = self.__mU.doImport(mappingPath, fmt="json")
75
+ ok = True
76
+ else:
77
+ fU = FileUtil()
78
+ fU.mkdir(dirPath)
79
+ # ---
80
+ num = len(retD["mapping"]) if "mapping" in retD else 0
81
+ logger.info("Completed ligand mapping reload (%d) with status (%r) at %s (%.4f seconds)", num, ok, time.strftime("%Y %m %d %H:%M:%S", time.localtime()), time.time() - startTime)
82
+ return retD
83
+
84
+ def __getMappingDataPath(self):
85
+ return os.path.join(self.__dirPath, "ligand-neighbor-mapping-data.json")
86
+
87
+ def fetchLigandNeighborMapping(self, cfgOb):
88
+ """Fetch ligand neighbor mapping details
89
+
90
+ Args:
91
+ cfgOb (obj): instance configuration class ConfigUtil()
92
+
93
+ Returns:
94
+ bool: True for success or False otherwise
95
+ """
96
+ try:
97
+ lnmEx = LigandNeighborMappingExtractor(cfgOb)
98
+ lnD = lnmEx.getLigandNeighbors()
99
+ fp = self.__getMappingDataPath()
100
+ tS = datetime.datetime.now().isoformat()
101
+ vS = datetime.datetime.now().strftime("%Y-%m-%d")
102
+ ok = self.__mU.doExport(fp, {"version": vS, "created": tS, "mapping": lnD}, fmt="json", indent=3)
103
+ return ok
104
+ except Exception as e:
105
+ logger.exception("Failing with %s", str(e))
106
+ return False