rcsb.exdb 1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. rcsb/__init__.py +1 -0
  2. rcsb/exdb/__init__.py +1 -0
  3. rcsb/exdb/branch/BranchedEntityExtractor.py +82 -0
  4. rcsb/exdb/branch/GlycanProvider.py +116 -0
  5. rcsb/exdb/branch/GlycanUtils.py +114 -0
  6. rcsb/exdb/branch/__init__.py +0 -0
  7. rcsb/exdb/chemref/ChemRefEtlWorker.py +118 -0
  8. rcsb/exdb/chemref/ChemRefExtractor.py +70 -0
  9. rcsb/exdb/chemref/ChemRefMappingProvider.py +139 -0
  10. rcsb/exdb/chemref/PubChemDataCacheProvider.py +372 -0
  11. rcsb/exdb/chemref/PubChemEtlWrapper.py +280 -0
  12. rcsb/exdb/chemref/PubChemIndexCacheProvider.py +638 -0
  13. rcsb/exdb/chemref/__init__.py +0 -0
  14. rcsb/exdb/citation/CitationAdapter.py +91 -0
  15. rcsb/exdb/citation/CitationExtractor.py +190 -0
  16. rcsb/exdb/citation/CitationUtils.py +51 -0
  17. rcsb/exdb/citation/__init__.py +0 -0
  18. rcsb/exdb/cli/__init__.py +0 -0
  19. rcsb/exdb/entry/EntryInfoProvider.py +148 -0
  20. rcsb/exdb/entry/__init__.py +0 -0
  21. rcsb/exdb/examples-seq/EntityInstanceExtractor.py +557 -0
  22. rcsb/exdb/examples-seq/EntityPolymerExtractor.py +544 -0
  23. rcsb/exdb/examples-seq/EntityPolymerExtractorFullTests.py +176 -0
  24. rcsb/exdb/examples-seq/ReferenceSequenceAssignmentUpdater.py +449 -0
  25. rcsb/exdb/examples-seq/ReferenceSequenceUtils.py +123 -0
  26. rcsb/exdb/examples-seq/ReferenceSequenceUtilsTests.py +109 -0
  27. rcsb/exdb/examples-seq/exampleObjectExtractor.py +109 -0
  28. rcsb/exdb/examples-seq/fixtureEntityPolymerExtractor.py +85 -0
  29. rcsb/exdb/examples-seq/testEntityInstanceExtractor.py +170 -0
  30. rcsb/exdb/examples-seq/testEntityPolymerExtractor.py +171 -0
  31. rcsb/exdb/examples-seq/testReferenceSequenceAssignmentUpdater.py +79 -0
  32. rcsb/exdb/examples-seq/testReferenceSequenceUtils.py +108 -0
  33. rcsb/exdb/seq/AnnotationExtractor.py +76 -0
  34. rcsb/exdb/seq/LigandNeighborMappingExtractor.py +84 -0
  35. rcsb/exdb/seq/LigandNeighborMappingProvider.py +106 -0
  36. rcsb/exdb/seq/PolymerEntityExtractor.py +328 -0
  37. rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +598 -0
  38. rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +228 -0
  39. rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +534 -0
  40. rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +388 -0
  41. rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +397 -0
  42. rcsb/exdb/seq/TaxonomyExtractor.py +69 -0
  43. rcsb/exdb/seq/UniProtCoreEtlWorker.py +177 -0
  44. rcsb/exdb/seq/UniProtExtractor.py +80 -0
  45. rcsb/exdb/seq/__init__.py +0 -0
  46. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +19 -0
  47. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +12 -0
  48. rcsb/exdb/tests/__init__.py +0 -0
  49. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +104 -0
  50. rcsb/exdb/tests/fixturePdbxLoader.py +298 -0
  51. rcsb/exdb/tests/test-data/components-abbrev.cif +2739 -0
  52. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +9171 -0
  53. rcsb/exdb/tests/testAnnotationExtractor.py +79 -0
  54. rcsb/exdb/tests/testBranchedEntityExtractor.py +81 -0
  55. rcsb/exdb/tests/testChemRefLoader.py +106 -0
  56. rcsb/exdb/tests/testChemRefMappingProvider.py +95 -0
  57. rcsb/exdb/tests/testCitationAdapter.py +97 -0
  58. rcsb/exdb/tests/testCitationExtractor.py +93 -0
  59. rcsb/exdb/tests/testCitationUtils.py +92 -0
  60. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +70 -0
  61. rcsb/exdb/tests/testEntryInfoProvider.py +97 -0
  62. rcsb/exdb/tests/testGlycanEtlWorkflow.py +70 -0
  63. rcsb/exdb/tests/testGlycanProvider.py +98 -0
  64. rcsb/exdb/tests/testGlycanUtils.py +64 -0
  65. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +90 -0
  66. rcsb/exdb/tests/testObjectExtractor.py +342 -0
  67. rcsb/exdb/tests/testObjectTransformer.py +83 -0
  68. rcsb/exdb/tests/testObjectUpdater.py +120 -0
  69. rcsb/exdb/tests/testPolymerEntityExtractor.py +93 -0
  70. rcsb/exdb/tests/testPubChemDataCacheProvider.py +124 -0
  71. rcsb/exdb/tests/testPubChemEtlWorkflow.py +134 -0
  72. rcsb/exdb/tests/testPubChemEtlWrapper.py +155 -0
  73. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +123 -0
  74. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +106 -0
  75. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +121 -0
  76. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +122 -0
  77. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +117 -0
  78. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +94 -0
  79. rcsb/exdb/tests/testTaxonomyExtractor.py +75 -0
  80. rcsb/exdb/tests/testTreeNodeListWorker.py +111 -0
  81. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +99 -0
  82. rcsb/exdb/tests/testUniProtExtractor.py +77 -0
  83. rcsb/exdb/tree/TreeNodeListWorker.py +228 -0
  84. rcsb/exdb/tree/__init__.py +0 -0
  85. rcsb/exdb/utils/ObjectAdapterBase.py +22 -0
  86. rcsb/exdb/utils/ObjectExtractor.py +286 -0
  87. rcsb/exdb/utils/ObjectTransformer.py +124 -0
  88. rcsb/exdb/utils/ObjectUpdater.py +121 -0
  89. rcsb/exdb/utils/ObjectValidator.py +160 -0
  90. rcsb/exdb/utils/__init__.py +0 -0
  91. rcsb/exdb/wf/EntryInfoEtlWorkflow.py +71 -0
  92. rcsb/exdb/wf/GlycanEtlWorkflow.py +76 -0
  93. rcsb/exdb/wf/PubChemEtlWorkflow.py +240 -0
  94. rcsb/exdb/wf/__init__.py +0 -0
  95. rcsb_exdb-1.31.dist-info/METADATA +103 -0
  96. rcsb_exdb-1.31.dist-info/RECORD +98 -0
  97. rcsb_exdb-1.31.dist-info/WHEEL +4 -0
  98. rcsb_exdb-1.31.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,99 @@
1
+ ##
2
+ # File: UniProtCoreEtlWorkerTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 9-Dec-2018
5
+ #
6
+ # Updates:
7
+ #
8
+ ##
9
+ """
10
+ Tests for loading UniProt core collection
11
+
12
+ """
13
+
14
+ __docformat__ = "google en"
15
+ __author__ = "John Westbrook"
16
+ __email__ = "jwest@rcsb.rutgers.edu"
17
+ __license__ = "Apache 2.0"
18
+
19
+
20
+ import logging
21
+ import os
22
+ import platform
23
+ import resource
24
+ import time
25
+ import unittest
26
+
27
+ from rcsb.exdb.seq.UniProtCoreEtlWorker import UniProtCoreEtlWorker
28
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
29
+
30
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
31
+ logger = logging.getLogger()
32
+
33
+ HERE = os.path.abspath(os.path.dirname(__file__))
34
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
35
+
36
+
37
+ class UniProtCoreEtlWorkerTests(unittest.TestCase):
38
+ def __init__(self, methodName="runTest"):
39
+ super(UniProtCoreEtlWorkerTests, self).__init__(methodName)
40
+ self.__verbose = True
41
+
42
+ def setUp(self):
43
+ #
44
+ #
45
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
46
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
47
+ configName = "site_info_configuration"
48
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
49
+ self.__cachePath = os.path.join(TOPDIR, "CACHE")
50
+ #
51
+ # sample data set
52
+ self.__updateId = "2018_23"
53
+ #
54
+ self.__startTime = time.time()
55
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
56
+
57
+ def tearDown(self):
58
+ unitS = "MB" if platform.system() == "Darwin" else "GB"
59
+ rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
60
+ logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
61
+ endTime = time.time()
62
+ logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
63
+
64
+ @unittest.skip("Disable test - deprecated")
65
+ def testLoadUniProtCore(self):
66
+ """Test case - load UniProt core collection reference data -"""
67
+ try:
68
+ uw = UniProtCoreEtlWorker(self.__cfgOb, self.__cachePath)
69
+ ok = uw.load(self.__updateId, extResource="UniProt", loadType="full")
70
+ #
71
+ self.assertTrue(ok)
72
+ except Exception as e:
73
+ logger.exception("Failing with %s", str(e))
74
+ self.fail()
75
+
76
+ @unittest.skip("Disable test - deprecated")
77
+ def testValidateUniProtCore(self):
78
+ """Test case - validate UniProt core collection reference data -"""
79
+ try:
80
+ uw = UniProtCoreEtlWorker(self.__cfgOb, self.__cachePath, doValidate=True)
81
+ ok = uw.load(self.__updateId, extResource="UniProt", loadType="full")
82
+ #
83
+ self.assertTrue(ok)
84
+ except Exception as e:
85
+ logger.exception("Failing with %s", str(e))
86
+ self.fail()
87
+
88
+
89
+ def uniProtCoreEtlWorkerSuite():
90
+ suiteSelect = unittest.TestSuite()
91
+ suiteSelect.addTest(UniProtCoreEtlWorkerTests("testLoadUniProtCore"))
92
+ suiteSelect.addTest(UniProtCoreEtlWorkerTests("testValidateUniProtCore"))
93
+ return suiteSelect
94
+
95
+
96
+ if __name__ == "__main__":
97
+ #
98
+ mySuite = uniProtCoreEtlWorkerSuite()
99
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,77 @@
1
+ ##
2
+ # File: UniProtExtractorTests.py
3
+ # Author: J. Westbrook
4
+ # Date: 5-Dec-2020
5
+ #
6
+ # Updates:
7
+ #
8
+ ##
9
+ """
10
+ Tests for extraction of UniProt reference sequence details from the ExDB UniProt collection.
11
+ """
12
+ __docformat__ = "google en"
13
+ __author__ = "John Westbrook"
14
+ __email__ = "jwest@rcsb.rutgers.edu"
15
+ __license__ = "Apache 2.0"
16
+
17
+ import logging
18
+ import os
19
+ import platform
20
+ import resource
21
+ import time
22
+ import unittest
23
+
24
+ from rcsb.exdb.seq.UniProtExtractor import UniProtExtractor
25
+ from rcsb.utils.config.ConfigUtil import ConfigUtil
26
+
27
+
28
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
29
+ logger = logging.getLogger()
30
+
31
+ HERE = os.path.abspath(os.path.dirname(__file__))
32
+ TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
33
+
34
+
35
+ class UniProtExtractorTests(unittest.TestCase):
36
+ def __init__(self, methodName="runTest"):
37
+ super(UniProtExtractorTests, self).__init__(methodName)
38
+ self.__verbose = True
39
+
40
+ def setUp(self):
41
+ #
42
+ self.__mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data")
43
+ configPath = os.path.join(TOPDIR, "rcsb", "mock-data", "config", "dbload-setup-example.yml")
44
+ configName = "site_info_configuration"
45
+ self.__cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=self.__mockTopPath)
46
+ #
47
+ #
48
+ self.__startTime = time.time()
49
+ logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
50
+
51
+ def tearDown(self):
52
+ unitS = "MB" if platform.system() == "Darwin" else "GB"
53
+ rusageMax = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
54
+ logger.info("Maximum resident memory size %.4f %s", rusageMax / 10 ** 6, unitS)
55
+ endTime = time.time()
56
+ logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
57
+
58
+ def testGetUniProtDetails(self):
59
+ """Test case - get UniProt reference sequences and essential details"""
60
+ try:
61
+ uEx = UniProtExtractor(self.__cfgOb)
62
+ unpD = uEx.getReferenceSequenceDetails()
63
+ logger.info("UniProt count %d", len(unpD))
64
+ except Exception as e:
65
+ logger.exception("Failing with %s", str(e))
66
+ self.fail()
67
+
68
+
69
+ def extractorSuite():
70
+ suiteSelect = unittest.TestSuite()
71
+ suiteSelect.addTest(UniProtExtractorTests("testGetGoIds"))
72
+ return suiteSelect
73
+
74
+
75
+ if __name__ == "__main__":
76
+ mySuite = extractorSuite()
77
+ unittest.TextTestRunner(verbosity=2).run(mySuite)
@@ -0,0 +1,228 @@
1
+ ##
2
+ # File: TreeNodeListWorker.py
3
+ # Date: 9-Apr-2019 jdw
4
+ #
5
+ # Loading worker for tree node list data.
6
+ #
7
+ # Updates:
8
+ # 9-Sep-2019 jdw add AtcProvider() and ChemrefExtractor() for ATC tree.
9
+ # 12-Apr-2023 dwp add CARD ontology tree
10
+ # 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead)
11
+ # 27-Aug-2024 dwp Update CARD ontology tree loading
12
+ # 23-Jan-2025 dwp Change indexed field from 'update_id' to 'id'
13
+ # 7-Aug-2025 dwp Change target DB and collection names to "dw" and "tree_*" (via configuration file);
14
+ # Make use of configuration file for loading tree node lists and setting indexed fields
15
+ #
16
+ ##
17
+ __docformat__ = "google en"
18
+ __author__ = "John Westbrook"
19
+ __email__ = "jwest@rcsb.rutgers.edu"
20
+ __license__ = "Apache 2.0"
21
+
22
+ import logging
23
+ import os.path
24
+
25
+ from rcsb.db.mongo.DocumentLoader import DocumentLoader
26
+ from rcsb.db.processors.DataExchangeStatus import DataExchangeStatus
27
+ from rcsb.exdb.chemref.ChemRefExtractor import ChemRefExtractor
28
+ from rcsb.utils.chemref.AtcProvider import AtcProvider
29
+ from rcsb.utils.ec.EnzymeDatabaseProvider import EnzymeDatabaseProvider
30
+ from rcsb.utils.targets.CARDTargetOntologyProvider import CARDTargetOntologyProvider
31
+ from rcsb.utils.struct.CathClassificationProvider import CathClassificationProvider
32
+ from rcsb.utils.struct.EcodClassificationProvider import EcodClassificationProvider
33
+ from rcsb.utils.struct.ScopClassificationProvider import ScopClassificationProvider
34
+ from rcsb.utils.struct.Scop2ClassificationProvider import Scop2ClassificationProvider
35
+ from rcsb.utils.taxonomy.TaxonomyProvider import TaxonomyProvider
36
+ from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class TreeNodeListWorker(object):
42
+ """Prepare and load repository holdings and repository update data."""
43
+
44
+ def __init__(self, cfgOb, cachePath, numProc=1, chunkSize=10, maxStepLength=4000, readBackCheck=False, documentLimit=None, verbose=False, useCache=False, useFilteredLists=False):
45
+ self.__cfgOb = cfgOb
46
+ self.__cachePath = os.path.abspath(cachePath)
47
+ self.__readBackCheck = readBackCheck
48
+ self.__numProc = numProc
49
+ self.__chunkSize = chunkSize
50
+ self.__maxStepLength = maxStepLength
51
+ self.__documentLimit = documentLimit
52
+ self.__resourceName = "MONGO_DB"
53
+ self.__filterType = "assign-dates"
54
+ self.__verbose = verbose
55
+ self.__statusList = []
56
+ self.__useCache = useCache
57
+ self.__useFilteredLists = useFilteredLists
58
+
59
+ def __updateStatus(self, updateId, databaseName, collectionName, status, startTimestamp):
60
+ try:
61
+ sFlag = "Y" if status else "N"
62
+ desp = DataExchangeStatus()
63
+ desp.setStartTime(tS=startTimestamp)
64
+ desp.setObject(databaseName, collectionName)
65
+ desp.setStatus(updateId=updateId, successFlag=sFlag)
66
+ desp.setEndTime()
67
+ self.__statusList.append(desp.getStatus())
68
+ return True
69
+ except Exception as e:
70
+ logger.exception("Failing with %s", str(e))
71
+ return False
72
+
73
+ def load(self, updateId, loadType="full", doLoad=True):
74
+ """Load tree node lists and status data -
75
+
76
+ Relevant configuration options:
77
+
78
+ tree_node_lists_configuration:
79
+ DATABASE_NAME: dw
80
+ COLLECTION_VERSION_STRING: 2.1.0
81
+ COLLECTION_NAME_LIST:
82
+ - tree_taxonomy
83
+ - tree_ec
84
+ - tree_scop
85
+ - tree_scop2
86
+ - tree_cath
87
+ - tree_atc
88
+ - tree_card
89
+ - tree_ecod
90
+ COLLECTION_INDICES:
91
+ - INDEX_NAME: primary
92
+ ATTRIBUTE_NAMES:
93
+ - id
94
+ - INDEX_NAME: index_2
95
+ ATTRIBUTE_NAMES:
96
+ - parents
97
+ """
98
+ try:
99
+ useCache = self.__useCache
100
+ #
101
+ logger.info("Starting with cache path %r (useCache=%r)", self.__cachePath, useCache)
102
+ #
103
+ self.__statusList = []
104
+ desp = DataExchangeStatus()
105
+ statusStartTimestamp = desp.setStartTime()
106
+ dl = DocumentLoader(
107
+ self.__cfgOb,
108
+ self.__cachePath,
109
+ self.__resourceName,
110
+ numProc=self.__numProc,
111
+ chunkSize=self.__chunkSize,
112
+ maxStepLength=self.__maxStepLength,
113
+ documentLimit=self.__documentLimit,
114
+ verbose=self.__verbose,
115
+ readBackCheck=self.__readBackCheck,
116
+ )
117
+ #
118
+ sectionName = "tree_node_lists_configuration"
119
+ databaseNameMongo = self.__cfgOb.get("DATABASE_NAME", sectionName=sectionName)
120
+ collectionNameList = self.__cfgOb.get("COLLECTION_NAME_LIST", sectionName=sectionName)
121
+ collectionIndexList = self.__cfgOb.get("COLLECTION_INDICES", sectionName=sectionName)
122
+ # databaseNameMongo = 'dw'
123
+ # collectionNameList = ['tree_taxonomy', 'tree_ec', 'tree_scop', 'tree_scop2', 'tree_cath', 'tree_atc', 'tree_card', 'tree_ecod', 'tree_go']
124
+ # collectionIndexList = [{'INDEX_NAME': 'primary', 'ATTRIBUTE_NAMES': ['id']}, {'INDEX_NAME': 'index_2', 'ATTRIBUTE_NAMES': ['parents']}]
125
+
126
+ # collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
127
+ # addValues = {"_schema_version": collectionVersion}
128
+ addValues = None
129
+
130
+ ok = True
131
+ for collectionName in collectionNameList:
132
+ nL = self.__getTreeDocList(collectionName, useCache)
133
+ if nL and doLoad:
134
+ ok = dl.load(
135
+ databaseNameMongo,
136
+ collectionName,
137
+ loadType=loadType,
138
+ documentList=nL,
139
+ keyNames=None,
140
+ addValues=addValues,
141
+ schemaLevel=None,
142
+ indexDL=collectionIndexList
143
+ ) and ok
144
+ self.__updateStatus(updateId, databaseNameMongo, collectionName, ok, statusStartTimestamp)
145
+ logger.info(
146
+ "Completed load of tree node list for database %r, collection %r, len(nL) %r (status %r)",
147
+ databaseNameMongo, collectionName, len(nL), ok
148
+ )
149
+ # ---
150
+ logger.info("Completed tree node list loading operations with loadType %r (status %r)", loadType, ok)
151
+ return True
152
+ except Exception as e:
153
+ logger.exception("Failing with %s", str(e))
154
+ return False
155
+
156
+ def __checkTaxonNodeList(self, nL):
157
+ eCount = 0
158
+ tD = {dD["id"]: True for dD in nL}
159
+ for dD in nL:
160
+ if "parents" in dD:
161
+ pId = dD["parents"][0]
162
+ if pId not in tD:
163
+ logger.info("Missing parent for taxon %d", pId)
164
+ eCount += 1
165
+ else:
166
+ logger.info("No parents for node %r", dD["id"])
167
+
168
+ def getLoadStatus(self):
169
+ return self.__statusList
170
+
171
+ def __getTreeDocList(self, collectionName, useCache):
172
+ nL = []
173
+ if collectionName.lower() == "tree_cath":
174
+ ccu = CathClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
175
+ nL = ccu.getTreeNodeList()
176
+ elif collectionName.lower() == "tree_scop2":
177
+ scu2 = Scop2ClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
178
+ nL = scu2.getTreeNodeList()
179
+ elif collectionName.lower() == "tree_scop":
180
+ scu = ScopClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
181
+ nL = scu.getTreeNodeList()
182
+ elif collectionName.lower() == "tree_ecod":
183
+ ecu = EcodClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
184
+ nL = ecu.getTreeNodeList()
185
+ elif collectionName.lower() == "tree_ec":
186
+ edbu = EnzymeDatabaseProvider(cachePath=self.__cachePath, useCache=useCache)
187
+ nL = edbu.getTreeNodeList()
188
+ elif collectionName.lower() == "tree_card":
189
+ okCou = True
190
+ cou = CARDTargetOntologyProvider(cachePath=self.__cachePath, useCache=useCache)
191
+ if not cou.testCache():
192
+ ok = cou.buildOntologyData()
193
+ cou.reload()
194
+ if not (ok and cou.testCache()):
195
+ logger.error("Skipping load of CARD Target Ontology tree data because it is missing.")
196
+ okCou = False
197
+ if okCou:
198
+ nL = cou.getTreeNodeList()
199
+ elif collectionName.lower() == "tree_taxonomy":
200
+ tU = TaxonomyProvider(cachePath=self.__cachePath, useCache=useCache)
201
+ if self.__useFilteredLists:
202
+ # Get the taxon coverage in the current data set -
203
+ epe = TaxonomyExtractor(self.__cfgOb)
204
+ tL = epe.getUniqueTaxons()
205
+ logger.info("Taxon coverage length %d", len(tL))
206
+ #
207
+ fD = {1}
208
+ for taxId in tL:
209
+ fD.update({k: True for k in tU.getLineage(taxId)})
210
+ logger.info("Taxon filter dictionary length %d", len(fD))
211
+ logger.debug("fD %r", sorted(fD))
212
+ #
213
+ nL = tU.exportNodeList(filterD=fD)
214
+ else:
215
+ # Get the full taxon node list without filtering
216
+ nL = tU.exportNodeList()
217
+ self.__checkTaxonNodeList(nL)
218
+ elif collectionName.lower() == "tree_atc":
219
+ crEx = ChemRefExtractor(self.__cfgOb)
220
+ atcFilterD = crEx.getChemCompAccessionMapping("ATC")
221
+ logger.info("Length of ATC filter %d", len(atcFilterD))
222
+ atcP = AtcProvider(cachePath=self.__cachePath, useCache=useCache)
223
+ nL = atcP.getTreeNodeList(filterD=atcFilterD)
224
+ else:
225
+ logger.error("Unsupported tree node collection %r", collectionName)
226
+ #
227
+ logger.info("Gathered tree nodes for loading collection %s (length %d)", collectionName, len(nL))
228
+ return nL
File without changes
@@ -0,0 +1,22 @@
1
+ ##
2
+ # File: ObjectAdapterBase.py
3
+ # Date: 17-Oct-2019
4
+ #
5
+ ##
6
+
7
+
8
+ class ObjectAdapterBase(object):
9
+ def __init(self, *args, **kwargs):
10
+ pass
11
+
12
+ def filter(self, obj, **kwargs):
13
+ """Operates on the input object and returns the transformed result.
14
+
15
+ Args:
16
+ obj (object): input object/document
17
+
18
+ Returns:
19
+
20
+ bool, object: filter status and transformed input object/document
21
+ """
22
+ raise NotImplementedError