rcsb.exdb 1.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. rcsb/__init__.py +1 -0
  2. rcsb/exdb/__init__.py +1 -0
  3. rcsb/exdb/branch/BranchedEntityExtractor.py +82 -0
  4. rcsb/exdb/branch/GlycanProvider.py +116 -0
  5. rcsb/exdb/branch/GlycanUtils.py +114 -0
  6. rcsb/exdb/branch/__init__.py +0 -0
  7. rcsb/exdb/chemref/ChemRefEtlWorker.py +118 -0
  8. rcsb/exdb/chemref/ChemRefExtractor.py +70 -0
  9. rcsb/exdb/chemref/ChemRefMappingProvider.py +139 -0
  10. rcsb/exdb/chemref/PubChemDataCacheProvider.py +372 -0
  11. rcsb/exdb/chemref/PubChemEtlWrapper.py +280 -0
  12. rcsb/exdb/chemref/PubChemIndexCacheProvider.py +638 -0
  13. rcsb/exdb/chemref/__init__.py +0 -0
  14. rcsb/exdb/citation/CitationAdapter.py +91 -0
  15. rcsb/exdb/citation/CitationExtractor.py +190 -0
  16. rcsb/exdb/citation/CitationUtils.py +51 -0
  17. rcsb/exdb/citation/__init__.py +0 -0
  18. rcsb/exdb/cli/__init__.py +0 -0
  19. rcsb/exdb/entry/EntryInfoProvider.py +148 -0
  20. rcsb/exdb/entry/__init__.py +0 -0
  21. rcsb/exdb/examples-seq/EntityInstanceExtractor.py +557 -0
  22. rcsb/exdb/examples-seq/EntityPolymerExtractor.py +544 -0
  23. rcsb/exdb/examples-seq/EntityPolymerExtractorFullTests.py +176 -0
  24. rcsb/exdb/examples-seq/ReferenceSequenceAssignmentUpdater.py +449 -0
  25. rcsb/exdb/examples-seq/ReferenceSequenceUtils.py +123 -0
  26. rcsb/exdb/examples-seq/ReferenceSequenceUtilsTests.py +109 -0
  27. rcsb/exdb/examples-seq/exampleObjectExtractor.py +109 -0
  28. rcsb/exdb/examples-seq/fixtureEntityPolymerExtractor.py +85 -0
  29. rcsb/exdb/examples-seq/testEntityInstanceExtractor.py +170 -0
  30. rcsb/exdb/examples-seq/testEntityPolymerExtractor.py +171 -0
  31. rcsb/exdb/examples-seq/testReferenceSequenceAssignmentUpdater.py +79 -0
  32. rcsb/exdb/examples-seq/testReferenceSequenceUtils.py +108 -0
  33. rcsb/exdb/seq/AnnotationExtractor.py +76 -0
  34. rcsb/exdb/seq/LigandNeighborMappingExtractor.py +84 -0
  35. rcsb/exdb/seq/LigandNeighborMappingProvider.py +106 -0
  36. rcsb/exdb/seq/PolymerEntityExtractor.py +328 -0
  37. rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +598 -0
  38. rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +228 -0
  39. rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +534 -0
  40. rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +388 -0
  41. rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +397 -0
  42. rcsb/exdb/seq/TaxonomyExtractor.py +69 -0
  43. rcsb/exdb/seq/UniProtCoreEtlWorker.py +177 -0
  44. rcsb/exdb/seq/UniProtExtractor.py +80 -0
  45. rcsb/exdb/seq/__init__.py +0 -0
  46. rcsb/exdb/tests/TEST-EXDB-CLI-EXEC.sh +19 -0
  47. rcsb/exdb/tests/TEST-EXDB-CLI-REFSEQ-EXEC.sh +12 -0
  48. rcsb/exdb/tests/__init__.py +0 -0
  49. rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +104 -0
  50. rcsb/exdb/tests/fixturePdbxLoader.py +298 -0
  51. rcsb/exdb/tests/test-data/components-abbrev.cif +2739 -0
  52. rcsb/exdb/tests/test-data/prdcc-abbrev.cif +9171 -0
  53. rcsb/exdb/tests/testAnnotationExtractor.py +79 -0
  54. rcsb/exdb/tests/testBranchedEntityExtractor.py +81 -0
  55. rcsb/exdb/tests/testChemRefLoader.py +106 -0
  56. rcsb/exdb/tests/testChemRefMappingProvider.py +95 -0
  57. rcsb/exdb/tests/testCitationAdapter.py +97 -0
  58. rcsb/exdb/tests/testCitationExtractor.py +93 -0
  59. rcsb/exdb/tests/testCitationUtils.py +92 -0
  60. rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +70 -0
  61. rcsb/exdb/tests/testEntryInfoProvider.py +97 -0
  62. rcsb/exdb/tests/testGlycanEtlWorkflow.py +70 -0
  63. rcsb/exdb/tests/testGlycanProvider.py +98 -0
  64. rcsb/exdb/tests/testGlycanUtils.py +64 -0
  65. rcsb/exdb/tests/testLigandNeighborMappingProvider.py +90 -0
  66. rcsb/exdb/tests/testObjectExtractor.py +342 -0
  67. rcsb/exdb/tests/testObjectTransformer.py +83 -0
  68. rcsb/exdb/tests/testObjectUpdater.py +120 -0
  69. rcsb/exdb/tests/testPolymerEntityExtractor.py +93 -0
  70. rcsb/exdb/tests/testPubChemDataCacheProvider.py +124 -0
  71. rcsb/exdb/tests/testPubChemEtlWorkflow.py +134 -0
  72. rcsb/exdb/tests/testPubChemEtlWrapper.py +155 -0
  73. rcsb/exdb/tests/testPubChemIndexCacheProvider.py +123 -0
  74. rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +106 -0
  75. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +121 -0
  76. rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +122 -0
  77. rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +117 -0
  78. rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +94 -0
  79. rcsb/exdb/tests/testTaxonomyExtractor.py +75 -0
  80. rcsb/exdb/tests/testTreeNodeListWorker.py +111 -0
  81. rcsb/exdb/tests/testUniProtCoreEtlWorker.py +99 -0
  82. rcsb/exdb/tests/testUniProtExtractor.py +77 -0
  83. rcsb/exdb/tree/TreeNodeListWorker.py +228 -0
  84. rcsb/exdb/tree/__init__.py +0 -0
  85. rcsb/exdb/utils/ObjectAdapterBase.py +22 -0
  86. rcsb/exdb/utils/ObjectExtractor.py +286 -0
  87. rcsb/exdb/utils/ObjectTransformer.py +124 -0
  88. rcsb/exdb/utils/ObjectUpdater.py +121 -0
  89. rcsb/exdb/utils/ObjectValidator.py +160 -0
  90. rcsb/exdb/utils/__init__.py +0 -0
  91. rcsb/exdb/wf/EntryInfoEtlWorkflow.py +71 -0
  92. rcsb/exdb/wf/GlycanEtlWorkflow.py +76 -0
  93. rcsb/exdb/wf/PubChemEtlWorkflow.py +240 -0
  94. rcsb/exdb/wf/__init__.py +0 -0
  95. rcsb_exdb-1.31.dist-info/METADATA +103 -0
  96. rcsb_exdb-1.31.dist-info/RECORD +98 -0
  97. rcsb_exdb-1.31.dist-info/WHEEL +4 -0
  98. rcsb_exdb-1.31.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,91 @@
1
+ ##
2
+ # File: CitationAdapter.py
3
+ # Date: 21-Nov-2019 jdw
4
+ #
5
+ # Selected utilities to update entry citations in the core_entry collection.
6
+ #
7
+ # Updates:
8
+ #
9
+ ##
10
+ __docformat__ = "google en"
11
+ __author__ = "John Westbrook"
12
+ __email__ = "jwest@rcsb.rutgers.edu"
13
+ __license__ = "Apache 2.0"
14
+
15
+ import copy
16
+ import logging
17
+ from string import capwords
18
+
19
+ from rcsb.exdb.utils.ObjectAdapterBase import ObjectAdapterBase
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class CitationAdapter(ObjectAdapterBase):
25
+ """Selected utilities to update entry citations in the core_entry collection."""
26
+
27
+ def __init__(self, citationReferenceProvider, journalTitleAbbreviationProvider):
28
+ super(CitationAdapter, self).__init__()
29
+ #
30
+ self.__crP = citationReferenceProvider
31
+ self.__jtaP = journalTitleAbbreviationProvider
32
+
33
+ def filter(self, obj, **kwargs):
34
+ isTestMode = True
35
+ if isTestMode:
36
+ _, _ = self.__filter(copy.deepcopy(obj))
37
+ return True, obj
38
+ else:
39
+ return self.__filter(obj)
40
+
41
+ def __filter(self, obj):
42
+ ok = True
43
+ try:
44
+ rcsbId = obj["rcsb_id"]
45
+ if "citation" in obj:
46
+ for citObj in obj["citation"]:
47
+ if citObj["id"].upper() != "PRIMARY":
48
+ continue
49
+ issn = citObj["journal_id_ISSN"] if "journal_id_ISSN" in citObj else None
50
+ curAbbrev = citObj["journal_abbrev"] if "journal_abbrev" in citObj else None
51
+ revAbbrev = self.__updateJournalAbbreviation(rcsbId, issn, curAbbrev)
52
+ logger.debug("%s: revised: %r current: %r", rcsbId, revAbbrev, curAbbrev)
53
+
54
+ except Exception as e:
55
+ ok = False
56
+ logger.exception("Filter adapter failing with error with %s", str(e))
57
+ #
58
+ return ok, obj
59
+
60
+ def __updateJournalAbbreviation(self, rcsbId, issn, curAbbrev):
61
+ try:
62
+ revAbbrev = None
63
+ if issn:
64
+ medlineAbbrev = self.__crP.getMedlineJournalAbbreviation(issn)
65
+ # medlineIsoAbbrev = self.__crP.getMedlineJournalIsoAbbreviation(issn)
66
+ crIssn = issn.replace("-", "")
67
+ crTitle = self.__crP.getCrossRefJournalTitle(crIssn)
68
+ #
69
+ revAbbrev = medlineAbbrev
70
+ if not medlineAbbrev and not crTitle:
71
+ logger.debug("%s: missing information for issn %r curAbbrev %r", rcsbId, issn, curAbbrev)
72
+ revAbbrev = capwords(curAbbrev.replace(".", " "))
73
+ elif not medlineAbbrev:
74
+ revAbbrev = self.__jtaP.getJournalAbbreviation(crTitle, usePunctuation=False)
75
+ else:
76
+ if curAbbrev.upper() in ["TO BE PUBLISHED", "IN PREPARATION"]:
77
+ revAbbrev = "To be published"
78
+ elif curAbbrev.upper().startswith("THESIS"):
79
+ revAbbrev = "Thesis"
80
+ else:
81
+ revAbbrev = capwords(curAbbrev.replace(".", " "))
82
+ logger.debug("%r: missing issn and non-standard abbrev for %r", rcsbId, curAbbrev)
83
+
84
+ if not curAbbrev:
85
+ logger.info("%r: missing issn and journal abbrev", rcsbId)
86
+ #
87
+ logger.debug("%s: revised: %r current: %r", rcsbId, revAbbrev, curAbbrev)
88
+ except Exception as e:
89
+ logger.exception("Failing on %r %r %r with %r", rcsbId, issn, curAbbrev, str(e))
90
+
91
+ return revAbbrev
@@ -0,0 +1,190 @@
1
+ ##
2
+ # File: CitationExtractor.py
3
+ # Date: 19-Feb-2019 jdw
4
+ #
5
+ # Selected utilities to extract citation data from the core_entry exchange database schema.
6
+ #
7
+ # Updates:
8
+ #
9
+ #
10
+ ##
11
+ __docformat__ = "google en"
12
+ __author__ = "John Westbrook"
13
+ __email__ = "jwest@rcsb.rutgers.edu"
14
+ __license__ = "Apache 2.0"
15
+
16
+ # import copy
17
+ import logging
18
+ import os
19
+
20
+ from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
21
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class CitationExtractor(object):
27
+ """Utilities to extract citation related data from the core_entry collection."""
28
+
29
+ def __init__(self, cfgOb, **kwargs):
30
+ self.__cfgOb = cfgOb
31
+ self.__resourceName = "MONGO_DB"
32
+ self.__databaseName = "pdbx_core"
33
+ self.__collectionName = "pdbx_core_entry"
34
+ #
35
+ self.__mU = MarshalUtil()
36
+ #
37
+ self.__entryD = self.__rebuildCache(**kwargs)
38
+ self.__idxD = self.__buildIndices(self.__entryD)
39
+ #
40
+
41
+ def __rebuildCache(self, **kwargs):
42
+ useCache = kwargs.get("useCache", True)
43
+ dirPath = kwargs.get("exdbDirPath", ".")
44
+ cacheKwargs = kwargs.get("cacheKwargs", {"fmt": "pickle"})
45
+ #
46
+ ext = "pic" if cacheKwargs["fmt"] == "pickle" else "json"
47
+ fn = "entry-citation-extracted-data-cache" + "." + ext
48
+ cacheFilePath = os.path.join(dirPath, fn)
49
+
50
+ cD = {"entryD": {}}
51
+ try:
52
+ if useCache and cacheFilePath and os.access(cacheFilePath, os.R_OK):
53
+ logger.info("Using cached entry citation file %s", cacheFilePath)
54
+ cD = self.__mU.doImport(cacheFilePath, **cacheKwargs)
55
+ else:
56
+ entryD = self.__extractCitations()
57
+ cD["entryD"] = entryD
58
+ if cacheFilePath:
59
+ ok = self.__mU.mkdir(dirPath)
60
+ ok = self.__mU.doExport(cacheFilePath, cD, **cacheKwargs)
61
+ logger.info("Saved entry citation results (%d) status %r in %s", len(entryD), ok, cacheFilePath)
62
+ except Exception as e:
63
+ logger.exception("Failing with %s", str(e))
64
+ return cD["entryD"]
65
+
66
+ def __buildIndices(self, entryD):
67
+ """
68
+ Example:
69
+ "entryD": {
70
+ "5KAL": {
71
+ "citation": [
72
+ {
73
+ "country": "UK",
74
+ "id": "primary",
75
+ "journal_abbrev": "Nucleic Acids Res.",
76
+ "journal_id_ASTM": "NARHAD",
77
+ "journal_id_CSD": "0389",
78
+ "journal_id_ISSN": "1362-4962",
79
+ "journal_volume": "44",
80
+ "page_first": "10862",
81
+ "page_last": "10878",
82
+ "title": "RNA Editing TUTase 1: structural foundation of substrate recognition, complex interactions and drug targeting.",
83
+ "year": 2016,
84
+ "pdbx_database_id_DOI": "10.1093/nar/gkw917",
85
+ "pdbx_database_id_PubMed": 27744351,
86
+ "rcsb_authors": [
87
+ "Rajappa-Titu, L.",
88
+ "Suematsu, T.",
89
+ "Munoz-Tello, P.",
90
+ "Long, M.",
91
+ "Demir, O.",
92
+ "Cheng, K.J.",
93
+ "Stagno, J.R.",
94
+ "Luecke, H.",
95
+ "Amaro, R.E.",
96
+ "Aphasizheva, I.",
97
+ "Aphasizhev, R.",
98
+ "Thore, S."
99
+ ]
100
+ }
101
+ ],
102
+ "_entry_id": "5KAL"
103
+ },
104
+ """
105
+ indD = {}
106
+ missingCitationCount = 0
107
+ missingJournalName = 0
108
+ numPubMed = 0
109
+ numDOI = 0
110
+ numCitations = 0
111
+ mD = {}
112
+ issnD = {}
113
+ missingISSNCount = 0
114
+ missingPubMedCount = 0
115
+ try:
116
+ for entryId, eD in entryD.items():
117
+ cDL = eD["citation"] if "citation" in eD else None
118
+ if cDL:
119
+ for cD in cDL[:1]:
120
+ if cD and "journal_abbrev" in cD:
121
+ indD[cD["journal_abbrev"]] = indD[cD["journal_abbrev"]] + 1 if cD["journal_abbrev"] in indD else 1
122
+ else:
123
+ logger.info("Missing journal name in entryId %s %r ", entryId, cD)
124
+ missingJournalName += 1
125
+ if cD and "pdbx_database_id_DOI" in cD:
126
+ numDOI += 1
127
+
128
+ if cD and "pdbx_database_id_PubMed" in cD:
129
+ numPubMed += 1
130
+ else:
131
+ mD[cD["journal_abbrev"]] = mD[cD["journal_abbrev"]] + 1 if cD["journal_abbrev"] in mD else 1
132
+ missingPubMedCount += 1
133
+
134
+ if "journal_id_ISSN" in cD and len(cD["journal_id_ISSN"]) > 7:
135
+ issnD[cD["journal_id_ISSN"]] = issnD[cD["journal_id_ISSN"]] + 1 if cD["journal_id_ISSN"] in issnD else 1
136
+ else:
137
+ missingISSNCount += 1
138
+
139
+ if cD:
140
+ numCitations += 1
141
+ else:
142
+ missingCitationCount += 1
143
+ except Exception as e:
144
+ logger.exception("Failing with %s", str(e))
145
+ #
146
+ logger.info("Number of citatons %d", numCitations)
147
+ logger.info("Number of PubMed ids %d", numPubMed)
148
+ logger.info("Number of DOIs %d", numDOI)
149
+ logger.info("No citation category count %d missing journal name %d", missingCitationCount, missingJournalName)
150
+ #
151
+ logger.info("Journal index name length %d", len(indD))
152
+ # logger.info("Journal name length %r",indD.items())
153
+ #
154
+ logger.info("Missing pubmed index length %d", len(mD))
155
+ logger.info("Missing pubmed length %d", missingPubMedCount)
156
+ logger.info("Missing PubMed %r", mD.items())
157
+ #
158
+ logger.info("ISSN dictionary length %d", len(issnD))
159
+ logger.info("ISSN missing length %d", missingISSNCount)
160
+ #
161
+ return indD
162
+
163
+ def getEntryCount(self):
164
+ return len(self.__entryD)
165
+
166
+ def __extractCitations(self):
167
+ """Test case - extract unique entity source and host taxonomies"""
168
+ try:
169
+ obEx = ObjectExtractor(
170
+ self.__cfgOb,
171
+ databaseName=self.__databaseName,
172
+ collectionName=self.__collectionName,
173
+ cacheFilePath=None,
174
+ useCache=False,
175
+ keyAttribute="entry",
176
+ uniqueAttributes=["rcsb_id"],
177
+ cacheKwargs=None,
178
+ objectLimit=None,
179
+ selectionQuery={},
180
+ selectionList=["rcsb_id", "citation"],
181
+ )
182
+ eCount = obEx.getCount()
183
+ logger.info("Entry count is %d", eCount)
184
+ objD = obEx.getObjects()
185
+ # for ky, eD in objD.items():
186
+ # logger.info("%s: %r", ky, eD)
187
+ return objD
188
+ except Exception as e:
189
+ logger.exception("Failing with %s", str(e))
190
+ return {}
@@ -0,0 +1,51 @@
1
+ ##
2
+ # File: CitationExtractor.py
3
+ # Date: 19-Feb-2019 jdw
4
+ #
5
+ # Selected utilities to process and normalize PDB citation data.
6
+ #
7
+ # Updates:
8
+ #
9
+ #
10
+ ##
11
+ __docformat__ = "google en"
12
+ __author__ = "John Westbrook"
13
+ __email__ = "jwest@rcsb.rutgers.edu"
14
+ __license__ = "Apache 2.0"
15
+
16
+ import logging
17
+
18
+ from rcsb.exdb.citation.CitationExtractor import CitationExtractor
19
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class CitationUtils(object):
26
+ """Utilities to process and normalize PDB citation data."""
27
+
28
+ def __init__(self, cfgOb, **kwargs):
29
+ self.__cfgOb = cfgOb
30
+ self.__mU = MarshalUtil()
31
+ #
32
+ self.__ce = self.__getEntryCitations(**kwargs)
33
+
34
+ def getCitationEntryCount(self):
35
+ return self.__ce.getEntryCount()
36
+
37
+ def __getEntryCitations(self, **kwargs):
38
+ """Extract entry citations"""
39
+ ce = None
40
+ exdbDirPath = kwargs.get("exdbDirPath", None)
41
+ saveKwargs = kwargs.get("cacheKwargs", {"fmt": "pickle"})
42
+ useCache = kwargs.get("useCache", True)
43
+ entryLimit = kwargs.get("entryLimit", True)
44
+ try:
45
+ ce = CitationExtractor(self.__cfgOb, exdbDirPath=exdbDirPath, useCache=useCache, cacheKwargs=saveKwargs, entryLimit=entryLimit)
46
+ eCount = ce.getEntryCount()
47
+ logger.info("Using citation data for %d entries", eCount)
48
+ #
49
+ except Exception as e:
50
+ logger.exception("Failing with %s", str(e))
51
+ return ce
File without changes
File without changes
@@ -0,0 +1,148 @@
1
+ ##
2
+ # File: EntryInfoProvider.py
3
+ # Date: 22-Sep-2021 jdw
4
+ #
5
+ # Updated:
6
+ #
7
+ ##
8
+ """
9
+ Accessors for entry-level annotations.
10
+
11
+ """
12
+
13
+ import logging
14
+ import os.path
15
+ import time
16
+
17
+ from rcsb.exdb.utils.ObjectExtractor import ObjectExtractor
18
+ from rcsb.utils.io.MarshalUtil import MarshalUtil
19
+ from rcsb.utils.io.StashableBase import StashableBase
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class EntryInfoProvider(StashableBase):
25
+ """Accessors and generators for entry-level annotations."""
26
+
27
+ def __init__(self, **kwargs):
28
+ #
29
+ self.__version = "0.50"
30
+ cachePath = kwargs.get("cachePath", ".")
31
+ useCache = kwargs.get("useCache", True)
32
+ self.__dirName = "rcsb_entry_info"
33
+ self.__dirPath = os.path.join(cachePath, self.__dirName)
34
+ super(EntryInfoProvider, self).__init__(cachePath, [self.__dirName])
35
+ #
36
+ self.__mU = MarshalUtil(workPath=self.__dirPath)
37
+ self.__entryInfoD = self.__reload(fmt="json", useCache=useCache)
38
+ #
39
+
40
+ def testCache(self, minCount=1):
41
+ if minCount == 0:
42
+ return True
43
+ if self.__entryInfoD and minCount and "entryInfo" in self.__entryInfoD and len(self.__entryInfoD["entryInfo"]) > minCount:
44
+ logger.info("Entry annotations for (%d) entries", len(self.__entryInfoD["entryInfo"]))
45
+ return True
46
+ return False
47
+
48
+ def getEntryInfo(self, entryId):
49
+ """Return a dictionary of entry-level annotations.
50
+
51
+ Returns:
52
+ (dict): of entry-level annotations
53
+ """
54
+ try:
55
+ return self.__entryInfoD["entryInfo"][entryId.upper()] if entryId.upper() in self.__entryInfoD["entryInfo"] else {}
56
+ except Exception as e:
57
+ logger.error("Failing with %r", str(e))
58
+ return {}
59
+
60
+ def getEntriesByPolymerEntityCount(self, count):
61
+ oL = []
62
+ try:
63
+ for entryId, eD in self.__entryInfoD["entryInfo"].items():
64
+ if eD["polymer_entity_count"] == count:
65
+ oL.append(entryId)
66
+ except Exception as e:
67
+ logger.error("Failing with %r", str(e))
68
+ return oL
69
+
70
+ def __getEntryInfoFilePath(self, fmt="json"):
71
+ baseFileName = "entry_info_details"
72
+ fExt = ".json" if fmt == "json" else ".pic"
73
+ fp = os.path.join(self.__dirPath, baseFileName + fExt)
74
+ return fp
75
+
76
+ def update(self, cfgOb, fmt="json", indent=3):
77
+ """Update branched entity glycan accession mapping cache.
78
+
79
+ Args:
80
+ cfgObj (object): ConfigInfo() object instance
81
+
82
+ Returns:
83
+ (bool): True for success for False otherwise
84
+ """
85
+ ok = False
86
+ try:
87
+ entryInfoD = self.__updateEntryInfo(cfgOb)
88
+
89
+ logger.info("Got entry_info for (%d)", len(entryInfoD))
90
+ #
91
+ tS = time.strftime("%Y %m %d %H:%M:%S", time.localtime())
92
+ self.__entryInfoD = {"version": self.__version, "created": tS, "entryInfo": entryInfoD}
93
+ #
94
+ infoFilePath = self.__getEntryInfoFilePath(fmt=fmt)
95
+ kwargs = {"indent": indent} if fmt == "json" else {}
96
+ ok = self.__mU.doExport(infoFilePath, self.__entryInfoD, fmt=fmt, **kwargs)
97
+ except Exception as e:
98
+ logger.exception("Failing with %s", str(e))
99
+ return ok
100
+
101
+ def reload(self):
102
+ """Reload from the current cache file."""
103
+ ok = False
104
+ try:
105
+ self.__entryInfoD = self.__reload(fmt="json", useCache=True)
106
+ ok = self.__entryInfoD is not None
107
+ except Exception as e:
108
+ logger.exception("Failing with %s", str(e))
109
+ return ok
110
+
111
+ def __reload(self, fmt="json", useCache=True):
112
+ entryInfoFilePath = self.__getEntryInfoFilePath(fmt=fmt)
113
+ tS = time.strftime("%Y %m %d %H:%M:%S", time.localtime())
114
+ pcD = {"version": self.__version, "created": tS, "identifiers": {}}
115
+
116
+ if useCache and self.__mU.exists(entryInfoFilePath):
117
+ logger.info("Reading entry-info cached path %r", entryInfoFilePath)
118
+ pcD = self.__mU.doImport(entryInfoFilePath, fmt=fmt)
119
+ return pcD
120
+
121
+ def __updateEntryInfo(self, cfgOb):
122
+ """Get entry_info data"""
123
+ rD = {}
124
+ try:
125
+ obEx = ObjectExtractor(
126
+ cfgOb,
127
+ databaseName="pdbx_core",
128
+ collectionName="pdbx_core_entry",
129
+ useCache=False,
130
+ keyAttribute="entry",
131
+ uniqueAttributes=["rcsb_id"],
132
+ selectionQuery={},
133
+ selectionList=["rcsb_id", "rcsb_entry_info.polymer_entity_count"],
134
+ )
135
+ #
136
+ eCount = obEx.getCount()
137
+ logger.info("Entry count is %d", eCount)
138
+
139
+ objD = obEx.getObjects()
140
+ for _, eD in objD.items():
141
+ rcsbId = eD["rcsb_id"]
142
+ try:
143
+ rD[rcsbId] = eD["rcsb_entry_info"]
144
+ except Exception:
145
+ pass
146
+ except Exception as e:
147
+ logger.exception("Failing with %s", str(e))
148
+ return rD
File without changes