rcsb.exdb 1.28__tar.gz → 1.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/HISTORY.txt +1 -0
- {rcsb_exdb-1.28/rcsb.exdb.egg-info → rcsb_exdb-1.29}/PKG-INFO +3 -2
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +73 -72
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/cli/__init__.py +1 -1
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/PubChemEtlWorkflow.py +2 -2
- {rcsb_exdb-1.28 → rcsb_exdb-1.29/rcsb.exdb.egg-info}/PKG-INFO +3 -2
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/LICENSE +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/MANIFEST.in +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/README.md +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/GlycanProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/GlycanUtils.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationAdapter.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationUtils.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/entry/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/fixturePdbxLoader.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationUtils.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testTreeNodeListWorker.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tree/TreeNodeListWorker.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tree/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectValidator.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/__init__.py +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/SOURCES.txt +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/not-zip-safe +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/requires.txt +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/top_level.txt +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/requirements.txt +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/setup.cfg +0 -0
- {rcsb_exdb-1.28 → rcsb_exdb-1.29}/setup.py +0 -0
|
@@ -110,3 +110,4 @@
|
|
|
110
110
|
Update Azure pipelines to run on latest macOS and ubuntu version
|
|
111
111
|
23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
|
|
112
112
|
11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
|
|
113
|
+
8-Apr-2025 V1.29 Add more logging to PubChemIndexCacheProvider and increase default numProc
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.29
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -41,6 +41,7 @@ Dynamic: description
|
|
|
41
41
|
Dynamic: description-content-type
|
|
42
42
|
Dynamic: home-page
|
|
43
43
|
Dynamic: license
|
|
44
|
+
Dynamic: license-file
|
|
44
45
|
Dynamic: provides-extra
|
|
45
46
|
Dynamic: requires-dist
|
|
46
47
|
Dynamic: summary
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
# 16-Jul-2020 jdw separate index and reference data management.
|
|
10
10
|
# 23-Jul-2021 jdw Make PubChemIndexCacheProvider a subclass of StashableBase()
|
|
11
11
|
# 2-Mar-2023 aae Return correct status from Single proc
|
|
12
|
+
# 8-Apr-2025 dwp Let MultiProc handle chunking; add more logging to debug slowness on west coast
|
|
12
13
|
#
|
|
13
14
|
##
|
|
14
15
|
__docformat__ = "google en"
|
|
@@ -100,84 +101,82 @@ class PubChemUpdateWorker(object):
|
|
|
100
101
|
#
|
|
101
102
|
"""
|
|
102
103
|
_ = workingDir
|
|
103
|
-
chunkSize = optionsD.get("chunkSize", 50)
|
|
104
104
|
matchIdOnly = optionsD.get("matchIdOnly", True)
|
|
105
105
|
# Path to store raw request data -
|
|
106
106
|
exportPath = optionsD.get("exportPath", None)
|
|
107
107
|
#
|
|
108
108
|
successList = []
|
|
109
|
-
retList1 = []
|
|
110
|
-
retList2 = []
|
|
111
109
|
diagList = []
|
|
112
|
-
|
|
110
|
+
failList = []
|
|
111
|
+
retList = []
|
|
113
112
|
#
|
|
114
113
|
try:
|
|
114
|
+
startTime = time.time()
|
|
115
115
|
tU = TimeUtil()
|
|
116
|
-
ccIdList = dataList
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
#
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
116
|
+
ccIdList = dataList # len(dataList) should be of size chunkSize
|
|
117
|
+
logger.info("%s search starting for %d reference definitions (matchIdOnly %r exportPath %r)", procName, len(ccIdList), matchIdOnly, exportPath)
|
|
118
|
+
tIdxDL = []
|
|
119
|
+
timeS = tU.getDateTimeObj(tU.getTimestamp())
|
|
120
|
+
for ccId in ccIdList:
|
|
121
|
+
# Get various forms from the search index -
|
|
122
|
+
chemIdList = self.__genChemIdList(ccId)
|
|
123
|
+
tIdxD = {"rcsb_id": ccId, "rcsb_last_update": timeS}
|
|
124
|
+
#
|
|
125
|
+
mL = []
|
|
126
|
+
for chemId in chemIdList:
|
|
127
|
+
stA = time.time()
|
|
128
|
+
ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
|
|
128
129
|
#
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
|
|
133
|
-
#
|
|
134
|
-
if not ok:
|
|
135
|
-
etA = time.time()
|
|
136
|
-
logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
|
|
137
|
-
|
|
138
|
-
#
|
|
139
|
-
if ok and refDL:
|
|
140
|
-
for tD in refDL:
|
|
141
|
-
pcId = tD["cid"]
|
|
142
|
-
inchiKey = (
|
|
143
|
-
self.__searchIdxD[chemId.indexName]["inchi-key"]
|
|
144
|
-
if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
|
|
145
|
-
else None
|
|
146
|
-
)
|
|
147
|
-
smiles = (
|
|
148
|
-
self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
|
|
149
|
-
)
|
|
150
|
-
mL.append(
|
|
151
|
-
{
|
|
152
|
-
"matched_id": pcId,
|
|
153
|
-
"search_id_type": chemId.identifierType,
|
|
154
|
-
"search_id_source": chemId.identifierSource,
|
|
155
|
-
"source_index_name": chemId.indexName,
|
|
156
|
-
"source_smiles": smiles,
|
|
157
|
-
"source_inchikey": inchiKey,
|
|
158
|
-
}
|
|
159
|
-
)
|
|
160
|
-
# tD.update({"rcsb_id": pcId, "rcsb_last_update": timeS})
|
|
161
|
-
# tDL.append(tD)
|
|
130
|
+
if not ok:
|
|
131
|
+
etA = time.time()
|
|
132
|
+
logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
|
|
162
133
|
#
|
|
163
|
-
if
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
134
|
+
if ok and refDL:
|
|
135
|
+
for tD in refDL:
|
|
136
|
+
pcId = tD["cid"]
|
|
137
|
+
inchiKey = (
|
|
138
|
+
self.__searchIdxD[chemId.indexName]["inchi-key"]
|
|
139
|
+
if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
|
|
140
|
+
else None
|
|
141
|
+
)
|
|
142
|
+
smiles = (
|
|
143
|
+
self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
|
|
144
|
+
)
|
|
145
|
+
mL.append(
|
|
146
|
+
{
|
|
147
|
+
"matched_id": pcId,
|
|
148
|
+
"search_id_type": chemId.identifierType,
|
|
149
|
+
"search_id_source": chemId.identifierSource,
|
|
150
|
+
"source_index_name": chemId.indexName,
|
|
151
|
+
"source_smiles": smiles,
|
|
152
|
+
"source_inchikey": inchiKey,
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
#
|
|
156
|
+
if mL:
|
|
157
|
+
tIdxD["matched_ids"] = mL
|
|
158
|
+
successList.append(ccId)
|
|
159
|
+
else:
|
|
160
|
+
logger.info("No match result for any form of %s", ccId)
|
|
161
|
+
#
|
|
162
|
+
tIdxDL.append(tIdxD)
|
|
163
|
+
# --
|
|
164
|
+
failList = sorted(set(dataList) - set(successList))
|
|
165
|
+
if failList:
|
|
166
|
+
logger.info("%s returns %d definitions with failures: %r", procName, len(failList), failList)
|
|
167
|
+
# --
|
|
168
|
+
endTime = time.time()
|
|
169
|
+
logger.info("%s completed updateList len %r duration %.3f secs", procName, len(ccIdList), endTime - startTime)
|
|
170
|
+
startTimeL = time.time()
|
|
171
|
+
logger.info("Saving dataList (len=%d)", len(ccIdList))
|
|
172
|
+
self.__updateObjectStore(self.__databaseName, self.__matchIndexCollectionName, tIdxDL)
|
|
173
|
+
endTimeL = time.time()
|
|
174
|
+
logger.info("Saved chunk (len=%d) in %.3f secs", len(ccIdList), endTimeL - startTimeL)
|
|
176
175
|
except Exception as e:
|
|
177
176
|
logger.exception("Failing %s for %d data items %s", procName, len(dataList), str(e))
|
|
178
|
-
logger.info("%s dataList length %d success length %d
|
|
177
|
+
logger.info("%s dataList length %d success length %d retList %d", procName, len(dataList), len(successList), len(retList))
|
|
179
178
|
#
|
|
180
|
-
return successList,
|
|
179
|
+
return successList, retList, diagList
|
|
181
180
|
|
|
182
181
|
def __updateObjectStore(self, databaseName, collectionName, objDL):
|
|
183
182
|
updateDL = []
|
|
@@ -196,10 +195,6 @@ class PubChemUpdateWorker(object):
|
|
|
196
195
|
ok = obUpd.createCollection(databaseName, collectionName, indexAttributeNames=indexAttributeNames, checkExists=True, bsonSchema=None)
|
|
197
196
|
return ok
|
|
198
197
|
|
|
199
|
-
def __chunker(self, iList, chunkSize):
|
|
200
|
-
chunkSize = max(1, chunkSize)
|
|
201
|
-
return (iList[i: i + chunkSize] for i in range(0, len(iList), chunkSize))
|
|
202
|
-
|
|
203
198
|
|
|
204
199
|
class PubChemIndexCacheProvider(StashableBase):
|
|
205
200
|
"""Utilities to manage chemical component/BIRD to PubChem compound identifier mapping data."""
|
|
@@ -515,7 +510,7 @@ class PubChemIndexCacheProvider(StashableBase):
|
|
|
515
510
|
Returns:
|
|
516
511
|
(bool, list): status flag, list of unmatched identifiers
|
|
517
512
|
"""
|
|
518
|
-
chunkSize =
|
|
513
|
+
chunkSize = 10
|
|
519
514
|
exportPath = kwargs.get("exportPath", None)
|
|
520
515
|
logger.info("Length starting list is %d", len(idList))
|
|
521
516
|
optD = {"chunkSize": chunkSize, "exportPath": exportPath, "matchIdOnly": True}
|
|
@@ -524,14 +519,20 @@ class PubChemIndexCacheProvider(StashableBase):
|
|
|
524
519
|
mpu = MultiProcUtil(verbose=True)
|
|
525
520
|
mpu.setOptions(optD)
|
|
526
521
|
mpu.set(workerObj=rWorker, workerMethod="updateList")
|
|
527
|
-
ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=
|
|
528
|
-
logger.info("Multi-proc %r failures %r result lengths %r
|
|
522
|
+
ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=1, chunkSize=chunkSize)
|
|
523
|
+
logger.info("Multi-proc %r failures %r result lengths %r", ok, len(failList), len(resultList[0]))
|
|
529
524
|
else:
|
|
530
|
-
successList, _, _
|
|
525
|
+
successList, _, _ = rWorker.updateList(idList, "SingleProc", optD, self.__dirPath)
|
|
531
526
|
failList = list(set(idList) - set(successList))
|
|
532
527
|
ok = len(failList) == 0
|
|
533
528
|
logger.info("Single-proc status %r failures %r", ok, len(failList))
|
|
534
529
|
#
|
|
530
|
+
if len(failList) > 0:
|
|
531
|
+
if len(failList) <= 100:
|
|
532
|
+
logger.info("failList: %r", failList)
|
|
533
|
+
else:
|
|
534
|
+
logger.info("failList[:100]: %r", failList[:100])
|
|
535
|
+
#
|
|
535
536
|
return ok, failList
|
|
536
537
|
|
|
537
538
|
def __reloadDump(self, objD, databaseName, collectionName, indexAttributeNames=None):
|
|
@@ -165,7 +165,7 @@ class PubChemEtlWorkflow(object):
|
|
|
165
165
|
birdUrlTarget = kwargs.get("birdUrlTarget", None)
|
|
166
166
|
ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc-full")
|
|
167
167
|
numProcChemComp = kwargs.get("numProcChemComp", 8)
|
|
168
|
-
numProc = kwargs.get("numProc",
|
|
168
|
+
numProc = kwargs.get("numProc", 4)
|
|
169
169
|
rebuildChemIndices = kwargs.get("rebuildChemIndices", True)
|
|
170
170
|
exportPath = kwargs.get("exportPath", None)
|
|
171
171
|
useStash = kwargs.get("useStash", True)
|
|
@@ -209,7 +209,7 @@ class PubChemEtlWorkflow(object):
|
|
|
209
209
|
try:
|
|
210
210
|
ok1 = ok2 = ok3 = ok4 = ok5 = ok6 = False
|
|
211
211
|
# --
|
|
212
|
-
numProc = kwargs.get("numProc",
|
|
212
|
+
numProc = kwargs.get("numProc", 4)
|
|
213
213
|
useStash = kwargs.get("useStash", True)
|
|
214
214
|
useGit = kwargs.get("useGit", False)
|
|
215
215
|
#
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.29
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -41,6 +41,7 @@ Dynamic: description
|
|
|
41
41
|
Dynamic: description-content-type
|
|
42
42
|
Dynamic: home-page
|
|
43
43
|
Dynamic: license
|
|
44
|
+
Dynamic: license-file
|
|
44
45
|
Dynamic: provides-extra
|
|
45
46
|
Dynamic: requires-dist
|
|
46
47
|
Dynamic: summary
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py
RENAMED
|
File without changes
|
{rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|