rcsb.exdb 1.28__tar.gz → 1.29__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/HISTORY.txt +1 -0
  2. {rcsb_exdb-1.28/rcsb.exdb.egg-info → rcsb_exdb-1.29}/PKG-INFO +3 -2
  3. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +73 -72
  4. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/cli/__init__.py +1 -1
  5. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/PubChemEtlWorkflow.py +2 -2
  6. {rcsb_exdb-1.28 → rcsb_exdb-1.29/rcsb.exdb.egg-info}/PKG-INFO +3 -2
  7. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/LICENSE +0 -0
  8. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/MANIFEST.in +0 -0
  9. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/README.md +0 -0
  10. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/__init__.py +0 -0
  11. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/__init__.py +0 -0
  12. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
  13. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/GlycanProvider.py +0 -0
  14. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/GlycanUtils.py +0 -0
  15. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/branch/__init__.py +0 -0
  16. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
  17. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
  18. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
  19. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
  20. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
  21. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/chemref/__init__.py +0 -0
  22. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationAdapter.py +0 -0
  23. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationExtractor.py +0 -0
  24. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationUtils.py +0 -0
  25. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/citation/__init__.py +0 -0
  26. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
  27. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/entry/__init__.py +0 -0
  28. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
  29. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
  30. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
  31. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
  32. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
  33. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
  34. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
  35. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
  36. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
  37. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
  38. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
  39. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
  40. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/seq/__init__.py +0 -0
  41. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/__init__.py +0 -0
  42. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
  43. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/fixturePdbxLoader.py +0 -0
  44. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
  45. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
  46. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
  47. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
  48. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
  49. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
  50. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationUtils.py +0 -0
  51. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
  52. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
  53. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
  54. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
  55. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
  56. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
  57. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
  58. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
  59. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
  60. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
  61. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
  62. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
  63. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
  64. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
  65. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
  66. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
  67. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
  68. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
  69. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
  70. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
  71. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testTreeNodeListWorker.py +0 -0
  72. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
  73. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
  74. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tree/TreeNodeListWorker.py +0 -0
  75. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/tree/__init__.py +0 -0
  76. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
  77. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
  78. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
  79. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
  80. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectValidator.py +0 -0
  81. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/utils/__init__.py +0 -0
  82. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
  83. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
  84. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb/exdb/wf/__init__.py +0 -0
  85. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/SOURCES.txt +0 -0
  86. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
  87. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/not-zip-safe +0 -0
  88. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/requires.txt +0 -0
  89. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/top_level.txt +0 -0
  90. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/requirements.txt +0 -0
  91. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/setup.cfg +0 -0
  92. {rcsb_exdb-1.28 → rcsb_exdb-1.29}/setup.py +0 -0
@@ -110,3 +110,4 @@
110
110
  Update Azure pipelines to run on latest macOS and ubuntu version
111
111
  23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
112
112
  11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
113
+ 8-Apr-2025 V1.29 Add more logging to PubChemIndexCacheProvider and increase default numProc
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: rcsb.exdb
3
- Version: 1.28
3
+ Version: 1.29
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -41,6 +41,7 @@ Dynamic: description
41
41
  Dynamic: description-content-type
42
42
  Dynamic: home-page
43
43
  Dynamic: license
44
+ Dynamic: license-file
44
45
  Dynamic: provides-extra
45
46
  Dynamic: requires-dist
46
47
  Dynamic: summary
@@ -9,6 +9,7 @@
9
9
  # 16-Jul-2020 jdw separate index and reference data management.
10
10
  # 23-Jul-2021 jdw Make PubChemIndexCacheProvider a subclass of StashableBase()
11
11
  # 2-Mar-2023 aae Return correct status from Single proc
12
+ # 8-Apr-2025 dwp Let MultiProc handle chunking; add more logging to debug slowness on west coast
12
13
  #
13
14
  ##
14
15
  __docformat__ = "google en"
@@ -100,84 +101,82 @@ class PubChemUpdateWorker(object):
100
101
  #
101
102
  """
102
103
  _ = workingDir
103
- chunkSize = optionsD.get("chunkSize", 50)
104
104
  matchIdOnly = optionsD.get("matchIdOnly", True)
105
105
  # Path to store raw request data -
106
106
  exportPath = optionsD.get("exportPath", None)
107
107
  #
108
108
  successList = []
109
- retList1 = []
110
- retList2 = []
111
109
  diagList = []
112
- emptyList = []
110
+ failList = []
111
+ retList = []
113
112
  #
114
113
  try:
114
+ startTime = time.time()
115
115
  tU = TimeUtil()
116
- ccIdList = dataList
117
- numChunks = len(list(self.__chunker(ccIdList, chunkSize)))
118
- logger.info("%s search starting for %d reference definitions (in chunks of length %d)", procName, len(ccIdList), chunkSize)
119
- for ii, ccIdChunk in enumerate(self.__chunker(ccIdList, chunkSize), 1):
120
- logger.info("%s starting chunk for %d of %d", procName, ii, numChunks)
121
- # tDL = []
122
- tIdxDL = []
123
- timeS = tU.getDateTimeObj(tU.getTimestamp())
124
- for ccId in ccIdChunk:
125
- # Get various forms from the search index -
126
- chemIdList = self.__genChemIdList(ccId)
127
- tIdxD = {"rcsb_id": ccId, "rcsb_last_update": timeS}
116
+ ccIdList = dataList # len(dataList) should be of size chunkSize
117
+ logger.info("%s search starting for %d reference definitions (matchIdOnly %r exportPath %r)", procName, len(ccIdList), matchIdOnly, exportPath)
118
+ tIdxDL = []
119
+ timeS = tU.getDateTimeObj(tU.getTimestamp())
120
+ for ccId in ccIdList:
121
+ # Get various forms from the search index -
122
+ chemIdList = self.__genChemIdList(ccId)
123
+ tIdxD = {"rcsb_id": ccId, "rcsb_last_update": timeS}
124
+ #
125
+ mL = []
126
+ for chemId in chemIdList:
127
+ stA = time.time()
128
+ ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
128
129
  #
129
- mL = []
130
- for chemId in chemIdList:
131
- stA = time.time()
132
- ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
133
- #
134
- if not ok:
135
- etA = time.time()
136
- logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
137
-
138
- #
139
- if ok and refDL:
140
- for tD in refDL:
141
- pcId = tD["cid"]
142
- inchiKey = (
143
- self.__searchIdxD[chemId.indexName]["inchi-key"]
144
- if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
145
- else None
146
- )
147
- smiles = (
148
- self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
149
- )
150
- mL.append(
151
- {
152
- "matched_id": pcId,
153
- "search_id_type": chemId.identifierType,
154
- "search_id_source": chemId.identifierSource,
155
- "source_index_name": chemId.indexName,
156
- "source_smiles": smiles,
157
- "source_inchikey": inchiKey,
158
- }
159
- )
160
- # tD.update({"rcsb_id": pcId, "rcsb_last_update": timeS})
161
- # tDL.append(tD)
130
+ if not ok:
131
+ etA = time.time()
132
+ logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
162
133
  #
163
- if mL:
164
- tIdxD["matched_ids"] = mL
165
- successList.append(ccId)
166
- else:
167
- logger.info("No match result for any form of %s", ccId)
168
- #
169
- tIdxDL.append(tIdxD)
170
- # --
171
- startTimeL = time.time()
172
- logger.info("Saving chunk %d (len=%d)", ii, len(ccIdChunk))
173
- self.__updateObjectStore(self.__databaseName, self.__matchIndexCollectionName, tIdxDL)
174
- endTimeL = time.time()
175
- logger.info("Saved chunk %d (len=%d) in %.3f secs", ii, len(ccIdChunk), endTimeL - startTimeL)
134
+ if ok and refDL:
135
+ for tD in refDL:
136
+ pcId = tD["cid"]
137
+ inchiKey = (
138
+ self.__searchIdxD[chemId.indexName]["inchi-key"]
139
+ if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
140
+ else None
141
+ )
142
+ smiles = (
143
+ self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
144
+ )
145
+ mL.append(
146
+ {
147
+ "matched_id": pcId,
148
+ "search_id_type": chemId.identifierType,
149
+ "search_id_source": chemId.identifierSource,
150
+ "source_index_name": chemId.indexName,
151
+ "source_smiles": smiles,
152
+ "source_inchikey": inchiKey,
153
+ }
154
+ )
155
+ #
156
+ if mL:
157
+ tIdxD["matched_ids"] = mL
158
+ successList.append(ccId)
159
+ else:
160
+ logger.info("No match result for any form of %s", ccId)
161
+ #
162
+ tIdxDL.append(tIdxD)
163
+ # --
164
+ failList = sorted(set(dataList) - set(successList))
165
+ if failList:
166
+ logger.info("%s returns %d definitions with failures: %r", procName, len(failList), failList)
167
+ # --
168
+ endTime = time.time()
169
+ logger.info("%s completed updateList len %r duration %.3f secs", procName, len(ccIdList), endTime - startTime)
170
+ startTimeL = time.time()
171
+ logger.info("Saving dataList (len=%d)", len(ccIdList))
172
+ self.__updateObjectStore(self.__databaseName, self.__matchIndexCollectionName, tIdxDL)
173
+ endTimeL = time.time()
174
+ logger.info("Saved chunk (len=%d) in %.3f secs", len(ccIdList), endTimeL - startTimeL)
176
175
  except Exception as e:
177
176
  logger.exception("Failing %s for %d data items %s", procName, len(dataList), str(e))
178
- logger.info("%s dataList length %d success length %d rst1 %d rst2 %d", procName, len(dataList), len(successList), len(retList1), len(retList2))
177
+ logger.info("%s dataList length %d success length %d retList %d", procName, len(dataList), len(successList), len(retList))
179
178
  #
180
- return successList, emptyList, emptyList, diagList
179
+ return successList, retList, diagList
181
180
 
182
181
  def __updateObjectStore(self, databaseName, collectionName, objDL):
183
182
  updateDL = []
@@ -196,10 +195,6 @@ class PubChemUpdateWorker(object):
196
195
  ok = obUpd.createCollection(databaseName, collectionName, indexAttributeNames=indexAttributeNames, checkExists=True, bsonSchema=None)
197
196
  return ok
198
197
 
199
- def __chunker(self, iList, chunkSize):
200
- chunkSize = max(1, chunkSize)
201
- return (iList[i: i + chunkSize] for i in range(0, len(iList), chunkSize))
202
-
203
198
 
204
199
  class PubChemIndexCacheProvider(StashableBase):
205
200
  """Utilities to manage chemical component/BIRD to PubChem compound identifier mapping data."""
@@ -515,7 +510,7 @@ class PubChemIndexCacheProvider(StashableBase):
515
510
  Returns:
516
511
  (bool, list): status flag, list of unmatched identifiers
517
512
  """
518
- chunkSize = 50
513
+ chunkSize = 10
519
514
  exportPath = kwargs.get("exportPath", None)
520
515
  logger.info("Length starting list is %d", len(idList))
521
516
  optD = {"chunkSize": chunkSize, "exportPath": exportPath, "matchIdOnly": True}
@@ -524,14 +519,20 @@ class PubChemIndexCacheProvider(StashableBase):
524
519
  mpu = MultiProcUtil(verbose=True)
525
520
  mpu.setOptions(optD)
526
521
  mpu.set(workerObj=rWorker, workerMethod="updateList")
527
- ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=2, chunkSize=chunkSize)
528
- logger.info("Multi-proc %r failures %r result lengths %r %r", ok, len(failList), len(resultList[0]), len(resultList[1]))
522
+ ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=1, chunkSize=chunkSize)
523
+ logger.info("Multi-proc %r failures %r result lengths %r", ok, len(failList), len(resultList[0]))
529
524
  else:
530
- successList, _, _, _ = rWorker.updateList(idList, "SingleProc", optD, self.__dirPath)
525
+ successList, _, _ = rWorker.updateList(idList, "SingleProc", optD, self.__dirPath)
531
526
  failList = list(set(idList) - set(successList))
532
527
  ok = len(failList) == 0
533
528
  logger.info("Single-proc status %r failures %r", ok, len(failList))
534
529
  #
530
+ if len(failList) > 0:
531
+ if len(failList) <= 100:
532
+ logger.info("failList: %r", failList)
533
+ else:
534
+ logger.info("failList[:100]: %r", failList[:100])
535
+ #
535
536
  return ok, failList
536
537
 
537
538
  def __reloadDump(self, objD, databaseName, collectionName, indexAttributeNames=None):
@@ -2,4 +2,4 @@ __docformat__ = "google en"
2
2
  __author__ = "John Westbrook"
3
3
  __email__ = "john.westbrook@rcsb.org"
4
4
  __license__ = "Apache 2.0"
5
- __version__ = "1.28"
5
+ __version__ = "1.29"
@@ -165,7 +165,7 @@ class PubChemEtlWorkflow(object):
165
165
  birdUrlTarget = kwargs.get("birdUrlTarget", None)
166
166
  ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc-full")
167
167
  numProcChemComp = kwargs.get("numProcChemComp", 8)
168
- numProc = kwargs.get("numProc", 2)
168
+ numProc = kwargs.get("numProc", 4)
169
169
  rebuildChemIndices = kwargs.get("rebuildChemIndices", True)
170
170
  exportPath = kwargs.get("exportPath", None)
171
171
  useStash = kwargs.get("useStash", True)
@@ -209,7 +209,7 @@ class PubChemEtlWorkflow(object):
209
209
  try:
210
210
  ok1 = ok2 = ok3 = ok4 = ok5 = ok6 = False
211
211
  # --
212
- numProc = kwargs.get("numProc", 2)
212
+ numProc = kwargs.get("numProc", 4)
213
213
  useStash = kwargs.get("useStash", True)
214
214
  useGit = kwargs.get("useGit", False)
215
215
  #
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: rcsb.exdb
3
- Version: 1.28
3
+ Version: 1.29
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -41,6 +41,7 @@ Dynamic: description
41
41
  Dynamic: description-content-type
42
42
  Dynamic: home-page
43
43
  Dynamic: license
44
+ Dynamic: license-file
44
45
  Dynamic: provides-extra
45
46
  Dynamic: requires-dist
46
47
  Dynamic: summary
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes