rcsb.exdb 1.28__tar.gz → 1.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/HISTORY.txt +2 -0
  2. {rcsb_exdb-1.28/rcsb.exdb.egg-info → rcsb_exdb-1.30}/PKG-INFO +5 -4
  3. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/chemref/ChemRefEtlWorker.py +9 -6
  4. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/chemref/ChemRefExtractor.py +3 -2
  5. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +73 -72
  6. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/cli/__init__.py +1 -1
  7. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/fixturePdbxLoader.py +11 -4
  8. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testObjectExtractor.py +2 -2
  9. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +1 -0
  10. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testTreeNodeListWorker.py +1 -1
  11. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tree/TreeNodeListWorker.py +87 -127
  12. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/wf/PubChemEtlWorkflow.py +2 -2
  13. {rcsb_exdb-1.28 → rcsb_exdb-1.30/rcsb.exdb.egg-info}/PKG-INFO +5 -4
  14. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb.exdb.egg-info/requires.txt +2 -2
  15. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/requirements.txt +2 -2
  16. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/LICENSE +0 -0
  17. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/MANIFEST.in +0 -0
  18. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/README.md +0 -0
  19. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/__init__.py +0 -0
  20. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/__init__.py +0 -0
  21. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
  22. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/branch/GlycanProvider.py +0 -0
  23. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/branch/GlycanUtils.py +0 -0
  24. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/branch/__init__.py +0 -0
  25. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
  26. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
  27. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
  28. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/chemref/__init__.py +0 -0
  29. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/citation/CitationAdapter.py +0 -0
  30. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/citation/CitationExtractor.py +0 -0
  31. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/citation/CitationUtils.py +0 -0
  32. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/citation/__init__.py +0 -0
  33. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
  34. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/entry/__init__.py +0 -0
  35. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
  36. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
  37. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
  38. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
  39. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
  40. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
  41. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
  42. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
  43. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
  44. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
  45. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
  46. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
  47. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/seq/__init__.py +0 -0
  48. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/__init__.py +0 -0
  49. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
  50. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
  51. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
  52. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
  53. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
  54. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
  55. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
  56. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testCitationUtils.py +0 -0
  57. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
  58. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
  59. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
  60. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
  61. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
  62. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
  63. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
  64. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
  65. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
  66. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
  67. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
  68. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
  69. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
  70. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
  71. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
  72. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
  73. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
  74. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
  75. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
  76. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
  77. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/tree/__init__.py +0 -0
  78. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
  79. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
  80. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
  81. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
  82. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/utils/ObjectValidator.py +0 -0
  83. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/utils/__init__.py +0 -0
  84. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
  85. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
  86. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb/exdb/wf/__init__.py +0 -0
  87. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb.exdb.egg-info/SOURCES.txt +0 -0
  88. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
  89. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb.exdb.egg-info/not-zip-safe +0 -0
  90. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/rcsb.exdb.egg-info/top_level.txt +0 -0
  91. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/setup.cfg +0 -0
  92. {rcsb_exdb-1.28 → rcsb_exdb-1.30}/setup.py +0 -0
@@ -110,3 +110,5 @@
110
110
  Update Azure pipelines to run on latest macOS and ubuntu version
111
111
  23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
112
112
  11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
113
+ 8-Apr-2025 V1.29 Add more logging to PubChemIndexCacheProvider and increase default numProc
114
+ 2-Oct-2025 V1.30 Make use of ExDB configuration file for loading drugbank and tree node list DBs/collections and setting indexed fields
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: rcsb.exdb
3
- Version: 1.28
3
+ Version: 1.30
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -19,8 +19,8 @@ License-File: LICENSE
19
19
  Requires-Dist: numpy
20
20
  Requires-Dist: jsonschema>=2.6.0
21
21
  Requires-Dist: rcsb.utils.io>=1.48
22
- Requires-Dist: rcsb.db>=1.800
23
- Requires-Dist: rcsb.utils.chem>=0.81
22
+ Requires-Dist: rcsb.db>=1.808
23
+ Requires-Dist: rcsb.utils.chem>=0.84
24
24
  Requires-Dist: rcsb.utils.chemref>=0.91
25
25
  Requires-Dist: rcsb.utils.config>=0.40
26
26
  Requires-Dist: rcsb.utils.ec>=0.25
@@ -41,6 +41,7 @@ Dynamic: description
41
41
  Dynamic: description-content-type
42
42
  Dynamic: home-page
43
43
  Dynamic: license
44
+ Dynamic: license-file
44
45
  Dynamic: provides-extra
45
46
  Dynamic: requires-dist
46
47
  Dynamic: summary
@@ -7,6 +7,8 @@
7
7
  # Updates:
8
8
  # 9-Dec-2018 jdw add validation methods
9
9
  # 3-Sep-2019 jdw move to rcsb.exdb.chemref
10
+ # 7-Aug-2025 dwp change target DB and collection from "drugbank_core" to "dw" and "core_drugbank" (as part of transition to DW);
11
+ # make use of configuration file for loading drugbank collection and setting indexed fields
10
12
  #
11
13
  ##
12
14
  __docformat__ = "google en"
@@ -66,9 +68,10 @@ class ChemRefEtlWorker(object):
66
68
  desp = DataExchangeStatus()
67
69
  statusStartTimestamp = desp.setStartTime()
68
70
  addValues = {}
71
+ collectionGroupName = "core_drugbank"
69
72
  #
70
73
  if extResource == "DrugBank":
71
- databaseName = "drugbank_core"
74
+ databaseNameMongo = self.__schP.getDatabaseMongoName(collectionGroupName=collectionGroupName)
72
75
  configName = self.__cfgOb.getDefaultSectionName()
73
76
  user = self.__cfgOb.get("_DRUGBANK_AUTH_USERNAME", sectionName=configName)
74
77
  pw = self.__cfgOb.get("_DRUGBANK_AUTH_PASSWORD", sectionName=configName)
@@ -81,10 +84,10 @@ class ChemRefEtlWorker(object):
81
84
  #
82
85
  logger.info("Resource %r extracted mapped document length %d", extResource, len(dList))
83
86
  logger.debug("Objects %r", dList[:2])
84
- sD, _, collectionList, _ = self.__schP.getSchemaInfo(databaseName)
87
+ _, _, collectionList, docIndexD = self.__schP.getSchemaInfo(collectionGroupName=collectionGroupName)
85
88
  collectionName = collectionList[0] if collectionList else "unassigned"
86
- indexL = sD.getDocumentIndex(collectionName, "primary")
87
- logger.info("Database %r collection %r index attributes %r", databaseName, collectionName, indexL)
89
+ indexDL = docIndexD[collectionName] if collectionName in docIndexD else []
90
+ logger.info("Database %r collection %r index attributes %r", databaseNameMongo, collectionName, indexDL)
88
91
  #
89
92
  # For some reason, 'addValues' was being overwritten with an empty dict (https://github.com/rcsb/py-rcsb_exdb/commit/26bd79e9a2fffc97c034b4116dece9248d1c1f39)
90
93
  # Will need to review this -- do we want to add the schema version values or not? (Also, see similar logic in UniProtCoreEtlWorker.py)
@@ -103,8 +106,8 @@ class ChemRefEtlWorker(object):
103
106
  readBackCheck=self.__readBackCheck,
104
107
  )
105
108
  #
106
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=dList, indexAttributeList=indexL, keyNames=None, addValues=addValues)
107
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
109
+ ok = dl.load(databaseNameMongo, collectionName, loadType=loadType, documentList=dList, keyNames=None, addValues=addValues, indexDL=indexDL)
110
+ self.__updateStatus(updateId, databaseNameMongo, collectionName, ok, statusStartTimestamp)
108
111
 
109
112
  return True
110
113
  except Exception as e:
@@ -7,6 +7,7 @@
7
7
  # Updates:
8
8
  # 7-Jan-2019 jdw moved from ChemRefEtlWorker.
9
9
  # 3-Sep-2019 jdw moved again to module rcsb.exdb.chemref
10
+ # 14-Aug-2025 dwp rename bird_chem_comp_core to core_chem_comp
10
11
  #
11
12
  ##
12
13
  __docformat__ = "google en"
@@ -42,8 +43,8 @@ class ChemRefExtractor(object):
42
43
  """
43
44
  idD = {}
44
45
  try:
45
- databaseName = "bird_chem_comp_core"
46
- collectionName = "bird_chem_comp_core"
46
+ databaseName = "dw"
47
+ collectionName = "core_chem_comp"
47
48
  selectD = {"rcsb_chem_comp_related.resource_name": referenceResourceName}
48
49
  selectionList = ["rcsb_id", "rcsb_chem_comp_related"]
49
50
  logger.info("Searching %s %s with selection query %r", databaseName, collectionName, selectD)
@@ -9,6 +9,7 @@
9
9
  # 16-Jul-2020 jdw separate index and reference data management.
10
10
  # 23-Jul-2021 jdw Make PubChemIndexCacheProvider a subclass of StashableBase()
11
11
  # 2-Mar-2023 aae Return correct status from Single proc
12
+ # 8-Apr-2025 dwp Let MultiProc handle chunking; add more logging to debug slowness on west coast
12
13
  #
13
14
  ##
14
15
  __docformat__ = "google en"
@@ -100,84 +101,82 @@ class PubChemUpdateWorker(object):
100
101
  #
101
102
  """
102
103
  _ = workingDir
103
- chunkSize = optionsD.get("chunkSize", 50)
104
104
  matchIdOnly = optionsD.get("matchIdOnly", True)
105
105
  # Path to store raw request data -
106
106
  exportPath = optionsD.get("exportPath", None)
107
107
  #
108
108
  successList = []
109
- retList1 = []
110
- retList2 = []
111
109
  diagList = []
112
- emptyList = []
110
+ failList = []
111
+ retList = []
113
112
  #
114
113
  try:
114
+ startTime = time.time()
115
115
  tU = TimeUtil()
116
- ccIdList = dataList
117
- numChunks = len(list(self.__chunker(ccIdList, chunkSize)))
118
- logger.info("%s search starting for %d reference definitions (in chunks of length %d)", procName, len(ccIdList), chunkSize)
119
- for ii, ccIdChunk in enumerate(self.__chunker(ccIdList, chunkSize), 1):
120
- logger.info("%s starting chunk for %d of %d", procName, ii, numChunks)
121
- # tDL = []
122
- tIdxDL = []
123
- timeS = tU.getDateTimeObj(tU.getTimestamp())
124
- for ccId in ccIdChunk:
125
- # Get various forms from the search index -
126
- chemIdList = self.__genChemIdList(ccId)
127
- tIdxD = {"rcsb_id": ccId, "rcsb_last_update": timeS}
116
+ ccIdList = dataList # len(dataList) should be of size chunkSize
117
+ logger.info("%s search starting for %d reference definitions (matchIdOnly %r exportPath %r)", procName, len(ccIdList), matchIdOnly, exportPath)
118
+ tIdxDL = []
119
+ timeS = tU.getDateTimeObj(tU.getTimestamp())
120
+ for ccId in ccIdList:
121
+ # Get various forms from the search index -
122
+ chemIdList = self.__genChemIdList(ccId)
123
+ tIdxD = {"rcsb_id": ccId, "rcsb_last_update": timeS}
124
+ #
125
+ mL = []
126
+ for chemId in chemIdList:
127
+ stA = time.time()
128
+ ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
128
129
  #
129
- mL = []
130
- for chemId in chemIdList:
131
- stA = time.time()
132
- ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
133
- #
134
- if not ok:
135
- etA = time.time()
136
- logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
137
-
138
- #
139
- if ok and refDL:
140
- for tD in refDL:
141
- pcId = tD["cid"]
142
- inchiKey = (
143
- self.__searchIdxD[chemId.indexName]["inchi-key"]
144
- if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
145
- else None
146
- )
147
- smiles = (
148
- self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
149
- )
150
- mL.append(
151
- {
152
- "matched_id": pcId,
153
- "search_id_type": chemId.identifierType,
154
- "search_id_source": chemId.identifierSource,
155
- "source_index_name": chemId.indexName,
156
- "source_smiles": smiles,
157
- "source_inchikey": inchiKey,
158
- }
159
- )
160
- # tD.update({"rcsb_id": pcId, "rcsb_last_update": timeS})
161
- # tDL.append(tD)
130
+ if not ok:
131
+ etA = time.time()
132
+ logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
162
133
  #
163
- if mL:
164
- tIdxD["matched_ids"] = mL
165
- successList.append(ccId)
166
- else:
167
- logger.info("No match result for any form of %s", ccId)
168
- #
169
- tIdxDL.append(tIdxD)
170
- # --
171
- startTimeL = time.time()
172
- logger.info("Saving chunk %d (len=%d)", ii, len(ccIdChunk))
173
- self.__updateObjectStore(self.__databaseName, self.__matchIndexCollectionName, tIdxDL)
174
- endTimeL = time.time()
175
- logger.info("Saved chunk %d (len=%d) in %.3f secs", ii, len(ccIdChunk), endTimeL - startTimeL)
134
+ if ok and refDL:
135
+ for tD in refDL:
136
+ pcId = tD["cid"]
137
+ inchiKey = (
138
+ self.__searchIdxD[chemId.indexName]["inchi-key"]
139
+ if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
140
+ else None
141
+ )
142
+ smiles = (
143
+ self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
144
+ )
145
+ mL.append(
146
+ {
147
+ "matched_id": pcId,
148
+ "search_id_type": chemId.identifierType,
149
+ "search_id_source": chemId.identifierSource,
150
+ "source_index_name": chemId.indexName,
151
+ "source_smiles": smiles,
152
+ "source_inchikey": inchiKey,
153
+ }
154
+ )
155
+ #
156
+ if mL:
157
+ tIdxD["matched_ids"] = mL
158
+ successList.append(ccId)
159
+ else:
160
+ logger.info("No match result for any form of %s", ccId)
161
+ #
162
+ tIdxDL.append(tIdxD)
163
+ # --
164
+ failList = sorted(set(dataList) - set(successList))
165
+ if failList:
166
+ logger.info("%s returns %d definitions with failures: %r", procName, len(failList), failList)
167
+ # --
168
+ endTime = time.time()
169
+ logger.info("%s completed updateList len %r duration %.3f secs", procName, len(ccIdList), endTime - startTime)
170
+ startTimeL = time.time()
171
+ logger.info("Saving dataList (len=%d)", len(ccIdList))
172
+ self.__updateObjectStore(self.__databaseName, self.__matchIndexCollectionName, tIdxDL)
173
+ endTimeL = time.time()
174
+ logger.info("Saved chunk (len=%d) in %.3f secs", len(ccIdList), endTimeL - startTimeL)
176
175
  except Exception as e:
177
176
  logger.exception("Failing %s for %d data items %s", procName, len(dataList), str(e))
178
- logger.info("%s dataList length %d success length %d rst1 %d rst2 %d", procName, len(dataList), len(successList), len(retList1), len(retList2))
177
+ logger.info("%s dataList length %d success length %d retList %d", procName, len(dataList), len(successList), len(retList))
179
178
  #
180
- return successList, emptyList, emptyList, diagList
179
+ return successList, retList, diagList
181
180
 
182
181
  def __updateObjectStore(self, databaseName, collectionName, objDL):
183
182
  updateDL = []
@@ -196,10 +195,6 @@ class PubChemUpdateWorker(object):
196
195
  ok = obUpd.createCollection(databaseName, collectionName, indexAttributeNames=indexAttributeNames, checkExists=True, bsonSchema=None)
197
196
  return ok
198
197
 
199
- def __chunker(self, iList, chunkSize):
200
- chunkSize = max(1, chunkSize)
201
- return (iList[i: i + chunkSize] for i in range(0, len(iList), chunkSize))
202
-
203
198
 
204
199
  class PubChemIndexCacheProvider(StashableBase):
205
200
  """Utilities to manage chemical component/BIRD to PubChem compound identifier mapping data."""
@@ -515,7 +510,7 @@ class PubChemIndexCacheProvider(StashableBase):
515
510
  Returns:
516
511
  (bool, list): status flag, list of unmatched identifiers
517
512
  """
518
- chunkSize = 50
513
+ chunkSize = 10
519
514
  exportPath = kwargs.get("exportPath", None)
520
515
  logger.info("Length starting list is %d", len(idList))
521
516
  optD = {"chunkSize": chunkSize, "exportPath": exportPath, "matchIdOnly": True}
@@ -524,14 +519,20 @@ class PubChemIndexCacheProvider(StashableBase):
524
519
  mpu = MultiProcUtil(verbose=True)
525
520
  mpu.setOptions(optD)
526
521
  mpu.set(workerObj=rWorker, workerMethod="updateList")
527
- ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=2, chunkSize=chunkSize)
528
- logger.info("Multi-proc %r failures %r result lengths %r %r", ok, len(failList), len(resultList[0]), len(resultList[1]))
522
+ ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=1, chunkSize=chunkSize)
523
+ logger.info("Multi-proc %r failures %r result lengths %r", ok, len(failList), len(resultList[0]))
529
524
  else:
530
- successList, _, _, _ = rWorker.updateList(idList, "SingleProc", optD, self.__dirPath)
525
+ successList, _, _ = rWorker.updateList(idList, "SingleProc", optD, self.__dirPath)
531
526
  failList = list(set(idList) - set(successList))
532
527
  ok = len(failList) == 0
533
528
  logger.info("Single-proc status %r failures %r", ok, len(failList))
534
529
  #
530
+ if len(failList) > 0:
531
+ if len(failList) <= 100:
532
+ logger.info("failList: %r", failList)
533
+ else:
534
+ logger.info("failList[:100]: %r", failList[:100])
535
+ #
535
536
  return ok, failList
536
537
 
537
538
  def __reloadDump(self, objD, databaseName, collectionName, indexAttributeNames=None):
@@ -2,4 +2,4 @@ __docformat__ = "google en"
2
2
  __author__ = "John Westbrook"
3
3
  __email__ = "john.westbrook@rcsb.org"
4
4
  __license__ = "Apache 2.0"
5
- __version__ = "1.28"
5
+ __version__ = "1.30"
@@ -162,7 +162,9 @@ class PdbxLoaderFixture(unittest.TestCase):
162
162
  ]
163
163
  self.__ldList = [
164
164
  {
165
- "databaseName": "bird_chem_comp_core",
165
+ # "databaseName": "dw",
166
+ "collectionGroupName": "core_chem_comp",
167
+ "contentType": "bird_chem_comp_core",
166
168
  "collectionNameList": None,
167
169
  "loadType": "full",
168
170
  "mergeContentTypes": None,
@@ -170,7 +172,9 @@ class PdbxLoaderFixture(unittest.TestCase):
170
172
  "inputIdCodeList": self.__birdChemCompCoreIdList
171
173
  },
172
174
  {
173
- "databaseName": "pdbx_core",
175
+ # "databaseName": "pdbx_core",
176
+ "collectionGroupName": "pdbx_core",
177
+ "contentType": "pdbx_core",
174
178
  "collectionNameList": None,
175
179
  "loadType": "replace",
176
180
  "mergeContentTypes": ["vrpt"],
@@ -179,6 +183,8 @@ class PdbxLoaderFixture(unittest.TestCase):
179
183
  },
180
184
  # {
181
185
  # "databaseName": "pdbx_comp_model_core",
186
+ # "collectionGroupName": "pdbx_comp_model_core",
187
+ # "contentType": "pdbx_comp_model_core",
182
188
  # "collectionNameList": None,
183
189
  # "loadType": "full",
184
190
  # "mergeContentTypes": None,
@@ -220,7 +226,7 @@ class PdbxLoaderFixture(unittest.TestCase):
220
226
  """Wrapper for the PDBx loader module"""
221
227
  ok = False
222
228
  try:
223
- logger.info("Loading %s", kwargs["databaseName"])
229
+ logger.info("Loading %s", kwargs["collectionGroupName"])
224
230
  mw = PdbxLoader(
225
231
  self.__cfgOb,
226
232
  cachePath=self.__cachePath,
@@ -235,8 +241,9 @@ class PdbxLoaderFixture(unittest.TestCase):
235
241
  rebuildSchemaFlag=False,
236
242
  )
237
243
  ok = mw.load(
238
- kwargs["databaseName"],
244
+ collectionGroupName=kwargs["collectionGroupName"],
239
245
  collectionLoadList=kwargs["collectionNameList"],
246
+ contentType=kwargs["contentType"],
240
247
  loadType=kwargs["loadType"],
241
248
  inputPathList=None,
242
249
  inputIdCodeList=kwargs["inputIdCodeList"],
@@ -81,8 +81,8 @@ class ObjectExtractorTests(unittest.TestCase):
81
81
  try:
82
82
  obEx = ObjectExtractor(
83
83
  self.__cfgOb,
84
- databaseName="bird_chem_comp_core",
85
- collectionName="bird_chem_comp_core",
84
+ databaseName="dw",
85
+ collectionName="core_chem_comp",
86
86
  cacheFilePath=os.path.join(self.__workPath, "drugbank-mapping-cache.json"),
87
87
  useCache=False,
88
88
  cacheKwargs=self.__testEntryCacheKwargs,
@@ -60,6 +60,7 @@ class ReferenceSequenceAnnotationAdapterTests(unittest.TestCase):
60
60
  endTime = time.time()
61
61
  logger.info("Completed %s at %s (%.4f seconds)", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()), endTime - self.__startTime)
62
62
 
63
+ # NOTE: IF YOU DISABLE THE TEST BELOW, THEN 'testReferenceCacheProvider' FAILS. CHECK WHETHER ALL 'Reference' PROVIDERS CAN BE DISABLED.
63
64
  # @unittest.skip("Disable test - no longer using in production, and fails too frequently with 'Bad xml text' when fetching from UniProt")
64
65
  def testAnnotationAdapter(self):
65
66
  """Test case - create and read cache reference sequences assignments and related data."""
@@ -1,5 +1,5 @@
1
1
  ##
2
- # File: TreeNodeListWorkerTests.py
2
+ # File: testTreeNodeListWorker.py
3
3
  # Author: J. Westbrook
4
4
  # Date: 23-Apr-2019
5
5
  #
@@ -10,6 +10,8 @@
10
10
  # 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead)
11
11
  # 27-Aug-2024 dwp Update CARD ontology tree loading
12
12
  # 23-Jan-2025 dwp Change indexed field from 'update_id' to 'id'
13
+ # 7-Aug-2025 dwp Change target DB and collection names to "dw" and "tree_*" (via configuration file);
14
+ # Make use of configuration file for loading tree node lists and setting indexed fields
13
15
  #
14
16
  ##
15
17
  __docformat__ = "google en"
@@ -32,8 +34,6 @@ from rcsb.utils.struct.ScopClassificationProvider import ScopClassificationProvi
32
34
  from rcsb.utils.struct.Scop2ClassificationProvider import Scop2ClassificationProvider
33
35
  from rcsb.utils.taxonomy.TaxonomyProvider import TaxonomyProvider
34
36
  from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
35
- # from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
36
- # from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
37
37
 
38
38
  logger = logging.getLogger(__name__)
39
39
 
@@ -76,37 +76,28 @@ class TreeNodeListWorker(object):
76
76
  Relevant configuration options:
77
77
 
78
78
  tree_node_lists_configuration:
79
- DATABASE_NAME: tree_node_lists
80
- DATABASE_VERSION_STRING: v5
81
- COLLECTION_VERSION_STRING: 1.0.0
82
- COLLECTION_TAXONOMY: tree_taxonomy_node_list
83
- COLLECTION_ENZYME: tree_ec_node_list
84
- COLLECTION_SCOP: tree_scop_node_list
85
- COLLECTION_CATH: tree_cath_node_list
86
-
79
+ DATABASE_NAME: dw
80
+ COLLECTION_VERSION_STRING: 2.1.0
81
+ COLLECTION_NAME_LIST:
82
+ - tree_taxonomy
83
+ - tree_ec
84
+ - tree_scop
85
+ - tree_scop2
86
+ - tree_cath
87
+ - tree_atc
88
+ - tree_card
89
+ - tree_ecod
90
+ COLLECTION_INDICES:
91
+ - INDEX_NAME: primary
92
+ ATTRIBUTE_NAMES:
93
+ - id
94
+ - INDEX_NAME: index_2
95
+ ATTRIBUTE_NAMES:
96
+ - parents
87
97
  """
88
98
  try:
89
99
  useCache = self.__useCache
90
100
  #
91
- # if not useCache:
92
- # cDL = ["domains_struct", "NCBI", "ec", "go", "atc"]
93
- # for cD in cDL:
94
- # try:
95
- # cfp = os.path.join(self.__cachePath, cD)
96
- # os.makedirs(cfp, 0o755)
97
- # except Exception:
98
- # pass
99
- # #
100
- # try:
101
- # cfp = os.path.join(self.__cachePath, cD)
102
- # fpL = glob.glob(os.path.join(cfp, "*"))
103
- # if fpL:
104
- # for fp in fpL:
105
- # os.remove(fp)
106
- # except Exception:
107
- # pass
108
- #
109
- #
110
101
  logger.info("Starting with cache path %r (useCache=%r)", self.__cachePath, useCache)
111
102
  #
112
103
  self.__statusList = []
@@ -124,65 +115,77 @@ class TreeNodeListWorker(object):
124
115
  readBackCheck=self.__readBackCheck,
125
116
  )
126
117
  #
127
- databaseName = "tree_node_lists"
118
+ sectionName = "tree_node_lists_configuration"
119
+ databaseNameMongo = self.__cfgOb.get("DATABASE_NAME", sectionName=sectionName)
120
+ collectionNameList = self.__cfgOb.get("COLLECTION_NAME_LIST", sectionName=sectionName)
121
+ collectionIndexList = self.__cfgOb.get("COLLECTION_INDICES", sectionName=sectionName)
122
+ # databaseNameMongo = 'dw'
123
+ # collectionNameList = ['tree_taxonomy', 'tree_ec', 'tree_scop', 'tree_scop2', 'tree_cath', 'tree_atc', 'tree_card', 'tree_ecod', 'tree_go']
124
+ # collectionIndexList = [{'INDEX_NAME': 'primary', 'ATTRIBUTE_NAMES': ['id']}, {'INDEX_NAME': 'index_2', 'ATTRIBUTE_NAMES': ['parents']}]
125
+
128
126
  # collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
129
127
  # addValues = {"_schema_version": collectionVersion}
130
128
  addValues = None
131
- #
132
- # --- GO - TURNED OFF 08 Aug 2023 dwp (tree is now loaded in DW)
133
- # goP = GeneOntologyProvider(goDirPath=os.path.join(self.__cachePath, "go"), useCache=useCache)
134
- # ok = goP.testCache()
135
- # anEx = AnnotationExtractor(self.__cfgOb)
136
- # goIdL = anEx.getUniqueIdentifiers("GO")
137
- # logger.info("Unique GO assignments %d", len(goIdL))
138
- # nL = goP.exportTreeNodeList(goIdL)
139
- # logger.info("GO tree node list length %d", len(nL))
140
- # if doLoad:
141
- # collectionName = "tree_go_node_list"
142
- # ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
143
- # self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
144
- #
145
- # ---- CATH
129
+
130
+ ok = True
131
+ for collectionName in collectionNameList:
132
+ nL = self.__getTreeDocList(collectionName, useCache)
133
+ if nL and doLoad:
134
+ ok = dl.load(
135
+ databaseNameMongo,
136
+ collectionName,
137
+ loadType=loadType,
138
+ documentList=nL,
139
+ keyNames=None,
140
+ addValues=addValues,
141
+ schemaLevel=None,
142
+ indexDL=collectionIndexList
143
+ ) and ok
144
+ self.__updateStatus(updateId, databaseNameMongo, collectionName, ok, statusStartTimestamp)
145
+ logger.info(
146
+ "Completed load of tree node list for database %r, collection %r, len(nL) %r (status %r)",
147
+ databaseNameMongo, collectionName, len(nL), ok
148
+ )
149
+ # ---
150
+ logger.info("Completed tree node list loading operations with loadType %r (status %r)", loadType, ok)
151
+ return True
152
+ except Exception as e:
153
+ logger.exception("Failing with %s", str(e))
154
+ return False
155
+
156
+ def __checkTaxonNodeList(self, nL):
157
+ eCount = 0
158
+ tD = {dD["id"]: True for dD in nL}
159
+ for dD in nL:
160
+ if "parents" in dD:
161
+ pId = dD["parents"][0]
162
+ if pId not in tD:
163
+ logger.info("Missing parent for taxon %d", pId)
164
+ eCount += 1
165
+ else:
166
+ logger.info("No parents for node %r", dD["id"])
167
+
168
+ def getLoadStatus(self):
169
+ return self.__statusList
170
+
171
+ def __getTreeDocList(self, collectionName, useCache):
172
+ nL = []
173
+ if collectionName.lower() == "tree_cath":
146
174
  ccu = CathClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
147
175
  nL = ccu.getTreeNodeList()
148
- logger.info("Starting load SCOP node tree length %d", len(nL))
149
- if doLoad:
150
- collectionName = "tree_cath_node_list"
151
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
152
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
153
- # ---- SCOP
176
+ elif collectionName.lower() == "tree_scop2":
177
+ scu2 = Scop2ClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
178
+ nL = scu2.getTreeNodeList()
179
+ elif collectionName.lower() == "tree_scop":
154
180
  scu = ScopClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
155
181
  nL = scu.getTreeNodeList()
156
- logger.info("Starting load SCOP node tree length %d", len(nL))
157
- if doLoad:
158
- collectionName = "tree_scop_node_list"
159
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
160
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
161
- # --- SCOP2
162
- scu = Scop2ClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
163
- nL = scu.getTreeNodeList()
164
- logger.info("Starting load SCOP2 node tree length %d", len(nL))
165
- if doLoad:
166
- collectionName = "tree_scop2_node_list"
167
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
168
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
169
- # ---- Ecod
182
+ elif collectionName.lower() == "tree_ecod":
170
183
  ecu = EcodClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
171
184
  nL = ecu.getTreeNodeList()
172
- logger.info("Starting load ECOD node tree length %d", len(nL))
173
- if doLoad:
174
- collectionName = "tree_ecod_node_list"
175
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
176
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
177
- # ---- EC
185
+ elif collectionName.lower() == "tree_ec":
178
186
  edbu = EnzymeDatabaseProvider(cachePath=self.__cachePath, useCache=useCache)
179
187
  nL = edbu.getTreeNodeList()
180
- logger.info("Starting load of EC node tree length %d", len(nL))
181
- if doLoad:
182
- collectionName = "tree_ec_node_list"
183
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
184
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
185
- # ---- CARD
188
+ elif collectionName.lower() == "tree_card":
186
189
  okCou = True
187
190
  cou = CARDTargetOntologyProvider(cachePath=self.__cachePath, useCache=useCache)
188
191
  if not cou.testCache():
@@ -193,21 +196,7 @@ class TreeNodeListWorker(object):
193
196
  okCou = False
194
197
  if okCou:
195
198
  nL = cou.getTreeNodeList()
196
- logger.info("Starting load of CARD ontology node tree length %d", len(nL))
197
- if doLoad:
198
- collectionName = "tree_card_node_list"
199
- ok = dl.load(
200
- databaseName,
201
- collectionName,
202
- loadType=loadType,
203
- documentList=nL,
204
- indexAttributeList=["id"],
205
- keyNames=None,
206
- addValues=addValues,
207
- schemaLevel=None
208
- )
209
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
210
- # ---- Taxonomy
199
+ elif collectionName.lower() == "tree_taxonomy":
211
200
  tU = TaxonomyProvider(cachePath=self.__cachePath, useCache=useCache)
212
201
  if self.__useFilteredLists:
213
202
  # Get the taxon coverage in the current data set -
@@ -226,43 +215,14 @@ class TreeNodeListWorker(object):
226
215
  # Get the full taxon node list without filtering
227
216
  nL = tU.exportNodeList()
228
217
  self.__checkTaxonNodeList(nL)
229
- logger.info("Starting load of taxonomy node tree length %d", len(nL))
230
- if doLoad:
231
- collectionName = "tree_taxonomy_node_list"
232
- logger.debug("Taxonomy nodes (%d) %r", len(nL), nL[:5])
233
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
234
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
235
- logger.info("Tree loading operations completed.")
236
- #
237
- # --- ATC
218
+ elif collectionName.lower() == "tree_atc":
238
219
  crEx = ChemRefExtractor(self.__cfgOb)
239
220
  atcFilterD = crEx.getChemCompAccessionMapping("ATC")
240
221
  logger.info("Length of ATC filter %d", len(atcFilterD))
241
222
  atcP = AtcProvider(cachePath=self.__cachePath, useCache=useCache)
242
223
  nL = atcP.getTreeNodeList(filterD=atcFilterD)
243
- collectionName = "tree_atc_node_list"
244
- logger.debug("ATC node list length %d %r", len(nL), nL[:5])
245
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
246
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
247
- #
248
- # ---
249
- logger.info("Completed tree node list loading operations.\n")
250
- return True
251
- except Exception as e:
252
- logger.exception("Failing with %s", str(e))
253
- return False
254
-
255
- def __checkTaxonNodeList(self, nL):
256
- eCount = 0
257
- tD = {dD["id"]: True for dD in nL}
258
- for dD in nL:
259
- if "parents" in dD:
260
- pId = dD["parents"][0]
261
- if pId not in tD:
262
- logger.info("Missing parent for taxon %d", pId)
263
- eCount += 1
264
- else:
265
- logger.info("No parents for node %r", dD["id"])
266
-
267
- def getLoadStatus(self):
268
- return self.__statusList
224
+ else:
225
+ logger.error("Unsupported tree node collection %r", collectionName)
226
+ #
227
+ logger.info("Gathered tree nodes for loading collection %s (length %d)", collectionName, len(nL))
228
+ return nL
@@ -165,7 +165,7 @@ class PubChemEtlWorkflow(object):
165
165
  birdUrlTarget = kwargs.get("birdUrlTarget", None)
166
166
  ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc-full")
167
167
  numProcChemComp = kwargs.get("numProcChemComp", 8)
168
- numProc = kwargs.get("numProc", 2)
168
+ numProc = kwargs.get("numProc", 4)
169
169
  rebuildChemIndices = kwargs.get("rebuildChemIndices", True)
170
170
  exportPath = kwargs.get("exportPath", None)
171
171
  useStash = kwargs.get("useStash", True)
@@ -209,7 +209,7 @@ class PubChemEtlWorkflow(object):
209
209
  try:
210
210
  ok1 = ok2 = ok3 = ok4 = ok5 = ok6 = False
211
211
  # --
212
- numProc = kwargs.get("numProc", 2)
212
+ numProc = kwargs.get("numProc", 4)
213
213
  useStash = kwargs.get("useStash", True)
214
214
  useGit = kwargs.get("useGit", False)
215
215
  #
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: rcsb.exdb
3
- Version: 1.28
3
+ Version: 1.30
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -19,8 +19,8 @@ License-File: LICENSE
19
19
  Requires-Dist: numpy
20
20
  Requires-Dist: jsonschema>=2.6.0
21
21
  Requires-Dist: rcsb.utils.io>=1.48
22
- Requires-Dist: rcsb.db>=1.800
23
- Requires-Dist: rcsb.utils.chem>=0.81
22
+ Requires-Dist: rcsb.db>=1.808
23
+ Requires-Dist: rcsb.utils.chem>=0.84
24
24
  Requires-Dist: rcsb.utils.chemref>=0.91
25
25
  Requires-Dist: rcsb.utils.config>=0.40
26
26
  Requires-Dist: rcsb.utils.ec>=0.25
@@ -41,6 +41,7 @@ Dynamic: description
41
41
  Dynamic: description-content-type
42
42
  Dynamic: home-page
43
43
  Dynamic: license
44
+ Dynamic: license-file
44
45
  Dynamic: provides-extra
45
46
  Dynamic: requires-dist
46
47
  Dynamic: summary
@@ -1,8 +1,8 @@
1
1
  numpy
2
2
  jsonschema>=2.6.0
3
3
  rcsb.utils.io>=1.48
4
- rcsb.db>=1.800
5
- rcsb.utils.chem>=0.81
4
+ rcsb.db>=1.808
5
+ rcsb.utils.chem>=0.84
6
6
  rcsb.utils.chemref>=0.91
7
7
  rcsb.utils.config>=0.40
8
8
  rcsb.utils.ec>=0.25
@@ -4,8 +4,8 @@
4
4
  numpy
5
5
  jsonschema >= 2.6.0
6
6
  rcsb.utils.io >= 1.48
7
- rcsb.db >= 1.800
8
- rcsb.utils.chem >= 0.81
7
+ rcsb.db >= 1.808
8
+ rcsb.utils.chem >= 0.84
9
9
  rcsb.utils.chemref >= 0.91
10
10
  rcsb.utils.config >= 0.40
11
11
  rcsb.utils.ec >= 0.25
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes