rcsb.exdb 0.96__tar.gz → 0.98__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/HISTORY.txt +4 -1
  2. {rcsb.exdb-0.96/rcsb.exdb.egg-info → rcsb.exdb-0.98}/PKG-INFO +1 -1
  3. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/cli/__init__.py +1 -1
  4. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/fixturePdbxLoader.py +4 -4
  5. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testExDbWorkflow.py +1 -1
  6. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testTreeNodeListWorker.py +2 -0
  7. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tree/TreeNodeListWorker.py +37 -29
  8. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +2 -1
  9. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/ExDbWorkflow.py +2 -0
  10. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/GlycanEtlWorkflow.py +2 -1
  11. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/PubChemEtlWorkflow.py +3 -2
  12. {rcsb.exdb-0.96 → rcsb.exdb-0.98/rcsb.exdb.egg-info}/PKG-INFO +1 -1
  13. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/LICENSE +0 -0
  14. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/MANIFEST.in +0 -0
  15. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/README.md +0 -0
  16. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/__init__.py +0 -0
  17. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/__init__.py +0 -0
  18. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
  19. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/GlycanProvider.py +0 -0
  20. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/GlycanUtils.py +0 -0
  21. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/branch/__init__.py +0 -0
  22. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
  23. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
  24. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
  25. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
  26. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
  27. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +0 -0
  28. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/chemref/__init__.py +0 -0
  29. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationAdapter.py +0 -0
  30. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationExtractor.py +0 -0
  31. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/CitationUtils.py +0 -0
  32. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/citation/__init__.py +0 -0
  33. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/cli/ExDbExec.py +0 -0
  34. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
  35. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/entry/__init__.py +0 -0
  36. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
  37. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
  38. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
  39. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
  40. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
  41. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
  42. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
  43. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
  44. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
  45. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
  46. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
  47. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
  48. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/seq/__init__.py +0 -0
  49. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/__init__.py +0 -0
  50. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
  51. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
  52. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
  53. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
  54. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
  55. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
  56. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
  57. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testCitationUtils.py +0 -0
  58. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
  59. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
  60. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
  61. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
  62. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
  63. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
  64. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
  65. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
  66. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
  67. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
  68. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
  69. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
  70. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
  71. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
  72. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
  73. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
  74. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
  75. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
  76. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
  77. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
  78. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
  79. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
  80. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/tree/__init__.py +0 -0
  81. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
  82. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
  83. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
  84. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
  85. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/ObjectValidator.py +0 -0
  86. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/utils/__init__.py +0 -0
  87. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb/exdb/wf/__init__.py +0 -0
  88. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/SOURCES.txt +0 -0
  89. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
  90. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/entry_points.txt +0 -0
  91. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/not-zip-safe +0 -0
  92. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/requires.txt +0 -0
  93. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/rcsb.exdb.egg-info/top_level.txt +0 -0
  94. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/requirements.txt +0 -0
  95. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/setup.cfg +0 -0
  96. {rcsb.exdb-0.96 → rcsb.exdb-0.98}/setup.py +0 -0
@@ -89,4 +89,7 @@
89
89
  9-Mar-2023 V0.94 Update ExDbWorkflow to make use of multiple processors for 'upd_ref_seq' operation;
90
90
  Lower refChunkSize to 10 for requests to UniProt API
91
91
  13-Mar-2023 V0.95 Updates to PubChem workflow to use multiprocess count, disable git stash testing, remove obsolete entries from test data
92
- 12-Apr-2023 V0.96 Add CARD ontology data to tree builder
92
+ 12-Apr-2023 V0.96 Add CARD ontology data to tree builder
93
+ 1-Jun-2023 V0.97 Don't back up resources to GitHub during cache update workflows
94
+ 8-Aug-2023 V0.98 Reduce memory and cpu footprint for Azure test cases;
95
+ Load full (unfiltered) taxonomy tree node list, and stop loading GO tree
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rcsb.exdb
3
- Version: 0.96
3
+ Version: 0.98
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -2,4 +2,4 @@ __docformat__ = "google en"
2
2
  __author__ = "John Westbrook"
3
3
  __email__ = "john.westbrook@rcsb.org"
4
4
  __license__ = "Apache 2.0"
5
- __version__ = "0.96"
5
+ __version__ = "0.98"
@@ -64,8 +64,8 @@ class PdbxLoaderFixture(unittest.TestCase):
64
64
  self.__failedFilePath = os.path.join(HERE, "test-output", "failed-list.txt")
65
65
  self.__cachePath = os.path.join(TOPDIR, "CACHE")
66
66
  self.__readBackCheck = True
67
- self.__numProc = 2
68
- self.__chunkSize = 10
67
+ self.__numProc = 1
68
+ self.__chunkSize = 5
69
69
  self.__fileLimit = 38
70
70
  self.__documentStyle = "rowwise_by_name_with_cardinality"
71
71
  #
@@ -172,7 +172,7 @@ class PdbxLoaderFixture(unittest.TestCase):
172
172
  {
173
173
  "databaseName": "pdbx_core",
174
174
  "collectionNameList": None,
175
- "loadType": "full",
175
+ "loadType": "replace",
176
176
  "mergeContentTypes": ["vrpt"],
177
177
  "validationLevel": "min",
178
178
  "inputIdCodeList": self.__pdbIdList
@@ -228,7 +228,7 @@ class PdbxLoaderFixture(unittest.TestCase):
228
228
  fileLimit=kwargs.get("fileLimit", self.__fileLimit),
229
229
  verbose=self.__verbose,
230
230
  readBackCheck=self.__readBackCheck,
231
- maxStepLength=2000,
231
+ maxStepLength=1000,
232
232
  useSchemaCache=True,
233
233
  rebuildSchemaFlag=False,
234
234
  )
@@ -59,7 +59,7 @@ class ExDbWorkflowTests(unittest.TestCase):
59
59
  "restoreUseGit": True,
60
60
  "restoreUseStash": False,
61
61
  }
62
- self.__loadCommonD = {"readBackCheck": True, "numProc": 2, "chunkSize": 5, "refChunkSize": 5, "loadType": "full"}
62
+ self.__loadCommonD = {"readBackCheck": True, "numProc": 2, "chunkSize": 5, "refChunkSize": 5, "loadType": "full", "useFilteredLists": True}
63
63
  #
64
64
  # These are test source files for chemical component/BIRD indices
65
65
  ccUrlTarget = os.path.join(self.__dataPath, "components-abbrev.cif")
@@ -63,6 +63,7 @@ class TreeNodeListWorkerTests(unittest.TestCase):
63
63
  self.__debugFlag = False
64
64
  self.__loadType = "full"
65
65
  self.__useCache = True
66
+ self.__useFilteredLists = True
66
67
  #
67
68
  self.__startTime = time.time()
68
69
  logger.debug("Starting %s at %s", self.id(), time.strftime("%Y %m %d %H:%M:%S", time.localtime()))
@@ -88,6 +89,7 @@ class TreeNodeListWorkerTests(unittest.TestCase):
88
89
  verbose=self.__debugFlag,
89
90
  readBackCheck=self.__readBackCheck,
90
91
  useCache=self.__useCache,
92
+ useFilteredLists=self.__useFilteredLists,
91
93
  )
92
94
  #
93
95
  ok = rhw.load(updateId, loadType=self.__loadType, doLoad=self.__doLoad)
@@ -7,6 +7,7 @@
7
7
  # Updates:
8
8
  # 9-Sep-2019 jdw add AtcProvider() and ChemrefExtractor() for ATC tree.
9
9
  # 12-Apr-2023 dwp add CARD ontology tree
10
+ # 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead)
10
11
  #
11
12
  ##
12
13
  __docformat__ = "google en"
@@ -20,17 +21,17 @@ import os.path
20
21
  from rcsb.db.mongo.DocumentLoader import DocumentLoader
21
22
  from rcsb.db.processors.DataExchangeStatus import DataExchangeStatus
22
23
  from rcsb.exdb.chemref.ChemRefExtractor import ChemRefExtractor
23
- from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
24
- from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
25
24
  from rcsb.utils.chemref.AtcProvider import AtcProvider
26
25
  from rcsb.utils.ec.EnzymeDatabaseProvider import EnzymeDatabaseProvider
27
26
  from rcsb.utils.targets.CARDTargetOntologyProvider import CARDTargetOntologyProvider
28
- from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
29
27
  from rcsb.utils.struct.CathClassificationProvider import CathClassificationProvider
30
28
  from rcsb.utils.struct.EcodClassificationProvider import EcodClassificationProvider
31
29
  from rcsb.utils.struct.ScopClassificationProvider import ScopClassificationProvider
32
30
  from rcsb.utils.struct.Scop2ClassificationProvider import Scop2ClassificationProvider
33
31
  from rcsb.utils.taxonomy.TaxonomyProvider import TaxonomyProvider
32
+ from rcsb.exdb.seq.TaxonomyExtractor import TaxonomyExtractor
33
+ # from rcsb.utils.go.GeneOntologyProvider import GeneOntologyProvider
34
+ # from rcsb.exdb.seq.AnnotationExtractor import AnnotationExtractor
34
35
 
35
36
  logger = logging.getLogger(__name__)
36
37
 
@@ -38,7 +39,7 @@ logger = logging.getLogger(__name__)
38
39
  class TreeNodeListWorker(object):
39
40
  """Prepare and load repository holdings and repository update data."""
40
41
 
41
- def __init__(self, cfgOb, cachePath, numProc=1, chunkSize=10, readBackCheck=False, documentLimit=None, verbose=False, useCache=False):
42
+ def __init__(self, cfgOb, cachePath, numProc=1, chunkSize=10, readBackCheck=False, documentLimit=None, verbose=False, useCache=False, useFilteredLists=False):
42
43
  self.__cfgOb = cfgOb
43
44
  self.__cachePath = os.path.abspath(cachePath)
44
45
  self.__readBackCheck = readBackCheck
@@ -50,6 +51,7 @@ class TreeNodeListWorker(object):
50
51
  self.__verbose = verbose
51
52
  self.__statusList = []
52
53
  self.__useCache = useCache
54
+ self.__useFilteredLists = useFilteredLists
53
55
 
54
56
  def __updateStatus(self, updateId, databaseName, collectionName, status, startTimestamp):
55
57
  try:
@@ -122,19 +124,21 @@ class TreeNodeListWorker(object):
122
124
  # collectionVersion = self.__cfgOb.get("COLLECTION_VERSION_STRING", sectionName=sectionName)
123
125
  # addValues = {"_schema_version": collectionVersion}
124
126
  addValues = None
125
- # --- GO
126
- goP = GeneOntologyProvider(goDirPath=os.path.join(self.__cachePath, "go"), useCache=useCache)
127
- ok = goP.testCache()
128
- anEx = AnnotationExtractor(self.__cfgOb)
129
- goIdL = anEx.getUniqueIdentifiers("GO")
130
- logger.info("Unique GO assignments %d", len(goIdL))
131
- nL = goP.exportTreeNodeList(goIdL)
132
- logger.info("GO tree node list length %d", len(nL))
133
- if doLoad:
134
- collectionName = "tree_go_node_list"
135
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
136
- self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
137
- # ---- CATH
127
+ #
128
+ # --- GO - TURNED OFF 08 Aug 2023 dwp (tree is now loaded in DW)
129
+ # goP = GeneOntologyProvider(goDirPath=os.path.join(self.__cachePath, "go"), useCache=useCache)
130
+ # ok = goP.testCache()
131
+ # anEx = AnnotationExtractor(self.__cfgOb)
132
+ # goIdL = anEx.getUniqueIdentifiers("GO")
133
+ # logger.info("Unique GO assignments %d", len(goIdL))
134
+ # nL = goP.exportTreeNodeList(goIdL)
135
+ # logger.info("GO tree node list length %d", len(nL))
136
+ # if doLoad:
137
+ # collectionName = "tree_go_node_list"
138
+ # ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
139
+ # self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
140
+ #
141
+ # ---- CATH
138
142
  ccu = CathClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
139
143
  nL = ccu.getTreeNodeList()
140
144
  logger.info("Starting load SCOP node tree length %d", len(nL))
@@ -183,19 +187,23 @@ class TreeNodeListWorker(object):
183
187
  ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
184
188
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
185
189
  # ---- Taxonomy
186
- # Get the taxon coverage in the current data set -
187
- epe = TaxonomyExtractor(self.__cfgOb)
188
- tL = epe.getUniqueTaxons()
189
- logger.info("Taxon coverage length %d", len(tL))
190
- #
191
190
  tU = TaxonomyProvider(cachePath=self.__cachePath, useCache=useCache)
192
- fD = {1}
193
- for taxId in tL:
194
- fD.update({k: True for k in tU.getLineage(taxId)})
195
- logger.info("Taxon filter dictionary length %d", len(fD))
196
- # logger.info("fD %r" % sorted(fD))
197
- #
198
- nL = tU.exportNodeList(filterD=fD)
191
+ if self.__useFilteredLists:
192
+ # Get the taxon coverage in the current data set -
193
+ epe = TaxonomyExtractor(self.__cfgOb)
194
+ tL = epe.getUniqueTaxons()
195
+ logger.info("Taxon coverage length %d", len(tL))
196
+ #
197
+ fD = {1}
198
+ for taxId in tL:
199
+ fD.update({k: True for k in tU.getLineage(taxId)})
200
+ logger.info("Taxon filter dictionary length %d", len(fD))
201
+ logger.debug("fD %r", sorted(fD))
202
+ #
203
+ nL = tU.exportNodeList(filterD=fD)
204
+ else:
205
+ # Get the full taxon node list without filtering
206
+ nL = tU.exportNodeList()
199
207
  self.__checkTaxonNodeList(nL)
200
208
  logger.info("Starting load of taxonomy node tree length %d", len(nL))
201
209
  if doLoad:
@@ -5,6 +5,7 @@
5
5
  # Workflow wrapper -- Entry-level annotations extracted from ExDB
6
6
  #
7
7
  # Updates:
8
+ # 1-Jun-2023 aae Don't back up resources to GitHub during cache update workflows
8
9
  #
9
10
  ##
10
11
  __docformat__ = "google en"
@@ -59,7 +60,7 @@ class EntryInfoEtlWorkflow(object):
59
60
  eiP.update(self.__cfgOb, fmt="json", indent=3)
60
61
  #
61
62
  if backup:
62
- ok = eiP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=True, useStash=True)
63
+ ok = eiP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=False, useStash=True)
63
64
  logger.info("Backup entry-level annotations (%r)", ok)
64
65
  else:
65
66
  ok = True
@@ -91,6 +91,7 @@ class ExDbWorkflow(object):
91
91
  rebuildSequenceCache = kwargs.get("rebuildSequenceCache", False)
92
92
  useSequenceCache = not rebuildSequenceCache
93
93
  #
94
+ useFilteredLists = kwargs.get("useFilteredLists", False)
94
95
  except Exception as e:
95
96
  logger.exception("Argument or configuration processing failing with %s", str(e))
96
97
  return False
@@ -107,6 +108,7 @@ class ExDbWorkflow(object):
107
108
  verbose=self.__debugFlag,
108
109
  readBackCheck=readBackCheck,
109
110
  useCache=self.__useCache,
111
+ useFilteredLists=useFilteredLists,
110
112
  )
111
113
  ok = rhw.load(dataSetId, loadType=loadType)
112
114
  okS = self.loadStatus(rhw.getLoadStatus(), readBackCheck=readBackCheck)
@@ -5,6 +5,7 @@
5
5
  # Workflow wrapper -- Glycan ETL utilities
6
6
  #
7
7
  # Updates:
8
+ # 1-Jun-2023 aae Don't back up resources to GitHub during cache update workflows
8
9
  #
9
10
  ##
10
11
  __docformat__ = "google en"
@@ -64,7 +65,7 @@ class GlycanEtlWorkflow(object):
64
65
  logger.info("Matched glycan identifiers (%d)", len(riD))
65
66
  #
66
67
  if backup:
67
- ok2 = gP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=True, useStash=True)
68
+ ok2 = gP.backup(self.__cfgOb, self.__configName, self.__stashRemotePrefix, useGit=False, useStash=True)
68
69
  logger.info("Backup matched glycan identifiers (%r)", ok2)
69
70
  else:
70
71
  ok2 = True
@@ -6,6 +6,7 @@
6
6
  #
7
7
  # Updates:
8
8
  # 13-Mar-2023 aae Updates to use multiprocess count, disable git stash testing
9
+ # 1-Jun-2023 aae Don't back up resources to GitHub during cache update workflows
9
10
  ##
10
11
  __docformat__ = "google en"
11
12
  __author__ = "John Westbrook"
@@ -168,7 +169,7 @@ class PubChemEtlWorkflow(object):
168
169
  rebuildChemIndices = kwargs.get("rebuildChemIndices", True)
169
170
  exportPath = kwargs.get("exportPath", None)
170
171
  useStash = kwargs.get("useStash", True)
171
- useGit = kwargs.get("useGit", True)
172
+ useGit = kwargs.get("useGit", False)
172
173
  #
173
174
  pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
174
175
  ok1 = pcewP.updateIndex(
@@ -207,7 +208,7 @@ class PubChemEtlWorkflow(object):
207
208
  # --
208
209
  numProc = kwargs.get("numProc", 2)
209
210
  useStash = kwargs.get("useStash", True)
210
- useGit = kwargs.get("useGit", True)
211
+ useGit = kwargs.get("useGit", False)
211
212
  #
212
213
  pcewP = PubChemEtlWrapper(self.__cfgOb, self.__cachePath, stashRemotePrefix=self.__stashRemotePrefix)
213
214
  ok1 = pcewP.updateMatchedData(numProc=numProc)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rcsb.exdb
3
- Version: 0.96
3
+ Version: 0.98
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes