rcsb.exdb 1.26__tar.gz → 1.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/HISTORY.txt +2 -0
  2. {rcsb_exdb-1.26/rcsb.exdb.egg-info → rcsb_exdb-1.28}/PKG-INFO +14 -63
  3. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/README.md +0 -54
  4. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/cli/__init__.py +1 -1
  5. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tree/TreeNodeListWorker.py +10 -9
  6. {rcsb_exdb-1.26 → rcsb_exdb-1.28/rcsb.exdb.egg-info}/PKG-INFO +14 -63
  7. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/SOURCES.txt +0 -4
  8. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/requires.txt +2 -9
  9. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/requirements.txt +4 -7
  10. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/setup.py +1 -2
  11. rcsb_exdb-1.26/rcsb/exdb/cli/ExDbExec.py +0 -239
  12. rcsb_exdb-1.26/rcsb/exdb/tests/testExDbWorkflow.py +0 -145
  13. rcsb_exdb-1.26/rcsb/exdb/wf/ExDbWorkflow.py +0 -521
  14. rcsb_exdb-1.26/rcsb.exdb.egg-info/entry_points.txt +0 -2
  15. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/LICENSE +0 -0
  16. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/MANIFEST.in +0 -0
  17. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/__init__.py +0 -0
  18. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/__init__.py +0 -0
  19. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
  20. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanProvider.py +0 -0
  21. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanUtils.py +0 -0
  22. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/__init__.py +0 -0
  23. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
  24. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
  25. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
  26. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
  27. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
  28. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +0 -0
  29. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/__init__.py +0 -0
  30. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationAdapter.py +0 -0
  31. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationExtractor.py +0 -0
  32. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationUtils.py +0 -0
  33. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/__init__.py +0 -0
  34. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
  35. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/entry/__init__.py +0 -0
  36. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
  37. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
  38. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
  39. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
  40. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
  41. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
  42. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
  43. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
  44. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
  45. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
  46. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
  47. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
  48. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/__init__.py +0 -0
  49. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/__init__.py +0 -0
  50. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
  51. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixturePdbxLoader.py +0 -0
  52. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
  53. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
  54. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
  55. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
  56. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
  57. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
  58. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationUtils.py +0 -0
  59. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
  60. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
  61. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
  62. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
  63. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
  64. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
  65. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
  66. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
  67. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
  68. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
  69. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
  70. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
  71. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
  72. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
  73. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
  74. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
  75. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
  76. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
  77. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
  78. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
  79. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTreeNodeListWorker.py +0 -0
  80. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
  81. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
  82. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tree/__init__.py +0 -0
  83. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
  84. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
  85. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
  86. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
  87. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectValidator.py +0 -0
  88. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/__init__.py +0 -0
  89. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
  90. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
  91. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/PubChemEtlWorkflow.py +0 -0
  92. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/__init__.py +0 -0
  93. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
  94. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/not-zip-safe +0 -0
  95. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/top_level.txt +0 -0
  96. {rcsb_exdb-1.26 → rcsb_exdb-1.28}/setup.cfg +0 -0
@@ -108,3 +108,5 @@
108
108
  Update CI/CD to python 3.10
109
109
  10-Dec-2024 V1.26 Update PolymerEntityExtractor to sort extracted sequence data;
110
110
  Update Azure pipelines to run on latest macOS and ubuntu version
111
+ 23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
112
+ 11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: rcsb.exdb
3
- Version: 1.26
3
+ Version: 1.28
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -16,29 +16,34 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Classifier: Programming Language :: Python :: 3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: OpenEye-toolkits>=2024.1.1
20
19
  Requires-Dist: numpy
21
20
  Requires-Dist: jsonschema>=2.6.0
22
21
  Requires-Dist: rcsb.utils.io>=1.48
23
- Requires-Dist: rcsb.db>=1.725
24
- Requires-Dist: rcsb.utils.chem>=0.79
22
+ Requires-Dist: rcsb.db>=1.800
23
+ Requires-Dist: rcsb.utils.chem>=0.81
25
24
  Requires-Dist: rcsb.utils.chemref>=0.91
26
- Requires-Dist: rcsb.utils.citation>=0.22
27
25
  Requires-Dist: rcsb.utils.config>=0.40
28
26
  Requires-Dist: rcsb.utils.ec>=0.25
29
27
  Requires-Dist: rcsb.utils.go>=0.18
30
28
  Requires-Dist: rcsb.utils.seq>=0.82
31
- Requires-Dist: rcsb.utils.seqalign>=0.31
32
29
  Requires-Dist: rcsb.utils.targets>=0.82
33
30
  Requires-Dist: rcsb.utils.struct>=0.47
34
31
  Requires-Dist: rcsb.utils.taxonomy>=0.43
35
32
  Requires-Dist: rcsb.utils.dictionary>=1.27
36
- Requires-Dist: rcsb.workflow>=0.46
37
- Requires-Dist: statistics; python_version < "3.0"
38
33
  Provides-Extra: dev
39
34
  Requires-Dist: check-manifest; extra == "dev"
40
35
  Provides-Extra: test
41
36
  Requires-Dist: coverage; extra == "test"
37
+ Dynamic: author
38
+ Dynamic: author-email
39
+ Dynamic: classifier
40
+ Dynamic: description
41
+ Dynamic: description-content-type
42
+ Dynamic: home-page
43
+ Dynamic: license
44
+ Dynamic: provides-extra
45
+ Dynamic: requires-dist
46
+ Dynamic: summary
42
47
 
43
48
  # py-rcsb_exdb
44
49
 
@@ -105,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
105
110
  [MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
106
111
  are required to support the ExDB tools. HomeBrew also provides a variety of options for
107
112
  managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
108
-
109
- ### Command Line Interfaces
110
-
111
- A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
112
-
113
- ```bash
114
- exdb_exec_cli --help
115
-
116
- usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
117
- [--etl_tree_node_lists] [--config_path CONFIG_PATH]
118
- [--config_name CONFIG_NAME] [--db_type DB_TYPE]
119
- [--read_back_check] [--num_proc NUM_PROC]
120
- [--chunk_size CHUNK_SIZE]
121
- [--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
122
- [--cache_path CACHE_PATH] [--rebuild_cache]
123
-
124
- optional arguments:
125
- -h, --help show this help message and exit
126
- --data_set_id DATA_SET_ID
127
- Data set identifier (default= 2019_14 for current
128
- week)
129
- --full Fresh full load in a new tables/collections (Default)
130
- --etl_chemref ETL integrated chemical reference data
131
- --etl_tree_node_lists
132
- ETL tree node lists
133
- --config_path CONFIG_PATH
134
- Path to configuration options file
135
- --config_name CONFIG_NAME
136
- Configuration section name
137
- --db_type DB_TYPE Database server type (default=mongo)
138
- --read_back_check Perform read back check on all documents
139
- --num_proc NUM_PROC Number of processes to execute (default=2)
140
- --chunk_size CHUNK_SIZE
141
- Number of files loaded per process
142
- --document_limit DOCUMENT_LIMIT
143
- Load document limit for testing
144
- --debug Turn on verbose logging
145
- --mock Use MOCK repository configuration for testing
146
- --cache_path CACHE_PATH
147
- Top cache path for external and local resource files
148
- --rebuild_cache Rebuild cached files from remote resources
149
- ________________________________________________________________________________
150
-
151
- ```
152
-
153
- For example, to construct and load tree nodes list data collections, the following
154
- command may be used:
155
-
156
- ```bash
157
- exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
158
- --cache_path ./CACHE \
159
- --config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
160
- --config_name site_info_configuration >& LOGTREE \
161
- ```
@@ -63,57 +63,3 @@ install this system. Once HomeBrew is installed, you can further install the
63
63
  [MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
64
64
  are required to support the ExDB tools. HomeBrew also provides a variety of options for
65
65
  managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
66
-
67
- ### Command Line Interfaces
68
-
69
- A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
70
-
71
- ```bash
72
- exdb_exec_cli --help
73
-
74
- usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
75
- [--etl_tree_node_lists] [--config_path CONFIG_PATH]
76
- [--config_name CONFIG_NAME] [--db_type DB_TYPE]
77
- [--read_back_check] [--num_proc NUM_PROC]
78
- [--chunk_size CHUNK_SIZE]
79
- [--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
80
- [--cache_path CACHE_PATH] [--rebuild_cache]
81
-
82
- optional arguments:
83
- -h, --help show this help message and exit
84
- --data_set_id DATA_SET_ID
85
- Data set identifier (default= 2019_14 for current
86
- week)
87
- --full Fresh full load in a new tables/collections (Default)
88
- --etl_chemref ETL integrated chemical reference data
89
- --etl_tree_node_lists
90
- ETL tree node lists
91
- --config_path CONFIG_PATH
92
- Path to configuration options file
93
- --config_name CONFIG_NAME
94
- Configuration section name
95
- --db_type DB_TYPE Database server type (default=mongo)
96
- --read_back_check Perform read back check on all documents
97
- --num_proc NUM_PROC Number of processes to execute (default=2)
98
- --chunk_size CHUNK_SIZE
99
- Number of files loaded per process
100
- --document_limit DOCUMENT_LIMIT
101
- Load document limit for testing
102
- --debug Turn on verbose logging
103
- --mock Use MOCK repository configuration for testing
104
- --cache_path CACHE_PATH
105
- Top cache path for external and local resource files
106
- --rebuild_cache Rebuild cached files from remote resources
107
- ________________________________________________________________________________
108
-
109
- ```
110
-
111
- For example, to construct and load tree nodes list data collections, the following
112
- command may be used:
113
-
114
- ```bash
115
- exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
116
- --cache_path ./CACHE \
117
- --config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
118
- --config_name site_info_configuration >& LOGTREE \
119
- ```
@@ -2,4 +2,4 @@ __docformat__ = "google en"
2
2
  __author__ = "John Westbrook"
3
3
  __email__ = "john.westbrook@rcsb.org"
4
4
  __license__ = "Apache 2.0"
5
- __version__ = "1.26"
5
+ __version__ = "1.28"
@@ -9,6 +9,7 @@
9
9
  # 12-Apr-2023 dwp add CARD ontology tree
10
10
  # 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead)
11
11
  # 27-Aug-2024 dwp Update CARD ontology tree loading
12
+ # 23-Jan-2025 dwp Change indexed field from 'update_id' to 'id'
12
13
  #
13
14
  ##
14
15
  __docformat__ = "google en"
@@ -138,7 +139,7 @@ class TreeNodeListWorker(object):
138
139
  # logger.info("GO tree node list length %d", len(nL))
139
140
  # if doLoad:
140
141
  # collectionName = "tree_go_node_list"
141
- # ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
142
+ # ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
142
143
  # self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
143
144
  #
144
145
  # ---- CATH
@@ -147,7 +148,7 @@ class TreeNodeListWorker(object):
147
148
  logger.info("Starting load SCOP node tree length %d", len(nL))
148
149
  if doLoad:
149
150
  collectionName = "tree_cath_node_list"
150
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
151
+ ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
151
152
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
152
153
  # ---- SCOP
153
154
  scu = ScopClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
@@ -155,7 +156,7 @@ class TreeNodeListWorker(object):
155
156
  logger.info("Starting load SCOP node tree length %d", len(nL))
156
157
  if doLoad:
157
158
  collectionName = "tree_scop_node_list"
158
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
159
+ ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
159
160
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
160
161
  # --- SCOP2
161
162
  scu = Scop2ClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
@@ -163,7 +164,7 @@ class TreeNodeListWorker(object):
163
164
  logger.info("Starting load SCOP2 node tree length %d", len(nL))
164
165
  if doLoad:
165
166
  collectionName = "tree_scop2_node_list"
166
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
167
+ ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
167
168
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
168
169
  # ---- Ecod
169
170
  ecu = EcodClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
@@ -171,7 +172,7 @@ class TreeNodeListWorker(object):
171
172
  logger.info("Starting load ECOD node tree length %d", len(nL))
172
173
  if doLoad:
173
174
  collectionName = "tree_ecod_node_list"
174
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
175
+ ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
175
176
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
176
177
  # ---- EC
177
178
  edbu = EnzymeDatabaseProvider(cachePath=self.__cachePath, useCache=useCache)
@@ -179,7 +180,7 @@ class TreeNodeListWorker(object):
179
180
  logger.info("Starting load of EC node tree length %d", len(nL))
180
181
  if doLoad:
181
182
  collectionName = "tree_ec_node_list"
182
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
183
+ ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
183
184
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
184
185
  # ---- CARD
185
186
  okCou = True
@@ -200,7 +201,7 @@ class TreeNodeListWorker(object):
200
201
  collectionName,
201
202
  loadType=loadType,
202
203
  documentList=nL,
203
- indexAttributeList=["update_id"],
204
+ indexAttributeList=["id"],
204
205
  keyNames=None,
205
206
  addValues=addValues,
206
207
  schemaLevel=None
@@ -229,7 +230,7 @@ class TreeNodeListWorker(object):
229
230
  if doLoad:
230
231
  collectionName = "tree_taxonomy_node_list"
231
232
  logger.debug("Taxonomy nodes (%d) %r", len(nL), nL[:5])
232
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
233
+ ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
233
234
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
234
235
  logger.info("Tree loading operations completed.")
235
236
  #
@@ -241,7 +242,7 @@ class TreeNodeListWorker(object):
241
242
  nL = atcP.getTreeNodeList(filterD=atcFilterD)
242
243
  collectionName = "tree_atc_node_list"
243
244
  logger.debug("ATC node list length %d %r", len(nL), nL[:5])
244
- ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None)
245
+ ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
245
246
  self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
246
247
  #
247
248
  # ---
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: rcsb.exdb
3
- Version: 1.26
3
+ Version: 1.28
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -16,29 +16,34 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Classifier: Programming Language :: Python :: 3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: OpenEye-toolkits>=2024.1.1
20
19
  Requires-Dist: numpy
21
20
  Requires-Dist: jsonschema>=2.6.0
22
21
  Requires-Dist: rcsb.utils.io>=1.48
23
- Requires-Dist: rcsb.db>=1.725
24
- Requires-Dist: rcsb.utils.chem>=0.79
22
+ Requires-Dist: rcsb.db>=1.800
23
+ Requires-Dist: rcsb.utils.chem>=0.81
25
24
  Requires-Dist: rcsb.utils.chemref>=0.91
26
- Requires-Dist: rcsb.utils.citation>=0.22
27
25
  Requires-Dist: rcsb.utils.config>=0.40
28
26
  Requires-Dist: rcsb.utils.ec>=0.25
29
27
  Requires-Dist: rcsb.utils.go>=0.18
30
28
  Requires-Dist: rcsb.utils.seq>=0.82
31
- Requires-Dist: rcsb.utils.seqalign>=0.31
32
29
  Requires-Dist: rcsb.utils.targets>=0.82
33
30
  Requires-Dist: rcsb.utils.struct>=0.47
34
31
  Requires-Dist: rcsb.utils.taxonomy>=0.43
35
32
  Requires-Dist: rcsb.utils.dictionary>=1.27
36
- Requires-Dist: rcsb.workflow>=0.46
37
- Requires-Dist: statistics; python_version < "3.0"
38
33
  Provides-Extra: dev
39
34
  Requires-Dist: check-manifest; extra == "dev"
40
35
  Provides-Extra: test
41
36
  Requires-Dist: coverage; extra == "test"
37
+ Dynamic: author
38
+ Dynamic: author-email
39
+ Dynamic: classifier
40
+ Dynamic: description
41
+ Dynamic: description-content-type
42
+ Dynamic: home-page
43
+ Dynamic: license
44
+ Dynamic: provides-extra
45
+ Dynamic: requires-dist
46
+ Dynamic: summary
42
47
 
43
48
  # py-rcsb_exdb
44
49
 
@@ -105,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
105
110
  [MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
106
111
  are required to support the ExDB tools. HomeBrew also provides a variety of options for
107
112
  managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
108
-
109
- ### Command Line Interfaces
110
-
111
- A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
112
-
113
- ```bash
114
- exdb_exec_cli --help
115
-
116
- usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
117
- [--etl_tree_node_lists] [--config_path CONFIG_PATH]
118
- [--config_name CONFIG_NAME] [--db_type DB_TYPE]
119
- [--read_back_check] [--num_proc NUM_PROC]
120
- [--chunk_size CHUNK_SIZE]
121
- [--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
122
- [--cache_path CACHE_PATH] [--rebuild_cache]
123
-
124
- optional arguments:
125
- -h, --help show this help message and exit
126
- --data_set_id DATA_SET_ID
127
- Data set identifier (default= 2019_14 for current
128
- week)
129
- --full Fresh full load in a new tables/collections (Default)
130
- --etl_chemref ETL integrated chemical reference data
131
- --etl_tree_node_lists
132
- ETL tree node lists
133
- --config_path CONFIG_PATH
134
- Path to configuration options file
135
- --config_name CONFIG_NAME
136
- Configuration section name
137
- --db_type DB_TYPE Database server type (default=mongo)
138
- --read_back_check Perform read back check on all documents
139
- --num_proc NUM_PROC Number of processes to execute (default=2)
140
- --chunk_size CHUNK_SIZE
141
- Number of files loaded per process
142
- --document_limit DOCUMENT_LIMIT
143
- Load document limit for testing
144
- --debug Turn on verbose logging
145
- --mock Use MOCK repository configuration for testing
146
- --cache_path CACHE_PATH
147
- Top cache path for external and local resource files
148
- --rebuild_cache Rebuild cached files from remote resources
149
- ________________________________________________________________________________
150
-
151
- ```
152
-
153
- For example, to construct and load tree nodes list data collections, the following
154
- command may be used:
155
-
156
- ```bash
157
- exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
158
- --cache_path ./CACHE \
159
- --config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
160
- --config_name site_info_configuration >& LOGTREE \
161
- ```
@@ -9,7 +9,6 @@ rcsb/__init__.py
9
9
  rcsb.exdb.egg-info/PKG-INFO
10
10
  rcsb.exdb.egg-info/SOURCES.txt
11
11
  rcsb.exdb.egg-info/dependency_links.txt
12
- rcsb.exdb.egg-info/entry_points.txt
13
12
  rcsb.exdb.egg-info/not-zip-safe
14
13
  rcsb.exdb.egg-info/requires.txt
15
14
  rcsb.exdb.egg-info/top_level.txt
@@ -29,7 +28,6 @@ rcsb/exdb/citation/CitationAdapter.py
29
28
  rcsb/exdb/citation/CitationExtractor.py
30
29
  rcsb/exdb/citation/CitationUtils.py
31
30
  rcsb/exdb/citation/__init__.py
32
- rcsb/exdb/cli/ExDbExec.py
33
31
  rcsb/exdb/cli/__init__.py
34
32
  rcsb/exdb/entry/EntryInfoProvider.py
35
33
  rcsb/exdb/entry/__init__.py
@@ -58,7 +56,6 @@ rcsb/exdb/tests/testCitationExtractor.py
58
56
  rcsb/exdb/tests/testCitationUtils.py
59
57
  rcsb/exdb/tests/testEntryInfoEtlWorkflow.py
60
58
  rcsb/exdb/tests/testEntryInfoProvider.py
61
- rcsb/exdb/tests/testExDbWorkflow.py
62
59
  rcsb/exdb/tests/testGlycanEtlWorkflow.py
63
60
  rcsb/exdb/tests/testGlycanProvider.py
64
61
  rcsb/exdb/tests/testGlycanUtils.py
@@ -89,7 +86,6 @@ rcsb/exdb/utils/ObjectUpdater.py
89
86
  rcsb/exdb/utils/ObjectValidator.py
90
87
  rcsb/exdb/utils/__init__.py
91
88
  rcsb/exdb/wf/EntryInfoEtlWorkflow.py
92
- rcsb/exdb/wf/ExDbWorkflow.py
93
89
  rcsb/exdb/wf/GlycanEtlWorkflow.py
94
90
  rcsb/exdb/wf/PubChemEtlWorkflow.py
95
91
  rcsb/exdb/wf/__init__.py
@@ -1,24 +1,17 @@
1
- OpenEye-toolkits>=2024.1.1
2
1
  numpy
3
2
  jsonschema>=2.6.0
4
3
  rcsb.utils.io>=1.48
5
- rcsb.db>=1.725
6
- rcsb.utils.chem>=0.79
4
+ rcsb.db>=1.800
5
+ rcsb.utils.chem>=0.81
7
6
  rcsb.utils.chemref>=0.91
8
- rcsb.utils.citation>=0.22
9
7
  rcsb.utils.config>=0.40
10
8
  rcsb.utils.ec>=0.25
11
9
  rcsb.utils.go>=0.18
12
10
  rcsb.utils.seq>=0.82
13
- rcsb.utils.seqalign>=0.31
14
11
  rcsb.utils.targets>=0.82
15
12
  rcsb.utils.struct>=0.47
16
13
  rcsb.utils.taxonomy>=0.43
17
14
  rcsb.utils.dictionary>=1.27
18
- rcsb.workflow>=0.46
19
-
20
- [:python_version < "3.0"]
21
- statistics
22
15
 
23
16
  [dev]
24
17
  check-manifest
@@ -1,20 +1,17 @@
1
1
  --extra-index-url https://pypi.anaconda.org/OpenEye/simple
2
- OpenEye-toolkits >= 2024.1.1
2
+ # Above line may be needed despite the OpenEye package not being a direct requirement of this package (it's used by rcsb.utils.chem)
3
+ # OpenEye-toolkits >= 2024.1.1
3
4
  numpy
4
5
  jsonschema >= 2.6.0
5
6
  rcsb.utils.io >= 1.48
6
- rcsb.db >= 1.725
7
- rcsb.utils.chem >= 0.79
7
+ rcsb.db >= 1.800
8
+ rcsb.utils.chem >= 0.81
8
9
  rcsb.utils.chemref >= 0.91
9
- rcsb.utils.citation >= 0.22
10
10
  rcsb.utils.config >= 0.40
11
11
  rcsb.utils.ec >= 0.25
12
12
  rcsb.utils.go >= 0.18
13
13
  rcsb.utils.seq >= 0.82
14
- rcsb.utils.seqalign >= 0.31
15
14
  rcsb.utils.targets >= 0.82
16
15
  rcsb.utils.struct >= 0.47
17
16
  rcsb.utils.taxonomy >= 0.43
18
17
  rcsb.utils.dictionary >= 1.27
19
- rcsb.workflow >= 0.46
20
- statistics; python_version < "3.0"
@@ -47,7 +47,6 @@ setup(
47
47
  "Programming Language :: Python :: 3.9",
48
48
  "Programming Language :: Python :: 3.10",
49
49
  ],
50
- entry_points={"console_scripts": ["exdb_exec_cli=rcsb.exdb.cli.ExDbExec:main"]},
51
50
  #
52
51
  install_requires=packagesRequired[1:],
53
52
  packages=find_packages(exclude=["rcsb.mock-data", "rcsb.exdb.tests-anal", "rcsb.exdb.tests-*", "tests.*"]),
@@ -57,7 +56,7 @@ setup(
57
56
  },
58
57
  #
59
58
  test_suite="rcsb.exdb.tests",
60
- tests_require=["tox"],
59
+ tests_require=["tox", "rcsb.utils.citation >= 0.22"],
61
60
  #
62
61
  # Not configured ...
63
62
  extras_require={"dev": ["check-manifest"], "test": ["coverage"]},