rcsb.exdb 1.27__tar.gz → 1.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/HISTORY.txt +3 -1
- {rcsb_exdb-1.27/rcsb.exdb.egg-info → rcsb_exdb-1.29}/PKG-INFO +5 -63
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/README.md +0 -54
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +73 -72
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/cli/__init__.py +1 -1
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/wf/PubChemEtlWorkflow.py +2 -2
- {rcsb_exdb-1.27 → rcsb_exdb-1.29/rcsb.exdb.egg-info}/PKG-INFO +5 -63
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/SOURCES.txt +0 -4
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/requires.txt +2 -9
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/requirements.txt +4 -7
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/setup.py +1 -2
- rcsb_exdb-1.27/rcsb/exdb/cli/ExDbExec.py +0 -239
- rcsb_exdb-1.27/rcsb/exdb/tests/testExDbWorkflow.py +0 -145
- rcsb_exdb-1.27/rcsb/exdb/wf/ExDbWorkflow.py +0 -521
- rcsb_exdb-1.27/rcsb.exdb.egg-info/entry_points.txt +0 -2
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/LICENSE +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/MANIFEST.in +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/branch/GlycanProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/branch/GlycanUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/branch/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/chemref/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/citation/CitationUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/citation/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/entry/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/seq/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/fixturePdbxLoader.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testCitationUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testTreeNodeListWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tree/TreeNodeListWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/tree/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/utils/ObjectValidator.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/utils/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb/exdb/wf/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/not-zip-safe +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/rcsb.exdb.egg-info/top_level.txt +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.29}/setup.cfg +0 -0
|
@@ -108,4 +108,6 @@
|
|
|
108
108
|
Update CI/CD to python 3.10
|
|
109
109
|
10-Dec-2024 V1.26 Update PolymerEntityExtractor to sort extracted sequence data;
|
|
110
110
|
Update Azure pipelines to run on latest macOS and ubuntu version
|
|
111
|
-
23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
|
|
111
|
+
23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
|
|
112
|
+
11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
|
|
113
|
+
8-Apr-2025 V1.29 Add more logging to PubChemIndexCacheProvider and increase default numProc
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.29
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -16,25 +16,20 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: OpenEye-toolkits>=2024.1.1
|
|
20
19
|
Requires-Dist: numpy
|
|
21
20
|
Requires-Dist: jsonschema>=2.6.0
|
|
22
21
|
Requires-Dist: rcsb.utils.io>=1.48
|
|
23
|
-
Requires-Dist: rcsb.db>=1.
|
|
24
|
-
Requires-Dist: rcsb.utils.chem>=0.
|
|
22
|
+
Requires-Dist: rcsb.db>=1.800
|
|
23
|
+
Requires-Dist: rcsb.utils.chem>=0.81
|
|
25
24
|
Requires-Dist: rcsb.utils.chemref>=0.91
|
|
26
|
-
Requires-Dist: rcsb.utils.citation>=0.22
|
|
27
25
|
Requires-Dist: rcsb.utils.config>=0.40
|
|
28
26
|
Requires-Dist: rcsb.utils.ec>=0.25
|
|
29
27
|
Requires-Dist: rcsb.utils.go>=0.18
|
|
30
28
|
Requires-Dist: rcsb.utils.seq>=0.82
|
|
31
|
-
Requires-Dist: rcsb.utils.seqalign>=0.31
|
|
32
29
|
Requires-Dist: rcsb.utils.targets>=0.82
|
|
33
30
|
Requires-Dist: rcsb.utils.struct>=0.47
|
|
34
31
|
Requires-Dist: rcsb.utils.taxonomy>=0.43
|
|
35
32
|
Requires-Dist: rcsb.utils.dictionary>=1.27
|
|
36
|
-
Requires-Dist: rcsb.workflow>=0.46
|
|
37
|
-
Requires-Dist: statistics; python_version < "3.0"
|
|
38
33
|
Provides-Extra: dev
|
|
39
34
|
Requires-Dist: check-manifest; extra == "dev"
|
|
40
35
|
Provides-Extra: test
|
|
@@ -46,6 +41,7 @@ Dynamic: description
|
|
|
46
41
|
Dynamic: description-content-type
|
|
47
42
|
Dynamic: home-page
|
|
48
43
|
Dynamic: license
|
|
44
|
+
Dynamic: license-file
|
|
49
45
|
Dynamic: provides-extra
|
|
50
46
|
Dynamic: requires-dist
|
|
51
47
|
Dynamic: summary
|
|
@@ -115,57 +111,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
115
111
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
116
112
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
117
113
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
118
|
-
|
|
119
|
-
### Command Line Interfaces
|
|
120
|
-
|
|
121
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
122
|
-
|
|
123
|
-
```bash
|
|
124
|
-
exdb_exec_cli --help
|
|
125
|
-
|
|
126
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
127
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
128
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
129
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
130
|
-
[--chunk_size CHUNK_SIZE]
|
|
131
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
132
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
133
|
-
|
|
134
|
-
optional arguments:
|
|
135
|
-
-h, --help show this help message and exit
|
|
136
|
-
--data_set_id DATA_SET_ID
|
|
137
|
-
Data set identifier (default= 2019_14 for current
|
|
138
|
-
week)
|
|
139
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
140
|
-
--etl_chemref ETL integrated chemical reference data
|
|
141
|
-
--etl_tree_node_lists
|
|
142
|
-
ETL tree node lists
|
|
143
|
-
--config_path CONFIG_PATH
|
|
144
|
-
Path to configuration options file
|
|
145
|
-
--config_name CONFIG_NAME
|
|
146
|
-
Configuration section name
|
|
147
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
148
|
-
--read_back_check Perform read back check on all documents
|
|
149
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
150
|
-
--chunk_size CHUNK_SIZE
|
|
151
|
-
Number of files loaded per process
|
|
152
|
-
--document_limit DOCUMENT_LIMIT
|
|
153
|
-
Load document limit for testing
|
|
154
|
-
--debug Turn on verbose logging
|
|
155
|
-
--mock Use MOCK repository configuration for testing
|
|
156
|
-
--cache_path CACHE_PATH
|
|
157
|
-
Top cache path for external and local resource files
|
|
158
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
159
|
-
________________________________________________________________________________
|
|
160
|
-
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
164
|
-
command may be used:
|
|
165
|
-
|
|
166
|
-
```bash
|
|
167
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
168
|
-
--cache_path ./CACHE \
|
|
169
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
170
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
171
|
-
```
|
|
@@ -63,57 +63,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
63
63
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
64
64
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
65
65
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
66
|
-
|
|
67
|
-
### Command Line Interfaces
|
|
68
|
-
|
|
69
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
70
|
-
|
|
71
|
-
```bash
|
|
72
|
-
exdb_exec_cli --help
|
|
73
|
-
|
|
74
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
75
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
76
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
77
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
78
|
-
[--chunk_size CHUNK_SIZE]
|
|
79
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
80
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
81
|
-
|
|
82
|
-
optional arguments:
|
|
83
|
-
-h, --help show this help message and exit
|
|
84
|
-
--data_set_id DATA_SET_ID
|
|
85
|
-
Data set identifier (default= 2019_14 for current
|
|
86
|
-
week)
|
|
87
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
88
|
-
--etl_chemref ETL integrated chemical reference data
|
|
89
|
-
--etl_tree_node_lists
|
|
90
|
-
ETL tree node lists
|
|
91
|
-
--config_path CONFIG_PATH
|
|
92
|
-
Path to configuration options file
|
|
93
|
-
--config_name CONFIG_NAME
|
|
94
|
-
Configuration section name
|
|
95
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
96
|
-
--read_back_check Perform read back check on all documents
|
|
97
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
98
|
-
--chunk_size CHUNK_SIZE
|
|
99
|
-
Number of files loaded per process
|
|
100
|
-
--document_limit DOCUMENT_LIMIT
|
|
101
|
-
Load document limit for testing
|
|
102
|
-
--debug Turn on verbose logging
|
|
103
|
-
--mock Use MOCK repository configuration for testing
|
|
104
|
-
--cache_path CACHE_PATH
|
|
105
|
-
Top cache path for external and local resource files
|
|
106
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
107
|
-
________________________________________________________________________________
|
|
108
|
-
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
112
|
-
command may be used:
|
|
113
|
-
|
|
114
|
-
```bash
|
|
115
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
116
|
-
--cache_path ./CACHE \
|
|
117
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
118
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
119
|
-
```
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
# 16-Jul-2020 jdw separate index and reference data management.
|
|
10
10
|
# 23-Jul-2021 jdw Make PubChemIndexCacheProvider a subclass of StashableBase()
|
|
11
11
|
# 2-Mar-2023 aae Return correct status from Single proc
|
|
12
|
+
# 8-Apr-2025 dwp Let MultiProc handle chunking; add more logging to debug slowness on west coast
|
|
12
13
|
#
|
|
13
14
|
##
|
|
14
15
|
__docformat__ = "google en"
|
|
@@ -100,84 +101,82 @@ class PubChemUpdateWorker(object):
|
|
|
100
101
|
#
|
|
101
102
|
"""
|
|
102
103
|
_ = workingDir
|
|
103
|
-
chunkSize = optionsD.get("chunkSize", 50)
|
|
104
104
|
matchIdOnly = optionsD.get("matchIdOnly", True)
|
|
105
105
|
# Path to store raw request data -
|
|
106
106
|
exportPath = optionsD.get("exportPath", None)
|
|
107
107
|
#
|
|
108
108
|
successList = []
|
|
109
|
-
retList1 = []
|
|
110
|
-
retList2 = []
|
|
111
109
|
diagList = []
|
|
112
|
-
|
|
110
|
+
failList = []
|
|
111
|
+
retList = []
|
|
113
112
|
#
|
|
114
113
|
try:
|
|
114
|
+
startTime = time.time()
|
|
115
115
|
tU = TimeUtil()
|
|
116
|
-
ccIdList = dataList
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
#
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
116
|
+
ccIdList = dataList # len(dataList) should be of size chunkSize
|
|
117
|
+
logger.info("%s search starting for %d reference definitions (matchIdOnly %r exportPath %r)", procName, len(ccIdList), matchIdOnly, exportPath)
|
|
118
|
+
tIdxDL = []
|
|
119
|
+
timeS = tU.getDateTimeObj(tU.getTimestamp())
|
|
120
|
+
for ccId in ccIdList:
|
|
121
|
+
# Get various forms from the search index -
|
|
122
|
+
chemIdList = self.__genChemIdList(ccId)
|
|
123
|
+
tIdxD = {"rcsb_id": ccId, "rcsb_last_update": timeS}
|
|
124
|
+
#
|
|
125
|
+
mL = []
|
|
126
|
+
for chemId in chemIdList:
|
|
127
|
+
stA = time.time()
|
|
128
|
+
ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
|
|
128
129
|
#
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
ok, refDL = self.__pcU.assemble(chemId, exportPath=exportPath, matchIdOnly=matchIdOnly)
|
|
133
|
-
#
|
|
134
|
-
if not ok:
|
|
135
|
-
etA = time.time()
|
|
136
|
-
logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
|
|
137
|
-
|
|
138
|
-
#
|
|
139
|
-
if ok and refDL:
|
|
140
|
-
for tD in refDL:
|
|
141
|
-
pcId = tD["cid"]
|
|
142
|
-
inchiKey = (
|
|
143
|
-
self.__searchIdxD[chemId.indexName]["inchi-key"]
|
|
144
|
-
if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
|
|
145
|
-
else None
|
|
146
|
-
)
|
|
147
|
-
smiles = (
|
|
148
|
-
self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
|
|
149
|
-
)
|
|
150
|
-
mL.append(
|
|
151
|
-
{
|
|
152
|
-
"matched_id": pcId,
|
|
153
|
-
"search_id_type": chemId.identifierType,
|
|
154
|
-
"search_id_source": chemId.identifierSource,
|
|
155
|
-
"source_index_name": chemId.indexName,
|
|
156
|
-
"source_smiles": smiles,
|
|
157
|
-
"source_inchikey": inchiKey,
|
|
158
|
-
}
|
|
159
|
-
)
|
|
160
|
-
# tD.update({"rcsb_id": pcId, "rcsb_last_update": timeS})
|
|
161
|
-
# tDL.append(tD)
|
|
130
|
+
if not ok:
|
|
131
|
+
etA = time.time()
|
|
132
|
+
logger.debug("Failing %s search source %s for %s (%.4f secs)", chemId.identifierType, chemId.identifierSource, chemId.idCode, etA - stA)
|
|
162
133
|
#
|
|
163
|
-
if
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
134
|
+
if ok and refDL:
|
|
135
|
+
for tD in refDL:
|
|
136
|
+
pcId = tD["cid"]
|
|
137
|
+
inchiKey = (
|
|
138
|
+
self.__searchIdxD[chemId.indexName]["inchi-key"]
|
|
139
|
+
if chemId.indexName in self.__searchIdxD and "inchi-key" in self.__searchIdxD[chemId.indexName]
|
|
140
|
+
else None
|
|
141
|
+
)
|
|
142
|
+
smiles = (
|
|
143
|
+
self.__searchIdxD[chemId.indexName]["smiles"] if chemId.indexName in self.__searchIdxD and "smiles" in self.__searchIdxD[chemId.indexName] else None
|
|
144
|
+
)
|
|
145
|
+
mL.append(
|
|
146
|
+
{
|
|
147
|
+
"matched_id": pcId,
|
|
148
|
+
"search_id_type": chemId.identifierType,
|
|
149
|
+
"search_id_source": chemId.identifierSource,
|
|
150
|
+
"source_index_name": chemId.indexName,
|
|
151
|
+
"source_smiles": smiles,
|
|
152
|
+
"source_inchikey": inchiKey,
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
#
|
|
156
|
+
if mL:
|
|
157
|
+
tIdxD["matched_ids"] = mL
|
|
158
|
+
successList.append(ccId)
|
|
159
|
+
else:
|
|
160
|
+
logger.info("No match result for any form of %s", ccId)
|
|
161
|
+
#
|
|
162
|
+
tIdxDL.append(tIdxD)
|
|
163
|
+
# --
|
|
164
|
+
failList = sorted(set(dataList) - set(successList))
|
|
165
|
+
if failList:
|
|
166
|
+
logger.info("%s returns %d definitions with failures: %r", procName, len(failList), failList)
|
|
167
|
+
# --
|
|
168
|
+
endTime = time.time()
|
|
169
|
+
logger.info("%s completed updateList len %r duration %.3f secs", procName, len(ccIdList), endTime - startTime)
|
|
170
|
+
startTimeL = time.time()
|
|
171
|
+
logger.info("Saving dataList (len=%d)", len(ccIdList))
|
|
172
|
+
self.__updateObjectStore(self.__databaseName, self.__matchIndexCollectionName, tIdxDL)
|
|
173
|
+
endTimeL = time.time()
|
|
174
|
+
logger.info("Saved chunk (len=%d) in %.3f secs", len(ccIdList), endTimeL - startTimeL)
|
|
176
175
|
except Exception as e:
|
|
177
176
|
logger.exception("Failing %s for %d data items %s", procName, len(dataList), str(e))
|
|
178
|
-
logger.info("%s dataList length %d success length %d
|
|
177
|
+
logger.info("%s dataList length %d success length %d retList %d", procName, len(dataList), len(successList), len(retList))
|
|
179
178
|
#
|
|
180
|
-
return successList,
|
|
179
|
+
return successList, retList, diagList
|
|
181
180
|
|
|
182
181
|
def __updateObjectStore(self, databaseName, collectionName, objDL):
|
|
183
182
|
updateDL = []
|
|
@@ -196,10 +195,6 @@ class PubChemUpdateWorker(object):
|
|
|
196
195
|
ok = obUpd.createCollection(databaseName, collectionName, indexAttributeNames=indexAttributeNames, checkExists=True, bsonSchema=None)
|
|
197
196
|
return ok
|
|
198
197
|
|
|
199
|
-
def __chunker(self, iList, chunkSize):
|
|
200
|
-
chunkSize = max(1, chunkSize)
|
|
201
|
-
return (iList[i: i + chunkSize] for i in range(0, len(iList), chunkSize))
|
|
202
|
-
|
|
203
198
|
|
|
204
199
|
class PubChemIndexCacheProvider(StashableBase):
|
|
205
200
|
"""Utilities to manage chemical component/BIRD to PubChem compound identifier mapping data."""
|
|
@@ -515,7 +510,7 @@ class PubChemIndexCacheProvider(StashableBase):
|
|
|
515
510
|
Returns:
|
|
516
511
|
(bool, list): status flag, list of unmatched identifiers
|
|
517
512
|
"""
|
|
518
|
-
chunkSize =
|
|
513
|
+
chunkSize = 10
|
|
519
514
|
exportPath = kwargs.get("exportPath", None)
|
|
520
515
|
logger.info("Length starting list is %d", len(idList))
|
|
521
516
|
optD = {"chunkSize": chunkSize, "exportPath": exportPath, "matchIdOnly": True}
|
|
@@ -524,14 +519,20 @@ class PubChemIndexCacheProvider(StashableBase):
|
|
|
524
519
|
mpu = MultiProcUtil(verbose=True)
|
|
525
520
|
mpu.setOptions(optD)
|
|
526
521
|
mpu.set(workerObj=rWorker, workerMethod="updateList")
|
|
527
|
-
ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=
|
|
528
|
-
logger.info("Multi-proc %r failures %r result lengths %r
|
|
522
|
+
ok, failList, resultList, _ = mpu.runMulti(dataList=idList, numProc=numProc, numResults=1, chunkSize=chunkSize)
|
|
523
|
+
logger.info("Multi-proc %r failures %r result lengths %r", ok, len(failList), len(resultList[0]))
|
|
529
524
|
else:
|
|
530
|
-
successList, _, _
|
|
525
|
+
successList, _, _ = rWorker.updateList(idList, "SingleProc", optD, self.__dirPath)
|
|
531
526
|
failList = list(set(idList) - set(successList))
|
|
532
527
|
ok = len(failList) == 0
|
|
533
528
|
logger.info("Single-proc status %r failures %r", ok, len(failList))
|
|
534
529
|
#
|
|
530
|
+
if len(failList) > 0:
|
|
531
|
+
if len(failList) <= 100:
|
|
532
|
+
logger.info("failList: %r", failList)
|
|
533
|
+
else:
|
|
534
|
+
logger.info("failList[:100]: %r", failList[:100])
|
|
535
|
+
#
|
|
535
536
|
return ok, failList
|
|
536
537
|
|
|
537
538
|
def __reloadDump(self, objD, databaseName, collectionName, indexAttributeNames=None):
|
|
@@ -165,7 +165,7 @@ class PubChemEtlWorkflow(object):
|
|
|
165
165
|
birdUrlTarget = kwargs.get("birdUrlTarget", None)
|
|
166
166
|
ccFileNamePrefix = kwargs.get("ccFileNamePrefix", "cc-full")
|
|
167
167
|
numProcChemComp = kwargs.get("numProcChemComp", 8)
|
|
168
|
-
numProc = kwargs.get("numProc",
|
|
168
|
+
numProc = kwargs.get("numProc", 4)
|
|
169
169
|
rebuildChemIndices = kwargs.get("rebuildChemIndices", True)
|
|
170
170
|
exportPath = kwargs.get("exportPath", None)
|
|
171
171
|
useStash = kwargs.get("useStash", True)
|
|
@@ -209,7 +209,7 @@ class PubChemEtlWorkflow(object):
|
|
|
209
209
|
try:
|
|
210
210
|
ok1 = ok2 = ok3 = ok4 = ok5 = ok6 = False
|
|
211
211
|
# --
|
|
212
|
-
numProc = kwargs.get("numProc",
|
|
212
|
+
numProc = kwargs.get("numProc", 4)
|
|
213
213
|
useStash = kwargs.get("useStash", True)
|
|
214
214
|
useGit = kwargs.get("useGit", False)
|
|
215
215
|
#
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.29
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -16,25 +16,20 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: OpenEye-toolkits>=2024.1.1
|
|
20
19
|
Requires-Dist: numpy
|
|
21
20
|
Requires-Dist: jsonschema>=2.6.0
|
|
22
21
|
Requires-Dist: rcsb.utils.io>=1.48
|
|
23
|
-
Requires-Dist: rcsb.db>=1.
|
|
24
|
-
Requires-Dist: rcsb.utils.chem>=0.
|
|
22
|
+
Requires-Dist: rcsb.db>=1.800
|
|
23
|
+
Requires-Dist: rcsb.utils.chem>=0.81
|
|
25
24
|
Requires-Dist: rcsb.utils.chemref>=0.91
|
|
26
|
-
Requires-Dist: rcsb.utils.citation>=0.22
|
|
27
25
|
Requires-Dist: rcsb.utils.config>=0.40
|
|
28
26
|
Requires-Dist: rcsb.utils.ec>=0.25
|
|
29
27
|
Requires-Dist: rcsb.utils.go>=0.18
|
|
30
28
|
Requires-Dist: rcsb.utils.seq>=0.82
|
|
31
|
-
Requires-Dist: rcsb.utils.seqalign>=0.31
|
|
32
29
|
Requires-Dist: rcsb.utils.targets>=0.82
|
|
33
30
|
Requires-Dist: rcsb.utils.struct>=0.47
|
|
34
31
|
Requires-Dist: rcsb.utils.taxonomy>=0.43
|
|
35
32
|
Requires-Dist: rcsb.utils.dictionary>=1.27
|
|
36
|
-
Requires-Dist: rcsb.workflow>=0.46
|
|
37
|
-
Requires-Dist: statistics; python_version < "3.0"
|
|
38
33
|
Provides-Extra: dev
|
|
39
34
|
Requires-Dist: check-manifest; extra == "dev"
|
|
40
35
|
Provides-Extra: test
|
|
@@ -46,6 +41,7 @@ Dynamic: description
|
|
|
46
41
|
Dynamic: description-content-type
|
|
47
42
|
Dynamic: home-page
|
|
48
43
|
Dynamic: license
|
|
44
|
+
Dynamic: license-file
|
|
49
45
|
Dynamic: provides-extra
|
|
50
46
|
Dynamic: requires-dist
|
|
51
47
|
Dynamic: summary
|
|
@@ -115,57 +111,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
115
111
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
116
112
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
117
113
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
118
|
-
|
|
119
|
-
### Command Line Interfaces
|
|
120
|
-
|
|
121
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
122
|
-
|
|
123
|
-
```bash
|
|
124
|
-
exdb_exec_cli --help
|
|
125
|
-
|
|
126
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
127
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
128
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
129
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
130
|
-
[--chunk_size CHUNK_SIZE]
|
|
131
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
132
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
133
|
-
|
|
134
|
-
optional arguments:
|
|
135
|
-
-h, --help show this help message and exit
|
|
136
|
-
--data_set_id DATA_SET_ID
|
|
137
|
-
Data set identifier (default= 2019_14 for current
|
|
138
|
-
week)
|
|
139
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
140
|
-
--etl_chemref ETL integrated chemical reference data
|
|
141
|
-
--etl_tree_node_lists
|
|
142
|
-
ETL tree node lists
|
|
143
|
-
--config_path CONFIG_PATH
|
|
144
|
-
Path to configuration options file
|
|
145
|
-
--config_name CONFIG_NAME
|
|
146
|
-
Configuration section name
|
|
147
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
148
|
-
--read_back_check Perform read back check on all documents
|
|
149
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
150
|
-
--chunk_size CHUNK_SIZE
|
|
151
|
-
Number of files loaded per process
|
|
152
|
-
--document_limit DOCUMENT_LIMIT
|
|
153
|
-
Load document limit for testing
|
|
154
|
-
--debug Turn on verbose logging
|
|
155
|
-
--mock Use MOCK repository configuration for testing
|
|
156
|
-
--cache_path CACHE_PATH
|
|
157
|
-
Top cache path for external and local resource files
|
|
158
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
159
|
-
________________________________________________________________________________
|
|
160
|
-
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
164
|
-
command may be used:
|
|
165
|
-
|
|
166
|
-
```bash
|
|
167
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
168
|
-
--cache_path ./CACHE \
|
|
169
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
170
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
171
|
-
```
|
|
@@ -9,7 +9,6 @@ rcsb/__init__.py
|
|
|
9
9
|
rcsb.exdb.egg-info/PKG-INFO
|
|
10
10
|
rcsb.exdb.egg-info/SOURCES.txt
|
|
11
11
|
rcsb.exdb.egg-info/dependency_links.txt
|
|
12
|
-
rcsb.exdb.egg-info/entry_points.txt
|
|
13
12
|
rcsb.exdb.egg-info/not-zip-safe
|
|
14
13
|
rcsb.exdb.egg-info/requires.txt
|
|
15
14
|
rcsb.exdb.egg-info/top_level.txt
|
|
@@ -29,7 +28,6 @@ rcsb/exdb/citation/CitationAdapter.py
|
|
|
29
28
|
rcsb/exdb/citation/CitationExtractor.py
|
|
30
29
|
rcsb/exdb/citation/CitationUtils.py
|
|
31
30
|
rcsb/exdb/citation/__init__.py
|
|
32
|
-
rcsb/exdb/cli/ExDbExec.py
|
|
33
31
|
rcsb/exdb/cli/__init__.py
|
|
34
32
|
rcsb/exdb/entry/EntryInfoProvider.py
|
|
35
33
|
rcsb/exdb/entry/__init__.py
|
|
@@ -58,7 +56,6 @@ rcsb/exdb/tests/testCitationExtractor.py
|
|
|
58
56
|
rcsb/exdb/tests/testCitationUtils.py
|
|
59
57
|
rcsb/exdb/tests/testEntryInfoEtlWorkflow.py
|
|
60
58
|
rcsb/exdb/tests/testEntryInfoProvider.py
|
|
61
|
-
rcsb/exdb/tests/testExDbWorkflow.py
|
|
62
59
|
rcsb/exdb/tests/testGlycanEtlWorkflow.py
|
|
63
60
|
rcsb/exdb/tests/testGlycanProvider.py
|
|
64
61
|
rcsb/exdb/tests/testGlycanUtils.py
|
|
@@ -89,7 +86,6 @@ rcsb/exdb/utils/ObjectUpdater.py
|
|
|
89
86
|
rcsb/exdb/utils/ObjectValidator.py
|
|
90
87
|
rcsb/exdb/utils/__init__.py
|
|
91
88
|
rcsb/exdb/wf/EntryInfoEtlWorkflow.py
|
|
92
|
-
rcsb/exdb/wf/ExDbWorkflow.py
|
|
93
89
|
rcsb/exdb/wf/GlycanEtlWorkflow.py
|
|
94
90
|
rcsb/exdb/wf/PubChemEtlWorkflow.py
|
|
95
91
|
rcsb/exdb/wf/__init__.py
|
|
@@ -1,24 +1,17 @@
|
|
|
1
|
-
OpenEye-toolkits>=2024.1.1
|
|
2
1
|
numpy
|
|
3
2
|
jsonschema>=2.6.0
|
|
4
3
|
rcsb.utils.io>=1.48
|
|
5
|
-
rcsb.db>=1.
|
|
6
|
-
rcsb.utils.chem>=0.
|
|
4
|
+
rcsb.db>=1.800
|
|
5
|
+
rcsb.utils.chem>=0.81
|
|
7
6
|
rcsb.utils.chemref>=0.91
|
|
8
|
-
rcsb.utils.citation>=0.22
|
|
9
7
|
rcsb.utils.config>=0.40
|
|
10
8
|
rcsb.utils.ec>=0.25
|
|
11
9
|
rcsb.utils.go>=0.18
|
|
12
10
|
rcsb.utils.seq>=0.82
|
|
13
|
-
rcsb.utils.seqalign>=0.31
|
|
14
11
|
rcsb.utils.targets>=0.82
|
|
15
12
|
rcsb.utils.struct>=0.47
|
|
16
13
|
rcsb.utils.taxonomy>=0.43
|
|
17
14
|
rcsb.utils.dictionary>=1.27
|
|
18
|
-
rcsb.workflow>=0.46
|
|
19
|
-
|
|
20
|
-
[:python_version < "3.0"]
|
|
21
|
-
statistics
|
|
22
15
|
|
|
23
16
|
[dev]
|
|
24
17
|
check-manifest
|
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
--extra-index-url https://pypi.anaconda.org/OpenEye/simple
|
|
2
|
-
OpenEye
|
|
2
|
+
# Above line may be needed despite the OpenEye package not being a direct requirement of this package (it's used by rcsb.utils.chem)
|
|
3
|
+
# OpenEye-toolkits >= 2024.1.1
|
|
3
4
|
numpy
|
|
4
5
|
jsonschema >= 2.6.0
|
|
5
6
|
rcsb.utils.io >= 1.48
|
|
6
|
-
rcsb.db >= 1.
|
|
7
|
-
rcsb.utils.chem >= 0.
|
|
7
|
+
rcsb.db >= 1.800
|
|
8
|
+
rcsb.utils.chem >= 0.81
|
|
8
9
|
rcsb.utils.chemref >= 0.91
|
|
9
|
-
rcsb.utils.citation >= 0.22
|
|
10
10
|
rcsb.utils.config >= 0.40
|
|
11
11
|
rcsb.utils.ec >= 0.25
|
|
12
12
|
rcsb.utils.go >= 0.18
|
|
13
13
|
rcsb.utils.seq >= 0.82
|
|
14
|
-
rcsb.utils.seqalign >= 0.31
|
|
15
14
|
rcsb.utils.targets >= 0.82
|
|
16
15
|
rcsb.utils.struct >= 0.47
|
|
17
16
|
rcsb.utils.taxonomy >= 0.43
|
|
18
17
|
rcsb.utils.dictionary >= 1.27
|
|
19
|
-
rcsb.workflow >= 0.46
|
|
20
|
-
statistics; python_version < "3.0"
|
|
@@ -47,7 +47,6 @@ setup(
|
|
|
47
47
|
"Programming Language :: Python :: 3.9",
|
|
48
48
|
"Programming Language :: Python :: 3.10",
|
|
49
49
|
],
|
|
50
|
-
entry_points={"console_scripts": ["exdb_exec_cli=rcsb.exdb.cli.ExDbExec:main"]},
|
|
51
50
|
#
|
|
52
51
|
install_requires=packagesRequired[1:],
|
|
53
52
|
packages=find_packages(exclude=["rcsb.mock-data", "rcsb.exdb.tests-anal", "rcsb.exdb.tests-*", "tests.*"]),
|
|
@@ -57,7 +56,7 @@ setup(
|
|
|
57
56
|
},
|
|
58
57
|
#
|
|
59
58
|
test_suite="rcsb.exdb.tests",
|
|
60
|
-
tests_require=["tox"],
|
|
59
|
+
tests_require=["tox", "rcsb.utils.citation >= 0.22"],
|
|
61
60
|
#
|
|
62
61
|
# Not configured ...
|
|
63
62
|
extras_require={"dev": ["check-manifest"], "test": ["coverage"]},
|