rcsb.exdb 1.27__tar.gz → 1.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/HISTORY.txt +2 -1
- {rcsb_exdb-1.27/rcsb.exdb.egg-info → rcsb_exdb-1.28}/PKG-INFO +3 -62
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/README.md +0 -54
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/cli/__init__.py +1 -1
- {rcsb_exdb-1.27 → rcsb_exdb-1.28/rcsb.exdb.egg-info}/PKG-INFO +3 -62
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/SOURCES.txt +0 -4
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/requires.txt +2 -9
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/requirements.txt +4 -7
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/setup.py +1 -2
- rcsb_exdb-1.27/rcsb/exdb/cli/ExDbExec.py +0 -239
- rcsb_exdb-1.27/rcsb/exdb/tests/testExDbWorkflow.py +0 -145
- rcsb_exdb-1.27/rcsb/exdb/wf/ExDbWorkflow.py +0 -521
- rcsb_exdb-1.27/rcsb.exdb.egg-info/entry_points.txt +0 -2
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/LICENSE +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/MANIFEST.in +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/entry/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixturePdbxLoader.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTreeNodeListWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tree/TreeNodeListWorker.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tree/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectValidator.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/PubChemEtlWorkflow.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/__init__.py +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/not-zip-safe +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/top_level.txt +0 -0
- {rcsb_exdb-1.27 → rcsb_exdb-1.28}/setup.cfg +0 -0
|
@@ -108,4 +108,5 @@
|
|
|
108
108
|
Update CI/CD to python 3.10
|
|
109
109
|
10-Dec-2024 V1.26 Update PolymerEntityExtractor to sort extracted sequence data;
|
|
110
110
|
Update Azure pipelines to run on latest macOS and ubuntu version
|
|
111
|
-
23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
|
|
111
|
+
23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
|
|
112
|
+
11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.28
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -16,25 +16,20 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: OpenEye-toolkits>=2024.1.1
|
|
20
19
|
Requires-Dist: numpy
|
|
21
20
|
Requires-Dist: jsonschema>=2.6.0
|
|
22
21
|
Requires-Dist: rcsb.utils.io>=1.48
|
|
23
|
-
Requires-Dist: rcsb.db>=1.
|
|
24
|
-
Requires-Dist: rcsb.utils.chem>=0.
|
|
22
|
+
Requires-Dist: rcsb.db>=1.800
|
|
23
|
+
Requires-Dist: rcsb.utils.chem>=0.81
|
|
25
24
|
Requires-Dist: rcsb.utils.chemref>=0.91
|
|
26
|
-
Requires-Dist: rcsb.utils.citation>=0.22
|
|
27
25
|
Requires-Dist: rcsb.utils.config>=0.40
|
|
28
26
|
Requires-Dist: rcsb.utils.ec>=0.25
|
|
29
27
|
Requires-Dist: rcsb.utils.go>=0.18
|
|
30
28
|
Requires-Dist: rcsb.utils.seq>=0.82
|
|
31
|
-
Requires-Dist: rcsb.utils.seqalign>=0.31
|
|
32
29
|
Requires-Dist: rcsb.utils.targets>=0.82
|
|
33
30
|
Requires-Dist: rcsb.utils.struct>=0.47
|
|
34
31
|
Requires-Dist: rcsb.utils.taxonomy>=0.43
|
|
35
32
|
Requires-Dist: rcsb.utils.dictionary>=1.27
|
|
36
|
-
Requires-Dist: rcsb.workflow>=0.46
|
|
37
|
-
Requires-Dist: statistics; python_version < "3.0"
|
|
38
33
|
Provides-Extra: dev
|
|
39
34
|
Requires-Dist: check-manifest; extra == "dev"
|
|
40
35
|
Provides-Extra: test
|
|
@@ -115,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
115
110
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
116
111
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
117
112
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
118
|
-
|
|
119
|
-
### Command Line Interfaces
|
|
120
|
-
|
|
121
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
122
|
-
|
|
123
|
-
```bash
|
|
124
|
-
exdb_exec_cli --help
|
|
125
|
-
|
|
126
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
127
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
128
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
129
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
130
|
-
[--chunk_size CHUNK_SIZE]
|
|
131
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
132
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
133
|
-
|
|
134
|
-
optional arguments:
|
|
135
|
-
-h, --help show this help message and exit
|
|
136
|
-
--data_set_id DATA_SET_ID
|
|
137
|
-
Data set identifier (default= 2019_14 for current
|
|
138
|
-
week)
|
|
139
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
140
|
-
--etl_chemref ETL integrated chemical reference data
|
|
141
|
-
--etl_tree_node_lists
|
|
142
|
-
ETL tree node lists
|
|
143
|
-
--config_path CONFIG_PATH
|
|
144
|
-
Path to configuration options file
|
|
145
|
-
--config_name CONFIG_NAME
|
|
146
|
-
Configuration section name
|
|
147
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
148
|
-
--read_back_check Perform read back check on all documents
|
|
149
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
150
|
-
--chunk_size CHUNK_SIZE
|
|
151
|
-
Number of files loaded per process
|
|
152
|
-
--document_limit DOCUMENT_LIMIT
|
|
153
|
-
Load document limit for testing
|
|
154
|
-
--debug Turn on verbose logging
|
|
155
|
-
--mock Use MOCK repository configuration for testing
|
|
156
|
-
--cache_path CACHE_PATH
|
|
157
|
-
Top cache path for external and local resource files
|
|
158
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
159
|
-
________________________________________________________________________________
|
|
160
|
-
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
164
|
-
command may be used:
|
|
165
|
-
|
|
166
|
-
```bash
|
|
167
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
168
|
-
--cache_path ./CACHE \
|
|
169
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
170
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
171
|
-
```
|
|
@@ -63,57 +63,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
63
63
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
64
64
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
65
65
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
66
|
-
|
|
67
|
-
### Command Line Interfaces
|
|
68
|
-
|
|
69
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
70
|
-
|
|
71
|
-
```bash
|
|
72
|
-
exdb_exec_cli --help
|
|
73
|
-
|
|
74
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
75
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
76
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
77
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
78
|
-
[--chunk_size CHUNK_SIZE]
|
|
79
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
80
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
81
|
-
|
|
82
|
-
optional arguments:
|
|
83
|
-
-h, --help show this help message and exit
|
|
84
|
-
--data_set_id DATA_SET_ID
|
|
85
|
-
Data set identifier (default= 2019_14 for current
|
|
86
|
-
week)
|
|
87
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
88
|
-
--etl_chemref ETL integrated chemical reference data
|
|
89
|
-
--etl_tree_node_lists
|
|
90
|
-
ETL tree node lists
|
|
91
|
-
--config_path CONFIG_PATH
|
|
92
|
-
Path to configuration options file
|
|
93
|
-
--config_name CONFIG_NAME
|
|
94
|
-
Configuration section name
|
|
95
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
96
|
-
--read_back_check Perform read back check on all documents
|
|
97
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
98
|
-
--chunk_size CHUNK_SIZE
|
|
99
|
-
Number of files loaded per process
|
|
100
|
-
--document_limit DOCUMENT_LIMIT
|
|
101
|
-
Load document limit for testing
|
|
102
|
-
--debug Turn on verbose logging
|
|
103
|
-
--mock Use MOCK repository configuration for testing
|
|
104
|
-
--cache_path CACHE_PATH
|
|
105
|
-
Top cache path for external and local resource files
|
|
106
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
107
|
-
________________________________________________________________________________
|
|
108
|
-
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
112
|
-
command may be used:
|
|
113
|
-
|
|
114
|
-
```bash
|
|
115
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
116
|
-
--cache_path ./CACHE \
|
|
117
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
118
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
119
|
-
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.28
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -16,25 +16,20 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: OpenEye-toolkits>=2024.1.1
|
|
20
19
|
Requires-Dist: numpy
|
|
21
20
|
Requires-Dist: jsonschema>=2.6.0
|
|
22
21
|
Requires-Dist: rcsb.utils.io>=1.48
|
|
23
|
-
Requires-Dist: rcsb.db>=1.
|
|
24
|
-
Requires-Dist: rcsb.utils.chem>=0.
|
|
22
|
+
Requires-Dist: rcsb.db>=1.800
|
|
23
|
+
Requires-Dist: rcsb.utils.chem>=0.81
|
|
25
24
|
Requires-Dist: rcsb.utils.chemref>=0.91
|
|
26
|
-
Requires-Dist: rcsb.utils.citation>=0.22
|
|
27
25
|
Requires-Dist: rcsb.utils.config>=0.40
|
|
28
26
|
Requires-Dist: rcsb.utils.ec>=0.25
|
|
29
27
|
Requires-Dist: rcsb.utils.go>=0.18
|
|
30
28
|
Requires-Dist: rcsb.utils.seq>=0.82
|
|
31
|
-
Requires-Dist: rcsb.utils.seqalign>=0.31
|
|
32
29
|
Requires-Dist: rcsb.utils.targets>=0.82
|
|
33
30
|
Requires-Dist: rcsb.utils.struct>=0.47
|
|
34
31
|
Requires-Dist: rcsb.utils.taxonomy>=0.43
|
|
35
32
|
Requires-Dist: rcsb.utils.dictionary>=1.27
|
|
36
|
-
Requires-Dist: rcsb.workflow>=0.46
|
|
37
|
-
Requires-Dist: statistics; python_version < "3.0"
|
|
38
33
|
Provides-Extra: dev
|
|
39
34
|
Requires-Dist: check-manifest; extra == "dev"
|
|
40
35
|
Provides-Extra: test
|
|
@@ -115,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
115
110
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
116
111
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
117
112
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
118
|
-
|
|
119
|
-
### Command Line Interfaces
|
|
120
|
-
|
|
121
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
122
|
-
|
|
123
|
-
```bash
|
|
124
|
-
exdb_exec_cli --help
|
|
125
|
-
|
|
126
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
127
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
128
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
129
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
130
|
-
[--chunk_size CHUNK_SIZE]
|
|
131
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
132
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
133
|
-
|
|
134
|
-
optional arguments:
|
|
135
|
-
-h, --help show this help message and exit
|
|
136
|
-
--data_set_id DATA_SET_ID
|
|
137
|
-
Data set identifier (default= 2019_14 for current
|
|
138
|
-
week)
|
|
139
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
140
|
-
--etl_chemref ETL integrated chemical reference data
|
|
141
|
-
--etl_tree_node_lists
|
|
142
|
-
ETL tree node lists
|
|
143
|
-
--config_path CONFIG_PATH
|
|
144
|
-
Path to configuration options file
|
|
145
|
-
--config_name CONFIG_NAME
|
|
146
|
-
Configuration section name
|
|
147
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
148
|
-
--read_back_check Perform read back check on all documents
|
|
149
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
150
|
-
--chunk_size CHUNK_SIZE
|
|
151
|
-
Number of files loaded per process
|
|
152
|
-
--document_limit DOCUMENT_LIMIT
|
|
153
|
-
Load document limit for testing
|
|
154
|
-
--debug Turn on verbose logging
|
|
155
|
-
--mock Use MOCK repository configuration for testing
|
|
156
|
-
--cache_path CACHE_PATH
|
|
157
|
-
Top cache path for external and local resource files
|
|
158
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
159
|
-
________________________________________________________________________________
|
|
160
|
-
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
164
|
-
command may be used:
|
|
165
|
-
|
|
166
|
-
```bash
|
|
167
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
168
|
-
--cache_path ./CACHE \
|
|
169
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
170
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
171
|
-
```
|
|
@@ -9,7 +9,6 @@ rcsb/__init__.py
|
|
|
9
9
|
rcsb.exdb.egg-info/PKG-INFO
|
|
10
10
|
rcsb.exdb.egg-info/SOURCES.txt
|
|
11
11
|
rcsb.exdb.egg-info/dependency_links.txt
|
|
12
|
-
rcsb.exdb.egg-info/entry_points.txt
|
|
13
12
|
rcsb.exdb.egg-info/not-zip-safe
|
|
14
13
|
rcsb.exdb.egg-info/requires.txt
|
|
15
14
|
rcsb.exdb.egg-info/top_level.txt
|
|
@@ -29,7 +28,6 @@ rcsb/exdb/citation/CitationAdapter.py
|
|
|
29
28
|
rcsb/exdb/citation/CitationExtractor.py
|
|
30
29
|
rcsb/exdb/citation/CitationUtils.py
|
|
31
30
|
rcsb/exdb/citation/__init__.py
|
|
32
|
-
rcsb/exdb/cli/ExDbExec.py
|
|
33
31
|
rcsb/exdb/cli/__init__.py
|
|
34
32
|
rcsb/exdb/entry/EntryInfoProvider.py
|
|
35
33
|
rcsb/exdb/entry/__init__.py
|
|
@@ -58,7 +56,6 @@ rcsb/exdb/tests/testCitationExtractor.py
|
|
|
58
56
|
rcsb/exdb/tests/testCitationUtils.py
|
|
59
57
|
rcsb/exdb/tests/testEntryInfoEtlWorkflow.py
|
|
60
58
|
rcsb/exdb/tests/testEntryInfoProvider.py
|
|
61
|
-
rcsb/exdb/tests/testExDbWorkflow.py
|
|
62
59
|
rcsb/exdb/tests/testGlycanEtlWorkflow.py
|
|
63
60
|
rcsb/exdb/tests/testGlycanProvider.py
|
|
64
61
|
rcsb/exdb/tests/testGlycanUtils.py
|
|
@@ -89,7 +86,6 @@ rcsb/exdb/utils/ObjectUpdater.py
|
|
|
89
86
|
rcsb/exdb/utils/ObjectValidator.py
|
|
90
87
|
rcsb/exdb/utils/__init__.py
|
|
91
88
|
rcsb/exdb/wf/EntryInfoEtlWorkflow.py
|
|
92
|
-
rcsb/exdb/wf/ExDbWorkflow.py
|
|
93
89
|
rcsb/exdb/wf/GlycanEtlWorkflow.py
|
|
94
90
|
rcsb/exdb/wf/PubChemEtlWorkflow.py
|
|
95
91
|
rcsb/exdb/wf/__init__.py
|
|
@@ -1,24 +1,17 @@
|
|
|
1
|
-
OpenEye-toolkits>=2024.1.1
|
|
2
1
|
numpy
|
|
3
2
|
jsonschema>=2.6.0
|
|
4
3
|
rcsb.utils.io>=1.48
|
|
5
|
-
rcsb.db>=1.
|
|
6
|
-
rcsb.utils.chem>=0.
|
|
4
|
+
rcsb.db>=1.800
|
|
5
|
+
rcsb.utils.chem>=0.81
|
|
7
6
|
rcsb.utils.chemref>=0.91
|
|
8
|
-
rcsb.utils.citation>=0.22
|
|
9
7
|
rcsb.utils.config>=0.40
|
|
10
8
|
rcsb.utils.ec>=0.25
|
|
11
9
|
rcsb.utils.go>=0.18
|
|
12
10
|
rcsb.utils.seq>=0.82
|
|
13
|
-
rcsb.utils.seqalign>=0.31
|
|
14
11
|
rcsb.utils.targets>=0.82
|
|
15
12
|
rcsb.utils.struct>=0.47
|
|
16
13
|
rcsb.utils.taxonomy>=0.43
|
|
17
14
|
rcsb.utils.dictionary>=1.27
|
|
18
|
-
rcsb.workflow>=0.46
|
|
19
|
-
|
|
20
|
-
[:python_version < "3.0"]
|
|
21
|
-
statistics
|
|
22
15
|
|
|
23
16
|
[dev]
|
|
24
17
|
check-manifest
|
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
--extra-index-url https://pypi.anaconda.org/OpenEye/simple
|
|
2
|
-
OpenEye
|
|
2
|
+
# Above line may be needed despite the OpenEye package not being a direct requirement of this package (it's used by rcsb.utils.chem)
|
|
3
|
+
# OpenEye-toolkits >= 2024.1.1
|
|
3
4
|
numpy
|
|
4
5
|
jsonschema >= 2.6.0
|
|
5
6
|
rcsb.utils.io >= 1.48
|
|
6
|
-
rcsb.db >= 1.
|
|
7
|
-
rcsb.utils.chem >= 0.
|
|
7
|
+
rcsb.db >= 1.800
|
|
8
|
+
rcsb.utils.chem >= 0.81
|
|
8
9
|
rcsb.utils.chemref >= 0.91
|
|
9
|
-
rcsb.utils.citation >= 0.22
|
|
10
10
|
rcsb.utils.config >= 0.40
|
|
11
11
|
rcsb.utils.ec >= 0.25
|
|
12
12
|
rcsb.utils.go >= 0.18
|
|
13
13
|
rcsb.utils.seq >= 0.82
|
|
14
|
-
rcsb.utils.seqalign >= 0.31
|
|
15
14
|
rcsb.utils.targets >= 0.82
|
|
16
15
|
rcsb.utils.struct >= 0.47
|
|
17
16
|
rcsb.utils.taxonomy >= 0.43
|
|
18
17
|
rcsb.utils.dictionary >= 1.27
|
|
19
|
-
rcsb.workflow >= 0.46
|
|
20
|
-
statistics; python_version < "3.0"
|
|
@@ -47,7 +47,6 @@ setup(
|
|
|
47
47
|
"Programming Language :: Python :: 3.9",
|
|
48
48
|
"Programming Language :: Python :: 3.10",
|
|
49
49
|
],
|
|
50
|
-
entry_points={"console_scripts": ["exdb_exec_cli=rcsb.exdb.cli.ExDbExec:main"]},
|
|
51
50
|
#
|
|
52
51
|
install_requires=packagesRequired[1:],
|
|
53
52
|
packages=find_packages(exclude=["rcsb.mock-data", "rcsb.exdb.tests-anal", "rcsb.exdb.tests-*", "tests.*"]),
|
|
@@ -57,7 +56,7 @@ setup(
|
|
|
57
56
|
},
|
|
58
57
|
#
|
|
59
58
|
test_suite="rcsb.exdb.tests",
|
|
60
|
-
tests_require=["tox"],
|
|
59
|
+
tests_require=["tox", "rcsb.utils.citation >= 0.22"],
|
|
61
60
|
#
|
|
62
61
|
# Not configured ...
|
|
63
62
|
extras_require={"dev": ["check-manifest"], "test": ["coverage"]},
|
|
@@ -1,239 +0,0 @@
|
|
|
1
|
-
##
|
|
2
|
-
# File: ExDbExec.py
|
|
3
|
-
# Date: 22-Apr-2019 jdw
|
|
4
|
-
#
|
|
5
|
-
# Execution wrapper -- for extract and load operations -
|
|
6
|
-
#
|
|
7
|
-
# Updates:
|
|
8
|
-
# 4-Sep-2019 jdw add Tree and Drugbank loaders
|
|
9
|
-
# 14-Feb-2020 jdw change over to ReferenceSequenceAnnotationProvider/Adapter
|
|
10
|
-
# 9-Mar-2023 dwp Lower refChunkSize to 10 (UniProt API having trouble streaming XML responses)
|
|
11
|
-
# 25-Apr-2024 dwp Add arguments and logic to support CLI usage from weekly-update workflow;
|
|
12
|
-
# Add support for logging output to a specific file
|
|
13
|
-
# 20-Aug-2024 dwp Add load_target_cofactors operation; change name of upd_targets_cofactors to upd_targets
|
|
14
|
-
# 22-Oct-2024 dwp Add ccd_img_gen and ccd_file_gen operations
|
|
15
|
-
# (latter will only be used briefly, as will stop generating SDF and Mol2 files in Dec 2024)
|
|
16
|
-
##
|
|
17
|
-
__docformat__ = "google en"
|
|
18
|
-
__author__ = "John Westbrook"
|
|
19
|
-
__email__ = "jwest@rcsb.rutgers.edu"
|
|
20
|
-
__license__ = "Apache 2.0"
|
|
21
|
-
|
|
22
|
-
import os
|
|
23
|
-
import sys
|
|
24
|
-
import argparse
|
|
25
|
-
import logging
|
|
26
|
-
|
|
27
|
-
from rcsb.utils.config.ConfigUtil import ConfigUtil
|
|
28
|
-
from rcsb.exdb.wf.ExDbWorkflow import ExDbWorkflow
|
|
29
|
-
|
|
30
|
-
HERE = os.path.abspath(os.path.dirname(__file__))
|
|
31
|
-
TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
|
|
32
|
-
|
|
33
|
-
# logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s", stream=sys.stdout)
|
|
34
|
-
logger = logging.getLogger()
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def main():
|
|
38
|
-
parser = argparse.ArgumentParser()
|
|
39
|
-
#
|
|
40
|
-
parser.add_argument(
|
|
41
|
-
"--op",
|
|
42
|
-
default=None,
|
|
43
|
-
required=True,
|
|
44
|
-
help="Loading operation to perform",
|
|
45
|
-
choices=[
|
|
46
|
-
"etl_chemref", # ETL integrated chemical reference data
|
|
47
|
-
"etl_uniprot_core", # ETL UniProt core reference data
|
|
48
|
-
"etl_tree_node_lists", # ETL tree node lists
|
|
49
|
-
"upd_ref_seq", # Update reference sequence assignments
|
|
50
|
-
"upd_neighbor_interactions",
|
|
51
|
-
"upd_uniprot_taxonomy",
|
|
52
|
-
"upd_targets",
|
|
53
|
-
"load_target_cofactors",
|
|
54
|
-
"upd_pubchem",
|
|
55
|
-
"upd_entry_info",
|
|
56
|
-
"upd_glycan_idx",
|
|
57
|
-
"upd_resource_stash",
|
|
58
|
-
"ccd_img_gen",
|
|
59
|
-
"ccd_file_gen",
|
|
60
|
-
]
|
|
61
|
-
)
|
|
62
|
-
parser.add_argument(
|
|
63
|
-
"--load_type",
|
|
64
|
-
default="full",
|
|
65
|
-
help="Type of load ('full' for complete and fresh single-worker load, 'replace' for incremental and multi-worker load)",
|
|
66
|
-
choices=["full", "replace"],
|
|
67
|
-
)
|
|
68
|
-
#
|
|
69
|
-
parser.add_argument("--config_path", default=None, help="Path to configuration options file")
|
|
70
|
-
parser.add_argument("--config_name", default="site_info_remote_configuration", help="Configuration section name")
|
|
71
|
-
parser.add_argument("--cache_path", default=None, help="Cache path for resource files")
|
|
72
|
-
parser.add_argument("--num_proc", default=2, help="Number of processes to execute (default=2)")
|
|
73
|
-
parser.add_argument("--chunk_size", default=10, help="Number of files loaded per process")
|
|
74
|
-
parser.add_argument("--max_step_length", default=500, help="Maximum subList size (default=500)")
|
|
75
|
-
parser.add_argument("--db_type", default="mongo", help="Database server type (default=mongo)")
|
|
76
|
-
parser.add_argument("--document_limit", default=None, help="Load document limit for testing")
|
|
77
|
-
#
|
|
78
|
-
parser.add_argument("--rebuild_cache", default=False, action="store_true", help="Rebuild cached resource files")
|
|
79
|
-
parser.add_argument("--rebuild_sequence_cache", default=False, action="store_true", help="Rebuild cached resource files for reference sequence updates")
|
|
80
|
-
parser.add_argument("--provider_types_exclude", default=None, help="Resource provider types to exclude")
|
|
81
|
-
parser.add_argument("--use_filtered_tax_list", default=False, action="store_true", help="Use filtered list for taxonomy tree loading")
|
|
82
|
-
parser.add_argument("--disable_read_back_check", default=False, action="store_true", help="Disable read back check on all documents")
|
|
83
|
-
parser.add_argument("--debug", default=False, action="store_true", help="Turn on verbose logging")
|
|
84
|
-
parser.add_argument("--mock", default=False, action="store_true", help="Use MOCK repository configuration for testing")
|
|
85
|
-
parser.add_argument("--log_file_path", default=None, help="Path to runtime log file output.")
|
|
86
|
-
#
|
|
87
|
-
# Arguments specific for op == 'upd_ref_seq'
|
|
88
|
-
parser.add_argument("--ref_chunk_size", default=10, help="Max chunk size for reference sequence updates (for op 'upd_ref_seq')")
|
|
89
|
-
parser.add_argument("--min_missing", default=0, help="Minimum number of allowed missing reference sequences (for op 'upd_ref_seq')")
|
|
90
|
-
parser.add_argument("--min_match_primary_percent", default=None, help="Minimum reference sequence match percentage (for op 'upd_ref_seq')")
|
|
91
|
-
parser.add_argument("--test_mode", default=False, action="store_true", help="Test mode for reference sequence updates (for op 'upd_ref_seq')")
|
|
92
|
-
#
|
|
93
|
-
# Arguments specific for op == 'ccd_img_gen' or 'ccd_file_gen'
|
|
94
|
-
parser.add_argument("--cc_output_path", default=None, help="The base local directory path where chemical component files (image, coordinates) are written (for op 'ccd_img_gen')")
|
|
95
|
-
parser.add_argument("--cc_cache_path", default=None, help="The base local directory path where chemical component cache data are written (for op 'ccd_img_gen')")
|
|
96
|
-
parser.add_argument("--oe_license_path", default=None, help="Path to OpenEye license file")
|
|
97
|
-
#
|
|
98
|
-
# Arguments buildExdbResources
|
|
99
|
-
parser.add_argument("--rebuild_all_neighbor_interactions", default=False, action="store_true", help="Rebuild all neighbor interactions from scratch (default is incrementally)")
|
|
100
|
-
parser.add_argument("--cc_file_prefix", default="cc-full", help="File name discriminator for index sets")
|
|
101
|
-
parser.add_argument("--cc_url_target", default=None, help="target url for chemical component dictionary resource file (default: None=all public)")
|
|
102
|
-
parser.add_argument("--bird_url_target", default=None, help="target url for bird dictionary resource file (cc format) (default: None=all public)")
|
|
103
|
-
#
|
|
104
|
-
args = parser.parse_args()
|
|
105
|
-
#
|
|
106
|
-
try:
|
|
107
|
-
op, commonD, loadD = processArguments(args)
|
|
108
|
-
except Exception as err:
|
|
109
|
-
logger.exception("Argument processing problem %s", str(err))
|
|
110
|
-
raise ValueError("Argument processing problem") from err
|
|
111
|
-
#
|
|
112
|
-
#
|
|
113
|
-
# Log input arguments
|
|
114
|
-
loadLogD = {k: v for d in [commonD, loadD] for k, v in d.items() if k != "inputIdCodeList"}
|
|
115
|
-
logger.info("running load op %r on loadLogD %r:", op, loadLogD)
|
|
116
|
-
#
|
|
117
|
-
# Run the operation
|
|
118
|
-
okR = False
|
|
119
|
-
exWf = ExDbWorkflow(**commonD)
|
|
120
|
-
if op in ["etl_chemref", "etl_uniprot_core", "etl_tree_node_lists", "upd_ref_seq"]:
|
|
121
|
-
okR = exWf.load(op, **loadD)
|
|
122
|
-
elif op in [
|
|
123
|
-
"upd_neighbor_interactions",
|
|
124
|
-
"upd_uniprot_taxonomy",
|
|
125
|
-
"upd_targets",
|
|
126
|
-
"load_target_cofactors",
|
|
127
|
-
"upd_pubchem",
|
|
128
|
-
"upd_entry_info",
|
|
129
|
-
"upd_glycan_idx",
|
|
130
|
-
"upd_resource_stash",
|
|
131
|
-
]:
|
|
132
|
-
okR = exWf.buildExdbResource(op, **loadD)
|
|
133
|
-
elif op in [
|
|
134
|
-
"ccd_img_gen",
|
|
135
|
-
"ccd_file_gen",
|
|
136
|
-
]:
|
|
137
|
-
okR = exWf.generateCcdFiles(op, **loadD)
|
|
138
|
-
else:
|
|
139
|
-
logger.error("Unsupported op %r", op)
|
|
140
|
-
#
|
|
141
|
-
logger.info("Operation %r completed with status %r", op, okR)
|
|
142
|
-
#
|
|
143
|
-
if not okR:
|
|
144
|
-
logger.error("Operation %r failed with status %r", op, okR)
|
|
145
|
-
raise ValueError("Operation %r failed" % op)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def processArguments(args):
|
|
149
|
-
# Logging details
|
|
150
|
-
logFilePath = args.log_file_path
|
|
151
|
-
debugFlag = args.debug
|
|
152
|
-
if debugFlag:
|
|
153
|
-
logger.setLevel(logging.DEBUG)
|
|
154
|
-
else:
|
|
155
|
-
logger.setLevel(logging.INFO)
|
|
156
|
-
if logFilePath:
|
|
157
|
-
logDir = os.path.dirname(logFilePath)
|
|
158
|
-
if not os.path.isdir(logDir):
|
|
159
|
-
os.makedirs(logDir)
|
|
160
|
-
handler = logging.FileHandler(logFilePath, mode="a")
|
|
161
|
-
if debugFlag:
|
|
162
|
-
handler.setLevel(logging.DEBUG)
|
|
163
|
-
else:
|
|
164
|
-
handler.setLevel(logging.INFO)
|
|
165
|
-
formatter = logging.Formatter("%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
|
|
166
|
-
handler.setFormatter(formatter)
|
|
167
|
-
logger.addHandler(handler)
|
|
168
|
-
#
|
|
169
|
-
# Configuration details
|
|
170
|
-
configPath = args.config_path
|
|
171
|
-
configName = args.config_name
|
|
172
|
-
if not (configPath and configName):
|
|
173
|
-
logger.error("Config path and/or name not provided: %r, %r", configPath, configName)
|
|
174
|
-
raise ValueError("Config path and/or name not provided: %r, %r" % (configPath, configName))
|
|
175
|
-
mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") if args.mock else None
|
|
176
|
-
logger.info("Using configuration file %r (section %r)", configPath, configName)
|
|
177
|
-
cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath)
|
|
178
|
-
cfgObTmp = cfgOb.exportConfig()
|
|
179
|
-
logger.info("Length of config object (%r)", len(cfgObTmp))
|
|
180
|
-
if len(cfgObTmp) == 0:
|
|
181
|
-
logger.error("Missing or access issue for config file %r", configPath)
|
|
182
|
-
raise ValueError("Missing or access issue for config file %r" % configPath)
|
|
183
|
-
else:
|
|
184
|
-
del cfgObTmp
|
|
185
|
-
#
|
|
186
|
-
# Do any additional argument checking
|
|
187
|
-
op = args.op
|
|
188
|
-
if not op:
|
|
189
|
-
raise ValueError("Must supply a value to '--op' argument")
|
|
190
|
-
#
|
|
191
|
-
cachePath = args.cache_path if args.cache_path else "."
|
|
192
|
-
cachePath = os.path.abspath(cachePath)
|
|
193
|
-
|
|
194
|
-
if args.db_type != "mongo":
|
|
195
|
-
logger.error("Unsupported database type %r (must be 'mongo')", args.db_type)
|
|
196
|
-
raise ValueError("Unsupported database type %r (must be 'mongo')" % args.db_type)
|
|
197
|
-
|
|
198
|
-
# Now collect arguments into dictionaries
|
|
199
|
-
commonD = {
|
|
200
|
-
"configPath": configPath,
|
|
201
|
-
"configName": configName,
|
|
202
|
-
"cachePath": cachePath,
|
|
203
|
-
"mockTopPath": mockTopPath,
|
|
204
|
-
"debugFlag": debugFlag,
|
|
205
|
-
"rebuildCache": args.rebuild_cache,
|
|
206
|
-
"providerTypeExcludeL": args.provider_types_exclude,
|
|
207
|
-
}
|
|
208
|
-
loadD = {
|
|
209
|
-
"loadType": args.load_type,
|
|
210
|
-
"numProc": int(args.num_proc),
|
|
211
|
-
"chunkSize": int(args.chunk_size),
|
|
212
|
-
"maxStepLength": int(args.max_step_length),
|
|
213
|
-
"dbType": args.db_type,
|
|
214
|
-
"documentLimit": int(args.document_limit) if args.document_limit else None,
|
|
215
|
-
"readBackCheck": not args.disable_read_back_check,
|
|
216
|
-
"rebuildSequenceCache": args.rebuild_sequence_cache,
|
|
217
|
-
"useFilteredLists": args.use_filtered_tax_list,
|
|
218
|
-
"refChunkSize": int(args.ref_chunk_size),
|
|
219
|
-
"minMissing": int(args.min_missing),
|
|
220
|
-
"minMatchPrimaryPercent": float(args.min_match_primary_percent) if args.min_match_primary_percent else None,
|
|
221
|
-
"testMode": args.test_mode,
|
|
222
|
-
"rebuildAllNeighborInteractions": args.rebuild_all_neighbor_interactions,
|
|
223
|
-
"ccFileNamePrefix": args.cc_file_prefix,
|
|
224
|
-
"ccUrlTarget": args.cc_url_target,
|
|
225
|
-
"birdUrlTarget": args.bird_url_target,
|
|
226
|
-
"ccOutputPath": args.cc_output_path,
|
|
227
|
-
"ccCachePath": args.cc_cache_path,
|
|
228
|
-
"licenseFilePath": args.oe_license_path,
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
return op, commonD, loadD
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
if __name__ == "__main__":
|
|
235
|
-
try:
|
|
236
|
-
main()
|
|
237
|
-
except Exception as e:
|
|
238
|
-
logger.exception("Run failed %s", str(e))
|
|
239
|
-
sys.exit(1)
|