rcsb.exdb 1.26__tar.gz → 1.28__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/HISTORY.txt +2 -0
- {rcsb_exdb-1.26/rcsb.exdb.egg-info → rcsb_exdb-1.28}/PKG-INFO +14 -63
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/README.md +0 -54
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/cli/__init__.py +1 -1
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tree/TreeNodeListWorker.py +10 -9
- {rcsb_exdb-1.26 → rcsb_exdb-1.28/rcsb.exdb.egg-info}/PKG-INFO +14 -63
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/SOURCES.txt +0 -4
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/requires.txt +2 -9
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/requirements.txt +4 -7
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/setup.py +1 -2
- rcsb_exdb-1.26/rcsb/exdb/cli/ExDbExec.py +0 -239
- rcsb_exdb-1.26/rcsb/exdb/tests/testExDbWorkflow.py +0 -145
- rcsb_exdb-1.26/rcsb/exdb/wf/ExDbWorkflow.py +0 -521
- rcsb_exdb-1.26/rcsb.exdb.egg-info/entry_points.txt +0 -2
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/LICENSE +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/MANIFEST.in +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanUtils.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/branch/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/chemref/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationAdapter.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationUtils.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/citation/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/entry/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/seq/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixturePdbxLoader.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationUtils.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTreeNodeListWorker.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/tree/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectValidator.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/utils/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/PubChemEtlWorkflow.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb/exdb/wf/__init__.py +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/not-zip-safe +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/top_level.txt +0 -0
- {rcsb_exdb-1.26 → rcsb_exdb-1.28}/setup.cfg +0 -0
|
@@ -108,3 +108,5 @@
|
|
|
108
108
|
Update CI/CD to python 3.10
|
|
109
109
|
10-Dec-2024 V1.26 Update PolymerEntityExtractor to sort extracted sequence data;
|
|
110
110
|
Update Azure pipelines to run on latest macOS and ubuntu version
|
|
111
|
+
23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
|
|
112
|
+
11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.28
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -16,29 +16,34 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: OpenEye-toolkits>=2024.1.1
|
|
20
19
|
Requires-Dist: numpy
|
|
21
20
|
Requires-Dist: jsonschema>=2.6.0
|
|
22
21
|
Requires-Dist: rcsb.utils.io>=1.48
|
|
23
|
-
Requires-Dist: rcsb.db>=1.
|
|
24
|
-
Requires-Dist: rcsb.utils.chem>=0.
|
|
22
|
+
Requires-Dist: rcsb.db>=1.800
|
|
23
|
+
Requires-Dist: rcsb.utils.chem>=0.81
|
|
25
24
|
Requires-Dist: rcsb.utils.chemref>=0.91
|
|
26
|
-
Requires-Dist: rcsb.utils.citation>=0.22
|
|
27
25
|
Requires-Dist: rcsb.utils.config>=0.40
|
|
28
26
|
Requires-Dist: rcsb.utils.ec>=0.25
|
|
29
27
|
Requires-Dist: rcsb.utils.go>=0.18
|
|
30
28
|
Requires-Dist: rcsb.utils.seq>=0.82
|
|
31
|
-
Requires-Dist: rcsb.utils.seqalign>=0.31
|
|
32
29
|
Requires-Dist: rcsb.utils.targets>=0.82
|
|
33
30
|
Requires-Dist: rcsb.utils.struct>=0.47
|
|
34
31
|
Requires-Dist: rcsb.utils.taxonomy>=0.43
|
|
35
32
|
Requires-Dist: rcsb.utils.dictionary>=1.27
|
|
36
|
-
Requires-Dist: rcsb.workflow>=0.46
|
|
37
|
-
Requires-Dist: statistics; python_version < "3.0"
|
|
38
33
|
Provides-Extra: dev
|
|
39
34
|
Requires-Dist: check-manifest; extra == "dev"
|
|
40
35
|
Provides-Extra: test
|
|
41
36
|
Requires-Dist: coverage; extra == "test"
|
|
37
|
+
Dynamic: author
|
|
38
|
+
Dynamic: author-email
|
|
39
|
+
Dynamic: classifier
|
|
40
|
+
Dynamic: description
|
|
41
|
+
Dynamic: description-content-type
|
|
42
|
+
Dynamic: home-page
|
|
43
|
+
Dynamic: license
|
|
44
|
+
Dynamic: provides-extra
|
|
45
|
+
Dynamic: requires-dist
|
|
46
|
+
Dynamic: summary
|
|
42
47
|
|
|
43
48
|
# py-rcsb_exdb
|
|
44
49
|
|
|
@@ -105,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
105
110
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
106
111
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
107
112
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
108
|
-
|
|
109
|
-
### Command Line Interfaces
|
|
110
|
-
|
|
111
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
112
|
-
|
|
113
|
-
```bash
|
|
114
|
-
exdb_exec_cli --help
|
|
115
|
-
|
|
116
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
117
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
118
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
119
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
120
|
-
[--chunk_size CHUNK_SIZE]
|
|
121
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
122
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
123
|
-
|
|
124
|
-
optional arguments:
|
|
125
|
-
-h, --help show this help message and exit
|
|
126
|
-
--data_set_id DATA_SET_ID
|
|
127
|
-
Data set identifier (default= 2019_14 for current
|
|
128
|
-
week)
|
|
129
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
130
|
-
--etl_chemref ETL integrated chemical reference data
|
|
131
|
-
--etl_tree_node_lists
|
|
132
|
-
ETL tree node lists
|
|
133
|
-
--config_path CONFIG_PATH
|
|
134
|
-
Path to configuration options file
|
|
135
|
-
--config_name CONFIG_NAME
|
|
136
|
-
Configuration section name
|
|
137
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
138
|
-
--read_back_check Perform read back check on all documents
|
|
139
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
140
|
-
--chunk_size CHUNK_SIZE
|
|
141
|
-
Number of files loaded per process
|
|
142
|
-
--document_limit DOCUMENT_LIMIT
|
|
143
|
-
Load document limit for testing
|
|
144
|
-
--debug Turn on verbose logging
|
|
145
|
-
--mock Use MOCK repository configuration for testing
|
|
146
|
-
--cache_path CACHE_PATH
|
|
147
|
-
Top cache path for external and local resource files
|
|
148
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
149
|
-
________________________________________________________________________________
|
|
150
|
-
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
154
|
-
command may be used:
|
|
155
|
-
|
|
156
|
-
```bash
|
|
157
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
158
|
-
--cache_path ./CACHE \
|
|
159
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
160
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
161
|
-
```
|
|
@@ -63,57 +63,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
63
63
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
64
64
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
65
65
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
66
|
-
|
|
67
|
-
### Command Line Interfaces
|
|
68
|
-
|
|
69
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
70
|
-
|
|
71
|
-
```bash
|
|
72
|
-
exdb_exec_cli --help
|
|
73
|
-
|
|
74
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
75
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
76
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
77
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
78
|
-
[--chunk_size CHUNK_SIZE]
|
|
79
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
80
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
81
|
-
|
|
82
|
-
optional arguments:
|
|
83
|
-
-h, --help show this help message and exit
|
|
84
|
-
--data_set_id DATA_SET_ID
|
|
85
|
-
Data set identifier (default= 2019_14 for current
|
|
86
|
-
week)
|
|
87
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
88
|
-
--etl_chemref ETL integrated chemical reference data
|
|
89
|
-
--etl_tree_node_lists
|
|
90
|
-
ETL tree node lists
|
|
91
|
-
--config_path CONFIG_PATH
|
|
92
|
-
Path to configuration options file
|
|
93
|
-
--config_name CONFIG_NAME
|
|
94
|
-
Configuration section name
|
|
95
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
96
|
-
--read_back_check Perform read back check on all documents
|
|
97
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
98
|
-
--chunk_size CHUNK_SIZE
|
|
99
|
-
Number of files loaded per process
|
|
100
|
-
--document_limit DOCUMENT_LIMIT
|
|
101
|
-
Load document limit for testing
|
|
102
|
-
--debug Turn on verbose logging
|
|
103
|
-
--mock Use MOCK repository configuration for testing
|
|
104
|
-
--cache_path CACHE_PATH
|
|
105
|
-
Top cache path for external and local resource files
|
|
106
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
107
|
-
________________________________________________________________________________
|
|
108
|
-
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
112
|
-
command may be used:
|
|
113
|
-
|
|
114
|
-
```bash
|
|
115
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
116
|
-
--cache_path ./CACHE \
|
|
117
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
118
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
119
|
-
```
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
# 12-Apr-2023 dwp add CARD ontology tree
|
|
10
10
|
# 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead)
|
|
11
11
|
# 27-Aug-2024 dwp Update CARD ontology tree loading
|
|
12
|
+
# 23-Jan-2025 dwp Change indexed field from 'update_id' to 'id'
|
|
12
13
|
#
|
|
13
14
|
##
|
|
14
15
|
__docformat__ = "google en"
|
|
@@ -138,7 +139,7 @@ class TreeNodeListWorker(object):
|
|
|
138
139
|
# logger.info("GO tree node list length %d", len(nL))
|
|
139
140
|
# if doLoad:
|
|
140
141
|
# collectionName = "tree_go_node_list"
|
|
141
|
-
# ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
142
|
+
# ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
142
143
|
# self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
143
144
|
#
|
|
144
145
|
# ---- CATH
|
|
@@ -147,7 +148,7 @@ class TreeNodeListWorker(object):
|
|
|
147
148
|
logger.info("Starting load SCOP node tree length %d", len(nL))
|
|
148
149
|
if doLoad:
|
|
149
150
|
collectionName = "tree_cath_node_list"
|
|
150
|
-
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
151
|
+
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
151
152
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
152
153
|
# ---- SCOP
|
|
153
154
|
scu = ScopClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
@@ -155,7 +156,7 @@ class TreeNodeListWorker(object):
|
|
|
155
156
|
logger.info("Starting load SCOP node tree length %d", len(nL))
|
|
156
157
|
if doLoad:
|
|
157
158
|
collectionName = "tree_scop_node_list"
|
|
158
|
-
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
159
|
+
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
159
160
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
160
161
|
# --- SCOP2
|
|
161
162
|
scu = Scop2ClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
@@ -163,7 +164,7 @@ class TreeNodeListWorker(object):
|
|
|
163
164
|
logger.info("Starting load SCOP2 node tree length %d", len(nL))
|
|
164
165
|
if doLoad:
|
|
165
166
|
collectionName = "tree_scop2_node_list"
|
|
166
|
-
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
167
|
+
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
167
168
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
168
169
|
# ---- Ecod
|
|
169
170
|
ecu = EcodClassificationProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
@@ -171,7 +172,7 @@ class TreeNodeListWorker(object):
|
|
|
171
172
|
logger.info("Starting load ECOD node tree length %d", len(nL))
|
|
172
173
|
if doLoad:
|
|
173
174
|
collectionName = "tree_ecod_node_list"
|
|
174
|
-
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
175
|
+
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
175
176
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
176
177
|
# ---- EC
|
|
177
178
|
edbu = EnzymeDatabaseProvider(cachePath=self.__cachePath, useCache=useCache)
|
|
@@ -179,7 +180,7 @@ class TreeNodeListWorker(object):
|
|
|
179
180
|
logger.info("Starting load of EC node tree length %d", len(nL))
|
|
180
181
|
if doLoad:
|
|
181
182
|
collectionName = "tree_ec_node_list"
|
|
182
|
-
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
183
|
+
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
183
184
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
184
185
|
# ---- CARD
|
|
185
186
|
okCou = True
|
|
@@ -200,7 +201,7 @@ class TreeNodeListWorker(object):
|
|
|
200
201
|
collectionName,
|
|
201
202
|
loadType=loadType,
|
|
202
203
|
documentList=nL,
|
|
203
|
-
indexAttributeList=["
|
|
204
|
+
indexAttributeList=["id"],
|
|
204
205
|
keyNames=None,
|
|
205
206
|
addValues=addValues,
|
|
206
207
|
schemaLevel=None
|
|
@@ -229,7 +230,7 @@ class TreeNodeListWorker(object):
|
|
|
229
230
|
if doLoad:
|
|
230
231
|
collectionName = "tree_taxonomy_node_list"
|
|
231
232
|
logger.debug("Taxonomy nodes (%d) %r", len(nL), nL[:5])
|
|
232
|
-
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
233
|
+
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
233
234
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
234
235
|
logger.info("Tree loading operations completed.")
|
|
235
236
|
#
|
|
@@ -241,7 +242,7 @@ class TreeNodeListWorker(object):
|
|
|
241
242
|
nL = atcP.getTreeNodeList(filterD=atcFilterD)
|
|
242
243
|
collectionName = "tree_atc_node_list"
|
|
243
244
|
logger.debug("ATC node list length %d %r", len(nL), nL[:5])
|
|
244
|
-
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["
|
|
245
|
+
ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["id"], keyNames=None, addValues=addValues, schemaLevel=None)
|
|
245
246
|
self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp)
|
|
246
247
|
#
|
|
247
248
|
# ---
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: rcsb.exdb
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.28
|
|
4
4
|
Summary: RCSB Python ExDB data extraction and loading workflows
|
|
5
5
|
Home-page: https://github.com/rcsb/py-rcsb_exdb
|
|
6
6
|
Author: John Westbrook
|
|
@@ -16,29 +16,34 @@ Classifier: Programming Language :: Python :: 3.9
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
|
-
Requires-Dist: OpenEye-toolkits>=2024.1.1
|
|
20
19
|
Requires-Dist: numpy
|
|
21
20
|
Requires-Dist: jsonschema>=2.6.0
|
|
22
21
|
Requires-Dist: rcsb.utils.io>=1.48
|
|
23
|
-
Requires-Dist: rcsb.db>=1.
|
|
24
|
-
Requires-Dist: rcsb.utils.chem>=0.
|
|
22
|
+
Requires-Dist: rcsb.db>=1.800
|
|
23
|
+
Requires-Dist: rcsb.utils.chem>=0.81
|
|
25
24
|
Requires-Dist: rcsb.utils.chemref>=0.91
|
|
26
|
-
Requires-Dist: rcsb.utils.citation>=0.22
|
|
27
25
|
Requires-Dist: rcsb.utils.config>=0.40
|
|
28
26
|
Requires-Dist: rcsb.utils.ec>=0.25
|
|
29
27
|
Requires-Dist: rcsb.utils.go>=0.18
|
|
30
28
|
Requires-Dist: rcsb.utils.seq>=0.82
|
|
31
|
-
Requires-Dist: rcsb.utils.seqalign>=0.31
|
|
32
29
|
Requires-Dist: rcsb.utils.targets>=0.82
|
|
33
30
|
Requires-Dist: rcsb.utils.struct>=0.47
|
|
34
31
|
Requires-Dist: rcsb.utils.taxonomy>=0.43
|
|
35
32
|
Requires-Dist: rcsb.utils.dictionary>=1.27
|
|
36
|
-
Requires-Dist: rcsb.workflow>=0.46
|
|
37
|
-
Requires-Dist: statistics; python_version < "3.0"
|
|
38
33
|
Provides-Extra: dev
|
|
39
34
|
Requires-Dist: check-manifest; extra == "dev"
|
|
40
35
|
Provides-Extra: test
|
|
41
36
|
Requires-Dist: coverage; extra == "test"
|
|
37
|
+
Dynamic: author
|
|
38
|
+
Dynamic: author-email
|
|
39
|
+
Dynamic: classifier
|
|
40
|
+
Dynamic: description
|
|
41
|
+
Dynamic: description-content-type
|
|
42
|
+
Dynamic: home-page
|
|
43
|
+
Dynamic: license
|
|
44
|
+
Dynamic: provides-extra
|
|
45
|
+
Dynamic: requires-dist
|
|
46
|
+
Dynamic: summary
|
|
42
47
|
|
|
43
48
|
# py-rcsb_exdb
|
|
44
49
|
|
|
@@ -105,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
|
|
|
105
110
|
[MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
|
|
106
111
|
are required to support the ExDB tools. HomeBrew also provides a variety of options for
|
|
107
112
|
managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
|
|
108
|
-
|
|
109
|
-
### Command Line Interfaces
|
|
110
|
-
|
|
111
|
-
A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
|
|
112
|
-
|
|
113
|
-
```bash
|
|
114
|
-
exdb_exec_cli --help
|
|
115
|
-
|
|
116
|
-
usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
|
|
117
|
-
[--etl_tree_node_lists] [--config_path CONFIG_PATH]
|
|
118
|
-
[--config_name CONFIG_NAME] [--db_type DB_TYPE]
|
|
119
|
-
[--read_back_check] [--num_proc NUM_PROC]
|
|
120
|
-
[--chunk_size CHUNK_SIZE]
|
|
121
|
-
[--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
|
|
122
|
-
[--cache_path CACHE_PATH] [--rebuild_cache]
|
|
123
|
-
|
|
124
|
-
optional arguments:
|
|
125
|
-
-h, --help show this help message and exit
|
|
126
|
-
--data_set_id DATA_SET_ID
|
|
127
|
-
Data set identifier (default= 2019_14 for current
|
|
128
|
-
week)
|
|
129
|
-
--full Fresh full load in a new tables/collections (Default)
|
|
130
|
-
--etl_chemref ETL integrated chemical reference data
|
|
131
|
-
--etl_tree_node_lists
|
|
132
|
-
ETL tree node lists
|
|
133
|
-
--config_path CONFIG_PATH
|
|
134
|
-
Path to configuration options file
|
|
135
|
-
--config_name CONFIG_NAME
|
|
136
|
-
Configuration section name
|
|
137
|
-
--db_type DB_TYPE Database server type (default=mongo)
|
|
138
|
-
--read_back_check Perform read back check on all documents
|
|
139
|
-
--num_proc NUM_PROC Number of processes to execute (default=2)
|
|
140
|
-
--chunk_size CHUNK_SIZE
|
|
141
|
-
Number of files loaded per process
|
|
142
|
-
--document_limit DOCUMENT_LIMIT
|
|
143
|
-
Load document limit for testing
|
|
144
|
-
--debug Turn on verbose logging
|
|
145
|
-
--mock Use MOCK repository configuration for testing
|
|
146
|
-
--cache_path CACHE_PATH
|
|
147
|
-
Top cache path for external and local resource files
|
|
148
|
-
--rebuild_cache Rebuild cached files from remote resources
|
|
149
|
-
________________________________________________________________________________
|
|
150
|
-
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
For example, to construct and load tree nodes list data collections, the following
|
|
154
|
-
command may be used:
|
|
155
|
-
|
|
156
|
-
```bash
|
|
157
|
-
exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
|
|
158
|
-
--cache_path ./CACHE \
|
|
159
|
-
--config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
|
|
160
|
-
--config_name site_info_configuration >& LOGTREE \
|
|
161
|
-
```
|
|
@@ -9,7 +9,6 @@ rcsb/__init__.py
|
|
|
9
9
|
rcsb.exdb.egg-info/PKG-INFO
|
|
10
10
|
rcsb.exdb.egg-info/SOURCES.txt
|
|
11
11
|
rcsb.exdb.egg-info/dependency_links.txt
|
|
12
|
-
rcsb.exdb.egg-info/entry_points.txt
|
|
13
12
|
rcsb.exdb.egg-info/not-zip-safe
|
|
14
13
|
rcsb.exdb.egg-info/requires.txt
|
|
15
14
|
rcsb.exdb.egg-info/top_level.txt
|
|
@@ -29,7 +28,6 @@ rcsb/exdb/citation/CitationAdapter.py
|
|
|
29
28
|
rcsb/exdb/citation/CitationExtractor.py
|
|
30
29
|
rcsb/exdb/citation/CitationUtils.py
|
|
31
30
|
rcsb/exdb/citation/__init__.py
|
|
32
|
-
rcsb/exdb/cli/ExDbExec.py
|
|
33
31
|
rcsb/exdb/cli/__init__.py
|
|
34
32
|
rcsb/exdb/entry/EntryInfoProvider.py
|
|
35
33
|
rcsb/exdb/entry/__init__.py
|
|
@@ -58,7 +56,6 @@ rcsb/exdb/tests/testCitationExtractor.py
|
|
|
58
56
|
rcsb/exdb/tests/testCitationUtils.py
|
|
59
57
|
rcsb/exdb/tests/testEntryInfoEtlWorkflow.py
|
|
60
58
|
rcsb/exdb/tests/testEntryInfoProvider.py
|
|
61
|
-
rcsb/exdb/tests/testExDbWorkflow.py
|
|
62
59
|
rcsb/exdb/tests/testGlycanEtlWorkflow.py
|
|
63
60
|
rcsb/exdb/tests/testGlycanProvider.py
|
|
64
61
|
rcsb/exdb/tests/testGlycanUtils.py
|
|
@@ -89,7 +86,6 @@ rcsb/exdb/utils/ObjectUpdater.py
|
|
|
89
86
|
rcsb/exdb/utils/ObjectValidator.py
|
|
90
87
|
rcsb/exdb/utils/__init__.py
|
|
91
88
|
rcsb/exdb/wf/EntryInfoEtlWorkflow.py
|
|
92
|
-
rcsb/exdb/wf/ExDbWorkflow.py
|
|
93
89
|
rcsb/exdb/wf/GlycanEtlWorkflow.py
|
|
94
90
|
rcsb/exdb/wf/PubChemEtlWorkflow.py
|
|
95
91
|
rcsb/exdb/wf/__init__.py
|
|
@@ -1,24 +1,17 @@
|
|
|
1
|
-
OpenEye-toolkits>=2024.1.1
|
|
2
1
|
numpy
|
|
3
2
|
jsonschema>=2.6.0
|
|
4
3
|
rcsb.utils.io>=1.48
|
|
5
|
-
rcsb.db>=1.
|
|
6
|
-
rcsb.utils.chem>=0.
|
|
4
|
+
rcsb.db>=1.800
|
|
5
|
+
rcsb.utils.chem>=0.81
|
|
7
6
|
rcsb.utils.chemref>=0.91
|
|
8
|
-
rcsb.utils.citation>=0.22
|
|
9
7
|
rcsb.utils.config>=0.40
|
|
10
8
|
rcsb.utils.ec>=0.25
|
|
11
9
|
rcsb.utils.go>=0.18
|
|
12
10
|
rcsb.utils.seq>=0.82
|
|
13
|
-
rcsb.utils.seqalign>=0.31
|
|
14
11
|
rcsb.utils.targets>=0.82
|
|
15
12
|
rcsb.utils.struct>=0.47
|
|
16
13
|
rcsb.utils.taxonomy>=0.43
|
|
17
14
|
rcsb.utils.dictionary>=1.27
|
|
18
|
-
rcsb.workflow>=0.46
|
|
19
|
-
|
|
20
|
-
[:python_version < "3.0"]
|
|
21
|
-
statistics
|
|
22
15
|
|
|
23
16
|
[dev]
|
|
24
17
|
check-manifest
|
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
--extra-index-url https://pypi.anaconda.org/OpenEye/simple
|
|
2
|
-
OpenEye
|
|
2
|
+
# Above line may be needed despite the OpenEye package not being a direct requirement of this package (it's used by rcsb.utils.chem)
|
|
3
|
+
# OpenEye-toolkits >= 2024.1.1
|
|
3
4
|
numpy
|
|
4
5
|
jsonschema >= 2.6.0
|
|
5
6
|
rcsb.utils.io >= 1.48
|
|
6
|
-
rcsb.db >= 1.
|
|
7
|
-
rcsb.utils.chem >= 0.
|
|
7
|
+
rcsb.db >= 1.800
|
|
8
|
+
rcsb.utils.chem >= 0.81
|
|
8
9
|
rcsb.utils.chemref >= 0.91
|
|
9
|
-
rcsb.utils.citation >= 0.22
|
|
10
10
|
rcsb.utils.config >= 0.40
|
|
11
11
|
rcsb.utils.ec >= 0.25
|
|
12
12
|
rcsb.utils.go >= 0.18
|
|
13
13
|
rcsb.utils.seq >= 0.82
|
|
14
|
-
rcsb.utils.seqalign >= 0.31
|
|
15
14
|
rcsb.utils.targets >= 0.82
|
|
16
15
|
rcsb.utils.struct >= 0.47
|
|
17
16
|
rcsb.utils.taxonomy >= 0.43
|
|
18
17
|
rcsb.utils.dictionary >= 1.27
|
|
19
|
-
rcsb.workflow >= 0.46
|
|
20
|
-
statistics; python_version < "3.0"
|
|
@@ -47,7 +47,6 @@ setup(
|
|
|
47
47
|
"Programming Language :: Python :: 3.9",
|
|
48
48
|
"Programming Language :: Python :: 3.10",
|
|
49
49
|
],
|
|
50
|
-
entry_points={"console_scripts": ["exdb_exec_cli=rcsb.exdb.cli.ExDbExec:main"]},
|
|
51
50
|
#
|
|
52
51
|
install_requires=packagesRequired[1:],
|
|
53
52
|
packages=find_packages(exclude=["rcsb.mock-data", "rcsb.exdb.tests-anal", "rcsb.exdb.tests-*", "tests.*"]),
|
|
@@ -57,7 +56,7 @@ setup(
|
|
|
57
56
|
},
|
|
58
57
|
#
|
|
59
58
|
test_suite="rcsb.exdb.tests",
|
|
60
|
-
tests_require=["tox"],
|
|
59
|
+
tests_require=["tox", "rcsb.utils.citation >= 0.22"],
|
|
61
60
|
#
|
|
62
61
|
# Not configured ...
|
|
63
62
|
extras_require={"dev": ["check-manifest"], "test": ["coverage"]},
|