rcsb.exdb 1.27__tar.gz → 1.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/HISTORY.txt +2 -1
  2. {rcsb_exdb-1.27/rcsb.exdb.egg-info → rcsb_exdb-1.28}/PKG-INFO +3 -62
  3. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/README.md +0 -54
  4. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/cli/__init__.py +1 -1
  5. {rcsb_exdb-1.27 → rcsb_exdb-1.28/rcsb.exdb.egg-info}/PKG-INFO +3 -62
  6. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/SOURCES.txt +0 -4
  7. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/requires.txt +2 -9
  8. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/requirements.txt +4 -7
  9. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/setup.py +1 -2
  10. rcsb_exdb-1.27/rcsb/exdb/cli/ExDbExec.py +0 -239
  11. rcsb_exdb-1.27/rcsb/exdb/tests/testExDbWorkflow.py +0 -145
  12. rcsb_exdb-1.27/rcsb/exdb/wf/ExDbWorkflow.py +0 -521
  13. rcsb_exdb-1.27/rcsb.exdb.egg-info/entry_points.txt +0 -2
  14. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/LICENSE +0 -0
  15. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/MANIFEST.in +0 -0
  16. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/__init__.py +0 -0
  17. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/__init__.py +0 -0
  18. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/BranchedEntityExtractor.py +0 -0
  19. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanProvider.py +0 -0
  20. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/GlycanUtils.py +0 -0
  21. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/branch/__init__.py +0 -0
  22. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefEtlWorker.py +0 -0
  23. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefExtractor.py +0 -0
  24. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/ChemRefMappingProvider.py +0 -0
  25. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemDataCacheProvider.py +0 -0
  26. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemEtlWrapper.py +0 -0
  27. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/PubChemIndexCacheProvider.py +0 -0
  28. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/chemref/__init__.py +0 -0
  29. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationAdapter.py +0 -0
  30. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationExtractor.py +0 -0
  31. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/CitationUtils.py +0 -0
  32. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/citation/__init__.py +0 -0
  33. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/entry/EntryInfoProvider.py +0 -0
  34. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/entry/__init__.py +0 -0
  35. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/AnnotationExtractor.py +0 -0
  36. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingExtractor.py +0 -0
  37. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/LigandNeighborMappingProvider.py +0 -0
  38. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/PolymerEntityExtractor.py +0 -0
  39. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationAdapter.py +0 -0
  40. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAnnotationProvider.py +0 -0
  41. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentAdapter.py +0 -0
  42. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceAssignmentProvider.py +0 -0
  43. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/ReferenceSequenceCacheProvider.py +0 -0
  44. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/TaxonomyExtractor.py +0 -0
  45. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtCoreEtlWorker.py +0 -0
  46. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/UniProtExtractor.py +0 -0
  47. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/seq/__init__.py +0 -0
  48. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/__init__.py +0 -0
  49. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixtureDictMethodResourceProvider.py +0 -0
  50. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/fixturePdbxLoader.py +0 -0
  51. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testAnnotationExtractor.py +0 -0
  52. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testBranchedEntityExtractor.py +0 -0
  53. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefLoader.py +0 -0
  54. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testChemRefMappingProvider.py +0 -0
  55. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationAdapter.py +0 -0
  56. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationExtractor.py +0 -0
  57. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testCitationUtils.py +0 -0
  58. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoEtlWorkflow.py +0 -0
  59. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testEntryInfoProvider.py +0 -0
  60. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanEtlWorkflow.py +0 -0
  61. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanProvider.py +0 -0
  62. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testGlycanUtils.py +0 -0
  63. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testLigandNeighborMappingProvider.py +0 -0
  64. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectExtractor.py +0 -0
  65. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectTransformer.py +0 -0
  66. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testObjectUpdater.py +0 -0
  67. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPolymerEntityExtractor.py +0 -0
  68. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemDataCacheProvider.py +0 -0
  69. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWorkflow.py +0 -0
  70. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemEtlWrapper.py +0 -0
  71. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testPubChemIndexCacheProvider.py +0 -0
  72. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAnnotationAdapter.py +0 -0
  73. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapter.py +0 -0
  74. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentAdapterValidate.py +0 -0
  75. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceAssignmentProvider.py +0 -0
  76. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testReferenceSequenceCacheProvider.py +0 -0
  77. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTaxonomyExtractor.py +0 -0
  78. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testTreeNodeListWorker.py +0 -0
  79. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtCoreEtlWorker.py +0 -0
  80. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tests/testUniProtExtractor.py +0 -0
  81. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tree/TreeNodeListWorker.py +0 -0
  82. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/tree/__init__.py +0 -0
  83. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectAdapterBase.py +0 -0
  84. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectExtractor.py +0 -0
  85. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectTransformer.py +0 -0
  86. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectUpdater.py +0 -0
  87. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/ObjectValidator.py +0 -0
  88. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/utils/__init__.py +0 -0
  89. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/EntryInfoEtlWorkflow.py +0 -0
  90. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/GlycanEtlWorkflow.py +0 -0
  91. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/PubChemEtlWorkflow.py +0 -0
  92. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb/exdb/wf/__init__.py +0 -0
  93. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/dependency_links.txt +0 -0
  94. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/not-zip-safe +0 -0
  95. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/rcsb.exdb.egg-info/top_level.txt +0 -0
  96. {rcsb_exdb-1.27 → rcsb_exdb-1.28}/setup.cfg +0 -0
@@ -108,4 +108,5 @@
108
108
  Update CI/CD to python 3.10
109
109
  10-Dec-2024 V1.26 Update PolymerEntityExtractor to sort extracted sequence data;
110
110
  Update Azure pipelines to run on latest macOS and ubuntu version
111
- 23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
111
+ 23-Jan-2025 V1.27 Update TreeNodeListWorker to index 'id' field
112
+ 11-Feb-2025 V1.28 Move ExDB CLI code (workflow, exec, and tests) and Dockerfile to rcsb.workflow to avoid circular imports
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rcsb.exdb
3
- Version: 1.27
3
+ Version: 1.28
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -16,25 +16,20 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Classifier: Programming Language :: Python :: 3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: OpenEye-toolkits>=2024.1.1
20
19
  Requires-Dist: numpy
21
20
  Requires-Dist: jsonschema>=2.6.0
22
21
  Requires-Dist: rcsb.utils.io>=1.48
23
- Requires-Dist: rcsb.db>=1.725
24
- Requires-Dist: rcsb.utils.chem>=0.79
22
+ Requires-Dist: rcsb.db>=1.800
23
+ Requires-Dist: rcsb.utils.chem>=0.81
25
24
  Requires-Dist: rcsb.utils.chemref>=0.91
26
- Requires-Dist: rcsb.utils.citation>=0.22
27
25
  Requires-Dist: rcsb.utils.config>=0.40
28
26
  Requires-Dist: rcsb.utils.ec>=0.25
29
27
  Requires-Dist: rcsb.utils.go>=0.18
30
28
  Requires-Dist: rcsb.utils.seq>=0.82
31
- Requires-Dist: rcsb.utils.seqalign>=0.31
32
29
  Requires-Dist: rcsb.utils.targets>=0.82
33
30
  Requires-Dist: rcsb.utils.struct>=0.47
34
31
  Requires-Dist: rcsb.utils.taxonomy>=0.43
35
32
  Requires-Dist: rcsb.utils.dictionary>=1.27
36
- Requires-Dist: rcsb.workflow>=0.46
37
- Requires-Dist: statistics; python_version < "3.0"
38
33
  Provides-Extra: dev
39
34
  Requires-Dist: check-manifest; extra == "dev"
40
35
  Provides-Extra: test
@@ -115,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
115
110
  [MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
116
111
  are required to support the ExDB tools. HomeBrew also provides a variety of options for
117
112
  managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
118
-
119
- ### Command Line Interfaces
120
-
121
- A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
122
-
123
- ```bash
124
- exdb_exec_cli --help
125
-
126
- usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
127
- [--etl_tree_node_lists] [--config_path CONFIG_PATH]
128
- [--config_name CONFIG_NAME] [--db_type DB_TYPE]
129
- [--read_back_check] [--num_proc NUM_PROC]
130
- [--chunk_size CHUNK_SIZE]
131
- [--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
132
- [--cache_path CACHE_PATH] [--rebuild_cache]
133
-
134
- optional arguments:
135
- -h, --help show this help message and exit
136
- --data_set_id DATA_SET_ID
137
- Data set identifier (default= 2019_14 for current
138
- week)
139
- --full Fresh full load in a new tables/collections (Default)
140
- --etl_chemref ETL integrated chemical reference data
141
- --etl_tree_node_lists
142
- ETL tree node lists
143
- --config_path CONFIG_PATH
144
- Path to configuration options file
145
- --config_name CONFIG_NAME
146
- Configuration section name
147
- --db_type DB_TYPE Database server type (default=mongo)
148
- --read_back_check Perform read back check on all documents
149
- --num_proc NUM_PROC Number of processes to execute (default=2)
150
- --chunk_size CHUNK_SIZE
151
- Number of files loaded per process
152
- --document_limit DOCUMENT_LIMIT
153
- Load document limit for testing
154
- --debug Turn on verbose logging
155
- --mock Use MOCK repository configuration for testing
156
- --cache_path CACHE_PATH
157
- Top cache path for external and local resource files
158
- --rebuild_cache Rebuild cached files from remote resources
159
- ________________________________________________________________________________
160
-
161
- ```
162
-
163
- For example, to construct and load tree nodes list data collections, the following
164
- command may be used:
165
-
166
- ```bash
167
- exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
168
- --cache_path ./CACHE \
169
- --config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
170
- --config_name site_info_configuration >& LOGTREE \
171
- ```
@@ -63,57 +63,3 @@ install this system. Once HomeBrew is installed, you can further install the
63
63
  [MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
64
64
  are required to support the ExDB tools. HomeBrew also provides a variety of options for
65
65
  managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
66
-
67
- ### Command Line Interfaces
68
-
69
- A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
70
-
71
- ```bash
72
- exdb_exec_cli --help
73
-
74
- usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
75
- [--etl_tree_node_lists] [--config_path CONFIG_PATH]
76
- [--config_name CONFIG_NAME] [--db_type DB_TYPE]
77
- [--read_back_check] [--num_proc NUM_PROC]
78
- [--chunk_size CHUNK_SIZE]
79
- [--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
80
- [--cache_path CACHE_PATH] [--rebuild_cache]
81
-
82
- optional arguments:
83
- -h, --help show this help message and exit
84
- --data_set_id DATA_SET_ID
85
- Data set identifier (default= 2019_14 for current
86
- week)
87
- --full Fresh full load in a new tables/collections (Default)
88
- --etl_chemref ETL integrated chemical reference data
89
- --etl_tree_node_lists
90
- ETL tree node lists
91
- --config_path CONFIG_PATH
92
- Path to configuration options file
93
- --config_name CONFIG_NAME
94
- Configuration section name
95
- --db_type DB_TYPE Database server type (default=mongo)
96
- --read_back_check Perform read back check on all documents
97
- --num_proc NUM_PROC Number of processes to execute (default=2)
98
- --chunk_size CHUNK_SIZE
99
- Number of files loaded per process
100
- --document_limit DOCUMENT_LIMIT
101
- Load document limit for testing
102
- --debug Turn on verbose logging
103
- --mock Use MOCK repository configuration for testing
104
- --cache_path CACHE_PATH
105
- Top cache path for external and local resource files
106
- --rebuild_cache Rebuild cached files from remote resources
107
- ________________________________________________________________________________
108
-
109
- ```
110
-
111
- For example, to construct and load tree nodes list data collections, the following
112
- command may be used:
113
-
114
- ```bash
115
- exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
116
- --cache_path ./CACHE \
117
- --config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
118
- --config_name site_info_configuration >& LOGTREE \
119
- ```
@@ -2,4 +2,4 @@ __docformat__ = "google en"
2
2
  __author__ = "John Westbrook"
3
3
  __email__ = "john.westbrook@rcsb.org"
4
4
  __license__ = "Apache 2.0"
5
- __version__ = "1.27"
5
+ __version__ = "1.28"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: rcsb.exdb
3
- Version: 1.27
3
+ Version: 1.28
4
4
  Summary: RCSB Python ExDB data extraction and loading workflows
5
5
  Home-page: https://github.com/rcsb/py-rcsb_exdb
6
6
  Author: John Westbrook
@@ -16,25 +16,20 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Classifier: Programming Language :: Python :: 3.10
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
- Requires-Dist: OpenEye-toolkits>=2024.1.1
20
19
  Requires-Dist: numpy
21
20
  Requires-Dist: jsonschema>=2.6.0
22
21
  Requires-Dist: rcsb.utils.io>=1.48
23
- Requires-Dist: rcsb.db>=1.725
24
- Requires-Dist: rcsb.utils.chem>=0.79
22
+ Requires-Dist: rcsb.db>=1.800
23
+ Requires-Dist: rcsb.utils.chem>=0.81
25
24
  Requires-Dist: rcsb.utils.chemref>=0.91
26
- Requires-Dist: rcsb.utils.citation>=0.22
27
25
  Requires-Dist: rcsb.utils.config>=0.40
28
26
  Requires-Dist: rcsb.utils.ec>=0.25
29
27
  Requires-Dist: rcsb.utils.go>=0.18
30
28
  Requires-Dist: rcsb.utils.seq>=0.82
31
- Requires-Dist: rcsb.utils.seqalign>=0.31
32
29
  Requires-Dist: rcsb.utils.targets>=0.82
33
30
  Requires-Dist: rcsb.utils.struct>=0.47
34
31
  Requires-Dist: rcsb.utils.taxonomy>=0.43
35
32
  Requires-Dist: rcsb.utils.dictionary>=1.27
36
- Requires-Dist: rcsb.workflow>=0.46
37
- Requires-Dist: statistics; python_version < "3.0"
38
33
  Provides-Extra: dev
39
34
  Requires-Dist: check-manifest; extra == "dev"
40
35
  Provides-Extra: test
@@ -115,57 +110,3 @@ install this system. Once HomeBrew is installed, you can further install the
115
110
  [MongoDB](https://docs.mongodb.com/manual/tutorial/install-mongodb-on-os-x/) packages which
116
111
  are required to support the ExDB tools. HomeBrew also provides a variety of options for
117
112
  managing a [Python virtual environments](https://gist.github.com/Geoyi/f55ed54d24cc9ff1c14bd95fac21c042).
118
-
119
- ### Command Line Interfaces
120
-
121
- A convenience CLI `exdb_exec_cli` is provided for performing update and loading operations.
122
-
123
- ```bash
124
- exdb_exec_cli --help
125
-
126
- usage: exdb_exec_cli [-h] [--data_set_id DATA_SET_ID] [--full] [--etl_chemref]
127
- [--etl_tree_node_lists] [--config_path CONFIG_PATH]
128
- [--config_name CONFIG_NAME] [--db_type DB_TYPE]
129
- [--read_back_check] [--num_proc NUM_PROC]
130
- [--chunk_size CHUNK_SIZE]
131
- [--document_limit DOCUMENT_LIMIT] [--debug] [--mock]
132
- [--cache_path CACHE_PATH] [--rebuild_cache]
133
-
134
- optional arguments:
135
- -h, --help show this help message and exit
136
- --data_set_id DATA_SET_ID
137
- Data set identifier (default= 2019_14 for current
138
- week)
139
- --full Fresh full load in a new tables/collections (Default)
140
- --etl_chemref ETL integrated chemical reference data
141
- --etl_tree_node_lists
142
- ETL tree node lists
143
- --config_path CONFIG_PATH
144
- Path to configuration options file
145
- --config_name CONFIG_NAME
146
- Configuration section name
147
- --db_type DB_TYPE Database server type (default=mongo)
148
- --read_back_check Perform read back check on all documents
149
- --num_proc NUM_PROC Number of processes to execute (default=2)
150
- --chunk_size CHUNK_SIZE
151
- Number of files loaded per process
152
- --document_limit DOCUMENT_LIMIT
153
- Load document limit for testing
154
- --debug Turn on verbose logging
155
- --mock Use MOCK repository configuration for testing
156
- --cache_path CACHE_PATH
157
- Top cache path for external and local resource files
158
- --rebuild_cache Rebuild cached files from remote resources
159
- ________________________________________________________________________________
160
-
161
- ```
162
-
163
- For example, to construct and load tree nodes list data collections, the following
164
- command may be used:
165
-
166
- ```bash
167
- exdb_exec_cli --mock --full --etl_tree_node_lists --rebuild_cache \
168
- --cache_path ./CACHE \
169
- --config_path ./rcsb/mock-data/config/dbload-setup-example.yml \
170
- --config_name site_info_configuration >& LOGTREE \
171
- ```
@@ -9,7 +9,6 @@ rcsb/__init__.py
9
9
  rcsb.exdb.egg-info/PKG-INFO
10
10
  rcsb.exdb.egg-info/SOURCES.txt
11
11
  rcsb.exdb.egg-info/dependency_links.txt
12
- rcsb.exdb.egg-info/entry_points.txt
13
12
  rcsb.exdb.egg-info/not-zip-safe
14
13
  rcsb.exdb.egg-info/requires.txt
15
14
  rcsb.exdb.egg-info/top_level.txt
@@ -29,7 +28,6 @@ rcsb/exdb/citation/CitationAdapter.py
29
28
  rcsb/exdb/citation/CitationExtractor.py
30
29
  rcsb/exdb/citation/CitationUtils.py
31
30
  rcsb/exdb/citation/__init__.py
32
- rcsb/exdb/cli/ExDbExec.py
33
31
  rcsb/exdb/cli/__init__.py
34
32
  rcsb/exdb/entry/EntryInfoProvider.py
35
33
  rcsb/exdb/entry/__init__.py
@@ -58,7 +56,6 @@ rcsb/exdb/tests/testCitationExtractor.py
58
56
  rcsb/exdb/tests/testCitationUtils.py
59
57
  rcsb/exdb/tests/testEntryInfoEtlWorkflow.py
60
58
  rcsb/exdb/tests/testEntryInfoProvider.py
61
- rcsb/exdb/tests/testExDbWorkflow.py
62
59
  rcsb/exdb/tests/testGlycanEtlWorkflow.py
63
60
  rcsb/exdb/tests/testGlycanProvider.py
64
61
  rcsb/exdb/tests/testGlycanUtils.py
@@ -89,7 +86,6 @@ rcsb/exdb/utils/ObjectUpdater.py
89
86
  rcsb/exdb/utils/ObjectValidator.py
90
87
  rcsb/exdb/utils/__init__.py
91
88
  rcsb/exdb/wf/EntryInfoEtlWorkflow.py
92
- rcsb/exdb/wf/ExDbWorkflow.py
93
89
  rcsb/exdb/wf/GlycanEtlWorkflow.py
94
90
  rcsb/exdb/wf/PubChemEtlWorkflow.py
95
91
  rcsb/exdb/wf/__init__.py
@@ -1,24 +1,17 @@
1
- OpenEye-toolkits>=2024.1.1
2
1
  numpy
3
2
  jsonschema>=2.6.0
4
3
  rcsb.utils.io>=1.48
5
- rcsb.db>=1.725
6
- rcsb.utils.chem>=0.79
4
+ rcsb.db>=1.800
5
+ rcsb.utils.chem>=0.81
7
6
  rcsb.utils.chemref>=0.91
8
- rcsb.utils.citation>=0.22
9
7
  rcsb.utils.config>=0.40
10
8
  rcsb.utils.ec>=0.25
11
9
  rcsb.utils.go>=0.18
12
10
  rcsb.utils.seq>=0.82
13
- rcsb.utils.seqalign>=0.31
14
11
  rcsb.utils.targets>=0.82
15
12
  rcsb.utils.struct>=0.47
16
13
  rcsb.utils.taxonomy>=0.43
17
14
  rcsb.utils.dictionary>=1.27
18
- rcsb.workflow>=0.46
19
-
20
- [:python_version < "3.0"]
21
- statistics
22
15
 
23
16
  [dev]
24
17
  check-manifest
@@ -1,20 +1,17 @@
1
1
  --extra-index-url https://pypi.anaconda.org/OpenEye/simple
2
- OpenEye-toolkits >= 2024.1.1
2
+ # Above line may be needed despite the OpenEye package not being a direct requirement of this package (it's used by rcsb.utils.chem)
3
+ # OpenEye-toolkits >= 2024.1.1
3
4
  numpy
4
5
  jsonschema >= 2.6.0
5
6
  rcsb.utils.io >= 1.48
6
- rcsb.db >= 1.725
7
- rcsb.utils.chem >= 0.79
7
+ rcsb.db >= 1.800
8
+ rcsb.utils.chem >= 0.81
8
9
  rcsb.utils.chemref >= 0.91
9
- rcsb.utils.citation >= 0.22
10
10
  rcsb.utils.config >= 0.40
11
11
  rcsb.utils.ec >= 0.25
12
12
  rcsb.utils.go >= 0.18
13
13
  rcsb.utils.seq >= 0.82
14
- rcsb.utils.seqalign >= 0.31
15
14
  rcsb.utils.targets >= 0.82
16
15
  rcsb.utils.struct >= 0.47
17
16
  rcsb.utils.taxonomy >= 0.43
18
17
  rcsb.utils.dictionary >= 1.27
19
- rcsb.workflow >= 0.46
20
- statistics; python_version < "3.0"
@@ -47,7 +47,6 @@ setup(
47
47
  "Programming Language :: Python :: 3.9",
48
48
  "Programming Language :: Python :: 3.10",
49
49
  ],
50
- entry_points={"console_scripts": ["exdb_exec_cli=rcsb.exdb.cli.ExDbExec:main"]},
51
50
  #
52
51
  install_requires=packagesRequired[1:],
53
52
  packages=find_packages(exclude=["rcsb.mock-data", "rcsb.exdb.tests-anal", "rcsb.exdb.tests-*", "tests.*"]),
@@ -57,7 +56,7 @@ setup(
57
56
  },
58
57
  #
59
58
  test_suite="rcsb.exdb.tests",
60
- tests_require=["tox"],
59
+ tests_require=["tox", "rcsb.utils.citation >= 0.22"],
61
60
  #
62
61
  # Not configured ...
63
62
  extras_require={"dev": ["check-manifest"], "test": ["coverage"]},
@@ -1,239 +0,0 @@
1
- ##
2
- # File: ExDbExec.py
3
- # Date: 22-Apr-2019 jdw
4
- #
5
- # Execution wrapper -- for extract and load operations -
6
- #
7
- # Updates:
8
- # 4-Sep-2019 jdw add Tree and Drugbank loaders
9
- # 14-Feb-2020 jdw change over to ReferenceSequenceAnnotationProvider/Adapter
10
- # 9-Mar-2023 dwp Lower refChunkSize to 10 (UniProt API having trouble streaming XML responses)
11
- # 25-Apr-2024 dwp Add arguments and logic to support CLI usage from weekly-update workflow;
12
- # Add support for logging output to a specific file
13
- # 20-Aug-2024 dwp Add load_target_cofactors operation; change name of upd_targets_cofactors to upd_targets
14
- # 22-Oct-2024 dwp Add ccd_img_gen and ccd_file_gen operations
15
- # (latter will only be used briefly, as will stop generating SDF and Mol2 files in Dec 2024)
16
- ##
17
- __docformat__ = "google en"
18
- __author__ = "John Westbrook"
19
- __email__ = "jwest@rcsb.rutgers.edu"
20
- __license__ = "Apache 2.0"
21
-
22
- import os
23
- import sys
24
- import argparse
25
- import logging
26
-
27
- from rcsb.utils.config.ConfigUtil import ConfigUtil
28
- from rcsb.exdb.wf.ExDbWorkflow import ExDbWorkflow
29
-
30
- HERE = os.path.abspath(os.path.dirname(__file__))
31
- TOPDIR = os.path.dirname(os.path.dirname(os.path.dirname(HERE)))
32
-
33
- # logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s", stream=sys.stdout)
34
- logger = logging.getLogger()
35
-
36
-
37
- def main():
38
- parser = argparse.ArgumentParser()
39
- #
40
- parser.add_argument(
41
- "--op",
42
- default=None,
43
- required=True,
44
- help="Loading operation to perform",
45
- choices=[
46
- "etl_chemref", # ETL integrated chemical reference data
47
- "etl_uniprot_core", # ETL UniProt core reference data
48
- "etl_tree_node_lists", # ETL tree node lists
49
- "upd_ref_seq", # Update reference sequence assignments
50
- "upd_neighbor_interactions",
51
- "upd_uniprot_taxonomy",
52
- "upd_targets",
53
- "load_target_cofactors",
54
- "upd_pubchem",
55
- "upd_entry_info",
56
- "upd_glycan_idx",
57
- "upd_resource_stash",
58
- "ccd_img_gen",
59
- "ccd_file_gen",
60
- ]
61
- )
62
- parser.add_argument(
63
- "--load_type",
64
- default="full",
65
- help="Type of load ('full' for complete and fresh single-worker load, 'replace' for incremental and multi-worker load)",
66
- choices=["full", "replace"],
67
- )
68
- #
69
- parser.add_argument("--config_path", default=None, help="Path to configuration options file")
70
- parser.add_argument("--config_name", default="site_info_remote_configuration", help="Configuration section name")
71
- parser.add_argument("--cache_path", default=None, help="Cache path for resource files")
72
- parser.add_argument("--num_proc", default=2, help="Number of processes to execute (default=2)")
73
- parser.add_argument("--chunk_size", default=10, help="Number of files loaded per process")
74
- parser.add_argument("--max_step_length", default=500, help="Maximum subList size (default=500)")
75
- parser.add_argument("--db_type", default="mongo", help="Database server type (default=mongo)")
76
- parser.add_argument("--document_limit", default=None, help="Load document limit for testing")
77
- #
78
- parser.add_argument("--rebuild_cache", default=False, action="store_true", help="Rebuild cached resource files")
79
- parser.add_argument("--rebuild_sequence_cache", default=False, action="store_true", help="Rebuild cached resource files for reference sequence updates")
80
- parser.add_argument("--provider_types_exclude", default=None, help="Resource provider types to exclude")
81
- parser.add_argument("--use_filtered_tax_list", default=False, action="store_true", help="Use filtered list for taxonomy tree loading")
82
- parser.add_argument("--disable_read_back_check", default=False, action="store_true", help="Disable read back check on all documents")
83
- parser.add_argument("--debug", default=False, action="store_true", help="Turn on verbose logging")
84
- parser.add_argument("--mock", default=False, action="store_true", help="Use MOCK repository configuration for testing")
85
- parser.add_argument("--log_file_path", default=None, help="Path to runtime log file output.")
86
- #
87
- # Arguments specific for op == 'upd_ref_seq'
88
- parser.add_argument("--ref_chunk_size", default=10, help="Max chunk size for reference sequence updates (for op 'upd_ref_seq')")
89
- parser.add_argument("--min_missing", default=0, help="Minimum number of allowed missing reference sequences (for op 'upd_ref_seq')")
90
- parser.add_argument("--min_match_primary_percent", default=None, help="Minimum reference sequence match percentage (for op 'upd_ref_seq')")
91
- parser.add_argument("--test_mode", default=False, action="store_true", help="Test mode for reference sequence updates (for op 'upd_ref_seq')")
92
- #
93
- # Arguments specific for op == 'ccd_img_gen' or 'ccd_file_gen'
94
- parser.add_argument("--cc_output_path", default=None, help="The base local directory path where chemical component files (image, coordinates) are written (for op 'ccd_img_gen')")
95
- parser.add_argument("--cc_cache_path", default=None, help="The base local directory path where chemical component cache data are written (for op 'ccd_img_gen')")
96
- parser.add_argument("--oe_license_path", default=None, help="Path to OpenEye license file")
97
- #
98
- # Arguments buildExdbResources
99
- parser.add_argument("--rebuild_all_neighbor_interactions", default=False, action="store_true", help="Rebuild all neighbor interactions from scratch (default is incrementally)")
100
- parser.add_argument("--cc_file_prefix", default="cc-full", help="File name discriminator for index sets")
101
- parser.add_argument("--cc_url_target", default=None, help="target url for chemical component dictionary resource file (default: None=all public)")
102
- parser.add_argument("--bird_url_target", default=None, help="target url for bird dictionary resource file (cc format) (default: None=all public)")
103
- #
104
- args = parser.parse_args()
105
- #
106
- try:
107
- op, commonD, loadD = processArguments(args)
108
- except Exception as err:
109
- logger.exception("Argument processing problem %s", str(err))
110
- raise ValueError("Argument processing problem") from err
111
- #
112
- #
113
- # Log input arguments
114
- loadLogD = {k: v for d in [commonD, loadD] for k, v in d.items() if k != "inputIdCodeList"}
115
- logger.info("running load op %r on loadLogD %r:", op, loadLogD)
116
- #
117
- # Run the operation
118
- okR = False
119
- exWf = ExDbWorkflow(**commonD)
120
- if op in ["etl_chemref", "etl_uniprot_core", "etl_tree_node_lists", "upd_ref_seq"]:
121
- okR = exWf.load(op, **loadD)
122
- elif op in [
123
- "upd_neighbor_interactions",
124
- "upd_uniprot_taxonomy",
125
- "upd_targets",
126
- "load_target_cofactors",
127
- "upd_pubchem",
128
- "upd_entry_info",
129
- "upd_glycan_idx",
130
- "upd_resource_stash",
131
- ]:
132
- okR = exWf.buildExdbResource(op, **loadD)
133
- elif op in [
134
- "ccd_img_gen",
135
- "ccd_file_gen",
136
- ]:
137
- okR = exWf.generateCcdFiles(op, **loadD)
138
- else:
139
- logger.error("Unsupported op %r", op)
140
- #
141
- logger.info("Operation %r completed with status %r", op, okR)
142
- #
143
- if not okR:
144
- logger.error("Operation %r failed with status %r", op, okR)
145
- raise ValueError("Operation %r failed" % op)
146
-
147
-
148
- def processArguments(args):
149
- # Logging details
150
- logFilePath = args.log_file_path
151
- debugFlag = args.debug
152
- if debugFlag:
153
- logger.setLevel(logging.DEBUG)
154
- else:
155
- logger.setLevel(logging.INFO)
156
- if logFilePath:
157
- logDir = os.path.dirname(logFilePath)
158
- if not os.path.isdir(logDir):
159
- os.makedirs(logDir)
160
- handler = logging.FileHandler(logFilePath, mode="a")
161
- if debugFlag:
162
- handler.setLevel(logging.DEBUG)
163
- else:
164
- handler.setLevel(logging.INFO)
165
- formatter = logging.Formatter("%(asctime)s [%(levelname)s]-%(module)s.%(funcName)s: %(message)s")
166
- handler.setFormatter(formatter)
167
- logger.addHandler(handler)
168
- #
169
- # Configuration details
170
- configPath = args.config_path
171
- configName = args.config_name
172
- if not (configPath and configName):
173
- logger.error("Config path and/or name not provided: %r, %r", configPath, configName)
174
- raise ValueError("Config path and/or name not provided: %r, %r" % (configPath, configName))
175
- mockTopPath = os.path.join(TOPDIR, "rcsb", "mock-data") if args.mock else None
176
- logger.info("Using configuration file %r (section %r)", configPath, configName)
177
- cfgOb = ConfigUtil(configPath=configPath, defaultSectionName=configName, mockTopPath=mockTopPath)
178
- cfgObTmp = cfgOb.exportConfig()
179
- logger.info("Length of config object (%r)", len(cfgObTmp))
180
- if len(cfgObTmp) == 0:
181
- logger.error("Missing or access issue for config file %r", configPath)
182
- raise ValueError("Missing or access issue for config file %r" % configPath)
183
- else:
184
- del cfgObTmp
185
- #
186
- # Do any additional argument checking
187
- op = args.op
188
- if not op:
189
- raise ValueError("Must supply a value to '--op' argument")
190
- #
191
- cachePath = args.cache_path if args.cache_path else "."
192
- cachePath = os.path.abspath(cachePath)
193
-
194
- if args.db_type != "mongo":
195
- logger.error("Unsupported database type %r (must be 'mongo')", args.db_type)
196
- raise ValueError("Unsupported database type %r (must be 'mongo')" % args.db_type)
197
-
198
- # Now collect arguments into dictionaries
199
- commonD = {
200
- "configPath": configPath,
201
- "configName": configName,
202
- "cachePath": cachePath,
203
- "mockTopPath": mockTopPath,
204
- "debugFlag": debugFlag,
205
- "rebuildCache": args.rebuild_cache,
206
- "providerTypeExcludeL": args.provider_types_exclude,
207
- }
208
- loadD = {
209
- "loadType": args.load_type,
210
- "numProc": int(args.num_proc),
211
- "chunkSize": int(args.chunk_size),
212
- "maxStepLength": int(args.max_step_length),
213
- "dbType": args.db_type,
214
- "documentLimit": int(args.document_limit) if args.document_limit else None,
215
- "readBackCheck": not args.disable_read_back_check,
216
- "rebuildSequenceCache": args.rebuild_sequence_cache,
217
- "useFilteredLists": args.use_filtered_tax_list,
218
- "refChunkSize": int(args.ref_chunk_size),
219
- "minMissing": int(args.min_missing),
220
- "minMatchPrimaryPercent": float(args.min_match_primary_percent) if args.min_match_primary_percent else None,
221
- "testMode": args.test_mode,
222
- "rebuildAllNeighborInteractions": args.rebuild_all_neighbor_interactions,
223
- "ccFileNamePrefix": args.cc_file_prefix,
224
- "ccUrlTarget": args.cc_url_target,
225
- "birdUrlTarget": args.bird_url_target,
226
- "ccOutputPath": args.cc_output_path,
227
- "ccCachePath": args.cc_cache_path,
228
- "licenseFilePath": args.oe_license_path,
229
- }
230
-
231
- return op, commonD, loadD
232
-
233
-
234
- if __name__ == "__main__":
235
- try:
236
- main()
237
- except Exception as e:
238
- logger.exception("Run failed %s", str(e))
239
- sys.exit(1)