rcsb-api 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rcsb_api-1.5.0.dist-info/METADATA +216 -0
- rcsb_api-1.5.0.dist-info/RECORD +21 -0
- rcsb_api-1.5.0.dist-info/WHEEL +4 -0
- rcsb_api-1.5.0.dist-info/licenses/LICENSE +21 -0
- rcsb_api-1.5.0.dist-info/licenses/NOTICE.md +40 -0
- rcsbapi/__init__.py +0 -0
- rcsbapi/config.py +52 -0
- rcsbapi/const.py +122 -0
- rcsbapi/data/__init__.py +32 -0
- rcsbapi/data/data_query.py +406 -0
- rcsbapi/data/data_schema.py +204 -0
- rcsbapi/graphql_schema.py +1244 -0
- rcsbapi/model/__init__.py +3 -0
- rcsbapi/model/model_query.py +708 -0
- rcsbapi/model/model_schema.py +66 -0
- rcsbapi/search/__init__.py +38 -0
- rcsbapi/search/search_query.py +2165 -0
- rcsbapi/search/search_schema.py +409 -0
- rcsbapi/sequence/__init__.py +16 -0
- rcsbapi/sequence/seq_query.py +390 -0
- rcsbapi/sequence/seq_schema.py +116 -0
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rcsb-api
|
|
3
|
+
Version: 1.5.0
|
|
4
|
+
Summary: Python package interface for RCSB.org API services
|
|
5
|
+
Project-URL: Homepage, https://github.com/rcsb/py-rcsb-api
|
|
6
|
+
Project-URL: Repository, https://github.com/rcsb/py-rcsb-api
|
|
7
|
+
Project-URL: Issues, https://github.com/rcsb/py-rcsb-api/issues
|
|
8
|
+
Project-URL: Documentation, https://rcsbapi.readthedocs.io/
|
|
9
|
+
Project-URL: Changelog, https://github.com/rcsb/py-rcsb-api/blob/master/CHANGELOG.md
|
|
10
|
+
Author-email: Dennis Piehl <dennis.piehl@rcsb.org>
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
License-File: NOTICE.md
|
|
14
|
+
Keywords: API,PDB,RCSB,bioinformatics,protein,structure
|
|
15
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: Natural Language :: English
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
26
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
27
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
28
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
29
|
+
Classifier: Typing :: Typed
|
|
30
|
+
Requires-Python: <4.0,>=3.9
|
|
31
|
+
Requires-Dist: graphql-core
|
|
32
|
+
Requires-Dist: httpx>=0.26.0
|
|
33
|
+
Requires-Dist: nest-asyncio>=1.6.0
|
|
34
|
+
Requires-Dist: rustworkx
|
|
35
|
+
Requires-Dist: tqdm
|
|
36
|
+
Provides-Extra: docs
|
|
37
|
+
Requires-Dist: myst-parser; extra == 'docs'
|
|
38
|
+
Requires-Dist: sphinx; extra == 'docs'
|
|
39
|
+
Requires-Dist: sphinx-rtd-theme; extra == 'docs'
|
|
40
|
+
Provides-Extra: tests
|
|
41
|
+
Requires-Dist: black>=21.5b1; extra == 'tests'
|
|
42
|
+
Requires-Dist: coverage; extra == 'tests'
|
|
43
|
+
Requires-Dist: flake8; extra == 'tests'
|
|
44
|
+
Requires-Dist: pylint; extra == 'tests'
|
|
45
|
+
Requires-Dist: tox; extra == 'tests'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
[](https://pypi.org/project/rcsb-api/)
|
|
49
|
+
[](https://dev.azure.com/rcsb/RCSB%20PDB%20Python%20Projects/_build/latest?definitionId=40&branchName=master)
|
|
50
|
+
[](https://rcsbapi.readthedocs.io/en/latest/?badge=latest)
|
|
51
|
+
[](https://doi.org/10.5281/zenodo.14052470)
|
|
52
|
+
[](https://www.bestpractices.dev/projects/10424)
|
|
53
|
+
[](https://fairsoftwarechecklist.net/v0.2?f=31&a=30112&i=32111&r=133)
|
|
54
|
+
[](https://fair-software.eu)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# <img src="https://github.com/user-attachments/assets/248d3e32-7644-46b2-bf18-b5248c9e6305" height="160"/> *rcsb-api*: Python Toolkit for Accessing RCSB.org APIs
|
|
58
|
+
Python interface for RCSB Protein Data Bank API services at [RCSB.org](https://www.rcsb.org/).
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
This package requires Python 3.9 or later.
|
|
62
|
+
|
|
63
|
+
Install it from PyPI via `pip` or `uv`:
|
|
64
|
+
|
|
65
|
+
pip install rcsb-api
|
|
66
|
+
|
|
67
|
+
# or, if using uv:
|
|
68
|
+
uv pip install rcsb-api
|
|
69
|
+
|
|
70
|
+
Or, download from [GitHub](https://github.com/rcsb/py-rcsb-api/) and install locally:
|
|
71
|
+
|
|
72
|
+
git clone https://github.com/rcsb/py-rcsb-api.git
|
|
73
|
+
cd py-rcsb-api
|
|
74
|
+
pip install .
|
|
75
|
+
|
|
76
|
+
## Getting Started
|
|
77
|
+
Full documentation available at [readthedocs](https://rcsbapi.readthedocs.io/en/latest/).
|
|
78
|
+
|
|
79
|
+
The [RCSB PDB Search API](https://search.rcsb.org) supports RESTful requests according to a defined [schema](https://search.rcsb.org/redoc/index.html). This package provides an `rcsbapi.search` module that simplifies generating complex search queries.
|
|
80
|
+
|
|
81
|
+
The [RCSB PDB Data API](https://data.rcsb.org) supports requests using [GraphQL](https://graphql.org/), a language for API queries. This package provides an `rcsbapi.data` module that simplifies generating queries in GraphQL syntax.
|
|
82
|
+
|
|
83
|
+
### Search API
|
|
84
|
+
The `rcsbapi.search` module supports all available [Advanced Search](https://www.rcsb.org/search/advanced) services, as listed below. For more details on their usage, see [Search Service Types](https://rcsbapi.readthedocs.io/en/latest/search_api/query_construction.html#search-service-types).
|
|
85
|
+
|
|
86
|
+
|Search service |QueryType |
|
|
87
|
+
|----------------------------------|--------------------------|
|
|
88
|
+
|Full-text |`TextQuery()` |
|
|
89
|
+
|Attribute (structure or chemical) |`AttributeQuery()` |
|
|
90
|
+
|Sequence similarity |`SeqSimilarityQuery()` |
|
|
91
|
+
|Sequence motif |`SeqMotifQuery()` |
|
|
92
|
+
|Structure similarity |`StructSimilarityQuery()` |
|
|
93
|
+
|Structure motif |`StructMotifQuery()` |
|
|
94
|
+
|Chemical similarity |`ChemSimilarityQuery()` |
|
|
95
|
+
|
|
96
|
+
#### Search API Examples
|
|
97
|
+
To perform a search for all structures from humans associated with the term "Hemoglobin", you can combine a "full-text" query (`TextQuery`) with an "attribute" query (`AttributeQuery`):
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from rcsbapi.search import AttributeQuery, TextQuery
|
|
101
|
+
from rcsbapi.search import search_attributes as attrs
|
|
102
|
+
|
|
103
|
+
# Construct a "full-text" sub-query for structures associated with the term "Hemoglobin"
|
|
104
|
+
q1 = TextQuery(value="Hemoglobin")
|
|
105
|
+
|
|
106
|
+
# Construct an "attribute" sub-query to search for structures from humans
|
|
107
|
+
q2 = AttributeQuery(
|
|
108
|
+
attribute="rcsb_entity_source_organism.scientific_name",
|
|
109
|
+
operator="exact_match", # Other operators include "contains_phrase", "exists", and more
|
|
110
|
+
value="Homo sapiens"
|
|
111
|
+
)
|
|
112
|
+
# OR, do so by using Python bitwise operators:
|
|
113
|
+
q2 = attrs.rcsb_entity_source_organism.scientific_name == "Homo sapiens"
|
|
114
|
+
|
|
115
|
+
# Combine the sub-queries (can sub-group using parentheses and standard operators, "&", "|", etc.)
|
|
116
|
+
query = q1 & q2
|
|
117
|
+
|
|
118
|
+
# Fetch the results by iterating over the query execution
|
|
119
|
+
for rId in query():
|
|
120
|
+
print(rId)
|
|
121
|
+
|
|
122
|
+
# OR, capture them into a variable
|
|
123
|
+
results = list(query())
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
These examples are in `operator` syntax. You can also make queries in `fluent` syntax. Learn more about both syntaxes and implementation details in [Query Syntax and Execution](https://rcsbapi.readthedocs.io/en/latest/search_api/query_construction.html#query-syntax-and-execution).
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
### Data API
|
|
130
|
+
The `rcsbapi.data` module allows you to easily construct GraphQL queries to the RCSB.org Data API.
|
|
131
|
+
|
|
132
|
+
This is done by specifying the following input:
|
|
133
|
+
- "input_type": the data hierarchy level you are starting from (e.g., "entry", "polymer_entity", etc.) (See full list [here](https://rcsbapi.readthedocs.io/en/latest/data_api/query_construction.html#input-type)).
|
|
134
|
+
- "input_ids": the list of IDs for which to fetch data (corresponding to the specified "input_type")
|
|
135
|
+
- "return_data_list": the list of data items ("fields") to retrieve. (Available fields can be explored [here](https://data.rcsb.org/data-attributes.html) or via the [GraphiQL editor's Documentation Explorer panel](https://data.rcsb.org/graphql/index.html).)
|
|
136
|
+
|
|
137
|
+
#### Data API Examples
|
|
138
|
+
This is a [simple query](https://data.rcsb.org/graphql/index.html?query=%7B%0A%20%20entry(entry_id%3A%20%224HHB%22)%20%7B%0A%20%20%20%20exptl%20%7B%0A%20%20%20%20%20%20method%0A%20%20%20%20%7D%0A%20%20%7D%0A%7D) requesting the experimental method of a structure with PDB ID 4HHB (Hemoglobin).
|
|
139
|
+
|
|
140
|
+
The query must be executed using the `.exec()` method, which will return the JSON response as well as store the response as an attribute of the `DataQuery` object. From the object, you can access the Data API response, get an interactive editor link, or access the arguments used to create the query.
|
|
141
|
+
The package is able to automatically build queries based on the "input_type" and path segment passed into "return_data_list". If using this package in code intended for long-term use, it's recommended to use fully qualified paths. When autocompletion is being used, an WARNING message will be printed out as a reminder.
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from rcsbapi.data import DataQuery as Query
|
|
145
|
+
query = Query(
|
|
146
|
+
input_type="entries",
|
|
147
|
+
input_ids=["4HHB"],
|
|
148
|
+
return_data_list=["exptl.method"]
|
|
149
|
+
)
|
|
150
|
+
print(query.exec())
|
|
151
|
+
```
|
|
152
|
+
Data is returned in JSON format
|
|
153
|
+
```json
|
|
154
|
+
{
|
|
155
|
+
"data": {
|
|
156
|
+
"entries": [
|
|
157
|
+
{
|
|
158
|
+
"rcsb_id": "4HHB",
|
|
159
|
+
"exptl": [
|
|
160
|
+
{
|
|
161
|
+
"method": "X-RAY DIFFRACTION"
|
|
162
|
+
}
|
|
163
|
+
]
|
|
164
|
+
}
|
|
165
|
+
]
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Here is a [more complex query](https://data.rcsb.org/graphql/index.html?query=%7B%0A%20%20polymer_entities(entity_ids%3A%5B%222CPK_1%22%2C%223WHM_1%22%2C%222D5Z_1%22%5D)%20%7B%0A%20%20%20%20rcsb_id%0A%20%20%20%20rcsb_entity_source_organism%20%7B%0A%20%20%20%20%20%20ncbi_taxonomy_id%0A%20%20%20%20%20%20ncbi_scientific_name%0A%20%20%20%20%7D%0A%20%20%20%20rcsb_cluster_membership%20%7B%0A%20%20%20%20%20%20cluster_id%0A%20%20%20%20%20%20identity%0A%20%20%20%20%7D%0A%20%20%7D%0A%7D). Note that periods can be used to further specify requested data in return_data_list. Also note multiple return data items and ids can be requested in one query.
|
|
171
|
+
```python
|
|
172
|
+
from rcsbapi.data import DataQuery as Query
|
|
173
|
+
query = Query(
|
|
174
|
+
input_type="polymer_entities",
|
|
175
|
+
input_ids=["2CPK_1", "3WHM_1", "2D5Z_1"],
|
|
176
|
+
return_data_list=[
|
|
177
|
+
"polymer_entities.rcsb_id",
|
|
178
|
+
"rcsb_entity_source_organism.ncbi_taxonomy_id",
|
|
179
|
+
"rcsb_entity_source_organism.ncbi_scientific_name",
|
|
180
|
+
"cluster_id",
|
|
181
|
+
"identity"
|
|
182
|
+
]
|
|
183
|
+
)
|
|
184
|
+
print(query.exec())
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Jupyter Notebooks
|
|
188
|
+
Several Jupyter notebooks with example use cases and workflows for all package modules are provided under [notebooks](notebooks/).
|
|
189
|
+
|
|
190
|
+
For example, one notebook using both Search and Data API packages for a COVID-19 related example is available in [notebooks/search_data_workflow.ipynb](notebooks/search_data_workflow.ipynb) or online through Google Colab <a href="https://colab.research.google.com/github/rcsb/py-rcsb-api/blob/master/notebooks/search_data_workflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>.
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
## Citing
|
|
194
|
+
Please cite the ``rcsb-api`` package with the following reference:
|
|
195
|
+
|
|
196
|
+
> Dennis W. Piehl, Brinda Vallat, Ivana Truong, Habiba Morsy, Rusham Bhatt,
|
|
197
|
+
> Santiago Blaumann, Pratyoy Biswas, Yana Rose, Sebastian Bittrich, Jose M. Duarte,
|
|
198
|
+
> Joan Segura, Chunxiao Bi, Douglas Myers-Turnbull, Brian P. Hudson, Christine Zardecki,
|
|
199
|
+
> Stephen K. Burley. rcsb-api: Python Toolkit for Streamlining Access to RCSB Protein
|
|
200
|
+
> Data Bank APIs, Journal of Molecular Biology, 2025.
|
|
201
|
+
> DOI: [10.1016/j.jmb.2025.168970](https://doi.org/10.1016/j.jmb.2025.168970)
|
|
202
|
+
|
|
203
|
+
You should also cite the RCSB.org API services this package utilizes:
|
|
204
|
+
|
|
205
|
+
> Yana Rose, Jose M. Duarte, Robert Lowe, Joan Segura, Chunxiao Bi, Charmi
|
|
206
|
+
> Bhikadiya, Li Chen, Alexander S. Rose, Sebastian Bittrich, Stephen K. Burley,
|
|
207
|
+
> John D. Westbrook. RCSB Protein Data Bank: Architectural Advances Towards
|
|
208
|
+
> Integrated Searching and Efficient Access to Macromolecular Structure Data
|
|
209
|
+
> from the PDB Archive, Journal of Molecular Biology, 2020.
|
|
210
|
+
> DOI: [10.1016/j.jmb.2020.11.003](https://doi.org/10.1016/j.jmb.2020.11.003)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
## Documentation and Support
|
|
214
|
+
Please refer to the [readthedocs page](https://rcsbapi.readthedocs.io/en/latest/index.html) to learn more about package usage and other available features as well as to see more examples.
|
|
215
|
+
|
|
216
|
+
If you experience any issues installing or using the package, please submit an issue on [GitHub](https://github.com/rcsb/py-rcsb-api/issues) and we will try to respond in a timely manner.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
rcsbapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
rcsbapi/config.py,sha256=HipLB9Q25-Ci_OT5GP-gVtuuPK67OCtHUZgapPpEh-s,2645
|
|
3
|
+
rcsbapi/const.py,sha256=282v7RqZ0uNRqRwosTK-7-Ok2SxojEmMsu1-19hKTj8,5768
|
|
4
|
+
rcsbapi/graphql_schema.py,sha256=NoHS0_cmNC0HWj9PsYidQc1ep5ToHr5EZLmISMRiy0w,59888
|
|
5
|
+
rcsbapi/data/__init__.py,sha256=hJMUhUH-sBUjPYi-EP5A_L5HrY7PCrnnAEyCXCOtOYY,998
|
|
6
|
+
rcsbapi/data/data_query.py,sha256=eEn_5RlRiVCPCtGKmhphUoxKRWv3vWBfCBdtlY8Lf9U,17432
|
|
7
|
+
rcsbapi/data/data_schema.py,sha256=bKGlSTJ8Ch-ANe3KyInvdKxWYoQJoUWuIdaOs7YrytY,10525
|
|
8
|
+
rcsbapi/model/__init__.py,sha256=wocv8P9koYgWYkYaaVqTNFURQxhNCH-FmUFNmAt7sAE,75
|
|
9
|
+
rcsbapi/model/model_query.py,sha256=Y2prl4A9MKIloRYH1wRtkZGTCzHSKLB2AC_BDi8ZyUE,30437
|
|
10
|
+
rcsbapi/model/model_schema.py,sha256=isIoGkDPdIF2xepd8iMzRFKX7Kb12SnzGj-j9liGH6w,2582
|
|
11
|
+
rcsbapi/search/__init__.py,sha256=Zdgu9Dj6QVmASII_tjfh6uOF55WJBOdxsXd_EMiYtu4,1129
|
|
12
|
+
rcsbapi/search/search_query.py,sha256=cbyQIMmr2hn75kLfG-hoD6hRZjgnTVwsaI_3el-s47M,90171
|
|
13
|
+
rcsbapi/search/search_schema.py,sha256=iYo2NUtbhAk4477uh35h61iJy3DKPzIRsLPSWazM30M,18196
|
|
14
|
+
rcsbapi/sequence/__init__.py,sha256=pO3ruQg9ZIXW6BKl2_u0WRv0Vc1puIBmPXGjUOzrDd4,454
|
|
15
|
+
rcsbapi/sequence/seq_query.py,sha256=dl_CdjrPf_U5XMIw8jRZbWQZlpyoNuR174u8Mbzmn30,16298
|
|
16
|
+
rcsbapi/sequence/seq_schema.py,sha256=5qd1NqCiJaoHmk2tMw5_z1Qbvf0NvQCjOmIIY54UoEM,4647
|
|
17
|
+
rcsb_api-1.5.0.dist-info/METADATA,sha256=JLhG9yW6ElwVhEIVJ_xeioGFxrKWRxWYlcdgdcEb_sE,11701
|
|
18
|
+
rcsb_api-1.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
19
|
+
rcsb_api-1.5.0.dist-info/licenses/LICENSE,sha256=jFdfp4-j1dWjHcGjkMswp9D4sXxRsjz01ixAIQRo3OY,1065
|
|
20
|
+
rcsb_api-1.5.0.dist-info/licenses/NOTICE.md,sha256=QMhgFEQ3XI1jiP3mJFv-i-HWBvG5xrMGYG_XksktEEA,2005
|
|
21
|
+
rcsb_api-1.5.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 RCSB PDB
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Third-Party Copyright Notices
|
|
2
|
+
`rcsb-api` uses third-party libraries or other resources that may
|
|
3
|
+
be distributed under licenses different than the `rcsb-api` software.
|
|
4
|
+
|
|
5
|
+
In the event that we accidentally failed to list a required notice,
|
|
6
|
+
please bring it to our attention through the creation of a [GitHub issue](https://github.com/rcsb/py-rcsb-api/issues).
|
|
7
|
+
|
|
8
|
+
The attached notices are provided for information only.
|
|
9
|
+
|
|
10
|
+
## [rcsbsearchapi](https://github.com/rcsb/py-rcsbsearchapi)
|
|
11
|
+
|
|
12
|
+
BSD 3-Clause License
|
|
13
|
+
--------------------
|
|
14
|
+
|
|
15
|
+
Copyright 2024 rcsbsearchapi Contributors
|
|
16
|
+
|
|
17
|
+
Redistribution and use in source and binary forms, with or without
|
|
18
|
+
modification, are permitted provided that the following conditions are met:
|
|
19
|
+
|
|
20
|
+
1. Redistributions of source code must retain the above copyright notice,
|
|
21
|
+
this list of conditions and the following disclaimer.
|
|
22
|
+
|
|
23
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
24
|
+
this list of conditions and the following disclaimer in the documentation
|
|
25
|
+
and/or other materials provided with the distribution.
|
|
26
|
+
|
|
27
|
+
3. Neither the name of the copyright holder nor the names of its contributors
|
|
28
|
+
may be used to endorse or promote products derived from this software
|
|
29
|
+
without specific prior written permission.
|
|
30
|
+
|
|
31
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
32
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
33
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
34
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
35
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
36
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
37
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
38
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
39
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
40
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
rcsbapi/__init__.py
ADDED
|
File without changes
|
rcsbapi/config.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configurable settings for rcsb-api
|
|
3
|
+
|
|
4
|
+
These settings can be overridden at runtime.
|
|
5
|
+
|
|
6
|
+
For example, you can turn off autocompletion warning messages by
|
|
7
|
+
modifying the `SUPPRESS_AUTOCOMPLETE_WARNING` setting as follows:
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
from rcsbapi.config import config
|
|
11
|
+
|
|
12
|
+
# Override the default warning suppression flag
|
|
13
|
+
config.SUPPRESS_AUTOCOMPLETE_WARNING = True
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from pydoc import locate
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Config:
|
|
25
|
+
API_TIMEOUT: int = 100 # Timeout in seconds for all API calls
|
|
26
|
+
MAX_RETRIES: int = 5 # Maximum number of retries to perform per request upon failure
|
|
27
|
+
RETRY_BACKOFF: int = 1 # Delay in seconds to wait between retries; increases exponentially between retries (e.g., 1s, 2s, 4s, 8s, ...)
|
|
28
|
+
SEARCH_API_REQUESTS_PER_SECOND: int = 10 # Requests per second limit for the Search API
|
|
29
|
+
DATA_API_REQUESTS_PER_SECOND: int = 20 # Requests per second limit for the Data API
|
|
30
|
+
DATA_API_BATCH_ID_SIZE: int = 300 # Size of batches to use for batching input ID list to Data API (reduce this if encountering timeouts or errors)
|
|
31
|
+
DATA_API_MAX_CONCURRENT_REQUESTS: int = 4 # Max number of Data API requests to run concurrently (e.g., when input ID list is split into many small batches)
|
|
32
|
+
DATA_API_INPUT_ID_LIMIT: int = 50_000 # Threshold for warning user that input ID list for Data API query is very large and may hinder performance
|
|
33
|
+
MODEL_API_REQUESTS_PER_SECOND: int = 10 # Requests per second limit for the Model API
|
|
34
|
+
SUPPRESS_AUTOCOMPLETE_WARNING: bool = False # Turn off autocompletion warnings from being raised for Data API queries
|
|
35
|
+
|
|
36
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
37
|
+
"""Verify attribute exists when a user tries to set a configuration parameter, and ensure proper typing.
|
|
38
|
+
Raises an error if user accidentally tries to create a new, unused attribute (e.g., due to a typo or misspelling),
|
|
39
|
+
or sets it to an unexpected type.
|
|
40
|
+
"""
|
|
41
|
+
# Verify attribute exists
|
|
42
|
+
if not hasattr(self, name):
|
|
43
|
+
raise AttributeError(f"'{name}' is not a valid attribute of Config class")
|
|
44
|
+
|
|
45
|
+
# Enforce consistent typing
|
|
46
|
+
expected_type = locate(self.__annotations__.get(name, None))
|
|
47
|
+
if expected_type and not isinstance(value, expected_type):
|
|
48
|
+
raise TypeError(f"Expected type '{expected_type.__name__}' for attribute '{name}', but got '{type(value).__name__}'")
|
|
49
|
+
super().__setattr__(name, value)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
config = Config()
|
rcsbapi/const.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Constants for rcsb-api (immutable and cannot be overridden)
|
|
3
|
+
|
|
4
|
+
These constants define fixed values used throughout the rcsb-api package,
|
|
5
|
+
including API endpoints, search services, and schema URLs. The values are
|
|
6
|
+
immutable and protected from modification during runtime.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from types import MappingProxyType
|
|
12
|
+
from typing import List
|
|
13
|
+
from importlib.metadata import version as get_package_version
|
|
14
|
+
|
|
15
|
+
__version__ = get_package_version("rcsb-api")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class Const:
|
|
20
|
+
# Search API constants
|
|
21
|
+
STRUCTURE_INDEX: int = 0
|
|
22
|
+
CHEMICAL_INDEX: int = 0
|
|
23
|
+
SEARCH_API_REQUEST_SCHEMA_URL: str = "https://search.rcsb.org/schema/search/request/json-schema-rcsb_search_query.json"
|
|
24
|
+
SEARCH_OPENAPI_SCHEMA_URL: str = "https://search.rcsb.org/openapi.json"
|
|
25
|
+
STRUCTURE_ATTRIBUTE_SEARCH_SERVICE: str = "text"
|
|
26
|
+
CHEMICAL_ATTRIBUTE_SEARCH_SERVICE: str = "text_chem"
|
|
27
|
+
FULL_TEXT_SEARCH_SERVICE: str = "full_text"
|
|
28
|
+
SEQUENCE_SEARCH_SERVICE: str = "sequence"
|
|
29
|
+
SEQMOTIF_SEARCH_SERVICE: str = "seqmotif"
|
|
30
|
+
STRUCT_SIM_SEARCH_SERVICE: str = "structure"
|
|
31
|
+
STRUCTMOTIF_SEARCH_SERVICE: str = "strucmotif"
|
|
32
|
+
CHEM_SIM_SEARCH_SERVICE: str = "chemical"
|
|
33
|
+
SEQUENCE_SEARCH_MIN_NUM_OF_RESIDUES: int = 25
|
|
34
|
+
SEQMOTIF_SEARCH_MIN_CHARACTERS: int = 2
|
|
35
|
+
STRUCT_MOTIF_MIN_RESIDUES: int = 2
|
|
36
|
+
STRUCT_MOTIF_MAX_RESIDUES: int = 10
|
|
37
|
+
RCSB_SEARCH_API_QUERY_URL: str = "https://search.rcsb.org/rcsbsearch/v2/query"
|
|
38
|
+
UPLOAD_URL: str = "https://user-upload.rcsb.org/v1/putMultipart"
|
|
39
|
+
RETURN_UP_URL: str = "https://user-upload.rcsb.org/v1/download/"
|
|
40
|
+
|
|
41
|
+
SEARCH_API_SCHEMA_DIR: str = "search/resources"
|
|
42
|
+
SEARCH_API_STRUCTURE_ATTRIBUTE_SCHEMA_URL: str = "https://search.rcsb.org/rcsbsearch/v2/metadata/schema"
|
|
43
|
+
SEARCH_API_STRUCTURE_ATTRIBUTE_SCHEMA_FILENAME: str = "structure_schema.json"
|
|
44
|
+
SEARCH_API_CHEMICAL_ATTRIBUTE_SCHEMA_URL: str = "https://search.rcsb.org/rcsbsearch/v2/metadata/chemical/schema"
|
|
45
|
+
SEARCH_API_CHEMICAL_ATTRIBUTE_SCHEMA_FILENAME: str = "chemical_schema.json"
|
|
46
|
+
|
|
47
|
+
# Data API constants
|
|
48
|
+
DATA_API_ENDPOINT: str = "https://data.rcsb.org/graphql"
|
|
49
|
+
DATA_API_SCHEMA_DIR: str = "data/resources"
|
|
50
|
+
DATA_API_SCHEMA_FILENAME: str = "data_api_schema.json"
|
|
51
|
+
DATA_API_SCHEMA_BASE_URL: str = "https://data.rcsb.org/rest/v1/schema/"
|
|
52
|
+
DATA_API_SCHEMA_ENDPOINT_TO_FILE: MappingProxyType[str, str] = field(default_factory=lambda: MappingProxyType({
|
|
53
|
+
"entry": "entry.json",
|
|
54
|
+
"polymer_entity": "polymer_entity.json",
|
|
55
|
+
"branched_entity": "branched_entity.json",
|
|
56
|
+
"nonpolymer_entity": "nonpolymer_entity.json",
|
|
57
|
+
"polymer_entity_instance": "polymer_entity_instance.json",
|
|
58
|
+
"branched_entity_instance": "branched_entity_instance.json",
|
|
59
|
+
"nonpolymer_entity_instance": "nonpolymer_entity_instance.json",
|
|
60
|
+
"assembly": "assembly.json",
|
|
61
|
+
"chem_comp": "chem_comp.json",
|
|
62
|
+
"pubmed": "pubmed.json",
|
|
63
|
+
"uniprot": "uniprot.json",
|
|
64
|
+
"drugbank": "drugbank.json",
|
|
65
|
+
}))
|
|
66
|
+
|
|
67
|
+
SEQUENCE_API_ENDPOINT: str = "https://sequence-coordinates.rcsb.org"
|
|
68
|
+
SEQUENCE_API_GRAPHQL_ENDPOINT: str = "https://sequence-coordinates.rcsb.org/graphql"
|
|
69
|
+
SEQUENCE_API_SCHEMA_DIR: str = "sequence/resources"
|
|
70
|
+
SEQUENCE_API_SCHEMA_FILENAME: str = "seq_api_schema.json"
|
|
71
|
+
|
|
72
|
+
MODELSERVER_API_SCHEMA_FILEPATH: str = "model/resources/modelserver_api_schema.json"
|
|
73
|
+
MODELSERVER_API_BASE_URL: str = "https://models.rcsb.org/v1"
|
|
74
|
+
MODELSERVER_API_SCHEMA_URL: str = "https://models.rcsb.org/openapi.json"
|
|
75
|
+
|
|
76
|
+
SINGULAR_TO_PLURAL: MappingProxyType[str, str] = field(default_factory=lambda: MappingProxyType({
|
|
77
|
+
"entry": "entries",
|
|
78
|
+
"polymer_entity": "polymer_entities",
|
|
79
|
+
"branched_entity": "branched_entities",
|
|
80
|
+
"nonpolymer_entity": "nonpolymer_entities",
|
|
81
|
+
"polymer_entity_instance": "polymer_entity_instances",
|
|
82
|
+
"nonpolymer_entity_instance": "nonpolymer_entity_instances",
|
|
83
|
+
"branched_entity_instance": "branched_entity_instances",
|
|
84
|
+
"assembly": "assemblies",
|
|
85
|
+
"interface": "interfaces",
|
|
86
|
+
"uniprot": "",
|
|
87
|
+
"pubmed": "",
|
|
88
|
+
"chem_comp": "chem_comps",
|
|
89
|
+
"entry_group": "entry_groups",
|
|
90
|
+
"polymer_entity_group": "polymer_entity_groups",
|
|
91
|
+
"group_provenance": ""
|
|
92
|
+
}))
|
|
93
|
+
#
|
|
94
|
+
ID_TO_SEPARATOR: MappingProxyType[str, str] = field(default_factory=lambda: MappingProxyType({
|
|
95
|
+
"entity_id": "_",
|
|
96
|
+
"asym_id": ".",
|
|
97
|
+
"assembly_id": "-",
|
|
98
|
+
"interface_id": "."
|
|
99
|
+
}))
|
|
100
|
+
|
|
101
|
+
# Regex strings for IDs
|
|
102
|
+
DATA_API_INPUT_TYPE_TO_REGEX: MappingProxyType[str, List[str]] = field(default_factory=lambda: MappingProxyType({
|
|
103
|
+
"entry": [r"^(MA|AF|ma|af)_[A-Z0-9]*$", r"^[A-Za-z0-9]{4}$"],
|
|
104
|
+
"entity": [r"^(MA|AF|ma|af)_[A-Z0-9]*_[0-9]+$", r"^[A-Z0-9]{4}_[0-9]+$"],
|
|
105
|
+
"instance": [r"^(MA|AF|ma|af)_[A-Z0-9]*\.[A-Za-z]+$", r"^[A-Z0-9]{4}\.[A-Za-z]+$"],
|
|
106
|
+
"assembly": [r"^(MA|AF|ma|af)_[A-Z0-9]*-[0-9]+$", r"^[A-Z0-9]{4}-[0-9]+$"],
|
|
107
|
+
"interface": [r"^(MA|AF|ma|af)_[A-Z0-9]*-[0-9]+\.[0-9]+$", r"^[A-Z0-9]{4}-[0-9]+\.[0-9]+$"],
|
|
108
|
+
# Regex for uniprot: https://www.uniprot.org/help/accession_numbers
|
|
109
|
+
"uniprot": [r"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}"]
|
|
110
|
+
}))
|
|
111
|
+
|
|
112
|
+
INPUT_TYPE_TO_ALL_STRUCTURES_ENDPOINT: MappingProxyType[str, List[str]] = field(default_factory=lambda: MappingProxyType({
|
|
113
|
+
"entries": ["https://data.rcsb.org/rest/v1/holdings/current/entry_ids"],
|
|
114
|
+
"chem_comps": ["https://data.rcsb.org/rest/v1/holdings/current/ccd_ids", "https://data.rcsb.org/rest/v1/holdings/current/prd_ids"]
|
|
115
|
+
}))
|
|
116
|
+
|
|
117
|
+
USER_AGENT: str = (
|
|
118
|
+
f"py-rcsb-api/{__version__} (+https://github.com/rcsb/py-rcsb-api)"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
const = Const()
|
rcsbapi/data/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""RCSB PDB Data API"""
|
|
2
|
+
from rcsbapi.data.data_schema import DataSchema
|
|
3
|
+
|
|
4
|
+
DATA_SCHEMA = DataSchema()
|
|
5
|
+
|
|
6
|
+
# This is needed because __getattr__ will be called twice on import,
|
|
7
|
+
# so ALL_STRUCTURES should be cached to avoid initializing twice
|
|
8
|
+
_import_cache: dict = {}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def __getattr__(name: str):
|
|
12
|
+
"""Overloading __getattr__ so that when ALL_STRUCTURES is accessed for the first time,
|
|
13
|
+
ALL_STRUCTURES object will be built.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
name (str): attribute name
|
|
17
|
+
"""
|
|
18
|
+
if name == "ALL_STRUCTURES":
|
|
19
|
+
if name not in _import_cache:
|
|
20
|
+
from rcsbapi.data.data_query import AllStructures
|
|
21
|
+
ALL_STRUCTURES = AllStructures()
|
|
22
|
+
_import_cache[name] = ALL_STRUCTURES
|
|
23
|
+
|
|
24
|
+
return _import_cache[name] # Return cached instance
|
|
25
|
+
|
|
26
|
+
# keep functionality of original __getattr__
|
|
27
|
+
raise AttributeError(f"Module {repr(__name__)} has no attribute {repr(name)}")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
from rcsbapi.data.data_query import DataQuery # noqa:E402
|
|
31
|
+
|
|
32
|
+
__all__ = ["DataQuery", "DataSchema"]
|