openprotein-python 0.8.7__tar.gz → 0.8.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/PKG-INFO +1 -1
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/api.py +0 -6
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/poet.py +5 -2
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/poet2.py +4 -3
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/__init__.py +2 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/api.py +16 -32
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/boltz.py +38 -84
- openprotein_python-0.8.9/openprotein/fold/complex.py +60 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/fold.py +10 -1
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/future.py +60 -4
- openprotein_python-0.8.9/openprotein/fold/minifold.py +54 -0
- openprotein_python-0.8.9/openprotein/fold/rosettafold3.py +148 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/.gitignore +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/LICENSE.txt +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/README.md +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/_version.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/align.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/future.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/msa.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/base.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/chains.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/features.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/model_metadata.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/reduction.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/config.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/csv.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/assaydataset.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/data.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/design.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/future.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/embeddings.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/esm.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/future.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/models.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/openprotein.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/errors.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fasta.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/alphafold2.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/esmfold.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/models.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/futures.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/jobs.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/base.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/foundation/rfdiffusion.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/models.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/models.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/prediction.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/predictor.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/validate.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/models.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/prompt.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/protein.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/models.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/svd.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/__init__.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/api.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/models.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/schemas.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/umap.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/utils/uuid.py +0 -0
- {openprotein_python-0.8.7 → openprotein_python-0.8.9}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: openprotein-python
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.9
|
|
4
4
|
Summary: OpenProtein Python interface.
|
|
5
5
|
Author-email: Mark Gee <markgee@ne47.bio>, "Timothy Truong Jr." <ttruong@ne47.bio>, Tristan Bepler <tbepler@ne47.bio>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -400,12 +400,6 @@ def prompt_post(
|
|
|
400
400
|
"Either 'num_sequences' or 'num_residues' must be set, but not both."
|
|
401
401
|
)
|
|
402
402
|
|
|
403
|
-
if num_sequences is not None and not (0 <= num_sequences < 100):
|
|
404
|
-
raise InvalidParameterError("The 'num_sequences' must be between 0 and 100.")
|
|
405
|
-
|
|
406
|
-
if num_residues is not None and not (0 <= num_residues < 24577):
|
|
407
|
-
raise InvalidParameterError("The 'num_residues' must be between 0 and 24577.")
|
|
408
|
-
|
|
409
403
|
if random_seed is None:
|
|
410
404
|
random_seed = random.randrange(2**32)
|
|
411
405
|
|
|
@@ -295,7 +295,11 @@ class PoETModel(EmbeddingModel):
|
|
|
295
295
|
EmbeddingsGenerateFuture
|
|
296
296
|
Future object representing the status and information about the generation job.
|
|
297
297
|
"""
|
|
298
|
-
|
|
298
|
+
if prompt is not None:
|
|
299
|
+
kwargs["prompt_id"] = prompt if isinstance(prompt, str) else prompt.id
|
|
300
|
+
else:
|
|
301
|
+
# NB: this is for handling PoET-2
|
|
302
|
+
assert self.model_id != "poet"
|
|
299
303
|
return EmbeddingsGenerateFuture.create(
|
|
300
304
|
session=self.session,
|
|
301
305
|
job=api.request_generate_post(
|
|
@@ -307,7 +311,6 @@ class PoETModel(EmbeddingModel):
|
|
|
307
311
|
topp=topp,
|
|
308
312
|
max_length=max_length,
|
|
309
313
|
random_seed=seed,
|
|
310
|
-
prompt_id=prompt_id,
|
|
311
314
|
**kwargs,
|
|
312
315
|
),
|
|
313
316
|
)
|
|
@@ -287,7 +287,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
|
|
|
287
287
|
|
|
288
288
|
def generate(
|
|
289
289
|
self,
|
|
290
|
-
prompt: str | Prompt,
|
|
290
|
+
prompt: str | Prompt | None,
|
|
291
291
|
query: str | bytes | Protein | Query | None = None,
|
|
292
292
|
use_query_structure_in_decoder: bool = True,
|
|
293
293
|
num_samples: int = 100,
|
|
@@ -304,7 +304,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
|
|
|
304
304
|
|
|
305
305
|
Parameters
|
|
306
306
|
----------
|
|
307
|
-
prompt : str or Prompt
|
|
307
|
+
prompt : str or Prompt or None, optional
|
|
308
308
|
Prompt from an align workflow to condition PoET model.
|
|
309
309
|
query : str or bytes or Protein or Query or None, optional
|
|
310
310
|
Query to use with prompt.
|
|
@@ -351,7 +351,8 @@ class PoET2Model(PoETModel, EmbeddingModel):
|
|
|
351
351
|
f"equal to the number of prompts ({prompt.num_replicates})"
|
|
352
352
|
)
|
|
353
353
|
return super().generate(
|
|
354
|
-
prompt
|
|
354
|
+
# NB: poet(-1) cannot use null prompt, so we don't change its .generate's type signature
|
|
355
|
+
prompt=prompt, # type: ignore
|
|
355
356
|
num_samples=num_samples,
|
|
356
357
|
temperature=temperature,
|
|
357
358
|
topk=topk,
|
|
@@ -7,6 +7,7 @@ isort:skip_file
|
|
|
7
7
|
from .schemas import FoldJob, FoldMetadata
|
|
8
8
|
from .models import FoldModel
|
|
9
9
|
from .esmfold import ESMFoldModel
|
|
10
|
+
from .minifold import MiniFoldModel
|
|
10
11
|
from .alphafold2 import AlphaFold2Model
|
|
11
12
|
from .boltz import (
|
|
12
13
|
Boltz1Model,
|
|
@@ -17,5 +18,6 @@ from .boltz import (
|
|
|
17
18
|
BoltzConstraint,
|
|
18
19
|
BoltzProperty,
|
|
19
20
|
)
|
|
21
|
+
from .rosettafold3 import RosettaFold3Model
|
|
20
22
|
from .future import FoldResultFuture, FoldComplexResultFuture
|
|
21
23
|
from .fold import FoldAPI
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Fold REST API interface for making HTTP calls to our fold backend."""
|
|
2
2
|
|
|
3
3
|
import io
|
|
4
|
-
from typing import Literal
|
|
4
|
+
from typing import TYPE_CHECKING, Literal
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
7
|
from pydantic import TypeAdapter
|
|
@@ -12,6 +12,9 @@ from openprotein.errors import HTTPError
|
|
|
12
12
|
|
|
13
13
|
from .schemas import FoldJob, FoldMetadata
|
|
14
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
15
18
|
PATH_PREFIX = "v1/fold"
|
|
16
19
|
|
|
17
20
|
|
|
@@ -160,8 +163,8 @@ def fold_get_complex_result(
|
|
|
160
163
|
def fold_get_complex_extra_result(
|
|
161
164
|
session: APISession,
|
|
162
165
|
job_id: str,
|
|
163
|
-
key: Literal["pae", "pde", "plddt", "confidence", "affinity"],
|
|
164
|
-
) -> np.ndarray | list[dict]:
|
|
166
|
+
key: Literal["pae", "pde", "plddt", "confidence", "affinity", "score", "metrics"],
|
|
167
|
+
) -> "np.ndarray | list[dict] | pd.DataFrame":
|
|
165
168
|
"""
|
|
166
169
|
Get extra result for a complex from the request ID.
|
|
167
170
|
|
|
@@ -183,6 +186,10 @@ def fold_get_complex_extra_result(
|
|
|
183
186
|
formatter = lambda response: np.load(io.BytesIO(response.content))
|
|
184
187
|
elif key in {"confidence", "affinity"}:
|
|
185
188
|
formatter = lambda response: response.json()
|
|
189
|
+
elif key in {"score", "metrics"}:
|
|
190
|
+
import pandas as pd
|
|
191
|
+
|
|
192
|
+
formatter = lambda response: pd.read_csv(io.StringIO(response.content.decode()))
|
|
186
193
|
else:
|
|
187
194
|
raise ValueError(f"Unexpected key: {key}")
|
|
188
195
|
endpoint = PATH_PREFIX + f"/{job_id}/complex/{key}"
|
|
@@ -194,7 +201,7 @@ def fold_get_complex_extra_result(
|
|
|
194
201
|
if e.status_code == 400 and key == "affinity":
|
|
195
202
|
raise ValueError("affinity not found for request") from None
|
|
196
203
|
raise e
|
|
197
|
-
output
|
|
204
|
+
output = formatter(response)
|
|
198
205
|
return output
|
|
199
206
|
|
|
200
207
|
|
|
@@ -254,34 +261,11 @@ def fold_models_post(
|
|
|
254
261
|
sequences = kwargs["sequences"]
|
|
255
262
|
# NOTE we are handling the boltz form here too
|
|
256
263
|
sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
if kwargs.get("num_models"):
|
|
263
|
-
body["num_models"] = kwargs["num_models"]
|
|
264
|
-
if kwargs.get("num_relax"):
|
|
265
|
-
body["num_relax"] = kwargs["num_relax"]
|
|
266
|
-
if kwargs.get("use_potentials"):
|
|
267
|
-
body["use_potentials"] = kwargs["use_potentials"]
|
|
268
|
-
# boltz
|
|
269
|
-
if kwargs.get("diffusion_samples"):
|
|
270
|
-
body["diffusion_samples"] = kwargs["diffusion_samples"]
|
|
271
|
-
if kwargs.get("recycling_steps"):
|
|
272
|
-
body["recycling_steps"] = kwargs["recycling_steps"]
|
|
273
|
-
if kwargs.get("sampling_steps"):
|
|
274
|
-
body["sampling_steps"] = kwargs["sampling_steps"]
|
|
275
|
-
if kwargs.get("step_scale"):
|
|
276
|
-
body["step_scale"] = kwargs["step_scale"]
|
|
277
|
-
if kwargs.get("constraints"):
|
|
278
|
-
body["constraints"] = kwargs["constraints"]
|
|
279
|
-
if kwargs.get("templates"):
|
|
280
|
-
body["templates"] = kwargs["templates"]
|
|
281
|
-
if kwargs.get("properties"):
|
|
282
|
-
body["properties"] = kwargs["properties"]
|
|
283
|
-
if kwargs.get("method"):
|
|
284
|
-
body["method"] = kwargs["method"]
|
|
264
|
+
kwargs["sequences"] = sequences
|
|
265
|
+
# add non-None args - note this doesnt affect msa_id which is nested
|
|
266
|
+
for k, v in kwargs.items():
|
|
267
|
+
if v is not None:
|
|
268
|
+
body[k] = v
|
|
285
269
|
|
|
286
270
|
response = session.post(endpoint, json=body)
|
|
287
271
|
return FoldJob.model_validate(response.json())
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Community-based Boltz models for complex structure prediction with ligands/dna/rna."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
3
|
+
import warnings
|
|
4
|
+
from logging import warning
|
|
5
5
|
from typing import Any
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel, Field, TypeAdapter, model_validator
|
|
@@ -13,67 +13,10 @@ from openprotein.common import ModelMetadata
|
|
|
13
13
|
from openprotein.protein import Protein
|
|
14
14
|
|
|
15
15
|
from . import api
|
|
16
|
+
from .complex import id_generator
|
|
16
17
|
from .future import FoldComplexResultFuture
|
|
17
18
|
from .models import FoldModel
|
|
18
19
|
|
|
19
|
-
valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def is_valid_id(id_str: str) -> bool:
|
|
23
|
-
"""
|
|
24
|
-
Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
|
|
25
|
-
"""
|
|
26
|
-
if not id_str or len(id_str) > 5:
|
|
27
|
-
return False
|
|
28
|
-
return bool(valid_id_pattern.fullmatch(id_str))
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
|
|
32
|
-
"""
|
|
33
|
-
Yields new chain IDs, skipping any in 'used_ids'.
|
|
34
|
-
First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
|
|
35
|
-
"""
|
|
36
|
-
used = set(tuple(used_ids or []))
|
|
37
|
-
letters = list(string.ascii_uppercase)
|
|
38
|
-
|
|
39
|
-
# --- Alphabetic IDs ---
|
|
40
|
-
curr_len = 1
|
|
41
|
-
curr_indices = [0] * curr_len # start at 'A'
|
|
42
|
-
|
|
43
|
-
def bump_indices():
|
|
44
|
-
# lexicographically increment curr_indices; return False on overflow
|
|
45
|
-
for i in reversed(range(len(curr_indices))):
|
|
46
|
-
if curr_indices[i] < len(letters) - 1:
|
|
47
|
-
curr_indices[i] += 1
|
|
48
|
-
for j in range(i + 1, len(curr_indices)):
|
|
49
|
-
curr_indices[j] = 0
|
|
50
|
-
return True
|
|
51
|
-
return False
|
|
52
|
-
|
|
53
|
-
while curr_len <= max_alpha_len:
|
|
54
|
-
candidate = "".join(letters[i] for i in curr_indices)
|
|
55
|
-
if candidate not in used:
|
|
56
|
-
used.add(candidate)
|
|
57
|
-
yield candidate
|
|
58
|
-
# bump
|
|
59
|
-
if not bump_indices():
|
|
60
|
-
curr_len += 1
|
|
61
|
-
if curr_len > max_alpha_len:
|
|
62
|
-
break
|
|
63
|
-
curr_indices = [0] * curr_len
|
|
64
|
-
|
|
65
|
-
# --- Numeric IDs ---
|
|
66
|
-
num = 1
|
|
67
|
-
while num <= max_numeric:
|
|
68
|
-
candidate = str(num)
|
|
69
|
-
num += 1
|
|
70
|
-
if candidate not in used:
|
|
71
|
-
used.add(candidate)
|
|
72
|
-
yield candidate
|
|
73
|
-
|
|
74
|
-
# exhausted
|
|
75
|
-
raise RuntimeError("exhausted all possible IDs")
|
|
76
|
-
|
|
77
20
|
|
|
78
21
|
class BoltzModel(FoldModel):
|
|
79
22
|
"""
|
|
@@ -97,8 +40,8 @@ class BoltzModel(FoldModel):
|
|
|
97
40
|
rnas: list[RNA] | None = None,
|
|
98
41
|
ligands: list[Ligand] | None = None,
|
|
99
42
|
diffusion_samples: int = 1,
|
|
100
|
-
|
|
101
|
-
|
|
43
|
+
num_recycles: int = 3,
|
|
44
|
+
num_steps: int = 200,
|
|
102
45
|
step_scale: float = 1.638,
|
|
103
46
|
use_potentials: bool = False,
|
|
104
47
|
constraints: list[dict] | None = None,
|
|
@@ -119,9 +62,9 @@ class BoltzModel(FoldModel):
|
|
|
119
62
|
List of ligands to include in folded output.
|
|
120
63
|
diffusion_samples: int
|
|
121
64
|
Number of diffusion samples to use
|
|
122
|
-
|
|
65
|
+
num_recycles : int
|
|
123
66
|
Number of recycling steps to use
|
|
124
|
-
|
|
67
|
+
num_steps : int
|
|
125
68
|
Number of sampling steps to use
|
|
126
69
|
step_scale : float
|
|
127
70
|
Scaling factor for diffusion steps.
|
|
@@ -133,6 +76,17 @@ class BoltzModel(FoldModel):
|
|
|
133
76
|
FoldComplexResultFuture
|
|
134
77
|
Future for the folding complex result.
|
|
135
78
|
"""
|
|
79
|
+
# migrate old parameter
|
|
80
|
+
if (recycling_steps := kwargs.get("recycling_steps")) is not None:
|
|
81
|
+
num_recycles = recycling_steps
|
|
82
|
+
warnings.warn(
|
|
83
|
+
"`recycling_steps` has been updated to `num_recycles`. The parameter will be auto-corrected for now but raise an exception in the future."
|
|
84
|
+
)
|
|
85
|
+
if (sampling_steps := kwargs.get("sampling_steps")) is not None:
|
|
86
|
+
num_steps = sampling_steps
|
|
87
|
+
warnings.warn(
|
|
88
|
+
"`sampling_steps` has been updated to `num_steps`. The parameter will be auto-corrected for now but raise an exception in the future."
|
|
89
|
+
)
|
|
136
90
|
# validate constraints
|
|
137
91
|
if constraints is not None:
|
|
138
92
|
TypeAdapter(list[BoltzConstraint]).validate_python(constraints)
|
|
@@ -247,8 +201,8 @@ class BoltzModel(FoldModel):
|
|
|
247
201
|
model_id=self.model_id,
|
|
248
202
|
sequences=sequences,
|
|
249
203
|
diffusion_samples=diffusion_samples,
|
|
250
|
-
|
|
251
|
-
|
|
204
|
+
num_recycles=num_recycles,
|
|
205
|
+
num_steps=num_steps,
|
|
252
206
|
step_scale=step_scale,
|
|
253
207
|
constraints=constraints,
|
|
254
208
|
use_potentials=use_potentials,
|
|
@@ -276,8 +230,8 @@ class Boltz2Model(BoltzModel, FoldModel):
|
|
|
276
230
|
rnas: list[RNA] | None = None,
|
|
277
231
|
ligands: list[Ligand] | None = None,
|
|
278
232
|
diffusion_samples: int = 1,
|
|
279
|
-
|
|
280
|
-
|
|
233
|
+
num_recycles: int = 3,
|
|
234
|
+
num_steps: int = 200,
|
|
281
235
|
step_scale: float = 1.638,
|
|
282
236
|
use_potentials: bool = False,
|
|
283
237
|
constraints: list[dict] | None = None,
|
|
@@ -300,9 +254,9 @@ class Boltz2Model(BoltzModel, FoldModel):
|
|
|
300
254
|
List of ligands to include in folded output.
|
|
301
255
|
diffusion_samples: int
|
|
302
256
|
Number of diffusion samples to use
|
|
303
|
-
|
|
257
|
+
num_recycles : int
|
|
304
258
|
Number of recycling steps to use
|
|
305
|
-
|
|
259
|
+
num_steps : int
|
|
306
260
|
Number of sampling steps to use
|
|
307
261
|
step_scale : float
|
|
308
262
|
Scaling factor for diffusion steps.
|
|
@@ -360,8 +314,8 @@ class Boltz2Model(BoltzModel, FoldModel):
|
|
|
360
314
|
rnas=rnas,
|
|
361
315
|
ligands=ligands,
|
|
362
316
|
diffusion_samples=diffusion_samples,
|
|
363
|
-
|
|
364
|
-
|
|
317
|
+
num_recycles=num_recycles,
|
|
318
|
+
num_steps=num_steps,
|
|
365
319
|
step_scale=step_scale,
|
|
366
320
|
use_potentials=use_potentials,
|
|
367
321
|
constraints=constraints,
|
|
@@ -385,8 +339,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
|
|
|
385
339
|
rnas: list[RNA] | None = None,
|
|
386
340
|
ligands: list[Ligand] | None = None,
|
|
387
341
|
diffusion_samples: int = 1,
|
|
388
|
-
|
|
389
|
-
|
|
342
|
+
num_recycles: int = 3,
|
|
343
|
+
num_steps: int = 200,
|
|
390
344
|
step_scale: float = 1.638,
|
|
391
345
|
constraints: list[dict] | None = None,
|
|
392
346
|
) -> FoldComplexResultFuture:
|
|
@@ -405,9 +359,9 @@ class Boltz1xModel(BoltzModel, FoldModel):
|
|
|
405
359
|
List of ligands to include in folded output.
|
|
406
360
|
diffusion_samples: int
|
|
407
361
|
Number of diffusion samples to use
|
|
408
|
-
|
|
362
|
+
num_recycles : int
|
|
409
363
|
Number of recycling steps to use
|
|
410
|
-
|
|
364
|
+
num_steps : int
|
|
411
365
|
Number of sampling steps to use
|
|
412
366
|
step_scale : float
|
|
413
367
|
Scaling factor for diffusion steps.
|
|
@@ -426,8 +380,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
|
|
|
426
380
|
rnas=rnas,
|
|
427
381
|
ligands=ligands,
|
|
428
382
|
diffusion_samples=diffusion_samples,
|
|
429
|
-
|
|
430
|
-
|
|
383
|
+
num_recycles=num_recycles,
|
|
384
|
+
num_steps=num_steps,
|
|
431
385
|
step_scale=step_scale,
|
|
432
386
|
use_potentials=True,
|
|
433
387
|
constraints=constraints,
|
|
@@ -448,8 +402,8 @@ class Boltz1Model(BoltzModel, FoldModel):
|
|
|
448
402
|
rnas: list[RNA] | None = None,
|
|
449
403
|
ligands: list[Ligand] | None = None,
|
|
450
404
|
diffusion_samples: int = 1,
|
|
451
|
-
|
|
452
|
-
|
|
405
|
+
num_recycles: int = 3,
|
|
406
|
+
num_steps: int = 200,
|
|
453
407
|
step_scale: float = 1.638,
|
|
454
408
|
use_potentials: bool = False,
|
|
455
409
|
constraints: list[dict] | None = None,
|
|
@@ -469,9 +423,9 @@ class Boltz1Model(BoltzModel, FoldModel):
|
|
|
469
423
|
List of ligands to include in folded output.
|
|
470
424
|
diffusion_samples: int
|
|
471
425
|
Number of diffusion samples to use
|
|
472
|
-
|
|
426
|
+
num_recycles : int
|
|
473
427
|
Number of recycling steps to use
|
|
474
|
-
|
|
428
|
+
num_steps : int
|
|
475
429
|
Number of sampling steps to use
|
|
476
430
|
step_scale : float
|
|
477
431
|
Scaling factor for diffusion steps.
|
|
@@ -492,8 +446,8 @@ class Boltz1Model(BoltzModel, FoldModel):
|
|
|
492
446
|
rnas=rnas,
|
|
493
447
|
ligands=ligands,
|
|
494
448
|
diffusion_samples=diffusion_samples,
|
|
495
|
-
|
|
496
|
-
|
|
449
|
+
num_recycles=num_recycles,
|
|
450
|
+
num_steps=num_steps,
|
|
497
451
|
step_scale=step_scale,
|
|
498
452
|
use_potentials=use_potentials,
|
|
499
453
|
constraints=constraints,
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import string
|
|
3
|
+
|
|
4
|
+
valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def is_valid_id(id_str: str) -> bool:
|
|
8
|
+
"""
|
|
9
|
+
Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
|
|
10
|
+
"""
|
|
11
|
+
if not id_str or len(id_str) > 5:
|
|
12
|
+
return False
|
|
13
|
+
return bool(valid_id_pattern.fullmatch(id_str))
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
|
|
17
|
+
"""
|
|
18
|
+
Yields new chain IDs, skipping any in 'used_ids'.
|
|
19
|
+
First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
|
|
20
|
+
"""
|
|
21
|
+
used = set(tuple(used_ids or []))
|
|
22
|
+
letters = list(string.ascii_uppercase)
|
|
23
|
+
|
|
24
|
+
# --- Alphabetic IDs ---
|
|
25
|
+
curr_len = 1
|
|
26
|
+
curr_indices = [0] * curr_len # start at 'A'
|
|
27
|
+
|
|
28
|
+
def bump_indices():
|
|
29
|
+
# lexicographically increment curr_indices; return False on overflow
|
|
30
|
+
for i in reversed(range(len(curr_indices))):
|
|
31
|
+
if curr_indices[i] < len(letters) - 1:
|
|
32
|
+
curr_indices[i] += 1
|
|
33
|
+
for j in range(i + 1, len(curr_indices)):
|
|
34
|
+
curr_indices[j] = 0
|
|
35
|
+
return True
|
|
36
|
+
return False
|
|
37
|
+
|
|
38
|
+
while curr_len <= max_alpha_len:
|
|
39
|
+
candidate = "".join(letters[i] for i in curr_indices)
|
|
40
|
+
if candidate not in used:
|
|
41
|
+
used.add(candidate)
|
|
42
|
+
yield candidate
|
|
43
|
+
# bump
|
|
44
|
+
if not bump_indices():
|
|
45
|
+
curr_len += 1
|
|
46
|
+
if curr_len > max_alpha_len:
|
|
47
|
+
break
|
|
48
|
+
curr_indices = [0] * curr_len
|
|
49
|
+
|
|
50
|
+
# --- Numeric IDs ---
|
|
51
|
+
num = 1
|
|
52
|
+
while num <= max_numeric:
|
|
53
|
+
candidate = str(num)
|
|
54
|
+
num += 1
|
|
55
|
+
if candidate not in used:
|
|
56
|
+
used.add(candidate)
|
|
57
|
+
yield candidate
|
|
58
|
+
|
|
59
|
+
# exhausted
|
|
60
|
+
raise RuntimeError("exhausted all possible IDs")
|
|
@@ -7,9 +7,11 @@ from .alphafold2 import AlphaFold2Model
|
|
|
7
7
|
from .boltz import Boltz1Model, Boltz1xModel, Boltz2Model
|
|
8
8
|
from .esmfold import ESMFoldModel
|
|
9
9
|
from .future import FoldComplexResultFuture, FoldResultFuture
|
|
10
|
+
from .minifold import MiniFoldModel
|
|
10
11
|
from .models import (
|
|
11
12
|
FoldModel,
|
|
12
13
|
)
|
|
14
|
+
from .rosettafold3 import RosettaFold3Model
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
class FoldAPI:
|
|
@@ -26,11 +28,16 @@ class FoldAPI:
|
|
|
26
28
|
#: Boltz-1 model
|
|
27
29
|
boltz1: Boltz1Model
|
|
28
30
|
boltz_1: Boltz1Model
|
|
29
|
-
af2: AlphaFold2Model
|
|
30
31
|
#: AlphaFold-2 model
|
|
32
|
+
af2: AlphaFold2Model
|
|
31
33
|
alphafold2: AlphaFold2Model
|
|
34
|
+
#: RosettaFold-3 model
|
|
35
|
+
rf3: RosettaFold3Model
|
|
36
|
+
rosettafold_3: RosettaFold3Model
|
|
32
37
|
#: ESMFold model
|
|
33
38
|
esmfold: ESMFoldModel
|
|
39
|
+
#: MiniFold model
|
|
40
|
+
minifold: MiniFoldModel
|
|
34
41
|
|
|
35
42
|
def __init__(self, session: APISession):
|
|
36
43
|
self.session = session
|
|
@@ -45,6 +52,8 @@ class FoldAPI:
|
|
|
45
52
|
# Setup aliases safely
|
|
46
53
|
if getattr(self, "alphafold2", None):
|
|
47
54
|
self.af2 = self.alphafold2
|
|
55
|
+
if getattr(self, "rosettafold_3", None):
|
|
56
|
+
self.rf3 = self.rosettafold_3
|
|
48
57
|
if getattr(self, "boltz_1", None):
|
|
49
58
|
self.boltz1 = self.boltz_1
|
|
50
59
|
if getattr(self, "boltz_1x", None):
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
6
7
|
from pydantic.type_adapter import TypeAdapter
|
|
7
8
|
from typing_extensions import Self
|
|
8
9
|
|
|
@@ -50,14 +51,14 @@ class FoldResultFuture(MappedFuture, Future):
|
|
|
50
51
|
if metadata is None:
|
|
51
52
|
if job is None or job.job_id is None:
|
|
52
53
|
raise ValueError("Expected fold metadata or job")
|
|
53
|
-
metadata = api.fold_get(session, job.job_id)
|
|
54
|
+
metadata = api.fold_get(session=session, job_id=job.job_id)
|
|
54
55
|
self._metadata = metadata
|
|
55
56
|
if job is None:
|
|
56
57
|
jobs_api = getattr(session, "jobs", None)
|
|
57
58
|
assert isinstance(jobs_api, JobsAPI)
|
|
58
59
|
job = FoldJob.create(jobs_api.get_job(job_id=metadata.job_id))
|
|
59
60
|
if sequences is None:
|
|
60
|
-
sequences = api.fold_get_sequences(
|
|
61
|
+
sequences = api.fold_get_sequences(session=session, job_id=job.job_id)
|
|
61
62
|
self._sequences = sequences
|
|
62
63
|
super().__init__(session, job, max_workers)
|
|
63
64
|
|
|
@@ -93,7 +94,11 @@ class FoldResultFuture(MappedFuture, Future):
|
|
|
93
94
|
else:
|
|
94
95
|
raise ValueError("Expected fold metadata or job")
|
|
95
96
|
model_id = api.fold_get(session=session, job_id=job_id).model_id
|
|
96
|
-
if
|
|
97
|
+
if (
|
|
98
|
+
model_id.startswith("boltz")
|
|
99
|
+
or model_id.startswith("alphafold")
|
|
100
|
+
or model_id.startswith("rosettafold")
|
|
101
|
+
):
|
|
97
102
|
return FoldComplexResultFuture(session=session, job=job, **kwargs)
|
|
98
103
|
else:
|
|
99
104
|
return cls(session=session, job=job, **kwargs)
|
|
@@ -124,7 +129,6 @@ class FoldResultFuture(MappedFuture, Future):
|
|
|
124
129
|
"""
|
|
125
130
|
return self.job.job_id
|
|
126
131
|
|
|
127
|
-
|
|
128
132
|
@property
|
|
129
133
|
def metadata(self) -> FoldMetadata:
|
|
130
134
|
"""The fold metadata."""
|
|
@@ -243,6 +247,8 @@ class FoldComplexResultFuture(Future):
|
|
|
243
247
|
self._pae: np.ndarray | None = None
|
|
244
248
|
self._pde: np.ndarray | None = None
|
|
245
249
|
self._plddt: np.ndarray | None = None
|
|
250
|
+
self._score: pd.DataFrame | None = None
|
|
251
|
+
self._metrics: pd.DataFrame | None = None
|
|
246
252
|
self._confidence: list["BoltzConfidence"] | None = None
|
|
247
253
|
self._affinity: "BoltzAffinity | None" = None
|
|
248
254
|
|
|
@@ -436,6 +442,56 @@ class FoldComplexResultFuture(Future):
|
|
|
436
442
|
self._plddt = plddt
|
|
437
443
|
return self._plddt
|
|
438
444
|
|
|
445
|
+
@property
|
|
446
|
+
def score(self) -> pd.DataFrame:
|
|
447
|
+
"""
|
|
448
|
+
Get the predicted scores.
|
|
449
|
+
|
|
450
|
+
Returns
|
|
451
|
+
-------
|
|
452
|
+
pd.DataFrame
|
|
453
|
+
Structure prediction scores.
|
|
454
|
+
|
|
455
|
+
Raises
|
|
456
|
+
------
|
|
457
|
+
AttributeError
|
|
458
|
+
If score is not supported for the model.
|
|
459
|
+
"""
|
|
460
|
+
if self.model_id not in {"rosettafold-3"}:
|
|
461
|
+
raise AttributeError("score not supported for non-RosettaFold model")
|
|
462
|
+
if self._score is None:
|
|
463
|
+
score = api.fold_get_complex_extra_result(
|
|
464
|
+
session=self.session, job_id=self.job.job_id, key="score"
|
|
465
|
+
)
|
|
466
|
+
assert isinstance(score, pd.DataFrame)
|
|
467
|
+
self._score = score
|
|
468
|
+
return self._score
|
|
469
|
+
|
|
470
|
+
@property
|
|
471
|
+
def metrics(self) -> pd.DataFrame:
|
|
472
|
+
"""
|
|
473
|
+
Get the predicted metrics.
|
|
474
|
+
|
|
475
|
+
Returns
|
|
476
|
+
-------
|
|
477
|
+
pd.DataFrame
|
|
478
|
+
Structure prediction metrics.
|
|
479
|
+
|
|
480
|
+
Raises
|
|
481
|
+
------
|
|
482
|
+
AttributeError
|
|
483
|
+
If metrics is not supported for the model.
|
|
484
|
+
"""
|
|
485
|
+
if self.model_id not in {"rosettafold-3"}:
|
|
486
|
+
raise AttributeError("metrics not supported for non-RosettaFold model")
|
|
487
|
+
if self._metrics is None:
|
|
488
|
+
metrics = api.fold_get_complex_extra_result(
|
|
489
|
+
session=self.session, job_id=self.job.job_id, key="metrics"
|
|
490
|
+
)
|
|
491
|
+
assert isinstance(metrics, pd.DataFrame)
|
|
492
|
+
self._metrics = metrics
|
|
493
|
+
return self._metrics
|
|
494
|
+
|
|
439
495
|
@property
|
|
440
496
|
def confidence(self) -> list["BoltzConfidence"]:
|
|
441
497
|
"""
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
from openprotein.base import APISession
|
|
4
|
+
from openprotein.common import ModelMetadata
|
|
5
|
+
|
|
6
|
+
from . import api
|
|
7
|
+
from .future import FoldResultFuture
|
|
8
|
+
from .models import FoldModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MiniFoldModel(FoldModel):
|
|
12
|
+
"""
|
|
13
|
+
Class providing inference endpoints for MiniFold.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
model_id: str = "minifold"
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
session: APISession,
|
|
21
|
+
model_id: str,
|
|
22
|
+
metadata: ModelMetadata | None = None,
|
|
23
|
+
):
|
|
24
|
+
super().__init__(session=session, model_id=model_id, metadata=metadata)
|
|
25
|
+
|
|
26
|
+
def fold(
|
|
27
|
+
self, sequences: Sequence[bytes | str], num_recycles: int | None = None
|
|
28
|
+
) -> FoldResultFuture:
|
|
29
|
+
"""
|
|
30
|
+
Fold sequences using this model.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
sequences : Sequence[bytes | str]
|
|
35
|
+
sequences to fold
|
|
36
|
+
num_recycles : int | None
|
|
37
|
+
number of times to recycle models
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
FoldResultFuture
|
|
41
|
+
"""
|
|
42
|
+
sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
|
|
43
|
+
assert all(":" not in s for s in sequences), "minifold does not support ':'"
|
|
44
|
+
result = FoldResultFuture.create(
|
|
45
|
+
session=self.session,
|
|
46
|
+
job=api.fold_models_post(
|
|
47
|
+
session=self.session,
|
|
48
|
+
model_id=self.model_id,
|
|
49
|
+
sequences=sequences,
|
|
50
|
+
num_recycles=num_recycles,
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
assert isinstance(result, FoldResultFuture)
|
|
54
|
+
return result
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Community-based RosettaFold3 models for complex structure prediction with ligands/dna/rna."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field, TypeAdapter, model_validator
|
|
6
|
+
|
|
7
|
+
from openprotein.align import AlignAPI, MSAFuture
|
|
8
|
+
from openprotein.base import APISession
|
|
9
|
+
from openprotein.chains import Ligand
|
|
10
|
+
from openprotein.common import ModelMetadata
|
|
11
|
+
from openprotein.protein import Protein
|
|
12
|
+
|
|
13
|
+
from . import api
|
|
14
|
+
from .complex import id_generator
|
|
15
|
+
from .future import FoldComplexResultFuture
|
|
16
|
+
from .models import FoldModel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RosettaFold3Model(FoldModel):
|
|
20
|
+
"""
|
|
21
|
+
Class providing inference endpoints for RosettaFold-3 structure prediction model.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
model_id: str = "rosettafold-3"
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
session: APISession,
|
|
29
|
+
model_id: str,
|
|
30
|
+
metadata: ModelMetadata | None = None,
|
|
31
|
+
):
|
|
32
|
+
super().__init__(session, model_id, metadata)
|
|
33
|
+
|
|
34
|
+
def fold(
|
|
35
|
+
self,
|
|
36
|
+
proteins: list[Protein] | MSAFuture | None = None,
|
|
37
|
+
ligands: list[Ligand] | None = None,
|
|
38
|
+
diffusion_samples: int = 1,
|
|
39
|
+
num_recycles: int = 10,
|
|
40
|
+
num_steps: int = 50,
|
|
41
|
+
**kwargs,
|
|
42
|
+
) -> FoldComplexResultFuture:
|
|
43
|
+
"""
|
|
44
|
+
Request structure prediction with RosettaFold-3 model.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
proteins : List[Protein] | MSAFuture | None
|
|
49
|
+
List of protein sequences to include in folded output. `Protein` objects must be tagged with an `msa`, which can be a `Protein.single_sequence_mode` for single sequence mode. Alternatively, supply an `MSAFuture` to use all query sequences as a multimer.
|
|
50
|
+
ligands : List[Ligand] | None
|
|
51
|
+
List of ligands to include in folded output.
|
|
52
|
+
diffusion_samples: int
|
|
53
|
+
Number of diffusion samples to use
|
|
54
|
+
num_recycles : int
|
|
55
|
+
Number of recycling steps to use
|
|
56
|
+
num_steps : int
|
|
57
|
+
Number of sampling steps to use
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
FoldComplexResultFuture
|
|
62
|
+
Future for the folding complex result.
|
|
63
|
+
"""
|
|
64
|
+
# collate the id's used
|
|
65
|
+
used_ids = []
|
|
66
|
+
if isinstance(proteins, list):
|
|
67
|
+
for protein in proteins:
|
|
68
|
+
if isinstance(protein, Protein) and protein.chain_id is not None:
|
|
69
|
+
if isinstance(protein.chain_id, str):
|
|
70
|
+
used_ids.append(protein.chain_id)
|
|
71
|
+
elif isinstance(protein.chain_id, list):
|
|
72
|
+
used_ids.extend(protein.chain_id)
|
|
73
|
+
for ligand in ligands or []:
|
|
74
|
+
if isinstance(ligand.chain_id, str):
|
|
75
|
+
used_ids.append(ligand.chain_id)
|
|
76
|
+
elif isinstance(ligand.chain_id, list):
|
|
77
|
+
used_ids.extend(ligand.chain_id)
|
|
78
|
+
id_gen = id_generator(used_ids)
|
|
79
|
+
# build the proteins from msa
|
|
80
|
+
if isinstance(proteins, MSAFuture):
|
|
81
|
+
align_api = getattr(self.session, "align", None)
|
|
82
|
+
assert isinstance(align_api, AlignAPI)
|
|
83
|
+
msa = proteins # rename
|
|
84
|
+
proteins = [] # convert back to list of proteins
|
|
85
|
+
seed = align_api.get_seed(job_id=msa.job.job_id)
|
|
86
|
+
query_seqs_cardinality: dict[str, int] = dict()
|
|
87
|
+
for seq in seed.split(":"):
|
|
88
|
+
query_seqs_cardinality[seq] = query_seqs_cardinality.get(seq, 0) + 1
|
|
89
|
+
for seq, card in query_seqs_cardinality.items():
|
|
90
|
+
protein = Protein(sequence=seq)
|
|
91
|
+
if card == 1:
|
|
92
|
+
id = next(id_gen)
|
|
93
|
+
else:
|
|
94
|
+
id = [next(id_gen) for _ in range(card)]
|
|
95
|
+
protein.chain_id = id
|
|
96
|
+
protein.msa = msa
|
|
97
|
+
proteins.append(protein)
|
|
98
|
+
|
|
99
|
+
# build the sequences input
|
|
100
|
+
sequences: list[dict[str, Any]] = []
|
|
101
|
+
for protein in proteins or []:
|
|
102
|
+
# check the msa
|
|
103
|
+
msa = protein.msa
|
|
104
|
+
if msa is None:
|
|
105
|
+
raise ValueError(
|
|
106
|
+
"Expected all protein sequences to have `.msa` set with an `MSAFuture` or `Protein.single_sequence_mode` for single sequence mode."
|
|
107
|
+
)
|
|
108
|
+
# convert to msa id or null for single sequence mode
|
|
109
|
+
msa_id = (
|
|
110
|
+
msa
|
|
111
|
+
if isinstance(msa, str)
|
|
112
|
+
else msa.id if isinstance(msa, MSAFuture) else None
|
|
113
|
+
)
|
|
114
|
+
# add the protein in the expected format
|
|
115
|
+
p = {
|
|
116
|
+
"id": protein.chain_id or next(id_gen),
|
|
117
|
+
"msa_id": msa_id,
|
|
118
|
+
"sequence": protein.sequence.decode(),
|
|
119
|
+
}
|
|
120
|
+
if protein.cyclic:
|
|
121
|
+
p["cyclic"] = protein.cyclic
|
|
122
|
+
sequences.append({"protein": p})
|
|
123
|
+
for ligand in ligands or []:
|
|
124
|
+
ligand_: dict = {"id": ligand.chain_id or next(id_gen)}
|
|
125
|
+
if ligand.ccd:
|
|
126
|
+
ligand_["ccd"] = ligand.ccd
|
|
127
|
+
if ligand.smiles:
|
|
128
|
+
ligand_["smiles"] = ligand.smiles
|
|
129
|
+
sequences.append({"ligand": ligand_})
|
|
130
|
+
|
|
131
|
+
if len(sequences) == 0:
|
|
132
|
+
raise ValueError("Expected proteins or ligands")
|
|
133
|
+
|
|
134
|
+
return FoldComplexResultFuture.create(
|
|
135
|
+
session=self.session,
|
|
136
|
+
job=api.fold_models_post(
|
|
137
|
+
session=self.session,
|
|
138
|
+
model_id=self.model_id,
|
|
139
|
+
sequences=sequences,
|
|
140
|
+
diffusion_samples=diffusion_samples,
|
|
141
|
+
num_recycles=num_recycles,
|
|
142
|
+
num_steps=num_steps,
|
|
143
|
+
**kwargs,
|
|
144
|
+
),
|
|
145
|
+
model_id=self.model_id,
|
|
146
|
+
proteins=proteins,
|
|
147
|
+
ligands=ligands,
|
|
148
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/foundation/rfdiffusion.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|