openprotein-python 0.8.7__tar.gz → 0.8.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/PKG-INFO +1 -1
  2. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/api.py +0 -6
  3. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/poet.py +5 -2
  4. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/poet2.py +4 -3
  5. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/__init__.py +2 -0
  6. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/api.py +16 -32
  7. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/boltz.py +38 -84
  8. openprotein_python-0.8.9/openprotein/fold/complex.py +60 -0
  9. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/fold.py +10 -1
  10. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/future.py +60 -4
  11. openprotein_python-0.8.9/openprotein/fold/minifold.py +54 -0
  12. openprotein_python-0.8.9/openprotein/fold/rosettafold3.py +148 -0
  13. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/.gitignore +0 -0
  14. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/LICENSE.txt +0 -0
  15. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/README.md +0 -0
  16. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/__init__.py +0 -0
  17. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/_version.py +0 -0
  18. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/__init__.py +0 -0
  19. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/align.py +0 -0
  20. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/future.py +0 -0
  21. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/msa.py +0 -0
  22. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/align/schemas.py +0 -0
  23. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/base.py +0 -0
  24. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/chains.py +0 -0
  25. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/__init__.py +0 -0
  26. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/features.py +0 -0
  27. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/model_metadata.py +0 -0
  28. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/common/reduction.py +0 -0
  29. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/config.py +0 -0
  30. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/csv.py +0 -0
  31. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/__init__.py +0 -0
  32. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/api.py +0 -0
  33. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/assaydataset.py +0 -0
  34. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/data.py +0 -0
  35. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/data/schemas.py +0 -0
  36. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/__init__.py +0 -0
  37. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/api.py +0 -0
  38. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/design.py +0 -0
  39. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/future.py +0 -0
  40. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/design/schemas.py +0 -0
  41. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/__init__.py +0 -0
  42. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/api.py +0 -0
  43. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/embeddings.py +0 -0
  44. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/esm.py +0 -0
  45. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/future.py +0 -0
  46. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/models.py +0 -0
  47. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/openprotein.py +0 -0
  48. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/embeddings/schemas.py +0 -0
  49. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/errors.py +0 -0
  50. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fasta.py +0 -0
  51. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/alphafold2.py +0 -0
  52. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/esmfold.py +0 -0
  53. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/models.py +0 -0
  54. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/fold/schemas.py +0 -0
  55. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/__init__.py +0 -0
  56. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/api.py +0 -0
  57. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/futures.py +0 -0
  58. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/jobs.py +0 -0
  59. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/jobs/schemas.py +0 -0
  60. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/__init__.py +0 -0
  61. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/base.py +0 -0
  62. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/foundation/rfdiffusion.py +0 -0
  63. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/models/models.py +0 -0
  64. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/__init__.py +0 -0
  65. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/api.py +0 -0
  66. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/models.py +0 -0
  67. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/prediction.py +0 -0
  68. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/predictor.py +0 -0
  69. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/schemas.py +0 -0
  70. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/predictor/validate.py +0 -0
  71. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/__init__.py +0 -0
  72. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/api.py +0 -0
  73. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/models.py +0 -0
  74. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/prompt.py +0 -0
  75. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/prompt/schemas.py +0 -0
  76. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/protein.py +0 -0
  77. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/__init__.py +0 -0
  78. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/api.py +0 -0
  79. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/models.py +0 -0
  80. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/schemas.py +0 -0
  81. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/svd/svd.py +0 -0
  82. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/__init__.py +0 -0
  83. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/api.py +0 -0
  84. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/models.py +0 -0
  85. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/schemas.py +0 -0
  86. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/umap/umap.py +0 -0
  87. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/openprotein/utils/uuid.py +0 -0
  88. {openprotein_python-0.8.7 → openprotein_python-0.8.9}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openprotein-python
3
- Version: 0.8.7
3
+ Version: 0.8.9
4
4
  Summary: OpenProtein Python interface.
5
5
  Author-email: Mark Gee <markgee@ne47.bio>, "Timothy Truong Jr." <ttruong@ne47.bio>, Tristan Bepler <tbepler@ne47.bio>
6
6
  License-Expression: MIT
@@ -400,12 +400,6 @@ def prompt_post(
400
400
  "Either 'num_sequences' or 'num_residues' must be set, but not both."
401
401
  )
402
402
 
403
- if num_sequences is not None and not (0 <= num_sequences < 100):
404
- raise InvalidParameterError("The 'num_sequences' must be between 0 and 100.")
405
-
406
- if num_residues is not None and not (0 <= num_residues < 24577):
407
- raise InvalidParameterError("The 'num_residues' must be between 0 and 24577.")
408
-
409
403
  if random_seed is None:
410
404
  random_seed = random.randrange(2**32)
411
405
 
@@ -295,7 +295,11 @@ class PoETModel(EmbeddingModel):
295
295
  EmbeddingsGenerateFuture
296
296
  Future object representing the status and information about the generation job.
297
297
  """
298
- prompt_id = prompt if isinstance(prompt, str) else prompt.id
298
+ if prompt is not None:
299
+ kwargs["prompt_id"] = prompt if isinstance(prompt, str) else prompt.id
300
+ else:
301
+ # NB: this is for handling PoET-2
302
+ assert self.model_id != "poet"
299
303
  return EmbeddingsGenerateFuture.create(
300
304
  session=self.session,
301
305
  job=api.request_generate_post(
@@ -307,7 +311,6 @@ class PoETModel(EmbeddingModel):
307
311
  topp=topp,
308
312
  max_length=max_length,
309
313
  random_seed=seed,
310
- prompt_id=prompt_id,
311
314
  **kwargs,
312
315
  ),
313
316
  )
@@ -287,7 +287,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
287
287
 
288
288
  def generate(
289
289
  self,
290
- prompt: str | Prompt,
290
+ prompt: str | Prompt | None,
291
291
  query: str | bytes | Protein | Query | None = None,
292
292
  use_query_structure_in_decoder: bool = True,
293
293
  num_samples: int = 100,
@@ -304,7 +304,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
304
304
 
305
305
  Parameters
306
306
  ----------
307
- prompt : str or Prompt
307
+ prompt : str or Prompt or None, optional
308
308
  Prompt from an align workflow to condition PoET model.
309
309
  query : str or bytes or Protein or Query or None, optional
310
310
  Query to use with prompt.
@@ -351,7 +351,8 @@ class PoET2Model(PoETModel, EmbeddingModel):
351
351
  f"equal to the number of prompts ({prompt.num_replicates})"
352
352
  )
353
353
  return super().generate(
354
- prompt=prompt,
354
+ # NB: poet(-1) cannot use null prompt, so we don't change its .generate's type signature
355
+ prompt=prompt, # type: ignore
355
356
  num_samples=num_samples,
356
357
  temperature=temperature,
357
358
  topk=topk,
@@ -7,6 +7,7 @@ isort:skip_file
7
7
  from .schemas import FoldJob, FoldMetadata
8
8
  from .models import FoldModel
9
9
  from .esmfold import ESMFoldModel
10
+ from .minifold import MiniFoldModel
10
11
  from .alphafold2 import AlphaFold2Model
11
12
  from .boltz import (
12
13
  Boltz1Model,
@@ -17,5 +18,6 @@ from .boltz import (
17
18
  BoltzConstraint,
18
19
  BoltzProperty,
19
20
  )
21
+ from .rosettafold3 import RosettaFold3Model
20
22
  from .future import FoldResultFuture, FoldComplexResultFuture
21
23
  from .fold import FoldAPI
@@ -1,7 +1,7 @@
1
1
  """Fold REST API interface for making HTTP calls to our fold backend."""
2
2
 
3
3
  import io
4
- from typing import Literal
4
+ from typing import TYPE_CHECKING, Literal
5
5
 
6
6
  import numpy as np
7
7
  from pydantic import TypeAdapter
@@ -12,6 +12,9 @@ from openprotein.errors import HTTPError
12
12
 
13
13
  from .schemas import FoldJob, FoldMetadata
14
14
 
15
+ if TYPE_CHECKING:
16
+ import pandas as pd
17
+
15
18
  PATH_PREFIX = "v1/fold"
16
19
 
17
20
 
@@ -160,8 +163,8 @@ def fold_get_complex_result(
160
163
  def fold_get_complex_extra_result(
161
164
  session: APISession,
162
165
  job_id: str,
163
- key: Literal["pae", "pde", "plddt", "confidence", "affinity"],
164
- ) -> np.ndarray | list[dict]:
166
+ key: Literal["pae", "pde", "plddt", "confidence", "affinity", "score", "metrics"],
167
+ ) -> "np.ndarray | list[dict] | pd.DataFrame":
165
168
  """
166
169
  Get extra result for a complex from the request ID.
167
170
 
@@ -183,6 +186,10 @@ def fold_get_complex_extra_result(
183
186
  formatter = lambda response: np.load(io.BytesIO(response.content))
184
187
  elif key in {"confidence", "affinity"}:
185
188
  formatter = lambda response: response.json()
189
+ elif key in {"score", "metrics"}:
190
+ import pandas as pd
191
+
192
+ formatter = lambda response: pd.read_csv(io.StringIO(response.content.decode()))
186
193
  else:
187
194
  raise ValueError(f"Unexpected key: {key}")
188
195
  endpoint = PATH_PREFIX + f"/{job_id}/complex/{key}"
@@ -194,7 +201,7 @@ def fold_get_complex_extra_result(
194
201
  if e.status_code == 400 and key == "affinity":
195
202
  raise ValueError("affinity not found for request") from None
196
203
  raise e
197
- output: np.ndarray | list[dict] = formatter(response)
204
+ output = formatter(response)
198
205
  return output
199
206
 
200
207
 
@@ -254,34 +261,11 @@ def fold_models_post(
254
261
  sequences = kwargs["sequences"]
255
262
  # NOTE we are handling the boltz form here too
256
263
  sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
257
- body["sequences"] = sequences
258
- if kwargs.get("msa_id"):
259
- body["msa_id"] = kwargs["msa_id"]
260
- if kwargs.get("num_recycles"):
261
- body["num_recycles"] = kwargs["num_recycles"]
262
- if kwargs.get("num_models"):
263
- body["num_models"] = kwargs["num_models"]
264
- if kwargs.get("num_relax"):
265
- body["num_relax"] = kwargs["num_relax"]
266
- if kwargs.get("use_potentials"):
267
- body["use_potentials"] = kwargs["use_potentials"]
268
- # boltz
269
- if kwargs.get("diffusion_samples"):
270
- body["diffusion_samples"] = kwargs["diffusion_samples"]
271
- if kwargs.get("recycling_steps"):
272
- body["recycling_steps"] = kwargs["recycling_steps"]
273
- if kwargs.get("sampling_steps"):
274
- body["sampling_steps"] = kwargs["sampling_steps"]
275
- if kwargs.get("step_scale"):
276
- body["step_scale"] = kwargs["step_scale"]
277
- if kwargs.get("constraints"):
278
- body["constraints"] = kwargs["constraints"]
279
- if kwargs.get("templates"):
280
- body["templates"] = kwargs["templates"]
281
- if kwargs.get("properties"):
282
- body["properties"] = kwargs["properties"]
283
- if kwargs.get("method"):
284
- body["method"] = kwargs["method"]
264
+ kwargs["sequences"] = sequences
265
+ # add non-None args - note this doesnt affect msa_id which is nested
266
+ for k, v in kwargs.items():
267
+ if v is not None:
268
+ body[k] = v
285
269
 
286
270
  response = session.post(endpoint, json=body)
287
271
  return FoldJob.model_validate(response.json())
@@ -1,7 +1,7 @@
1
1
  """Community-based Boltz models for complex structure prediction with ligands/dna/rna."""
2
2
 
3
- import re
4
- import string
3
+ import warnings
4
+ from logging import warning
5
5
  from typing import Any
6
6
 
7
7
  from pydantic import BaseModel, Field, TypeAdapter, model_validator
@@ -13,67 +13,10 @@ from openprotein.common import ModelMetadata
13
13
  from openprotein.protein import Protein
14
14
 
15
15
  from . import api
16
+ from .complex import id_generator
16
17
  from .future import FoldComplexResultFuture
17
18
  from .models import FoldModel
18
19
 
19
- valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
20
-
21
-
22
- def is_valid_id(id_str: str) -> bool:
23
- """
24
- Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
25
- """
26
- if not id_str or len(id_str) > 5:
27
- return False
28
- return bool(valid_id_pattern.fullmatch(id_str))
29
-
30
-
31
- def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
32
- """
33
- Yields new chain IDs, skipping any in 'used_ids'.
34
- First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
35
- """
36
- used = set(tuple(used_ids or []))
37
- letters = list(string.ascii_uppercase)
38
-
39
- # --- Alphabetic IDs ---
40
- curr_len = 1
41
- curr_indices = [0] * curr_len # start at 'A'
42
-
43
- def bump_indices():
44
- # lexicographically increment curr_indices; return False on overflow
45
- for i in reversed(range(len(curr_indices))):
46
- if curr_indices[i] < len(letters) - 1:
47
- curr_indices[i] += 1
48
- for j in range(i + 1, len(curr_indices)):
49
- curr_indices[j] = 0
50
- return True
51
- return False
52
-
53
- while curr_len <= max_alpha_len:
54
- candidate = "".join(letters[i] for i in curr_indices)
55
- if candidate not in used:
56
- used.add(candidate)
57
- yield candidate
58
- # bump
59
- if not bump_indices():
60
- curr_len += 1
61
- if curr_len > max_alpha_len:
62
- break
63
- curr_indices = [0] * curr_len
64
-
65
- # --- Numeric IDs ---
66
- num = 1
67
- while num <= max_numeric:
68
- candidate = str(num)
69
- num += 1
70
- if candidate not in used:
71
- used.add(candidate)
72
- yield candidate
73
-
74
- # exhausted
75
- raise RuntimeError("exhausted all possible IDs")
76
-
77
20
 
78
21
  class BoltzModel(FoldModel):
79
22
  """
@@ -97,8 +40,8 @@ class BoltzModel(FoldModel):
97
40
  rnas: list[RNA] | None = None,
98
41
  ligands: list[Ligand] | None = None,
99
42
  diffusion_samples: int = 1,
100
- recycling_steps: int = 3,
101
- sampling_steps: int = 200,
43
+ num_recycles: int = 3,
44
+ num_steps: int = 200,
102
45
  step_scale: float = 1.638,
103
46
  use_potentials: bool = False,
104
47
  constraints: list[dict] | None = None,
@@ -119,9 +62,9 @@ class BoltzModel(FoldModel):
119
62
  List of ligands to include in folded output.
120
63
  diffusion_samples: int
121
64
  Number of diffusion samples to use
122
- recycling_steps : int
65
+ num_recycles : int
123
66
  Number of recycling steps to use
124
- sampling_steps : int
67
+ num_steps : int
125
68
  Number of sampling steps to use
126
69
  step_scale : float
127
70
  Scaling factor for diffusion steps.
@@ -133,6 +76,17 @@ class BoltzModel(FoldModel):
133
76
  FoldComplexResultFuture
134
77
  Future for the folding complex result.
135
78
  """
79
+ # migrate old parameter
80
+ if (recycling_steps := kwargs.get("recycling_steps")) is not None:
81
+ num_recycles = recycling_steps
82
+ warnings.warn(
83
+ "`recycling_steps` has been updated to `num_recycles`. The parameter will be auto-corrected for now but raise an exception in the future."
84
+ )
85
+ if (sampling_steps := kwargs.get("sampling_steps")) is not None:
86
+ num_steps = sampling_steps
87
+ warnings.warn(
88
+ "`sampling_steps` has been updated to `num_steps`. The parameter will be auto-corrected for now but raise an exception in the future."
89
+ )
136
90
  # validate constraints
137
91
  if constraints is not None:
138
92
  TypeAdapter(list[BoltzConstraint]).validate_python(constraints)
@@ -247,8 +201,8 @@ class BoltzModel(FoldModel):
247
201
  model_id=self.model_id,
248
202
  sequences=sequences,
249
203
  diffusion_samples=diffusion_samples,
250
- recycling_steps=recycling_steps,
251
- sampling_steps=sampling_steps,
204
+ num_recycles=num_recycles,
205
+ num_steps=num_steps,
252
206
  step_scale=step_scale,
253
207
  constraints=constraints,
254
208
  use_potentials=use_potentials,
@@ -276,8 +230,8 @@ class Boltz2Model(BoltzModel, FoldModel):
276
230
  rnas: list[RNA] | None = None,
277
231
  ligands: list[Ligand] | None = None,
278
232
  diffusion_samples: int = 1,
279
- recycling_steps: int = 3,
280
- sampling_steps: int = 200,
233
+ num_recycles: int = 3,
234
+ num_steps: int = 200,
281
235
  step_scale: float = 1.638,
282
236
  use_potentials: bool = False,
283
237
  constraints: list[dict] | None = None,
@@ -300,9 +254,9 @@ class Boltz2Model(BoltzModel, FoldModel):
300
254
  List of ligands to include in folded output.
301
255
  diffusion_samples: int
302
256
  Number of diffusion samples to use
303
- recycling_steps : int
257
+ num_recycles : int
304
258
  Number of recycling steps to use
305
- sampling_steps : int
259
+ num_steps : int
306
260
  Number of sampling steps to use
307
261
  step_scale : float
308
262
  Scaling factor for diffusion steps.
@@ -360,8 +314,8 @@ class Boltz2Model(BoltzModel, FoldModel):
360
314
  rnas=rnas,
361
315
  ligands=ligands,
362
316
  diffusion_samples=diffusion_samples,
363
- recycling_steps=recycling_steps,
364
- sampling_steps=sampling_steps,
317
+ num_recycles=num_recycles,
318
+ num_steps=num_steps,
365
319
  step_scale=step_scale,
366
320
  use_potentials=use_potentials,
367
321
  constraints=constraints,
@@ -385,8 +339,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
385
339
  rnas: list[RNA] | None = None,
386
340
  ligands: list[Ligand] | None = None,
387
341
  diffusion_samples: int = 1,
388
- recycling_steps: int = 3,
389
- sampling_steps: int = 200,
342
+ num_recycles: int = 3,
343
+ num_steps: int = 200,
390
344
  step_scale: float = 1.638,
391
345
  constraints: list[dict] | None = None,
392
346
  ) -> FoldComplexResultFuture:
@@ -405,9 +359,9 @@ class Boltz1xModel(BoltzModel, FoldModel):
405
359
  List of ligands to include in folded output.
406
360
  diffusion_samples: int
407
361
  Number of diffusion samples to use
408
- recycling_steps : int
362
+ num_recycles : int
409
363
  Number of recycling steps to use
410
- sampling_steps : int
364
+ num_steps : int
411
365
  Number of sampling steps to use
412
366
  step_scale : float
413
367
  Scaling factor for diffusion steps.
@@ -426,8 +380,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
426
380
  rnas=rnas,
427
381
  ligands=ligands,
428
382
  diffusion_samples=diffusion_samples,
429
- recycling_steps=recycling_steps,
430
- sampling_steps=sampling_steps,
383
+ num_recycles=num_recycles,
384
+ num_steps=num_steps,
431
385
  step_scale=step_scale,
432
386
  use_potentials=True,
433
387
  constraints=constraints,
@@ -448,8 +402,8 @@ class Boltz1Model(BoltzModel, FoldModel):
448
402
  rnas: list[RNA] | None = None,
449
403
  ligands: list[Ligand] | None = None,
450
404
  diffusion_samples: int = 1,
451
- recycling_steps: int = 3,
452
- sampling_steps: int = 200,
405
+ num_recycles: int = 3,
406
+ num_steps: int = 200,
453
407
  step_scale: float = 1.638,
454
408
  use_potentials: bool = False,
455
409
  constraints: list[dict] | None = None,
@@ -469,9 +423,9 @@ class Boltz1Model(BoltzModel, FoldModel):
469
423
  List of ligands to include in folded output.
470
424
  diffusion_samples: int
471
425
  Number of diffusion samples to use
472
- recycling_steps : int
426
+ num_recycles : int
473
427
  Number of recycling steps to use
474
- sampling_steps : int
428
+ num_steps : int
475
429
  Number of sampling steps to use
476
430
  step_scale : float
477
431
  Scaling factor for diffusion steps.
@@ -492,8 +446,8 @@ class Boltz1Model(BoltzModel, FoldModel):
492
446
  rnas=rnas,
493
447
  ligands=ligands,
494
448
  diffusion_samples=diffusion_samples,
495
- recycling_steps=recycling_steps,
496
- sampling_steps=sampling_steps,
449
+ num_recycles=num_recycles,
450
+ num_steps=num_steps,
497
451
  step_scale=step_scale,
498
452
  use_potentials=use_potentials,
499
453
  constraints=constraints,
@@ -0,0 +1,60 @@
1
+ import re
2
+ import string
3
+
4
+ valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
5
+
6
+
7
+ def is_valid_id(id_str: str) -> bool:
8
+ """
9
+ Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
10
+ """
11
+ if not id_str or len(id_str) > 5:
12
+ return False
13
+ return bool(valid_id_pattern.fullmatch(id_str))
14
+
15
+
16
+ def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
17
+ """
18
+ Yields new chain IDs, skipping any in 'used_ids'.
19
+ First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
20
+ """
21
+ used = set(tuple(used_ids or []))
22
+ letters = list(string.ascii_uppercase)
23
+
24
+ # --- Alphabetic IDs ---
25
+ curr_len = 1
26
+ curr_indices = [0] * curr_len # start at 'A'
27
+
28
+ def bump_indices():
29
+ # lexicographically increment curr_indices; return False on overflow
30
+ for i in reversed(range(len(curr_indices))):
31
+ if curr_indices[i] < len(letters) - 1:
32
+ curr_indices[i] += 1
33
+ for j in range(i + 1, len(curr_indices)):
34
+ curr_indices[j] = 0
35
+ return True
36
+ return False
37
+
38
+ while curr_len <= max_alpha_len:
39
+ candidate = "".join(letters[i] for i in curr_indices)
40
+ if candidate not in used:
41
+ used.add(candidate)
42
+ yield candidate
43
+ # bump
44
+ if not bump_indices():
45
+ curr_len += 1
46
+ if curr_len > max_alpha_len:
47
+ break
48
+ curr_indices = [0] * curr_len
49
+
50
+ # --- Numeric IDs ---
51
+ num = 1
52
+ while num <= max_numeric:
53
+ candidate = str(num)
54
+ num += 1
55
+ if candidate not in used:
56
+ used.add(candidate)
57
+ yield candidate
58
+
59
+ # exhausted
60
+ raise RuntimeError("exhausted all possible IDs")
@@ -7,9 +7,11 @@ from .alphafold2 import AlphaFold2Model
7
7
  from .boltz import Boltz1Model, Boltz1xModel, Boltz2Model
8
8
  from .esmfold import ESMFoldModel
9
9
  from .future import FoldComplexResultFuture, FoldResultFuture
10
+ from .minifold import MiniFoldModel
10
11
  from .models import (
11
12
  FoldModel,
12
13
  )
14
+ from .rosettafold3 import RosettaFold3Model
13
15
 
14
16
 
15
17
  class FoldAPI:
@@ -26,11 +28,16 @@ class FoldAPI:
26
28
  #: Boltz-1 model
27
29
  boltz1: Boltz1Model
28
30
  boltz_1: Boltz1Model
29
- af2: AlphaFold2Model
30
31
  #: AlphaFold-2 model
32
+ af2: AlphaFold2Model
31
33
  alphafold2: AlphaFold2Model
34
+ #: RosettaFold-3 model
35
+ rf3: RosettaFold3Model
36
+ rosettafold_3: RosettaFold3Model
32
37
  #: ESMFold model
33
38
  esmfold: ESMFoldModel
39
+ #: MiniFold model
40
+ minifold: MiniFoldModel
34
41
 
35
42
  def __init__(self, session: APISession):
36
43
  self.session = session
@@ -45,6 +52,8 @@ class FoldAPI:
45
52
  # Setup aliases safely
46
53
  if getattr(self, "alphafold2", None):
47
54
  self.af2 = self.alphafold2
55
+ if getattr(self, "rosettafold_3", None):
56
+ self.rf3 = self.rosettafold_3
48
57
  if getattr(self, "boltz_1", None):
49
58
  self.boltz1 = self.boltz_1
50
59
  if getattr(self, "boltz_1x", None):
@@ -3,6 +3,7 @@
3
3
  from typing import TYPE_CHECKING, Literal
4
4
 
5
5
  import numpy as np
6
+ import pandas as pd
6
7
  from pydantic.type_adapter import TypeAdapter
7
8
  from typing_extensions import Self
8
9
 
@@ -50,14 +51,14 @@ class FoldResultFuture(MappedFuture, Future):
50
51
  if metadata is None:
51
52
  if job is None or job.job_id is None:
52
53
  raise ValueError("Expected fold metadata or job")
53
- metadata = api.fold_get(session, job.job_id)
54
+ metadata = api.fold_get(session=session, job_id=job.job_id)
54
55
  self._metadata = metadata
55
56
  if job is None:
56
57
  jobs_api = getattr(session, "jobs", None)
57
58
  assert isinstance(jobs_api, JobsAPI)
58
59
  job = FoldJob.create(jobs_api.get_job(job_id=metadata.job_id))
59
60
  if sequences is None:
60
- sequences = api.fold_get_sequences(self.session, job_id=job.job_id)
61
+ sequences = api.fold_get_sequences(session=session, job_id=job.job_id)
61
62
  self._sequences = sequences
62
63
  super().__init__(session, job, max_workers)
63
64
 
@@ -93,7 +94,11 @@ class FoldResultFuture(MappedFuture, Future):
93
94
  else:
94
95
  raise ValueError("Expected fold metadata or job")
95
96
  model_id = api.fold_get(session=session, job_id=job_id).model_id
96
- if model_id.startswith("boltz") or model_id.startswith("alphafold"):
97
+ if (
98
+ model_id.startswith("boltz")
99
+ or model_id.startswith("alphafold")
100
+ or model_id.startswith("rosettafold")
101
+ ):
97
102
  return FoldComplexResultFuture(session=session, job=job, **kwargs)
98
103
  else:
99
104
  return cls(session=session, job=job, **kwargs)
@@ -124,7 +129,6 @@ class FoldResultFuture(MappedFuture, Future):
124
129
  """
125
130
  return self.job.job_id
126
131
 
127
-
128
132
  @property
129
133
  def metadata(self) -> FoldMetadata:
130
134
  """The fold metadata."""
@@ -243,6 +247,8 @@ class FoldComplexResultFuture(Future):
243
247
  self._pae: np.ndarray | None = None
244
248
  self._pde: np.ndarray | None = None
245
249
  self._plddt: np.ndarray | None = None
250
+ self._score: pd.DataFrame | None = None
251
+ self._metrics: pd.DataFrame | None = None
246
252
  self._confidence: list["BoltzConfidence"] | None = None
247
253
  self._affinity: "BoltzAffinity | None" = None
248
254
 
@@ -436,6 +442,56 @@ class FoldComplexResultFuture(Future):
436
442
  self._plddt = plddt
437
443
  return self._plddt
438
444
 
445
+ @property
446
+ def score(self) -> pd.DataFrame:
447
+ """
448
+ Get the predicted scores.
449
+
450
+ Returns
451
+ -------
452
+ pd.DataFrame
453
+ Structure prediction scores.
454
+
455
+ Raises
456
+ ------
457
+ AttributeError
458
+ If score is not supported for the model.
459
+ """
460
+ if self.model_id not in {"rosettafold-3"}:
461
+ raise AttributeError("score not supported for non-RosettaFold model")
462
+ if self._score is None:
463
+ score = api.fold_get_complex_extra_result(
464
+ session=self.session, job_id=self.job.job_id, key="score"
465
+ )
466
+ assert isinstance(score, pd.DataFrame)
467
+ self._score = score
468
+ return self._score
469
+
470
+ @property
471
+ def metrics(self) -> pd.DataFrame:
472
+ """
473
+ Get the predicted metrics.
474
+
475
+ Returns
476
+ -------
477
+ pd.DataFrame
478
+ Structure prediction metrics.
479
+
480
+ Raises
481
+ ------
482
+ AttributeError
483
+ If metrics is not supported for the model.
484
+ """
485
+ if self.model_id not in {"rosettafold-3"}:
486
+ raise AttributeError("metrics not supported for non-RosettaFold model")
487
+ if self._metrics is None:
488
+ metrics = api.fold_get_complex_extra_result(
489
+ session=self.session, job_id=self.job.job_id, key="metrics"
490
+ )
491
+ assert isinstance(metrics, pd.DataFrame)
492
+ self._metrics = metrics
493
+ return self._metrics
494
+
439
495
  @property
440
496
  def confidence(self) -> list["BoltzConfidence"]:
441
497
  """
@@ -0,0 +1,54 @@
1
+ from collections.abc import Sequence
2
+
3
+ from openprotein.base import APISession
4
+ from openprotein.common import ModelMetadata
5
+
6
+ from . import api
7
+ from .future import FoldResultFuture
8
+ from .models import FoldModel
9
+
10
+
11
+ class MiniFoldModel(FoldModel):
12
+ """
13
+ Class providing inference endpoints for MiniFold.
14
+ """
15
+
16
+ model_id: str = "minifold"
17
+
18
+ def __init__(
19
+ self,
20
+ session: APISession,
21
+ model_id: str,
22
+ metadata: ModelMetadata | None = None,
23
+ ):
24
+ super().__init__(session=session, model_id=model_id, metadata=metadata)
25
+
26
+ def fold(
27
+ self, sequences: Sequence[bytes | str], num_recycles: int | None = None
28
+ ) -> FoldResultFuture:
29
+ """
30
+ Fold sequences using this model.
31
+
32
+ Parameters
33
+ ----------
34
+ sequences : Sequence[bytes | str]
35
+ sequences to fold
36
+ num_recycles : int | None
37
+ number of times to recycle models
38
+ Returns
39
+ -------
40
+ FoldResultFuture
41
+ """
42
+ sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
43
+ assert all(":" not in s for s in sequences), "minifold does not support ':'"
44
+ result = FoldResultFuture.create(
45
+ session=self.session,
46
+ job=api.fold_models_post(
47
+ session=self.session,
48
+ model_id=self.model_id,
49
+ sequences=sequences,
50
+ num_recycles=num_recycles,
51
+ ),
52
+ )
53
+ assert isinstance(result, FoldResultFuture)
54
+ return result
@@ -0,0 +1,148 @@
1
+ """Community-based RosettaFold3 models for complex structure prediction with ligands/dna/rna."""
2
+
3
+ from typing import Any
4
+
5
+ from pydantic import BaseModel, Field, TypeAdapter, model_validator
6
+
7
+ from openprotein.align import AlignAPI, MSAFuture
8
+ from openprotein.base import APISession
9
+ from openprotein.chains import Ligand
10
+ from openprotein.common import ModelMetadata
11
+ from openprotein.protein import Protein
12
+
13
+ from . import api
14
+ from .complex import id_generator
15
+ from .future import FoldComplexResultFuture
16
+ from .models import FoldModel
17
+
18
+
19
+ class RosettaFold3Model(FoldModel):
20
+ """
21
+ Class providing inference endpoints for RosettaFold-3 structure prediction model.
22
+ """
23
+
24
+ model_id: str = "rosettafold-3"
25
+
26
+ def __init__(
27
+ self,
28
+ session: APISession,
29
+ model_id: str,
30
+ metadata: ModelMetadata | None = None,
31
+ ):
32
+ super().__init__(session, model_id, metadata)
33
+
34
+ def fold(
35
+ self,
36
+ proteins: list[Protein] | MSAFuture | None = None,
37
+ ligands: list[Ligand] | None = None,
38
+ diffusion_samples: int = 1,
39
+ num_recycles: int = 10,
40
+ num_steps: int = 50,
41
+ **kwargs,
42
+ ) -> FoldComplexResultFuture:
43
+ """
44
+ Request structure prediction with RosettaFold-3 model.
45
+
46
+ Parameters
47
+ ----------
48
+ proteins : List[Protein] | MSAFuture | None
49
+ List of protein sequences to include in folded output. `Protein` objects must be tagged with an `msa`, which can be a `Protein.single_sequence_mode` for single sequence mode. Alternatively, supply an `MSAFuture` to use all query sequences as a multimer.
50
+ ligands : List[Ligand] | None
51
+ List of ligands to include in folded output.
52
+ diffusion_samples: int
53
+ Number of diffusion samples to use
54
+ num_recycles : int
55
+ Number of recycling steps to use
56
+ num_steps : int
57
+ Number of sampling steps to use
58
+
59
+ Returns
60
+ -------
61
+ FoldComplexResultFuture
62
+ Future for the folding complex result.
63
+ """
64
+ # collate the id's used
65
+ used_ids = []
66
+ if isinstance(proteins, list):
67
+ for protein in proteins:
68
+ if isinstance(protein, Protein) and protein.chain_id is not None:
69
+ if isinstance(protein.chain_id, str):
70
+ used_ids.append(protein.chain_id)
71
+ elif isinstance(protein.chain_id, list):
72
+ used_ids.extend(protein.chain_id)
73
+ for ligand in ligands or []:
74
+ if isinstance(ligand.chain_id, str):
75
+ used_ids.append(ligand.chain_id)
76
+ elif isinstance(ligand.chain_id, list):
77
+ used_ids.extend(ligand.chain_id)
78
+ id_gen = id_generator(used_ids)
79
+ # build the proteins from msa
80
+ if isinstance(proteins, MSAFuture):
81
+ align_api = getattr(self.session, "align", None)
82
+ assert isinstance(align_api, AlignAPI)
83
+ msa = proteins # rename
84
+ proteins = [] # convert back to list of proteins
85
+ seed = align_api.get_seed(job_id=msa.job.job_id)
86
+ query_seqs_cardinality: dict[str, int] = dict()
87
+ for seq in seed.split(":"):
88
+ query_seqs_cardinality[seq] = query_seqs_cardinality.get(seq, 0) + 1
89
+ for seq, card in query_seqs_cardinality.items():
90
+ protein = Protein(sequence=seq)
91
+ if card == 1:
92
+ id = next(id_gen)
93
+ else:
94
+ id = [next(id_gen) for _ in range(card)]
95
+ protein.chain_id = id
96
+ protein.msa = msa
97
+ proteins.append(protein)
98
+
99
+ # build the sequences input
100
+ sequences: list[dict[str, Any]] = []
101
+ for protein in proteins or []:
102
+ # check the msa
103
+ msa = protein.msa
104
+ if msa is None:
105
+ raise ValueError(
106
+ "Expected all protein sequences to have `.msa` set with an `MSAFuture` or `Protein.single_sequence_mode` for single sequence mode."
107
+ )
108
+ # convert to msa id or null for single sequence mode
109
+ msa_id = (
110
+ msa
111
+ if isinstance(msa, str)
112
+ else msa.id if isinstance(msa, MSAFuture) else None
113
+ )
114
+ # add the protein in the expected format
115
+ p = {
116
+ "id": protein.chain_id or next(id_gen),
117
+ "msa_id": msa_id,
118
+ "sequence": protein.sequence.decode(),
119
+ }
120
+ if protein.cyclic:
121
+ p["cyclic"] = protein.cyclic
122
+ sequences.append({"protein": p})
123
+ for ligand in ligands or []:
124
+ ligand_: dict = {"id": ligand.chain_id or next(id_gen)}
125
+ if ligand.ccd:
126
+ ligand_["ccd"] = ligand.ccd
127
+ if ligand.smiles:
128
+ ligand_["smiles"] = ligand.smiles
129
+ sequences.append({"ligand": ligand_})
130
+
131
+ if len(sequences) == 0:
132
+ raise ValueError("Expected proteins or ligands")
133
+
134
+ return FoldComplexResultFuture.create(
135
+ session=self.session,
136
+ job=api.fold_models_post(
137
+ session=self.session,
138
+ model_id=self.model_id,
139
+ sequences=sequences,
140
+ diffusion_samples=diffusion_samples,
141
+ num_recycles=num_recycles,
142
+ num_steps=num_steps,
143
+ **kwargs,
144
+ ),
145
+ model_id=self.model_id,
146
+ proteins=proteins,
147
+ ligands=ligands,
148
+ )