openprotein-python 0.8.6__tar.gz → 0.8.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/PKG-INFO +2 -2
  2. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/align/api.py +0 -6
  3. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/poet.py +5 -2
  4. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/poet2.py +4 -3
  5. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/__init__.py +2 -0
  6. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/boltz.py +38 -84
  7. openprotein_python-0.8.8/openprotein/fold/complex.py +60 -0
  8. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/fold.py +10 -1
  9. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/future.py +7 -4
  10. openprotein_python-0.8.8/openprotein/fold/minifold.py +54 -0
  11. openprotein_python-0.8.8/openprotein/fold/rosettafold3.py +148 -0
  12. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/protein.py +1 -7
  13. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/pyproject.toml +7 -2
  14. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/.gitignore +0 -0
  15. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/LICENSE.txt +0 -0
  16. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/README.md +0 -0
  17. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/__init__.py +0 -0
  18. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/_version.py +0 -0
  19. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/align/__init__.py +0 -0
  20. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/align/align.py +0 -0
  21. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/align/future.py +0 -0
  22. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/align/msa.py +0 -0
  23. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/align/schemas.py +0 -0
  24. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/base.py +0 -0
  25. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/chains.py +0 -0
  26. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/common/__init__.py +0 -0
  27. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/common/features.py +0 -0
  28. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/common/model_metadata.py +0 -0
  29. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/common/reduction.py +0 -0
  30. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/config.py +0 -0
  31. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/csv.py +0 -0
  32. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/data/__init__.py +0 -0
  33. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/data/api.py +0 -0
  34. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/data/assaydataset.py +0 -0
  35. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/data/data.py +0 -0
  36. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/data/schemas.py +0 -0
  37. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/design/__init__.py +0 -0
  38. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/design/api.py +0 -0
  39. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/design/design.py +0 -0
  40. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/design/future.py +0 -0
  41. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/design/schemas.py +0 -0
  42. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/__init__.py +0 -0
  43. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/api.py +0 -0
  44. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/embeddings.py +0 -0
  45. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/esm.py +0 -0
  46. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/future.py +0 -0
  47. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/models.py +0 -0
  48. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/openprotein.py +0 -0
  49. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/embeddings/schemas.py +0 -0
  50. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/errors.py +0 -0
  51. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fasta.py +0 -0
  52. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/alphafold2.py +0 -0
  53. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/api.py +0 -0
  54. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/esmfold.py +0 -0
  55. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/models.py +0 -0
  56. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/fold/schemas.py +0 -0
  57. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/jobs/__init__.py +0 -0
  58. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/jobs/api.py +0 -0
  59. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/jobs/futures.py +0 -0
  60. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/jobs/jobs.py +0 -0
  61. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/jobs/schemas.py +0 -0
  62. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/models/__init__.py +0 -0
  63. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/models/base.py +0 -0
  64. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/models/foundation/rfdiffusion.py +0 -0
  65. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/models/models.py +0 -0
  66. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/predictor/__init__.py +0 -0
  67. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/predictor/api.py +0 -0
  68. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/predictor/models.py +0 -0
  69. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/predictor/prediction.py +0 -0
  70. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/predictor/predictor.py +0 -0
  71. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/predictor/schemas.py +0 -0
  72. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/predictor/validate.py +0 -0
  73. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/prompt/__init__.py +0 -0
  74. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/prompt/api.py +0 -0
  75. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/prompt/models.py +0 -0
  76. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/prompt/prompt.py +0 -0
  77. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/prompt/schemas.py +0 -0
  78. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/svd/__init__.py +0 -0
  79. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/svd/api.py +0 -0
  80. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/svd/models.py +0 -0
  81. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/svd/schemas.py +0 -0
  82. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/svd/svd.py +0 -0
  83. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/umap/__init__.py +0 -0
  84. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/umap/api.py +0 -0
  85. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/umap/models.py +0 -0
  86. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/umap/schemas.py +0 -0
  87. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/umap/umap.py +0 -0
  88. {openprotein_python-0.8.6 → openprotein_python-0.8.8}/openprotein/utils/uuid.py +0 -0
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: openprotein-python
3
- Version: 0.8.6
3
+ Version: 0.8.8
4
4
  Summary: OpenProtein Python interface.
5
5
  Author-email: Mark Gee <markgee@ne47.bio>, "Timothy Truong Jr." <ttruong@ne47.bio>, Tristan Bepler <tbepler@ne47.bio>
6
6
  License-Expression: MIT
7
7
  License-File: LICENSE.txt
8
8
  Classifier: Development Status :: 4 - Beta
9
9
  Classifier: Programming Language :: Python :: 3
10
- Requires-Python: >=3.10
10
+ Requires-Python: <3.13,>=3.10
11
11
  Requires-Dist: gemmi<0.8,>=0.7.0
12
12
  Requires-Dist: numpy<3,>=1.9
13
13
  Requires-Dist: pandas<3,>=2.2.2
@@ -400,12 +400,6 @@ def prompt_post(
400
400
  "Either 'num_sequences' or 'num_residues' must be set, but not both."
401
401
  )
402
402
 
403
- if num_sequences is not None and not (0 <= num_sequences < 100):
404
- raise InvalidParameterError("The 'num_sequences' must be between 0 and 100.")
405
-
406
- if num_residues is not None and not (0 <= num_residues < 24577):
407
- raise InvalidParameterError("The 'num_residues' must be between 0 and 24577.")
408
-
409
403
  if random_seed is None:
410
404
  random_seed = random.randrange(2**32)
411
405
 
@@ -295,7 +295,11 @@ class PoETModel(EmbeddingModel):
295
295
  EmbeddingsGenerateFuture
296
296
  Future object representing the status and information about the generation job.
297
297
  """
298
- prompt_id = prompt if isinstance(prompt, str) else prompt.id
298
+ if prompt is not None:
299
+ kwargs["prompt_id"] = prompt if isinstance(prompt, str) else prompt.id
300
+ else:
301
+ # NB: this is for handling PoET-2
302
+ assert self.model_id != "poet"
299
303
  return EmbeddingsGenerateFuture.create(
300
304
  session=self.session,
301
305
  job=api.request_generate_post(
@@ -307,7 +311,6 @@ class PoETModel(EmbeddingModel):
307
311
  topp=topp,
308
312
  max_length=max_length,
309
313
  random_seed=seed,
310
- prompt_id=prompt_id,
311
314
  **kwargs,
312
315
  ),
313
316
  )
@@ -287,7 +287,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
287
287
 
288
288
  def generate(
289
289
  self,
290
- prompt: str | Prompt,
290
+ prompt: str | Prompt | None,
291
291
  query: str | bytes | Protein | Query | None = None,
292
292
  use_query_structure_in_decoder: bool = True,
293
293
  num_samples: int = 100,
@@ -304,7 +304,7 @@ class PoET2Model(PoETModel, EmbeddingModel):
304
304
 
305
305
  Parameters
306
306
  ----------
307
- prompt : str or Prompt
307
+ prompt : str or Prompt or None, optional
308
308
  Prompt from an align workflow to condition PoET model.
309
309
  query : str or bytes or Protein or Query or None, optional
310
310
  Query to use with prompt.
@@ -351,7 +351,8 @@ class PoET2Model(PoETModel, EmbeddingModel):
351
351
  f"equal to the number of prompts ({prompt.num_replicates})"
352
352
  )
353
353
  return super().generate(
354
- prompt=prompt,
354
+ # NB: poet(-1) cannot use null prompt, so we don't change its .generate's type signature
355
+ prompt=prompt, # type: ignore
355
356
  num_samples=num_samples,
356
357
  temperature=temperature,
357
358
  topk=topk,
@@ -7,6 +7,7 @@ isort:skip_file
7
7
  from .schemas import FoldJob, FoldMetadata
8
8
  from .models import FoldModel
9
9
  from .esmfold import ESMFoldModel
10
+ from .minifold import MiniFoldModel
10
11
  from .alphafold2 import AlphaFold2Model
11
12
  from .boltz import (
12
13
  Boltz1Model,
@@ -17,5 +18,6 @@ from .boltz import (
17
18
  BoltzConstraint,
18
19
  BoltzProperty,
19
20
  )
21
+ from .rosettafold3 import RosettaFold3Model
20
22
  from .future import FoldResultFuture, FoldComplexResultFuture
21
23
  from .fold import FoldAPI
@@ -1,7 +1,7 @@
1
1
  """Community-based Boltz models for complex structure prediction with ligands/dna/rna."""
2
2
 
3
- import re
4
- import string
3
+ import warnings
4
+ from logging import warning
5
5
  from typing import Any
6
6
 
7
7
  from pydantic import BaseModel, Field, TypeAdapter, model_validator
@@ -13,67 +13,10 @@ from openprotein.common import ModelMetadata
13
13
  from openprotein.protein import Protein
14
14
 
15
15
  from . import api
16
+ from .complex import id_generator
16
17
  from .future import FoldComplexResultFuture
17
18
  from .models import FoldModel
18
19
 
19
- valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
20
-
21
-
22
- def is_valid_id(id_str: str) -> bool:
23
- """
24
- Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
25
- """
26
- if not id_str or len(id_str) > 5:
27
- return False
28
- return bool(valid_id_pattern.fullmatch(id_str))
29
-
30
-
31
- def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
32
- """
33
- Yields new chain IDs, skipping any in 'used_ids'.
34
- First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
35
- """
36
- used = set(tuple(used_ids or []))
37
- letters = list(string.ascii_uppercase)
38
-
39
- # --- Alphabetic IDs ---
40
- curr_len = 1
41
- curr_indices = [0] * curr_len # start at 'A'
42
-
43
- def bump_indices():
44
- # lexicographically increment curr_indices; return False on overflow
45
- for i in reversed(range(len(curr_indices))):
46
- if curr_indices[i] < len(letters) - 1:
47
- curr_indices[i] += 1
48
- for j in range(i + 1, len(curr_indices)):
49
- curr_indices[j] = 0
50
- return True
51
- return False
52
-
53
- while curr_len <= max_alpha_len:
54
- candidate = "".join(letters[i] for i in curr_indices)
55
- if candidate not in used:
56
- used.add(candidate)
57
- yield candidate
58
- # bump
59
- if not bump_indices():
60
- curr_len += 1
61
- if curr_len > max_alpha_len:
62
- break
63
- curr_indices = [0] * curr_len
64
-
65
- # --- Numeric IDs ---
66
- num = 1
67
- while num <= max_numeric:
68
- candidate = str(num)
69
- num += 1
70
- if candidate not in used:
71
- used.add(candidate)
72
- yield candidate
73
-
74
- # exhausted
75
- raise RuntimeError("exhausted all possible IDs")
76
-
77
20
 
78
21
  class BoltzModel(FoldModel):
79
22
  """
@@ -97,8 +40,8 @@ class BoltzModel(FoldModel):
97
40
  rnas: list[RNA] | None = None,
98
41
  ligands: list[Ligand] | None = None,
99
42
  diffusion_samples: int = 1,
100
- recycling_steps: int = 3,
101
- sampling_steps: int = 200,
43
+ num_recycles: int = 3,
44
+ num_steps: int = 200,
102
45
  step_scale: float = 1.638,
103
46
  use_potentials: bool = False,
104
47
  constraints: list[dict] | None = None,
@@ -119,9 +62,9 @@ class BoltzModel(FoldModel):
119
62
  List of ligands to include in folded output.
120
63
  diffusion_samples: int
121
64
  Number of diffusion samples to use
122
- recycling_steps : int
65
+ num_recycles : int
123
66
  Number of recycling steps to use
124
- sampling_steps : int
67
+ num_steps : int
125
68
  Number of sampling steps to use
126
69
  step_scale : float
127
70
  Scaling factor for diffusion steps.
@@ -133,6 +76,17 @@ class BoltzModel(FoldModel):
133
76
  FoldComplexResultFuture
134
77
  Future for the folding complex result.
135
78
  """
79
+ # migrate old parameter
80
+ if (recycling_steps := kwargs.get("recycling_steps")) is not None:
81
+ num_recycles = recycling_steps
82
+ warnings.warn(
83
+ "`recycling_steps` has been updated to `num_recycles`. The parameter will be auto-corrected for now but raise an exception in the future."
84
+ )
85
+ if (sampling_steps := kwargs.get("sampling_steps")) is not None:
86
+ num_steps = sampling_steps
87
+ warnings.warn(
88
+ "`sampling_steps` has been updated to `num_steps`. The parameter will be auto-corrected for now but raise an exception in the future."
89
+ )
136
90
  # validate constraints
137
91
  if constraints is not None:
138
92
  TypeAdapter(list[BoltzConstraint]).validate_python(constraints)
@@ -247,8 +201,8 @@ class BoltzModel(FoldModel):
247
201
  model_id=self.model_id,
248
202
  sequences=sequences,
249
203
  diffusion_samples=diffusion_samples,
250
- recycling_steps=recycling_steps,
251
- sampling_steps=sampling_steps,
204
+ num_recycles=num_recycles,
205
+ num_steps=num_steps,
252
206
  step_scale=step_scale,
253
207
  constraints=constraints,
254
208
  use_potentials=use_potentials,
@@ -276,8 +230,8 @@ class Boltz2Model(BoltzModel, FoldModel):
276
230
  rnas: list[RNA] | None = None,
277
231
  ligands: list[Ligand] | None = None,
278
232
  diffusion_samples: int = 1,
279
- recycling_steps: int = 3,
280
- sampling_steps: int = 200,
233
+ num_recycles: int = 3,
234
+ num_steps: int = 200,
281
235
  step_scale: float = 1.638,
282
236
  use_potentials: bool = False,
283
237
  constraints: list[dict] | None = None,
@@ -300,9 +254,9 @@ class Boltz2Model(BoltzModel, FoldModel):
300
254
  List of ligands to include in folded output.
301
255
  diffusion_samples: int
302
256
  Number of diffusion samples to use
303
- recycling_steps : int
257
+ num_recycles : int
304
258
  Number of recycling steps to use
305
- sampling_steps : int
259
+ num_steps : int
306
260
  Number of sampling steps to use
307
261
  step_scale : float
308
262
  Scaling factor for diffusion steps.
@@ -360,8 +314,8 @@ class Boltz2Model(BoltzModel, FoldModel):
360
314
  rnas=rnas,
361
315
  ligands=ligands,
362
316
  diffusion_samples=diffusion_samples,
363
- recycling_steps=recycling_steps,
364
- sampling_steps=sampling_steps,
317
+ num_recycles=num_recycles,
318
+ num_steps=num_steps,
365
319
  step_scale=step_scale,
366
320
  use_potentials=use_potentials,
367
321
  constraints=constraints,
@@ -385,8 +339,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
385
339
  rnas: list[RNA] | None = None,
386
340
  ligands: list[Ligand] | None = None,
387
341
  diffusion_samples: int = 1,
388
- recycling_steps: int = 3,
389
- sampling_steps: int = 200,
342
+ num_recycles: int = 3,
343
+ num_steps: int = 200,
390
344
  step_scale: float = 1.638,
391
345
  constraints: list[dict] | None = None,
392
346
  ) -> FoldComplexResultFuture:
@@ -405,9 +359,9 @@ class Boltz1xModel(BoltzModel, FoldModel):
405
359
  List of ligands to include in folded output.
406
360
  diffusion_samples: int
407
361
  Number of diffusion samples to use
408
- recycling_steps : int
362
+ num_recycles : int
409
363
  Number of recycling steps to use
410
- sampling_steps : int
364
+ num_steps : int
411
365
  Number of sampling steps to use
412
366
  step_scale : float
413
367
  Scaling factor for diffusion steps.
@@ -426,8 +380,8 @@ class Boltz1xModel(BoltzModel, FoldModel):
426
380
  rnas=rnas,
427
381
  ligands=ligands,
428
382
  diffusion_samples=diffusion_samples,
429
- recycling_steps=recycling_steps,
430
- sampling_steps=sampling_steps,
383
+ num_recycles=num_recycles,
384
+ num_steps=num_steps,
431
385
  step_scale=step_scale,
432
386
  use_potentials=True,
433
387
  constraints=constraints,
@@ -448,8 +402,8 @@ class Boltz1Model(BoltzModel, FoldModel):
448
402
  rnas: list[RNA] | None = None,
449
403
  ligands: list[Ligand] | None = None,
450
404
  diffusion_samples: int = 1,
451
- recycling_steps: int = 3,
452
- sampling_steps: int = 200,
405
+ num_recycles: int = 3,
406
+ num_steps: int = 200,
453
407
  step_scale: float = 1.638,
454
408
  use_potentials: bool = False,
455
409
  constraints: list[dict] | None = None,
@@ -469,9 +423,9 @@ class Boltz1Model(BoltzModel, FoldModel):
469
423
  List of ligands to include in folded output.
470
424
  diffusion_samples: int
471
425
  Number of diffusion samples to use
472
- recycling_steps : int
426
+ num_recycles : int
473
427
  Number of recycling steps to use
474
- sampling_steps : int
428
+ num_steps : int
475
429
  Number of sampling steps to use
476
430
  step_scale : float
477
431
  Scaling factor for diffusion steps.
@@ -492,8 +446,8 @@ class Boltz1Model(BoltzModel, FoldModel):
492
446
  rnas=rnas,
493
447
  ligands=ligands,
494
448
  diffusion_samples=diffusion_samples,
495
- recycling_steps=recycling_steps,
496
- sampling_steps=sampling_steps,
449
+ num_recycles=num_recycles,
450
+ num_steps=num_steps,
497
451
  step_scale=step_scale,
498
452
  use_potentials=use_potentials,
499
453
  constraints=constraints,
@@ -0,0 +1,60 @@
1
+ import re
2
+ import string
3
+
4
+ valid_id_pattern = re.compile(r"^[A-Z]{1,5}$|^\d{1,5}$")
5
+
6
+
7
+ def is_valid_id(id_str: str) -> bool:
8
+ """
9
+ Check if the id_str matches the valid pattern for IDs (1-5 uppercase or 1-5 digits).
10
+ """
11
+ if not id_str or len(id_str) > 5:
12
+ return False
13
+ return bool(valid_id_pattern.fullmatch(id_str))
14
+
15
+
16
+ def id_generator(used_ids: list[str] | None = None, max_alpha_len=5, max_numeric=99999):
17
+ """
18
+ Yields new chain IDs, skipping any in 'used_ids'.
19
+ First A..Z, AA..ZZ, … up to max_alpha_len, then '1','2',… up to max_numeric.
20
+ """
21
+ used = set(tuple(used_ids or []))
22
+ letters = list(string.ascii_uppercase)
23
+
24
+ # --- Alphabetic IDs ---
25
+ curr_len = 1
26
+ curr_indices = [0] * curr_len # start at 'A'
27
+
28
+ def bump_indices():
29
+ # lexicographically increment curr_indices; return False on overflow
30
+ for i in reversed(range(len(curr_indices))):
31
+ if curr_indices[i] < len(letters) - 1:
32
+ curr_indices[i] += 1
33
+ for j in range(i + 1, len(curr_indices)):
34
+ curr_indices[j] = 0
35
+ return True
36
+ return False
37
+
38
+ while curr_len <= max_alpha_len:
39
+ candidate = "".join(letters[i] for i in curr_indices)
40
+ if candidate not in used:
41
+ used.add(candidate)
42
+ yield candidate
43
+ # bump
44
+ if not bump_indices():
45
+ curr_len += 1
46
+ if curr_len > max_alpha_len:
47
+ break
48
+ curr_indices = [0] * curr_len
49
+
50
+ # --- Numeric IDs ---
51
+ num = 1
52
+ while num <= max_numeric:
53
+ candidate = str(num)
54
+ num += 1
55
+ if candidate not in used:
56
+ used.add(candidate)
57
+ yield candidate
58
+
59
+ # exhausted
60
+ raise RuntimeError("exhausted all possible IDs")
@@ -7,9 +7,11 @@ from .alphafold2 import AlphaFold2Model
7
7
  from .boltz import Boltz1Model, Boltz1xModel, Boltz2Model
8
8
  from .esmfold import ESMFoldModel
9
9
  from .future import FoldComplexResultFuture, FoldResultFuture
10
+ from .minifold import MiniFoldModel
10
11
  from .models import (
11
12
  FoldModel,
12
13
  )
14
+ from .rosettafold3 import RosettaFold3Model
13
15
 
14
16
 
15
17
  class FoldAPI:
@@ -26,11 +28,16 @@ class FoldAPI:
26
28
  #: Boltz-1 model
27
29
  boltz1: Boltz1Model
28
30
  boltz_1: Boltz1Model
29
- af2: AlphaFold2Model
30
31
  #: AlphaFold-2 model
32
+ af2: AlphaFold2Model
31
33
  alphafold2: AlphaFold2Model
34
+ #: RosettaFold-3 model
35
+ rf3: RosettaFold3Model
36
+ rosettafold_3: RosettaFold3Model
32
37
  #: ESMFold model
33
38
  esmfold: ESMFoldModel
39
+ #: MiniFold model
40
+ minifold: MiniFoldModel
34
41
 
35
42
  def __init__(self, session: APISession):
36
43
  self.session = session
@@ -45,6 +52,8 @@ class FoldAPI:
45
52
  # Setup aliases safely
46
53
  if getattr(self, "alphafold2", None):
47
54
  self.af2 = self.alphafold2
55
+ if getattr(self, "rosettafold_3", None):
56
+ self.rf3 = self.rosettafold_3
48
57
  if getattr(self, "boltz_1", None):
49
58
  self.boltz1 = self.boltz_1
50
59
  if getattr(self, "boltz_1x", None):
@@ -50,14 +50,14 @@ class FoldResultFuture(MappedFuture, Future):
50
50
  if metadata is None:
51
51
  if job is None or job.job_id is None:
52
52
  raise ValueError("Expected fold metadata or job")
53
- metadata = api.fold_get(session, job.job_id)
53
+ metadata = api.fold_get(session=session, job_id=job.job_id)
54
54
  self._metadata = metadata
55
55
  if job is None:
56
56
  jobs_api = getattr(session, "jobs", None)
57
57
  assert isinstance(jobs_api, JobsAPI)
58
58
  job = FoldJob.create(jobs_api.get_job(job_id=metadata.job_id))
59
59
  if sequences is None:
60
- sequences = api.fold_get_sequences(self.session, job_id=job.job_id)
60
+ sequences = api.fold_get_sequences(session=session, job_id=job.job_id)
61
61
  self._sequences = sequences
62
62
  super().__init__(session, job, max_workers)
63
63
 
@@ -93,7 +93,11 @@ class FoldResultFuture(MappedFuture, Future):
93
93
  else:
94
94
  raise ValueError("Expected fold metadata or job")
95
95
  model_id = api.fold_get(session=session, job_id=job_id).model_id
96
- if model_id.startswith("boltz") or model_id.startswith("alphafold"):
96
+ if (
97
+ model_id.startswith("boltz")
98
+ or model_id.startswith("alphafold")
99
+ or model_id.startswith("rosettafold")
100
+ ):
97
101
  return FoldComplexResultFuture(session=session, job=job, **kwargs)
98
102
  else:
99
103
  return cls(session=session, job=job, **kwargs)
@@ -124,7 +128,6 @@ class FoldResultFuture(MappedFuture, Future):
124
128
  """
125
129
  return self.job.job_id
126
130
 
127
-
128
131
  @property
129
132
  def metadata(self) -> FoldMetadata:
130
133
  """The fold metadata."""
@@ -0,0 +1,54 @@
1
+ from collections.abc import Sequence
2
+
3
+ from openprotein.base import APISession
4
+ from openprotein.common import ModelMetadata
5
+
6
+ from . import api
7
+ from .future import FoldResultFuture
8
+ from .models import FoldModel
9
+
10
+
11
+ class MiniFoldModel(FoldModel):
12
+ """
13
+ Class providing inference endpoints for MiniFold.
14
+ """
15
+
16
+ model_id: str = "minifold"
17
+
18
+ def __init__(
19
+ self,
20
+ session: APISession,
21
+ model_id: str,
22
+ metadata: ModelMetadata | None = None,
23
+ ):
24
+ super().__init__(session=session, model_id=model_id, metadata=metadata)
25
+
26
+ def fold(
27
+ self, sequences: Sequence[bytes | str], num_recycles: int | None = None
28
+ ) -> FoldResultFuture:
29
+ """
30
+ Fold sequences using this model.
31
+
32
+ Parameters
33
+ ----------
34
+ sequences : Sequence[bytes | str]
35
+ sequences to fold
36
+ num_recycles : int | None
37
+ number of times to recycle models
38
+ Returns
39
+ -------
40
+ FoldResultFuture
41
+ """
42
+ sequences = [s.decode() if isinstance(s, bytes) else s for s in sequences]
43
+ assert all(":" not in s for s in sequences), "minifold does not support ':'"
44
+ result = FoldResultFuture.create(
45
+ session=self.session,
46
+ job=api.fold_models_post(
47
+ session=self.session,
48
+ model_id=self.model_id,
49
+ sequences=sequences,
50
+ num_recycles=num_recycles,
51
+ ),
52
+ )
53
+ assert isinstance(result, FoldResultFuture)
54
+ return result
@@ -0,0 +1,148 @@
1
+ """Community-based RosettaFold3 models for complex structure prediction with ligands/dna/rna."""
2
+
3
+ from typing import Any
4
+
5
+ from pydantic import BaseModel, Field, TypeAdapter, model_validator
6
+
7
+ from openprotein.align import AlignAPI, MSAFuture
8
+ from openprotein.base import APISession
9
+ from openprotein.chains import Ligand
10
+ from openprotein.common import ModelMetadata
11
+ from openprotein.protein import Protein
12
+
13
+ from . import api
14
+ from .complex import id_generator
15
+ from .future import FoldComplexResultFuture
16
+ from .models import FoldModel
17
+
18
+
19
+ class RosettaFold3Model(FoldModel):
20
+ """
21
+ Class providing inference endpoints for RosettaFold-3 structure prediction model.
22
+ """
23
+
24
+ model_id: str = "rosettafold-3"
25
+
26
+ def __init__(
27
+ self,
28
+ session: APISession,
29
+ model_id: str,
30
+ metadata: ModelMetadata | None = None,
31
+ ):
32
+ super().__init__(session, model_id, metadata)
33
+
34
+ def fold(
35
+ self,
36
+ proteins: list[Protein] | MSAFuture | None = None,
37
+ ligands: list[Ligand] | None = None,
38
+ diffusion_samples: int = 1,
39
+ num_recycles: int = 10,
40
+ num_steps: int = 50,
41
+ **kwargs,
42
+ ) -> FoldComplexResultFuture:
43
+ """
44
+ Request structure prediction with RosettaFold-3 model.
45
+
46
+ Parameters
47
+ ----------
48
+ proteins : List[Protein] | MSAFuture | None
49
+ List of protein sequences to include in folded output. `Protein` objects must be tagged with an `msa`, which can be a `Protein.single_sequence_mode` for single sequence mode. Alternatively, supply an `MSAFuture` to use all query sequences as a multimer.
50
+ ligands : List[Ligand] | None
51
+ List of ligands to include in folded output.
52
+ diffusion_samples: int
53
+ Number of diffusion samples to use
54
+ num_recycles : int
55
+ Number of recycling steps to use
56
+ num_steps : int
57
+ Number of sampling steps to use
58
+
59
+ Returns
60
+ -------
61
+ FoldComplexResultFuture
62
+ Future for the folding complex result.
63
+ """
64
+ # collate the id's used
65
+ used_ids = []
66
+ if isinstance(proteins, list):
67
+ for protein in proteins:
68
+ if isinstance(protein, Protein) and protein.chain_id is not None:
69
+ if isinstance(protein.chain_id, str):
70
+ used_ids.append(protein.chain_id)
71
+ elif isinstance(protein.chain_id, list):
72
+ used_ids.extend(protein.chain_id)
73
+ for ligand in ligands or []:
74
+ if isinstance(ligand.chain_id, str):
75
+ used_ids.append(ligand.chain_id)
76
+ elif isinstance(ligand.chain_id, list):
77
+ used_ids.extend(ligand.chain_id)
78
+ id_gen = id_generator(used_ids)
79
+ # build the proteins from msa
80
+ if isinstance(proteins, MSAFuture):
81
+ align_api = getattr(self.session, "align", None)
82
+ assert isinstance(align_api, AlignAPI)
83
+ msa = proteins # rename
84
+ proteins = [] # convert back to list of proteins
85
+ seed = align_api.get_seed(job_id=msa.job.job_id)
86
+ query_seqs_cardinality: dict[str, int] = dict()
87
+ for seq in seed.split(":"):
88
+ query_seqs_cardinality[seq] = query_seqs_cardinality.get(seq, 0) + 1
89
+ for seq, card in query_seqs_cardinality.items():
90
+ protein = Protein(sequence=seq)
91
+ if card == 1:
92
+ id = next(id_gen)
93
+ else:
94
+ id = [next(id_gen) for _ in range(card)]
95
+ protein.chain_id = id
96
+ protein.msa = msa
97
+ proteins.append(protein)
98
+
99
+ # build the sequences input
100
+ sequences: list[dict[str, Any]] = []
101
+ for protein in proteins or []:
102
+ # check the msa
103
+ msa = protein.msa
104
+ if msa is None:
105
+ raise ValueError(
106
+ "Expected all protein sequences to have `.msa` set with an `MSAFuture` or `Protein.single_sequence_mode` for single sequence mode."
107
+ )
108
+ # convert to msa id or null for single sequence mode
109
+ msa_id = (
110
+ msa
111
+ if isinstance(msa, str)
112
+ else msa.id if isinstance(msa, MSAFuture) else None
113
+ )
114
+ # add the protein in the expected format
115
+ p = {
116
+ "id": protein.chain_id or next(id_gen),
117
+ "msa_id": msa_id,
118
+ "sequence": protein.sequence.decode(),
119
+ }
120
+ if protein.cyclic:
121
+ p["cyclic"] = protein.cyclic
122
+ sequences.append({"protein": p})
123
+ for ligand in ligands or []:
124
+ ligand_: dict = {"id": ligand.chain_id or next(id_gen)}
125
+ if ligand.ccd:
126
+ ligand_["ccd"] = ligand.ccd
127
+ if ligand.smiles:
128
+ ligand_["smiles"] = ligand.smiles
129
+ sequences.append({"ligand": ligand_})
130
+
131
+ if len(sequences) == 0:
132
+ raise ValueError("Expected proteins or ligands")
133
+
134
+ return FoldComplexResultFuture.create(
135
+ session=self.session,
136
+ job=api.fold_models_post(
137
+ session=self.session,
138
+ model_id=self.model_id,
139
+ sequences=sequences,
140
+ diffusion_samples=diffusion_samples,
141
+ num_recycles=num_recycles,
142
+ num_steps=num_steps,
143
+ **kwargs,
144
+ ),
145
+ model_id=self.model_id,
146
+ proteins=proteins,
147
+ ligands=ligands,
148
+ )
@@ -561,15 +561,9 @@ def _use_bfactor_as_plddt(structure: gemmi.Structure) -> bool:
561
561
  This heuristic decides whether to use B-factor as pLDDT.
562
562
  It uses B-factor as pLDDT when all of the following fields are *not* set:
563
563
  - structure resolution
564
- - _pdbx_database_status.recvd_initial_deposition_date
565
564
  This heuristic may be changed in the future.
566
565
  """
567
- return (structure.resolution == 0.0) and (
568
- structure.make_mmcif_block(
569
- groups=gemmi.MmcifOutputGroups(False, database_status=True)
570
- ).find_value("_pdbx_database_status.recvd_initial_deposition_date")
571
- is None
572
- )
566
+ return structure.resolution == 0.0
573
567
 
574
568
 
575
569
  def calc_rmsd(
@@ -24,7 +24,7 @@ dependencies = [
24
24
  "numpy>=1.9,<3",
25
25
  "gemmi>=0.7.0,<0.8",
26
26
  ]
27
- requires-python = ">=3.10"
27
+ requires-python = ">=3.10,<3.13"
28
28
 
29
29
  [dependency-groups]
30
30
  dev = [
@@ -52,12 +52,17 @@ python = ">=3.10,<3.13"
52
52
  [tool.pixi.feature.dev.tasks]
53
53
  postinstall = "pip install --no-build-isolation --no-deps --disable-pip-version-check -e ."
54
54
  jupyterinstall = "python -m ipykernel install --user --name=openprotein-python"
55
+ build = """
56
+ pixi global install -q --no-progress -e conda-build --expose conda --expose conda-verify --expose conda-build --expose anaconda anaconda-client conda-build conda-verify > /dev/null;
57
+ pixi global install -q --no-progress --expose hatch hatch > /dev/null;
58
+ hatch build; hatch publish; hatch build -t conda
59
+ """
55
60
 
56
61
  [tool.pixi.environments]
57
62
  dev = ["dev"]
58
63
 
59
64
  [build-system]
60
- requires = ["hatchling>=1.26.1", "hatch-vcs>=0.5.0"]
65
+ requires = ["hatchling>=1.26.1", "hatch-vcs>=0.5.0", "hatch-conda-build>=0.1.2"]
61
66
  build-backend = "hatchling.build"
62
67
 
63
68
  [tool.hatch.version]