openprotein-python 0.8.2__1-py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. openprotein/__init__.py +164 -0
  2. openprotein/_version.py +48 -0
  3. openprotein/align/__init__.py +8 -0
  4. openprotein/align/align.py +395 -0
  5. openprotein/align/api.py +428 -0
  6. openprotein/align/future.py +55 -0
  7. openprotein/align/msa.py +129 -0
  8. openprotein/align/schemas.py +165 -0
  9. openprotein/base.py +181 -0
  10. openprotein/chains.py +88 -0
  11. openprotein/common/__init__.py +5 -0
  12. openprotein/common/features.py +7 -0
  13. openprotein/common/model_metadata.py +33 -0
  14. openprotein/common/reduction.py +8 -0
  15. openprotein/config.py +9 -0
  16. openprotein/csv.py +31 -0
  17. openprotein/data/__init__.py +9 -0
  18. openprotein/data/api.py +218 -0
  19. openprotein/data/assaydataset.py +178 -0
  20. openprotein/data/data.py +93 -0
  21. openprotein/data/schemas.py +27 -0
  22. openprotein/design/__init__.py +16 -0
  23. openprotein/design/api.py +259 -0
  24. openprotein/design/design.py +125 -0
  25. openprotein/design/future.py +146 -0
  26. openprotein/design/schemas.py +607 -0
  27. openprotein/embeddings/__init__.py +27 -0
  28. openprotein/embeddings/api.py +619 -0
  29. openprotein/embeddings/embeddings.py +151 -0
  30. openprotein/embeddings/esm.py +33 -0
  31. openprotein/embeddings/future.py +146 -0
  32. openprotein/embeddings/models.py +421 -0
  33. openprotein/embeddings/openprotein.py +21 -0
  34. openprotein/embeddings/poet.py +446 -0
  35. openprotein/embeddings/poet2.py +505 -0
  36. openprotein/embeddings/schemas.py +78 -0
  37. openprotein/errors.py +76 -0
  38. openprotein/fasta.py +92 -0
  39. openprotein/fold/__init__.py +21 -0
  40. openprotein/fold/alphafold2.py +131 -0
  41. openprotein/fold/api.py +287 -0
  42. openprotein/fold/boltz.py +691 -0
  43. openprotein/fold/esmfold.py +54 -0
  44. openprotein/fold/fold.py +107 -0
  45. openprotein/fold/future.py +509 -0
  46. openprotein/fold/models.py +139 -0
  47. openprotein/fold/schemas.py +39 -0
  48. openprotein/jobs/__init__.py +9 -0
  49. openprotein/jobs/api.py +71 -0
  50. openprotein/jobs/futures.py +746 -0
  51. openprotein/jobs/jobs.py +69 -0
  52. openprotein/jobs/schemas.py +135 -0
  53. openprotein/models/__init__.py +4 -0
  54. openprotein/models/base.py +63 -0
  55. openprotein/models/foundation/rfdiffusion.py +283 -0
  56. openprotein/models/models.py +33 -0
  57. openprotein/predictor/__init__.py +25 -0
  58. openprotein/predictor/api.py +384 -0
  59. openprotein/predictor/models.py +374 -0
  60. openprotein/predictor/prediction.py +79 -0
  61. openprotein/predictor/predictor.py +242 -0
  62. openprotein/predictor/schemas.py +113 -0
  63. openprotein/predictor/validate.py +40 -0
  64. openprotein/prompt/__init__.py +9 -0
  65. openprotein/prompt/api.py +505 -0
  66. openprotein/prompt/models.py +142 -0
  67. openprotein/prompt/prompt.py +130 -0
  68. openprotein/prompt/schemas.py +49 -0
  69. openprotein/protein.py +587 -0
  70. openprotein/svd/__init__.py +9 -0
  71. openprotein/svd/api.py +206 -0
  72. openprotein/svd/models.py +288 -0
  73. openprotein/svd/schemas.py +31 -0
  74. openprotein/svd/svd.py +134 -0
  75. openprotein/umap/__init__.py +9 -0
  76. openprotein/umap/api.py +259 -0
  77. openprotein/umap/models.py +211 -0
  78. openprotein/umap/schemas.py +35 -0
  79. openprotein/umap/umap.py +175 -0
  80. openprotein/utils/uuid.py +29 -0
  81. openprotein_python-0.8.2.dist-info/METADATA +176 -0
  82. openprotein_python-0.8.2.dist-info/RECORD +84 -0
  83. openprotein_python-0.8.2.dist-info/WHEEL +4 -0
  84. openprotein_python-0.8.2.dist-info/licenses/LICENSE.txt +30 -0
@@ -0,0 +1,218 @@
1
+ from pydantic import TypeAdapter
2
+
3
+ from openprotein.base import APISession
4
+ from openprotein.errors import APIError
5
+
6
+ from .schemas import AssayDataPage, AssayMetadata
7
+
8
+
9
+ def list_models(session: APISession, assay_id: str) -> list:
10
+ """
11
+ List models assoicated with assay.
12
+
13
+ Parameters
14
+ ----------
15
+ session : APISession
16
+ Session object for API communication.
17
+ assay_id : str
18
+ assay ID
19
+
20
+ Returns
21
+ -------
22
+ List
23
+ List of models
24
+ """
25
+ endpoint = "v1/models"
26
+ response = session.get(endpoint, params={"assay_id": assay_id})
27
+ return response.json()
28
+
29
+
30
+ def assaydata_post(
31
+ session: APISession,
32
+ assay_file,
33
+ assay_name: str,
34
+ assay_description: str | None = "",
35
+ ) -> AssayMetadata:
36
+ """
37
+ Post assay data.
38
+
39
+ Parameters
40
+ ----------
41
+ session : APISession
42
+ Session object for API communication.
43
+ assay_file : str
44
+ Path to the assay data file.
45
+ assay_name : str
46
+ Name of the assay.
47
+ assay_description : str, optional
48
+ Description of the assay, by default ''.
49
+
50
+ Returns
51
+ -------
52
+ AssayMetadata
53
+ Metadata of the posted assay data.
54
+ """
55
+ endpoint = "v1/assaydata"
56
+
57
+ files = {"assay_data": assay_file}
58
+ data = {"assay_name": assay_name, "assay_description": assay_description}
59
+
60
+ response = session.post(endpoint, files=files, data=data)
61
+ if response.status_code == 200:
62
+ return TypeAdapter(AssayMetadata).validate_python(response.json())
63
+ else:
64
+ raise APIError(f"Unable to post assay data: {response.text}")
65
+
66
+
67
+ def assaydata_list(session: APISession) -> list[AssayMetadata]:
68
+ """
69
+ Get a list of all assay metadata.
70
+
71
+ Parameters
72
+ ----------
73
+ session : APISession
74
+ Session object for API communication.
75
+
76
+ Returns
77
+ -------
78
+ List[AssayMetadata]
79
+ List of all assay metadata.
80
+
81
+ Raises
82
+ ------
83
+ APIError
84
+ If an error occurs during the API request.
85
+ """
86
+ endpoint = "v1/assaydata"
87
+ response = session.get(endpoint)
88
+ if response.status_code == 200:
89
+ return TypeAdapter(list[AssayMetadata]).validate_python(response.json())
90
+ else:
91
+ raise APIError(f"Unable to list assay data: {response.text}")
92
+
93
+
94
+ def get_assay_metadata(session: APISession, assay_id: str) -> AssayMetadata:
95
+ """
96
+ Retrieve metadata for a specified assay.
97
+
98
+
99
+ Parameters
100
+ ----------
101
+ session : APISession
102
+ The current API session for communication with the server.
103
+ assay_id : str
104
+ The identifier of the assay for which metadata is to be retrieved.
105
+
106
+ Returns
107
+ -------
108
+ AssayMetadata
109
+ An AssayMetadata that contains the metadata for the specified assay.
110
+
111
+ Raises
112
+ ------
113
+ InvalidJob
114
+ If no assay metadata with the specified assay_id is found.
115
+ """
116
+
117
+ endpoint = "v1/assaydata/metadata"
118
+ response = session.get(endpoint, params={"assay_id": assay_id})
119
+ if response.status_code == 200:
120
+ data = response.json()
121
+ if not data:
122
+ raise APIError(f"No assay with id={assay_id} found")
123
+ return TypeAdapter(AssayMetadata).validate_python(data)
124
+ else:
125
+ raise APIError(f"Unable to list assay data: {response.text}")
126
+
127
+
128
+ def assaydata_put(
129
+ session: APISession,
130
+ assay_id: str,
131
+ assay_name: str | None = None,
132
+ assay_description: str | None = None,
133
+ ) -> AssayMetadata:
134
+ """
135
+ Update assay metadata.
136
+
137
+ Parameters
138
+ ----------
139
+ session : APISession
140
+ Session object for API communication.
141
+ assay_id : str
142
+ Id of the assay.
143
+ assay_name : str, optional
144
+ New name of the assay, by default None.
145
+ assay_description : str, optional
146
+ New description of the assay, by default None.
147
+
148
+ Returns
149
+ -------
150
+ AssayMetadata
151
+ Updated metadata of the assay.
152
+
153
+ Raises
154
+ ------
155
+ APIError
156
+ If an error occurs during the API request.
157
+ """
158
+ endpoint = f"v1/assaydata/{assay_id}"
159
+ data = {}
160
+ if assay_name is not None:
161
+ data["assay_name"] = assay_name
162
+ if assay_description is not None:
163
+ data["assay_description"] = assay_description
164
+
165
+ response = session.put(endpoint, data=data)
166
+ if response.status_code == 200:
167
+ return TypeAdapter(AssayMetadata).validate_python(response.json())
168
+ else:
169
+ raise APIError(f"Unable to update assay data: {response.text}")
170
+
171
+
172
+ def assaydata_page_get(
173
+ session: APISession,
174
+ assay_id: str,
175
+ measurement_name: str | None = None,
176
+ page_offset: int = 0,
177
+ page_size: int = 1000,
178
+ data_format: str = "wide",
179
+ ) -> AssayDataPage:
180
+ """
181
+ Get a page of assay data.
182
+
183
+ Parameters
184
+ ----------
185
+ session : APISession
186
+ Session object for API communication.
187
+ assay_id : str
188
+ Id of the assay.
189
+ measurement_name : str, optional
190
+ Name of the measurement, by default None.
191
+ page_offset : int, optional
192
+ Offset of the page, by default 0.
193
+ page_size : int, optional
194
+ Size of the page, by default 1000.
195
+ data_format : str, optional
196
+ data_format of the data, by default 'wide'.
197
+
198
+ Returns
199
+ -------
200
+ AssayDataPage
201
+ Page of assay data.
202
+
203
+ Raises
204
+ ------
205
+ APIError
206
+ If an error occurs during the API request.
207
+ """
208
+ endpoint = f"v1/assaydata/{assay_id}"
209
+
210
+ params = {"page_offset": page_offset, "page_size": page_size, "format": data_format}
211
+ if measurement_name is not None:
212
+ params["measurement_name"] = measurement_name
213
+
214
+ response = session.get(endpoint, params=params)
215
+ if response.status_code == 200:
216
+ return TypeAdapter(AssayDataPage).validate_python(response.json())
217
+ else:
218
+ raise APIError(f"Unable to get assay data page: {response.text}")
@@ -0,0 +1,178 @@
1
+ import pandas as pd
2
+
3
+ from openprotein import config
4
+ from openprotein.base import APISession
5
+ from openprotein.errors import APIError
6
+
7
+ from . import api
8
+ from .schemas import AssayDataPage, AssayMetadata
9
+
10
+
11
+ class AssayDataset:
12
+ """Assay dataset which contains your sequences and measurements which can be used for training predictors."""
13
+
14
+ def __init__(self, session: APISession, metadata: AssayMetadata):
15
+ """
16
+ init for AssayDataset.
17
+
18
+ Parameters
19
+ ----------
20
+ session : APISession
21
+ Session object for API communication.
22
+ metadata : AssayMetadata
23
+ Metadata object of the assay data.
24
+ """
25
+ self.session = session
26
+ self.metadata = metadata
27
+ self.page_size = config.BASE_PAGE_SIZE
28
+ if self.page_size > 1000:
29
+ self.page_size = 1000
30
+
31
+ def __str__(self) -> str:
32
+ return str(self.metadata)
33
+
34
+ def __repr__(self) -> str:
35
+ return repr(self.metadata)
36
+
37
+ @property
38
+ def id(self):
39
+ return self.metadata.assay_id
40
+
41
+ @property
42
+ def name(self):
43
+ return self.metadata.assay_name
44
+
45
+ @property
46
+ def description(self):
47
+ return self.metadata.assay_description
48
+
49
+ @property
50
+ def measurement_names(self):
51
+ return self.metadata.measurement_names
52
+
53
+ @property
54
+ def sequence_length(self):
55
+ return self.metadata.sequence_length
56
+
57
+ def __len__(self):
58
+ return self.metadata.num_rows
59
+
60
+ @property
61
+ def shape(self):
62
+ return (len(self), len(self.measurement_names) + 1)
63
+
64
+ def list_models(self):
65
+ """
66
+ List models assoicated with assay.
67
+
68
+ Returns
69
+ -------
70
+ List
71
+ List of models
72
+ """
73
+ return api.list_models(self.session, self.id)
74
+
75
+ def update(
76
+ self, assay_name: str | None = None, assay_description: str | None = None
77
+ ) -> None:
78
+ """
79
+ Update the assay metadata.
80
+
81
+ Parameters
82
+ ----------
83
+ assay_name : str, optional
84
+ New name of the assay, by default None.
85
+ assay_description : str, optional
86
+ New description of the assay, by default None.
87
+
88
+ Returns
89
+ -------
90
+ None
91
+ """
92
+ metadata = api.assaydata_put(
93
+ self.session,
94
+ self.id,
95
+ assay_name=assay_name,
96
+ assay_description=assay_description,
97
+ )
98
+ self.metadata = metadata
99
+
100
+ def _get_all(self, verbose: bool = False) -> pd.DataFrame:
101
+ """
102
+ Get all assay data.
103
+
104
+ Returns
105
+ -------
106
+ pd.DataFrame
107
+ Dataframe containing all assay data.
108
+ """
109
+ step = self.page_size
110
+
111
+ results = []
112
+ num_returned = step
113
+ offset = 0
114
+
115
+ while num_returned >= step:
116
+ try:
117
+ result = self.get_slice(offset, offset + step)
118
+ results.append(result)
119
+ num_returned = len(result)
120
+ offset += num_returned
121
+ except APIError as exc:
122
+ if verbose:
123
+ print(f"Failed to get results: {exc}")
124
+ return pd.concat(results)
125
+ return pd.concat(results)
126
+
127
+ def get_first(self) -> pd.DataFrame:
128
+ """
129
+ Get head slice of assay data.
130
+
131
+ Returns
132
+ -------
133
+ pd.DataFrame
134
+ Dataframe containing the slice of assay data.
135
+ """
136
+ rows = []
137
+ entries = api.assaydata_page_get(
138
+ self.session, self.id, page_offset=0, page_size=1
139
+ )
140
+ for row in entries.assaydata:
141
+ row = [row.mut_sequence] + row.measurement_values
142
+ rows.append(row)
143
+ table = pd.DataFrame(rows, columns=["sequence"] + self.measurement_names) # type: ignore
144
+ return table
145
+
146
+ def get_slice(self, start: int, end: int) -> pd.DataFrame:
147
+ """
148
+ Get a slice of assay data.
149
+
150
+ Parameters
151
+ ----------
152
+ start : int
153
+ Start index of the slice.
154
+ end : int
155
+ End index of the slice.
156
+
157
+ Returns
158
+ -------
159
+ pd.DataFrame
160
+ Dataframe containing the slice of assay data.
161
+ """
162
+ rows = []
163
+ page_size = self.page_size
164
+ # loop over the range
165
+ for i in range(start, end, page_size):
166
+ # the last page might be smaller than the page size
167
+ current_page_size = min(page_size, end - i)
168
+
169
+ entries = api.assaydata_page_get(
170
+ self.session, self.id, page_offset=i, page_size=current_page_size
171
+ )
172
+
173
+ for row in entries.assaydata:
174
+ row = [row.mut_sequence] + row.measurement_values
175
+ rows.append(row)
176
+
177
+ table = pd.DataFrame(rows, columns=["sequence"] + self.measurement_names) # type: ignore
178
+ return table
@@ -0,0 +1,93 @@
1
+ import io
2
+
3
+ import pandas as pd
4
+
5
+ from openprotein.base import APISession
6
+
7
+ from . import api
8
+ from .assaydataset import AssayDataset
9
+
10
+
11
+ class DataAPI:
12
+ """API interface for calling AssayData endpoints"""
13
+
14
+ def __init__(self, session: APISession):
15
+ self.session = session
16
+
17
+ def list(self) -> list[AssayDataset]:
18
+ """
19
+ List all assay datasets.
20
+
21
+ Returns
22
+ -------
23
+ List[AssayDataset]
24
+ List of all assay datasets.
25
+ """
26
+ metadata = api.assaydata_list(self.session)
27
+ return [AssayDataset(self.session, x) for x in metadata]
28
+
29
+ def create(
30
+ self, table: pd.DataFrame, name: str, description: str | None = None
31
+ ) -> AssayDataset:
32
+ """
33
+ Create a new assay dataset.
34
+
35
+ Parameters
36
+ ----------
37
+ table : pd.DataFrame
38
+ DataFrame containing the assay data.
39
+ name : str
40
+ Name of the assay dataset.
41
+ description : str, optional
42
+ Description of the assay dataset, by default None.
43
+
44
+ Returns
45
+ -------
46
+ AssayDataset
47
+ Created assay dataset.
48
+ """
49
+ stream = io.BytesIO()
50
+ table.to_csv(stream, index=False)
51
+ stream.seek(0)
52
+ metadata = api.assaydata_post(
53
+ self.session, stream, name, assay_description=description
54
+ )
55
+ metadata.sequence_length = len(table["sequence"].values[0])
56
+ return AssayDataset(self.session, metadata)
57
+
58
+ def get(self, assay_id: str, verbose: bool = False) -> AssayDataset:
59
+ """
60
+ Get an assay dataset by its ID.
61
+
62
+ Parameters
63
+ ----------
64
+ assay_id : str
65
+ ID of the assay dataset.
66
+
67
+ Returns
68
+ -------
69
+ AssayDataset
70
+ Assay dataset with the specified ID.
71
+
72
+ Raises
73
+ ------
74
+ KeyError
75
+ If no assay dataset with the given ID is found.
76
+ """
77
+ return AssayDataset(
78
+ session=self.session,
79
+ metadata=api.get_assay_metadata(self.session, assay_id),
80
+ )
81
+
82
+ load_assay = get
83
+
84
+ def __len__(self) -> int:
85
+ """
86
+ Get the number of assay datasets.
87
+
88
+ Returns
89
+ -------
90
+ int
91
+ Number of assay datasets.
92
+ """
93
+ return len(self.list())
@@ -0,0 +1,27 @@
1
+ from datetime import datetime
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class AssayMetadata(BaseModel):
7
+ assay_name: str
8
+ assay_description: str
9
+ assay_id: str
10
+ original_filename: str
11
+ created_date: datetime
12
+ num_rows: int
13
+ num_entries: int
14
+ measurement_names: list[str]
15
+ sequence_length: int | None = None
16
+
17
+
18
+ class AssayDataRow(BaseModel):
19
+ mut_sequence: str
20
+ measurement_values: list[float | None]
21
+
22
+
23
+ class AssayDataPage(BaseModel):
24
+ assaymetadata: AssayMetadata
25
+ page_size: int
26
+ page_offset: int
27
+ assaydata: list[AssayDataRow]
@@ -0,0 +1,16 @@
1
+ """
2
+ Design module for designing protein sequences on OpenProtein.
3
+
4
+ isort:skip_file
5
+ """
6
+
7
+ from .schemas import (
8
+ Criteria,
9
+ Criterion,
10
+ ModelCriterion,
11
+ DesignConstraint,
12
+ Subcriterion,
13
+ n_mutations,
14
+ )
15
+ from .future import DesignFuture
16
+ from .design import DesignAPI