rcsb-embedding-model 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rcsb-embedding-model might be problematic. Click here for more details.
- rcsb_embedding_model/rcsb_structure_embedding.py +27 -4
- {rcsb_embedding_model-0.0.2.dist-info → rcsb_embedding_model-0.0.4.dist-info}/METADATA +3 -3
- rcsb_embedding_model-0.0.4.dist-info/RECORD +8 -0
- rcsb_embedding_model-0.0.2.dist-info/RECORD +0 -8
- {rcsb_embedding_model-0.0.2.dist-info → rcsb_embedding_model-0.0.4.dist-info}/WHEEL +0 -0
- {rcsb_embedding_model-0.0.2.dist-info → rcsb_embedding_model-0.0.4.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -29,10 +29,7 @@ class RcsbStructureEmbedding:
|
|
|
29
29
|
def load_residue_embedding(self, device=None):
|
|
30
30
|
if not device:
|
|
31
31
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
32
|
-
self.__residue_embedding =
|
|
33
|
-
ESM3_OPEN_SMALL,
|
|
34
|
-
device
|
|
35
|
-
)
|
|
32
|
+
self.__residue_embedding = _load_res_model(device)
|
|
36
33
|
|
|
37
34
|
def load_aggregator_embedding(self, device=None):
|
|
38
35
|
if not device:
|
|
@@ -69,6 +66,25 @@ class RcsbStructureEmbedding:
|
|
|
69
66
|
dim=0
|
|
70
67
|
)
|
|
71
68
|
|
|
69
|
+
def sequence_embedding(self, sequence):
|
|
70
|
+
self.__check_residue_embedding()
|
|
71
|
+
|
|
72
|
+
if sequence.startswith(">"):
|
|
73
|
+
sequence = "".join(line.strip() for line in sequence.splitlines() if not line.startswith(">"))
|
|
74
|
+
|
|
75
|
+
if len(sequence) < RcsbStructureEmbedding.MIN_RES:
|
|
76
|
+
raise ValueError(f"Sequence too short for embedding (min {RcsbStructureEmbedding.MIN_RES} residues)")
|
|
77
|
+
|
|
78
|
+
protein = ESMProtein(sequence=sequence)
|
|
79
|
+
protein_tensor = self.__residue_embedding.encode(protein)
|
|
80
|
+
|
|
81
|
+
result = self.__residue_embedding.forward_and_sample(
|
|
82
|
+
protein_tensor,
|
|
83
|
+
SamplingConfig(return_per_residue_embeddings=True)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return result.per_residue_embedding
|
|
87
|
+
|
|
72
88
|
def aggregator_embedding(self, residue_embedding):
|
|
73
89
|
self.__check_aggregator_embedding()
|
|
74
90
|
return self.__aggregator_embedding(residue_embedding)
|
|
@@ -146,3 +162,10 @@ def _load_model(model_path, device=None):
|
|
|
146
162
|
aggregator_model.to(device)
|
|
147
163
|
aggregator_model.eval()
|
|
148
164
|
return aggregator_model
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _load_res_model(device=None):
|
|
168
|
+
return ESM3.from_pretrained(
|
|
169
|
+
ESM3_OPEN_SMALL,
|
|
170
|
+
device
|
|
171
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb-embedding-model
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: Protein Embedding Model for Structure Search
|
|
5
5
|
Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
|
|
6
6
|
Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
|
|
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Requires-Dist: esm>=3.2.0
|
|
14
|
-
Requires-Dist: torch>=2.
|
|
14
|
+
Requires-Dist: torch>=2.2.0
|
|
15
15
|
Description-Content-Type: text/markdown
|
|
16
16
|
|
|
17
17
|
# RCSB Embedding Model: A Deep Learning Approach for 3D Structure Embeddings
|
|
@@ -70,7 +70,7 @@ res_embedding = model.residue_embedding(
|
|
|
70
70
|
)
|
|
71
71
|
```
|
|
72
72
|
|
|
73
|
-
### **Generating Protein Structure**
|
|
73
|
+
### **Generating Protein Structure Embeddings**
|
|
74
74
|
Protein 3D structure embedding can be calculated as:
|
|
75
75
|
|
|
76
76
|
```python
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
rcsb_embedding_model/__init__.py,sha256=r3gLdeBIXkQEQA_K6QcRPO-TtYuAQSutk6pXRUE_nas,120
|
|
2
|
+
rcsb_embedding_model/rcsb_structure_embedding.py,sha256=W_Os_xa_ZkYXR_KPjvwfzW8raMIzjsMCdhfKktbVY3s,5983
|
|
3
|
+
rcsb_embedding_model/model/layers.py,sha256=lhKaWC4gTS_T5lHOP0mgnnP8nKTPEOm4MrjhESA4hE8,743
|
|
4
|
+
rcsb_embedding_model/model/residue_embedding_aggregator.py,sha256=k3UW63Ax8DtjCMdD3O5xNxtyAu28l2n3-Ab6nS0atm0,1967
|
|
5
|
+
rcsb_embedding_model-0.0.4.dist-info/METADATA,sha256=4wBBZfkyq5FY6PgMeAzBB4Aka2k5eheJpL6uXVn9DXA,5381
|
|
6
|
+
rcsb_embedding_model-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
+
rcsb_embedding_model-0.0.4.dist-info/licenses/LICENSE.md,sha256=oUaHiKgfBkChth_Sm67WemEvatO1U0Go8LHjaskXY0w,1522
|
|
8
|
+
rcsb_embedding_model-0.0.4.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
rcsb_embedding_model/__init__.py,sha256=r3gLdeBIXkQEQA_K6QcRPO-TtYuAQSutk6pXRUE_nas,120
|
|
2
|
-
rcsb_embedding_model/rcsb_structure_embedding.py,sha256=quxl9SnLPIYJE7yn7kXgJvCKR-W9kXIJ5fBFTTLV1YQ,5195
|
|
3
|
-
rcsb_embedding_model/model/layers.py,sha256=lhKaWC4gTS_T5lHOP0mgnnP8nKTPEOm4MrjhESA4hE8,743
|
|
4
|
-
rcsb_embedding_model/model/residue_embedding_aggregator.py,sha256=k3UW63Ax8DtjCMdD3O5xNxtyAu28l2n3-Ab6nS0atm0,1967
|
|
5
|
-
rcsb_embedding_model-0.0.2.dist-info/METADATA,sha256=O5JNOufpbfTUJznAWaWMwwicerku0ih7TjWfzAc3OJo,5370
|
|
6
|
-
rcsb_embedding_model-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
-
rcsb_embedding_model-0.0.2.dist-info/licenses/LICENSE.md,sha256=oUaHiKgfBkChth_Sm67WemEvatO1U0Go8LHjaskXY0w,1522
|
|
8
|
-
rcsb_embedding_model-0.0.2.dist-info/RECORD,,
|
|
File without changes
|
{rcsb_embedding_model-0.0.2.dist-info → rcsb_embedding_model-0.0.4.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|