rcsb-embedding-model 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rcsb-embedding-model might be problematic. Click here for more details.

@@ -29,10 +29,7 @@ class RcsbStructureEmbedding:
29
29
  def load_residue_embedding(self, device=None):
30
30
  if not device:
31
31
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
- self.__residue_embedding = ESM3.from_pretrained(
33
- ESM3_OPEN_SMALL,
34
- device
35
- )
32
+ self.__residue_embedding = _load_res_model(device)
36
33
 
37
34
  def load_aggregator_embedding(self, device=None):
38
35
  if not device:
@@ -69,6 +66,25 @@ class RcsbStructureEmbedding:
69
66
  dim=0
70
67
  )
71
68
 
69
+ def sequence_embedding(self, sequence):
70
+ self.__check_residue_embedding()
71
+
72
+ if sequence.startswith(">"):
73
+ sequence = "".join(line.strip() for line in sequence.splitlines() if not line.startswith(">"))
74
+
75
+ if len(sequence) < RcsbStructureEmbedding.MIN_RES:
76
+ raise ValueError(f"Sequence too short for embedding (min {RcsbStructureEmbedding.MIN_RES} residues)")
77
+
78
+ protein = ESMProtein(sequence=sequence)
79
+ protein_tensor = self.__residue_embedding.encode(protein)
80
+
81
+ result = self.__residue_embedding.forward_and_sample(
82
+ protein_tensor,
83
+ SamplingConfig(return_per_residue_embeddings=True)
84
+ )
85
+
86
+ return result.per_residue_embedding
87
+
72
88
  def aggregator_embedding(self, residue_embedding):
73
89
  self.__check_aggregator_embedding()
74
90
  return self.__aggregator_embedding(residue_embedding)
@@ -146,3 +162,10 @@ def _load_model(model_path, device=None):
146
162
  aggregator_model.to(device)
147
163
  aggregator_model.eval()
148
164
  return aggregator_model
165
+
166
+
167
+ def _load_res_model(device=None):
168
+ return ESM3.from_pretrained(
169
+ ESM3_OPEN_SMALL,
170
+ device
171
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rcsb-embedding-model
3
- Version: 0.0.2
3
+ Version: 0.0.4
4
4
  Summary: Protein Embedding Model for Structure Search
5
5
  Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
6
6
  Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
@@ -11,7 +11,7 @@ Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.10
13
13
  Requires-Dist: esm>=3.2.0
14
- Requires-Dist: torch>=2.6.0
14
+ Requires-Dist: torch>=2.2.0
15
15
  Description-Content-Type: text/markdown
16
16
 
17
17
  # RCSB Embedding Model: A Deep Learning Approach for 3D Structure Embeddings
@@ -70,7 +70,7 @@ res_embedding = model.residue_embedding(
70
70
  )
71
71
  ```
72
72
 
73
- ### **Generating Protein Structure**
73
+ ### **Generating Protein Structure Embeddings**
74
74
  Protein 3D structure embedding can be calculated as:
75
75
 
76
76
  ```python
@@ -0,0 +1,8 @@
1
+ rcsb_embedding_model/__init__.py,sha256=r3gLdeBIXkQEQA_K6QcRPO-TtYuAQSutk6pXRUE_nas,120
2
+ rcsb_embedding_model/rcsb_structure_embedding.py,sha256=W_Os_xa_ZkYXR_KPjvwfzW8raMIzjsMCdhfKktbVY3s,5983
3
+ rcsb_embedding_model/model/layers.py,sha256=lhKaWC4gTS_T5lHOP0mgnnP8nKTPEOm4MrjhESA4hE8,743
4
+ rcsb_embedding_model/model/residue_embedding_aggregator.py,sha256=k3UW63Ax8DtjCMdD3O5xNxtyAu28l2n3-Ab6nS0atm0,1967
5
+ rcsb_embedding_model-0.0.4.dist-info/METADATA,sha256=4wBBZfkyq5FY6PgMeAzBB4Aka2k5eheJpL6uXVn9DXA,5381
6
+ rcsb_embedding_model-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
7
+ rcsb_embedding_model-0.0.4.dist-info/licenses/LICENSE.md,sha256=oUaHiKgfBkChth_Sm67WemEvatO1U0Go8LHjaskXY0w,1522
8
+ rcsb_embedding_model-0.0.4.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- rcsb_embedding_model/__init__.py,sha256=r3gLdeBIXkQEQA_K6QcRPO-TtYuAQSutk6pXRUE_nas,120
2
- rcsb_embedding_model/rcsb_structure_embedding.py,sha256=quxl9SnLPIYJE7yn7kXgJvCKR-W9kXIJ5fBFTTLV1YQ,5195
3
- rcsb_embedding_model/model/layers.py,sha256=lhKaWC4gTS_T5lHOP0mgnnP8nKTPEOm4MrjhESA4hE8,743
4
- rcsb_embedding_model/model/residue_embedding_aggregator.py,sha256=k3UW63Ax8DtjCMdD3O5xNxtyAu28l2n3-Ab6nS0atm0,1967
5
- rcsb_embedding_model-0.0.2.dist-info/METADATA,sha256=O5JNOufpbfTUJznAWaWMwwicerku0ih7TjWfzAc3OJo,5370
6
- rcsb_embedding_model-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
7
- rcsb_embedding_model-0.0.2.dist-info/licenses/LICENSE.md,sha256=oUaHiKgfBkChth_Sm67WemEvatO1U0Go8LHjaskXY0w,1522
8
- rcsb_embedding_model-0.0.2.dist-info/RECORD,,