rcsb-embedding-model 0.0.27__tar.gz → 0.0.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rcsb-embedding-model might be problematic. Click here for more details.
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/PKG-INFO +1 -1
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/pyproject.toml +1 -1
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/dataset/esm_prot_from_chain.py +5 -5
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/dataset/esm_prot_from_structure.py +4 -2
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py +7 -6
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py +4 -5
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/dataset/residue_embedding_from_structure.py +7 -5
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/dataset/residue_embedding_from_tensor_file.py +2 -2
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/utils/data.py +18 -3
- rcsb_embedding_model-0.0.29/tests/resources/src_stream/instance.csv +2 -0
- rcsb_embedding_model-0.0.29/tests/test_remote_inference.py +108 -0
- rcsb_embedding_model-0.0.27/tests/test_remote_inference.py +0 -48
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/.dockerignore +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/.github/workflows/_workflow-docker.yaml +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/.github/workflows/publish.yaml +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/.gitignore +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/Dockerfile +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/LICENSE.md +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/README.md +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/assets/embedding-model-architecture.png +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/examples/esm_embeddings.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/__init__.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/cli/args_utils.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/cli/inference.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/inference/assembly_inferece.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/inference/chain_inference.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/inference/esm_inference.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/inference/structure_inference.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/model/layers.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/model/residue_embedding_aggregator.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/modules/chain_module.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/modules/esm_module.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/modules/structure_module.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/rcsb_structure_embedding.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/types/api_types.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/utils/model.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/utils/structure_parser.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/utils/structure_provider.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/writer/batch_writer.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/1acb.A.pt +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/1acb.B.pt +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/2uzi.A.pt +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/2uzi.B.pt +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/2uzi.C.pt +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/pdb/1acb.cif +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/pdb/2uzi.cif +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/test_embedding_model.py +0 -0
- {rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/test_inference.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb-embedding-model
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.29
|
|
4
4
|
Summary: Protein Embedding Model for Structure Search
|
|
5
5
|
Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
|
|
6
6
|
Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
|
|
@@ -59,10 +59,10 @@ class EsmProtFromChain(Dataset):
|
|
|
59
59
|
return len(self.data)
|
|
60
60
|
|
|
61
61
|
def __getitem__(self, idx):
|
|
62
|
-
src_name = self.data.
|
|
63
|
-
src_structure = self.data.
|
|
64
|
-
chain_id = self.data.
|
|
65
|
-
item_name = self.data.
|
|
62
|
+
src_name = self.data.iloc[idx][EsmProtFromChain.STREAM_NAME_ATTR]
|
|
63
|
+
src_structure = self.data.iloc[idx][EsmProtFromChain.STREAM_ATTR]
|
|
64
|
+
chain_id = self.data.iloc[idx][EsmProtFromChain.CH_ATTR]
|
|
65
|
+
item_name = self.data.iloc[idx][EsmProtFromChain.ITEM_NAME_ATTR]
|
|
66
66
|
structure = self.__structure_provider.get_structure(
|
|
67
67
|
src_name=src_name,
|
|
68
68
|
src_structure=stringio_from_url(src_structure) if self.structure_location == StructureLocation.remote else src_structure,
|
|
@@ -87,7 +87,7 @@ if __name__ == '__main__':
|
|
|
87
87
|
src_stream=args.file_list,
|
|
88
88
|
src_location=SrcLocation.file,
|
|
89
89
|
structure_location=StructureLocation.remote,
|
|
90
|
-
structure_format=StructureFormat.
|
|
90
|
+
structure_format=StructureFormat.bciff,
|
|
91
91
|
)
|
|
92
92
|
|
|
93
93
|
esm3 = ESM3.from_pretrained(
|
|
@@ -40,7 +40,7 @@ class EsmProtFromStructure(EsmProtFromChain):
|
|
|
40
40
|
|
|
41
41
|
def __get_chains(self, src_stream):
|
|
42
42
|
chains = []
|
|
43
|
-
|
|
43
|
+
data = pd.DataFrame(
|
|
44
44
|
src_stream,
|
|
45
45
|
dtype=str,
|
|
46
46
|
columns=EsmProtFromStructure.COLUMNS
|
|
@@ -50,7 +50,9 @@ class EsmProtFromStructure(EsmProtFromChain):
|
|
|
50
50
|
index_col=None,
|
|
51
51
|
dtype=str,
|
|
52
52
|
names=EsmProtFromStructure.COLUMNS
|
|
53
|
-
)
|
|
53
|
+
)
|
|
54
|
+
data = data.sort_values(by=data.columns[0])
|
|
55
|
+
for idx, row in data.iterrows():
|
|
54
56
|
src_name = row[EsmProtFromStructure.STREAM_NAME_ATTR]
|
|
55
57
|
src_structure = row[EsmProtFromStructure.STREAM_ATTR]
|
|
56
58
|
item_name = row[EsmProtFromStructure.ITEM_NAME_ATTR]
|
|
@@ -33,7 +33,6 @@ class ResidueAssemblyDatasetFromStructure(ResidueAssemblyEmbeddingFromTensorFile
|
|
|
33
33
|
self.structure_format = structure_format
|
|
34
34
|
self.min_res_n = min_res_n
|
|
35
35
|
self.max_res_n = max_res_n
|
|
36
|
-
self.__structure_provider = structure_provider
|
|
37
36
|
super().__init__(
|
|
38
37
|
src_stream=self.__get_assemblies(src_stream),
|
|
39
38
|
res_embedding_location=res_embedding_location,
|
|
@@ -47,17 +46,19 @@ class ResidueAssemblyDatasetFromStructure(ResidueAssemblyEmbeddingFromTensorFile
|
|
|
47
46
|
|
|
48
47
|
def __get_assemblies(self, src_stream):
|
|
49
48
|
assemblies = []
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
data = pd.DataFrame(
|
|
50
|
+
src_stream,
|
|
51
|
+
dtype=str,
|
|
52
|
+
columns=ResidueAssemblyDatasetFromStructure.COLUMNS
|
|
54
53
|
) if self.src_location == SrcLocation.stream else pd.read_csv(
|
|
55
54
|
src_stream,
|
|
56
55
|
header=None,
|
|
57
56
|
index_col=None,
|
|
58
57
|
dtype=str,
|
|
59
58
|
names=ResidueAssemblyDatasetFromStructure.COLUMNS
|
|
60
|
-
)
|
|
59
|
+
)
|
|
60
|
+
data = data.sort_values(by=data.columns[0])
|
|
61
|
+
for idx, row in data.iterrows():
|
|
61
62
|
src_name = row[ResidueAssemblyDatasetFromStructure.STREAM_NAME_ATTR]
|
|
62
63
|
src_structure = row[ResidueAssemblyDatasetFromStructure.STREAM_ATTR]
|
|
63
64
|
structure = stringio_from_url(src_structure) if self.structure_location == StructureLocation.remote else src_structure
|
|
@@ -58,11 +58,10 @@ class ResidueAssemblyEmbeddingFromTensorFile(Dataset):
|
|
|
58
58
|
return len(self.data)
|
|
59
59
|
|
|
60
60
|
def __getitem__(self, idx):
|
|
61
|
-
src_name = self.data.
|
|
62
|
-
src_structure = self.data.
|
|
63
|
-
assembly_id = self.data.
|
|
64
|
-
item_name = self.data.
|
|
65
|
-
|
|
61
|
+
src_name = self.data.iloc[idx][ResidueAssemblyEmbeddingFromTensorFile.STREAM_NAME_ATTR]
|
|
62
|
+
src_structure = self.data.iloc[idx][ResidueAssemblyEmbeddingFromTensorFile.STREAM_ATTR]
|
|
63
|
+
assembly_id = self.data.iloc[idx][ResidueAssemblyEmbeddingFromTensorFile.ASSEMBLY_ATTR]
|
|
64
|
+
item_name = self.data.iloc[idx][ResidueAssemblyEmbeddingFromTensorFile.ITEM_NAME_ATTR]
|
|
66
65
|
structure = self.__structure_provider.get_structure(
|
|
67
66
|
src_name=src_name,
|
|
68
67
|
src_structure=stringio_from_url(src_structure) if self.structure_location == StructureLocation.remote else src_structure,
|
|
@@ -42,17 +42,19 @@ class ResidueEmbeddingFromStructure(ResidueEmbeddingFromTensorFile):
|
|
|
42
42
|
|
|
43
43
|
def __get_chains(self, src_stream):
|
|
44
44
|
chains = []
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
data = pd.DataFrame(
|
|
46
|
+
src_stream,
|
|
47
|
+
dtype=str,
|
|
48
|
+
columns=ResidueEmbeddingFromStructure.COLUMNS
|
|
49
49
|
) if self.src_location == SrcLocation.stream else pd.read_csv(
|
|
50
50
|
src_stream,
|
|
51
51
|
header=None,
|
|
52
52
|
index_col=None,
|
|
53
53
|
dtype=str,
|
|
54
54
|
names=ResidueEmbeddingFromStructure.COLUMNS
|
|
55
|
-
)
|
|
55
|
+
)
|
|
56
|
+
data = data.sort_values(by=data.columns[0])
|
|
57
|
+
for idx, row in data.iterrows():
|
|
56
58
|
src_name = row[ResidueEmbeddingFromStructure.STREAM_NAME_ATTR]
|
|
57
59
|
src_structure = row[ResidueEmbeddingFromStructure.STREAM_ATTR]
|
|
58
60
|
item_name = row[ResidueEmbeddingFromStructure.ITEM_NAME_ATTR]
|
|
@@ -39,6 +39,6 @@ class ResidueEmbeddingFromTensorFile(Dataset):
|
|
|
39
39
|
return len(self.data)
|
|
40
40
|
|
|
41
41
|
def __getitem__(self, idx):
|
|
42
|
-
embedding_src = self.data.
|
|
43
|
-
item_name = self.data.
|
|
42
|
+
embedding_src = self.data.iloc[idx][ResidueEmbeddingFromTensorFile.FILE_ATTR]
|
|
43
|
+
item_name = self.data.iloc[idx][ResidueEmbeddingFromTensorFile.ITEM_NAME_ATTR]
|
|
44
44
|
return torch.load(embedding_src, map_location=torch.device('cpu')), item_name
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/utils/data.py
RENAMED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from io import StringIO
|
|
3
|
-
|
|
4
2
|
import requests
|
|
3
|
+
import gzip
|
|
4
|
+
from io import StringIO, BytesIO
|
|
5
|
+
|
|
5
6
|
import torch
|
|
6
7
|
|
|
7
8
|
|
|
@@ -40,10 +41,24 @@ def stringio_from_url(url):
|
|
|
40
41
|
try:
|
|
41
42
|
response = requests.get(url)
|
|
42
43
|
response.raise_for_status()
|
|
43
|
-
|
|
44
|
+
data = response.content
|
|
45
|
+
if url.endswith('.bcif.gz'):
|
|
46
|
+
with gzip.GzipFile(fileobj=BytesIO(data), mode='rb') as gz:
|
|
47
|
+
decompressed_data = gz.read()
|
|
48
|
+
return BytesIO(decompressed_data)
|
|
49
|
+
if url.endswith('.gz'):
|
|
50
|
+
compressed = BytesIO(data)
|
|
51
|
+
with gzip.open(compressed, 'rt') as f:
|
|
52
|
+
return StringIO(f.read())
|
|
53
|
+
else:
|
|
54
|
+
return StringIO(response.text)
|
|
44
55
|
except requests.exceptions.RequestException as e:
|
|
45
56
|
print(f"Error fetching URL: {e}")
|
|
46
57
|
return None
|
|
58
|
+
except (OSError, gzip.BadGzipFile) as e:
|
|
59
|
+
print(f"Error decompressing gzip file: {e}")
|
|
60
|
+
return None
|
|
61
|
+
|
|
47
62
|
|
|
48
63
|
|
|
49
64
|
def concatenate_tensors(file_list, max_residues, dim=0):
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
import unittest
|
|
3
|
+
|
|
4
|
+
from rcsb_embedding_model.types.api_types import SrcLocation, SrcProteinFrom, StructureLocation, StructureFormat, \
|
|
5
|
+
Accelerator, SrcAssemblyFrom
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestRemoteInference(unittest.TestCase):
|
|
9
|
+
|
|
10
|
+
__test_path = os.path.dirname(__file__)
|
|
11
|
+
|
|
12
|
+
def test_esm_inference_from_structure(self):
|
|
13
|
+
from rcsb_embedding_model.inference.esm_inference import predict
|
|
14
|
+
|
|
15
|
+
esm_embeddings = predict(
|
|
16
|
+
src_stream=[
|
|
17
|
+
("1acb", "https://files.rcsb.org/download/1acb.cif", "1acb"),
|
|
18
|
+
("2uzi", "https://files.rcsb.org/download/2uzi.cif", "2uzi")
|
|
19
|
+
],
|
|
20
|
+
src_location=SrcLocation.stream,
|
|
21
|
+
src_from=SrcProteinFrom.structure,
|
|
22
|
+
structure_location=StructureLocation.remote,
|
|
23
|
+
structure_format=StructureFormat.mmcif,
|
|
24
|
+
accelerator=Accelerator.cpu
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
self.assertEqual(len(esm_embeddings), 5)
|
|
28
|
+
shapes = ((243, 1536), (65, 1536), (116, 1536), (106, 1536), (168, 1536))
|
|
29
|
+
for idx, shape in enumerate(shapes):
|
|
30
|
+
self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
|
|
31
|
+
|
|
32
|
+
def test_esm_inference_from_bcif_gz(self):
|
|
33
|
+
from rcsb_embedding_model.inference.esm_inference import predict
|
|
34
|
+
|
|
35
|
+
esm_embeddings = predict(
|
|
36
|
+
src_stream=[
|
|
37
|
+
("1acb", "https://models.rcsb.org/1acb.bcif.gz", "1acb"),
|
|
38
|
+
("2uzi", "https://models.rcsb.org/2uzi.bcif.gz", "2uzi")
|
|
39
|
+
],
|
|
40
|
+
src_location=SrcLocation.stream,
|
|
41
|
+
src_from=SrcProteinFrom.structure,
|
|
42
|
+
structure_location=StructureLocation.remote,
|
|
43
|
+
structure_format=StructureFormat.bciff,
|
|
44
|
+
accelerator=Accelerator.cpu
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
self.assertEqual(len(esm_embeddings), 5)
|
|
48
|
+
shapes = ((243, 1536), (65, 1536), (116, 1536), (106, 1536), (168, 1536))
|
|
49
|
+
for idx, shape in enumerate(shapes):
|
|
50
|
+
self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_esm_inference_from_csv_bcif_gz(self):
|
|
54
|
+
from rcsb_embedding_model.inference.esm_inference import predict
|
|
55
|
+
|
|
56
|
+
esm_embeddings = predict(
|
|
57
|
+
src_stream=f"{self.__test_path}/resources/src_stream/instance.csv",
|
|
58
|
+
src_location=SrcLocation.file,
|
|
59
|
+
src_from=SrcProteinFrom.chain,
|
|
60
|
+
structure_location=StructureLocation.remote,
|
|
61
|
+
structure_format=StructureFormat.bciff,
|
|
62
|
+
accelerator=Accelerator.cpu
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
self.assertEqual(len(esm_embeddings), 2)
|
|
66
|
+
shapes = ((243, 1536), (116, 1536))
|
|
67
|
+
for idx, shape in enumerate(shapes):
|
|
68
|
+
self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_esm_inference_from_cif_gz(self):
|
|
72
|
+
from rcsb_embedding_model.inference.esm_inference import predict
|
|
73
|
+
|
|
74
|
+
esm_embeddings = predict(
|
|
75
|
+
src_stream=[
|
|
76
|
+
("1acb", "https://files.rcsb.org/download/1acb.cif.gz", "1acb"),
|
|
77
|
+
("2uzi", "https://files.rcsb.org/download/2uzi.cif.gz", "2uzi")
|
|
78
|
+
],
|
|
79
|
+
src_location=SrcLocation.stream,
|
|
80
|
+
src_from=SrcProteinFrom.structure,
|
|
81
|
+
structure_location=StructureLocation.remote,
|
|
82
|
+
structure_format=StructureFormat.mmcif,
|
|
83
|
+
accelerator=Accelerator.cpu
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
self.assertEqual(len(esm_embeddings), 5)
|
|
87
|
+
shapes = ((243, 1536), (65, 1536), (116, 1536), (106, 1536), (168, 1536))
|
|
88
|
+
for idx, shape in enumerate(shapes):
|
|
89
|
+
self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_assembly_inference_from_structure(self):
|
|
93
|
+
from rcsb_embedding_model.inference.assembly_inferece import predict
|
|
94
|
+
|
|
95
|
+
assembly_embeddings = predict(
|
|
96
|
+
src_stream=[
|
|
97
|
+
("1acb", "https://files.rcsb.org/download/1acb.cif", "1acb"),
|
|
98
|
+
("2uzi", "https://files.rcsb.org/download/2uzi.cif", "2uzi")
|
|
99
|
+
],
|
|
100
|
+
res_embedding_location=f"{self.__test_path}/resources/embeddings",
|
|
101
|
+
src_location=SrcLocation.stream,
|
|
102
|
+
src_from=SrcAssemblyFrom.structure,
|
|
103
|
+
structure_location=StructureLocation.remote,
|
|
104
|
+
structure_format=StructureFormat.mmcif,
|
|
105
|
+
accelerator=Accelerator.cpu
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
self.assertEqual(len(assembly_embeddings), 2)
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
import os.path
|
|
2
|
-
import unittest
|
|
3
|
-
|
|
4
|
-
from rcsb_embedding_model.types.api_types import SrcLocation, SrcProteinFrom, StructureLocation, StructureFormat, \
|
|
5
|
-
Accelerator, SrcAssemblyFrom
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class TestRemoteInference(unittest.TestCase):
|
|
9
|
-
|
|
10
|
-
__test_path = os.path.dirname(__file__)
|
|
11
|
-
|
|
12
|
-
def test_esm_inference_from_structure(self):
|
|
13
|
-
from rcsb_embedding_model.inference.esm_inference import predict
|
|
14
|
-
|
|
15
|
-
esm_embeddings = predict(
|
|
16
|
-
src_stream=[
|
|
17
|
-
("1acb", "https://files.rcsb.org/download/1acb.cif", "1acb"),
|
|
18
|
-
("2uzi", "https://files.rcsb.org/download/2uzi.cif", "2uzi")
|
|
19
|
-
],
|
|
20
|
-
src_location=SrcLocation.stream,
|
|
21
|
-
src_from=SrcProteinFrom.structure,
|
|
22
|
-
structure_location=StructureLocation.remote,
|
|
23
|
-
structure_format=StructureFormat.mmcif,
|
|
24
|
-
accelerator=Accelerator.cpu
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
self.assertEqual(len(esm_embeddings), 5)
|
|
28
|
-
shapes = ((243, 1536), (65, 1536), (116, 1536), (106, 1536), (168, 1536))
|
|
29
|
-
for idx, shape in enumerate(shapes):
|
|
30
|
-
self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
|
|
31
|
-
|
|
32
|
-
def test_assembly_inference_from_structure(self):
|
|
33
|
-
from rcsb_embedding_model.inference.assembly_inferece import predict
|
|
34
|
-
|
|
35
|
-
assembly_embeddings = predict(
|
|
36
|
-
src_stream=[
|
|
37
|
-
("1acb", "https://files.rcsb.org/download/1acb.cif", "1acb"),
|
|
38
|
-
("2uzi", "https://files.rcsb.org/download/2uzi.cif", "2uzi")
|
|
39
|
-
],
|
|
40
|
-
res_embedding_location=f"{self.__test_path}/resources/embeddings",
|
|
41
|
-
src_location=SrcLocation.stream,
|
|
42
|
-
src_from=SrcAssemblyFrom.structure,
|
|
43
|
-
structure_location=StructureLocation.remote,
|
|
44
|
-
structure_format=StructureFormat.mmcif,
|
|
45
|
-
accelerator=Accelerator.cpu
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
self.assertEqual(len(assembly_embeddings), 2)
|
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/.github/workflows/_workflow-docker.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/assets/embedding-model-architecture.png
RENAMED
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/model/layers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/src/rcsb_embedding_model/utils/model.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/1acb.A.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/1acb.B.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/2uzi.A.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/2uzi.B.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.27 → rcsb_embedding_model-0.0.29}/tests/resources/embeddings/2uzi.C.pt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|