rcsb-embedding-model 0.0.26__tar.gz → 0.0.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rcsb-embedding-model might be problematic. Click here for more details.

Files changed (46) hide show
  1. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/PKG-INFO +1 -1
  2. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/pyproject.toml +1 -1
  3. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/dataset/esm_prot_from_chain.py +1 -0
  4. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py +1 -0
  5. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/dataset/residue_embedding_from_tensor_file.py +1 -0
  6. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/utils/data.py +18 -3
  7. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/test_remote_inference.py +42 -0
  8. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/.dockerignore +0 -0
  9. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/.github/workflows/_workflow-docker.yaml +0 -0
  10. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/.github/workflows/publish.yaml +0 -0
  11. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/.gitignore +0 -0
  12. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/Dockerfile +0 -0
  13. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/LICENSE.md +0 -0
  14. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/README.md +0 -0
  15. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/assets/embedding-model-architecture.png +0 -0
  16. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/examples/esm_embeddings.py +0 -0
  17. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/__init__.py +0 -0
  18. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/cli/args_utils.py +0 -0
  19. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/cli/inference.py +0 -0
  20. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/dataset/esm_prot_from_structure.py +0 -0
  21. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py +0 -0
  22. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/dataset/residue_embedding_from_structure.py +0 -0
  23. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/inference/assembly_inferece.py +0 -0
  24. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/inference/chain_inference.py +0 -0
  25. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/inference/esm_inference.py +0 -0
  26. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/inference/structure_inference.py +0 -0
  27. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/model/layers.py +0 -0
  28. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/model/residue_embedding_aggregator.py +0 -0
  29. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/modules/chain_module.py +0 -0
  30. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/modules/esm_module.py +0 -0
  31. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/modules/structure_module.py +0 -0
  32. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/rcsb_structure_embedding.py +0 -0
  33. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/types/api_types.py +0 -0
  34. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/utils/model.py +0 -0
  35. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/utils/structure_parser.py +0 -0
  36. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/utils/structure_provider.py +0 -0
  37. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/src/rcsb_embedding_model/writer/batch_writer.py +0 -0
  38. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/resources/embeddings/1acb.A.pt +0 -0
  39. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/resources/embeddings/1acb.B.pt +0 -0
  40. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/resources/embeddings/2uzi.A.pt +0 -0
  41. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/resources/embeddings/2uzi.B.pt +0 -0
  42. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/resources/embeddings/2uzi.C.pt +0 -0
  43. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/resources/pdb/1acb.cif +0 -0
  44. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/resources/pdb/2uzi.cif +0 -0
  45. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/test_embedding_model.py +0 -0
  46. {rcsb_embedding_model-0.0.26 → rcsb_embedding_model-0.0.28}/tests/test_inference.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rcsb-embedding-model
3
- Version: 0.0.26
3
+ Version: 0.0.28
4
4
  Summary: Protein Embedding Model for Structure Search
5
5
  Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
6
6
  Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "rcsb-embedding-model"
3
- version = "0.0.26"
3
+ version = "0.0.28"
4
4
  authors = [
5
5
  { name="Joan Segura", email="joan.segura@rcsb.org" },
6
6
  ]
@@ -53,6 +53,7 @@ class EsmProtFromChain(Dataset):
53
53
  dtype=str,
54
54
  names=EsmProtFromChain.COLUMNS
55
55
  )
56
+ self.data = self.data.sort_values(by=self.data.columns[0])
56
57
 
57
58
  def __len__(self):
58
59
  return len(self.data)
@@ -52,6 +52,7 @@ class ResidueAssemblyEmbeddingFromTensorFile(Dataset):
52
52
  dtype=str,
53
53
  names=ResidueAssemblyEmbeddingFromTensorFile.COLUMNS
54
54
  )
55
+ self.data = self.data.sort_values(by=self.data.columns[0])
55
56
 
56
57
  def __len__(self):
57
58
  return len(self.data)
@@ -33,6 +33,7 @@ class ResidueEmbeddingFromTensorFile(Dataset):
33
33
  index_col=None,
34
34
  names=ResidueEmbeddingFromTensorFile.COLUMNS
35
35
  )
36
+ self.data = self.data.sort_values(by=self.data.columns[0])
36
37
 
37
38
  def __len__(self):
38
39
  return len(self.data)
@@ -1,7 +1,8 @@
1
1
  import os
2
- from io import StringIO
3
-
4
2
  import requests
3
+ import gzip
4
+ from io import StringIO, BytesIO
5
+
5
6
  import torch
6
7
 
7
8
 
@@ -40,10 +41,24 @@ def stringio_from_url(url):
40
41
  try:
41
42
  response = requests.get(url)
42
43
  response.raise_for_status()
43
- return StringIO(response.text)
44
+ data = response.content
45
+ if url.endswith('.bcif.gz'):
46
+ with gzip.GzipFile(fileobj=BytesIO(data), mode='rb') as gz:
47
+ decompressed_data = gz.read()
48
+ return BytesIO(decompressed_data)
49
+ if url.endswith('.gz'):
50
+ compressed = BytesIO(data)
51
+ with gzip.open(compressed, 'rt') as f:
52
+ return StringIO(f.read())
53
+ else:
54
+ return StringIO(response.text)
44
55
  except requests.exceptions.RequestException as e:
45
56
  print(f"Error fetching URL: {e}")
46
57
  return None
58
+ except (OSError, gzip.BadGzipFile) as e:
59
+ print(f"Error decompressing gzip file: {e}")
60
+ return None
61
+
47
62
 
48
63
 
49
64
  def concatenate_tensors(file_list, max_residues, dim=0):
@@ -29,6 +29,48 @@ class TestRemoteInference(unittest.TestCase):
29
29
  for idx, shape in enumerate(shapes):
30
30
  self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
31
31
 
32
+ def test_esm_inference_from_bcif_gz(self):
33
+ from rcsb_embedding_model.inference.esm_inference import predict
34
+
35
+ esm_embeddings = predict(
36
+ src_stream=[
37
+ ("1acb", "https://models.rcsb.org/1acb.bcif.gz", "1acb"),
38
+ ("2uzi", "https://models.rcsb.org/2uzi.bcif.gz", "2uzi")
39
+ ],
40
+ src_location=SrcLocation.stream,
41
+ src_from=SrcProteinFrom.structure,
42
+ structure_location=StructureLocation.remote,
43
+ structure_format=StructureFormat.bciff,
44
+ accelerator=Accelerator.cpu
45
+ )
46
+
47
+ self.assertEqual(len(esm_embeddings), 5)
48
+ shapes = ((243, 1536), (65, 1536), (116, 1536), (106, 1536), (168, 1536))
49
+ for idx, shape in enumerate(shapes):
50
+ self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
51
+
52
+
53
+ def test_esm_inference_from_cif_gz(self):
54
+ from rcsb_embedding_model.inference.esm_inference import predict
55
+
56
+ esm_embeddings = predict(
57
+ src_stream=[
58
+ ("1acb", "https://files.rcsb.org/download/1acb.cif.gz", "1acb"),
59
+ ("2uzi", "https://files.rcsb.org/download/2uzi.cif.gz", "2uzi")
60
+ ],
61
+ src_location=SrcLocation.stream,
62
+ src_from=SrcProteinFrom.structure,
63
+ structure_location=StructureLocation.remote,
64
+ structure_format=StructureFormat.mmcif,
65
+ accelerator=Accelerator.cpu
66
+ )
67
+
68
+ self.assertEqual(len(esm_embeddings), 5)
69
+ shapes = ((243, 1536), (65, 1536), (116, 1536), (106, 1536), (168, 1536))
70
+ for idx, shape in enumerate(shapes):
71
+ self.assertEqual(tuple(esm_embeddings[idx][0][0].shape), shape)
72
+
73
+
32
74
  def test_assembly_inference_from_structure(self):
33
75
  from rcsb_embedding_model.inference.assembly_inferece import predict
34
76