rcsb-embedding-model 0.0.33__tar.gz → 0.0.35__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rcsb-embedding-model might be problematic. Click here for more details.

Files changed (50) hide show
  1. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/PKG-INFO +1 -1
  2. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/pyproject.toml +1 -1
  3. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/dataset/esm_prot_from_chain.py +10 -6
  4. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/utils/structure_parser.py +17 -15
  5. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/.dockerignore +0 -0
  6. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/.github/workflows/_workflow-docker.yaml +0 -0
  7. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/.github/workflows/publish.yaml +0 -0
  8. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/.gitignore +0 -0
  9. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/Dockerfile +0 -0
  10. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/LICENSE.md +0 -0
  11. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/README.md +0 -0
  12. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/assets/embedding-model-architecture.png +0 -0
  13. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/examples/esm_embeddings.py +0 -0
  14. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/__init__.py +0 -0
  15. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/cli/args_utils.py +0 -0
  16. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/cli/inference.py +0 -0
  17. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/dataset/esm_prot_from_structure.py +0 -0
  18. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py +0 -0
  19. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py +0 -0
  20. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/dataset/residue_embedding_from_structure.py +0 -0
  21. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/dataset/residue_embedding_from_tensor_file.py +0 -0
  22. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/inference/assembly_inferece.py +0 -0
  23. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/inference/chain_inference.py +0 -0
  24. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/inference/esm_inference.py +0 -0
  25. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/inference/structure_inference.py +0 -0
  26. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/model/layers.py +0 -0
  27. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/model/residue_embedding_aggregator.py +0 -0
  28. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/modules/chain_module.py +0 -0
  29. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/modules/esm_module.py +0 -0
  30. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/modules/structure_module.py +0 -0
  31. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/rcsb_structure_embedding.py +0 -0
  32. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/types/api_types.py +0 -0
  33. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/utils/data.py +0 -0
  34. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/utils/model.py +0 -0
  35. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/utils/structure_provider.py +0 -0
  36. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/src/rcsb_embedding_model/writer/batch_writer.py +0 -0
  37. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/embeddings/1acb.A.pt +0 -0
  38. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/embeddings/1acb.B.pt +0 -0
  39. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/embeddings/2uzi.A.pt +0 -0
  40. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/embeddings/2uzi.B.pt +0 -0
  41. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/embeddings/2uzi.C.pt +0 -0
  42. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/pdb/1acb.cif +0 -0
  43. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/pdb/2uzi.cif +0 -0
  44. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/src_stream/assembly-complete-test.csv +0 -0
  45. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/src_stream/instance-complete-test.csv +0 -0
  46. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/resources/src_stream/instance.csv +0 -0
  47. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/test_cli_inference.py +0 -0
  48. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/test_embedding_model.py +0 -0
  49. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/test_inference.py +0 -0
  50. {rcsb_embedding_model-0.0.33 → rcsb_embedding_model-0.0.35}/tests/test_remote_inference.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rcsb-embedding-model
3
- Version: 0.0.33
3
+ Version: 0.0.35
4
4
  Summary: Protein Embedding Model for Structure Search
5
5
  Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
6
6
  Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "rcsb-embedding-model"
3
- version = "0.0.33"
3
+ version = "0.0.35"
4
4
  authors = [
5
5
  { name="Joan Segura", email="joan.segura@rcsb.org" },
6
6
  ]
@@ -11,7 +11,7 @@ import pandas as pd
11
11
 
12
12
  from rcsb_embedding_model.types.api_types import StructureFormat, StructureLocation, SrcLocation
13
13
  from rcsb_embedding_model.utils.data import stringio_from_url
14
- from rcsb_embedding_model.utils.structure_parser import rename_atom_ch, check_all_hetero, remove_hetero
14
+ from rcsb_embedding_model.utils.structure_parser import rename_atom_attr,filter_residues
15
15
  from rcsb_embedding_model.utils.structure_provider import StructureProvider
16
16
 
17
17
 
@@ -72,13 +72,17 @@ class EsmProtFromChain(Dataset):
72
72
  for atom_ch in chain_iter(structure):
73
73
  if len(atom_ch) == 0:
74
74
  raise IOError(f"No atoms were found in structure chain {src_name}.{chain_id}")
75
- if check_all_hetero(atom_ch):
76
- atom_ch = remove_hetero(atom_ch)
77
- atom_ch = rename_atom_ch(atom_ch)
78
- protein_chain = ProteinChain.from_atomarray(atom_ch)
79
- protein_chain = ESMProtein.from_protein_chain(protein_chain)
75
+ try:
76
+ atom_ch = filter_residues(atom_ch)
77
+ atom_ch = rename_atom_attr(atom_ch)
78
+ protein_chain = ProteinChain.from_atomarray(atom_ch)
79
+ protein_chain = ESMProtein.from_protein_chain(protein_chain)
80
+ except Exception as e:
81
+ raise IOError(f"Error while creating ESMProtein from structure chain {src_name}.{chain_id}: {e}")
82
+
80
83
  if len(protein_chain) == 0:
81
84
  raise IOError(f"No atoms were found in structure chain {src_name}.{chain_id}")
85
+
82
86
  return protein_chain, item_name
83
87
  raise IOError(f"No atoms were found in structure chain {src_name}.{chain_id}")
84
88
 
@@ -1,4 +1,6 @@
1
- from biotite.structure import filter_amino_acids, filter_polymer, chain_iter, get_chains, get_residues, AtomArray
1
+ import numpy as np
2
+ from biotite.structure import filter_amino_acids, filter_polymer, chain_iter, get_chains, get_residues, \
3
+ filter_peptide_backbone, residue_iter, array
2
4
  from biotite.structure.io.pdb import PDBFile, get_structure as get_pdb_structure, get_assembly as get_pdb_assembly, list_assemblies as list_pdb_assemblies
3
5
  from biotite.structure.io.pdbx import CIFFile, get_structure, get_assembly, BinaryCIFFile, list_assemblies
4
6
 
@@ -54,24 +56,16 @@ def get_assemblies(structure, structure_format="mmcif"):
54
56
  raise RuntimeError(f"Error reading assemblies from {structure}: {e}")
55
57
 
56
58
 
57
- def rename_atom_ch(atom_ch, ch="A"):
58
- renamed_atom_ch = AtomArray(len(atom_ch))
59
- for idx, atom in enumerate(atom_ch):
60
- atom.chain_id = ch
61
- renamed_atom_ch[idx] = atom
62
- return renamed_atom_ch
59
+ def rename_atom_attr(atom_ch):
60
+ return array([__rename_atom(a) for a in atom_ch])
63
61
 
64
62
 
65
- def remove_hetero(atom_ch):
66
- renamed_atom_ch = AtomArray(len(atom_ch))
67
- for idx, atom in enumerate(atom_ch):
68
- atom.hetero = False
69
- renamed_atom_ch[idx] = atom
70
- return renamed_atom_ch
63
+ def filter_residues(atom_ch):
64
+ return atom_ch[filter_amino_acids(atom_ch)]
71
65
 
72
66
 
73
- def check_all_hetero(atom_ch):
74
- return sum(atom_ch.hetero) == len(atom_ch)
67
+ def get_backbone_atoms(atom_ch):
68
+ return np.array([(lambda x: [a.coord for a in x])(r) for r in residue_iter(atom_ch[filter_peptide_backbone(atom_ch)])])
75
69
 
76
70
 
77
71
  def __get_pdb_structure(pdb_file, assembly_id=None):
@@ -85,6 +79,14 @@ def __get_pdb_structure(pdb_file, assembly_id=None):
85
79
  )
86
80
 
87
81
 
82
+ def __rename_atom(atom):
83
+ atom.chain_id = "A"
84
+ atom.hetero = False
85
+ if len(atom.res_name) > 3:
86
+ atom.res_name = 'UNK'
87
+ return atom
88
+
89
+
88
90
  def __get_structure(cif_file, assembly_id=None):
89
91
  return get_structure(
90
92
  cif_file,