rcsb-embedding-model 0.0.29__tar.gz → 0.0.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rcsb-embedding-model might be problematic. Click here for more details.
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/.gitignore +2 -1
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/PKG-INFO +2 -1
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/pyproject.toml +2 -1
- rcsb_embedding_model-0.0.31/src/rcsb_embedding_model/__init__.py +9 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/cli/inference.py +18 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py +10 -4
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/utils/structure_parser.py +1 -1
- rcsb_embedding_model-0.0.31/tests/resources/src_stream/assembly-complete-test.csv +7 -0
- rcsb_embedding_model-0.0.31/tests/resources/src_stream/instance-complete-test.csv +10 -0
- rcsb_embedding_model-0.0.31/tests/test_cli_inference.py +53 -0
- rcsb_embedding_model-0.0.29/src/rcsb_embedding_model/__init__.py +0 -4
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/.dockerignore +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/.github/workflows/_workflow-docker.yaml +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/.github/workflows/publish.yaml +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/Dockerfile +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/LICENSE.md +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/README.md +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/assets/embedding-model-architecture.png +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/examples/esm_embeddings.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/cli/args_utils.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/dataset/esm_prot_from_chain.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/dataset/esm_prot_from_structure.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/dataset/residue_embedding_from_structure.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/dataset/residue_embedding_from_tensor_file.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/inference/assembly_inferece.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/inference/chain_inference.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/inference/esm_inference.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/inference/structure_inference.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/model/layers.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/model/residue_embedding_aggregator.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/modules/chain_module.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/modules/esm_module.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/modules/structure_module.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/rcsb_structure_embedding.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/types/api_types.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/utils/data.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/utils/model.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/utils/structure_provider.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/writer/batch_writer.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/1acb.A.pt +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/1acb.B.pt +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/2uzi.A.pt +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/2uzi.B.pt +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/2uzi.C.pt +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/pdb/1acb.cif +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/pdb/2uzi.cif +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/src_stream/instance.csv +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/test_embedding_model.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/test_inference.py +0 -0
- {rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/test_remote_inference.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb-embedding-model
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.31
|
|
4
4
|
Summary: Protein Embedding Model for Structure Search
|
|
5
5
|
Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
|
|
6
6
|
Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
|
|
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Requires-Dist: esm>=3.2.0
|
|
14
|
+
Requires-Dist: importlib-metadata>=8.7.0
|
|
14
15
|
Requires-Dist: lightning>=2.5.0
|
|
15
16
|
Requires-Dist: typer>=0.15.0
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "rcsb-embedding-model"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.31"
|
|
4
4
|
authors = [
|
|
5
5
|
{ name="Joan Segura", email="joan.segura@rcsb.org" },
|
|
6
6
|
]
|
|
@@ -14,6 +14,7 @@ classifiers = [
|
|
|
14
14
|
license = "BSD-3-Clause"
|
|
15
15
|
license-files = ["LICEN[CS]E*"]
|
|
16
16
|
dependencies=[
|
|
17
|
+
"importlib-metadata >= 8.7.0",
|
|
17
18
|
"esm >= 3.2.0",
|
|
18
19
|
"lightning >= 2.5.0",
|
|
19
20
|
"typer >= 0.15.0"
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
2
|
+
from rcsb_embedding_model.rcsb_structure_embedding import RcsbStructureEmbedding
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
__version__ = version("rcsb-embedding-model")
|
|
6
|
+
except PackageNotFoundError:
|
|
7
|
+
__version__ = "0.0.0"
|
|
8
|
+
|
|
9
|
+
__all__ = ["RcsbStructureEmbedding", "__version__"]
|
|
@@ -3,6 +3,7 @@ from typing import Annotated, List
|
|
|
3
3
|
|
|
4
4
|
import typer
|
|
5
5
|
|
|
6
|
+
from rcsb_embedding_model import __version__
|
|
6
7
|
from rcsb_embedding_model.cli.args_utils import arg_devices
|
|
7
8
|
from rcsb_embedding_model.types.api_types import StructureFormat, Accelerator, SrcLocation, SrcProteinFrom, \
|
|
8
9
|
StructureLocation, SrcAssemblyFrom, SrcTensorFrom, OutFormat
|
|
@@ -437,5 +438,22 @@ def complete_embedding(
|
|
|
437
438
|
)
|
|
438
439
|
|
|
439
440
|
|
|
441
|
+
def version_callback(value: bool):
|
|
442
|
+
if value:
|
|
443
|
+
typer.echo(f"{__version__}")
|
|
444
|
+
raise typer.Exit()
|
|
445
|
+
|
|
446
|
+
@app.callback()
|
|
447
|
+
def main(
|
|
448
|
+
version: bool = typer.Option(
|
|
449
|
+
None,
|
|
450
|
+
"--version",
|
|
451
|
+
callback=version_callback,
|
|
452
|
+
is_eager=True,
|
|
453
|
+
help="Show the version and exit",
|
|
454
|
+
)
|
|
455
|
+
):
|
|
456
|
+
pass
|
|
457
|
+
|
|
440
458
|
if __name__ == "__main__":
|
|
441
459
|
app()
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import argparse
|
|
1
2
|
import sys
|
|
2
3
|
|
|
3
4
|
import pandas as pd
|
|
@@ -76,12 +77,17 @@ class ResidueAssemblyEmbeddingFromTensorFile(Dataset):
|
|
|
76
77
|
|
|
77
78
|
if __name__ == "__main__":
|
|
78
79
|
|
|
80
|
+
parser = argparse.ArgumentParser()
|
|
81
|
+
parser.add_argument('--file_list', type=argparse.FileType('r'), required=True)
|
|
82
|
+
parser.add_argument('--res_embeddings_path', required=True)
|
|
83
|
+
args = parser.parse_args()
|
|
84
|
+
|
|
79
85
|
dataset = ResidueAssemblyEmbeddingFromTensorFile(
|
|
80
|
-
src_stream=
|
|
81
|
-
res_embedding_location=
|
|
86
|
+
src_stream=args.file_list,
|
|
87
|
+
res_embedding_location=args.res_embeddings_path,
|
|
82
88
|
src_location=SrcLocation.file,
|
|
83
|
-
structure_location=StructureLocation.
|
|
84
|
-
structure_format=StructureFormat.
|
|
89
|
+
structure_location=StructureLocation.remote,
|
|
90
|
+
structure_format=StructureFormat.bciff
|
|
85
91
|
)
|
|
86
92
|
|
|
87
93
|
dataloader = DataLoader(
|
|
@@ -32,7 +32,7 @@ def get_protein_chains(structure, min_res_n=0):
|
|
|
32
32
|
for atom_ch in chain_iter(structure):
|
|
33
33
|
atom_res = atom_ch[filter_polymer(atom_ch)]
|
|
34
34
|
atom_res = atom_res[filter_amino_acids(atom_res)]
|
|
35
|
-
if len(atom_res) > 0 and len(get_residues(atom_res)) > min_res_n:
|
|
35
|
+
if len(atom_res) > 0 and len(get_residues(atom_res)[0]) > min_res_n:
|
|
36
36
|
chain_ids.append(str(get_chains(atom_res)[0]))
|
|
37
37
|
return tuple(chain_ids)
|
|
38
38
|
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
1A21,https://models.rcsb.org/1A21.bcif.gz,1,1A21-1
|
|
2
|
+
1A21,https://models.rcsb.org/1A21.bcif.gz,2,1A21-2
|
|
3
|
+
1A3J,https://models.rcsb.org/1A3J.bcif.gz,1,1A3J-1
|
|
4
|
+
1A3X,https://models.rcsb.org/1A3X.bcif.gz,1,1A3X-1
|
|
5
|
+
1A3X,https://models.rcsb.org/1A3X.bcif.gz,2,1A3X-2
|
|
6
|
+
1AIV,https://models.rcsb.org/1AIV.bcif.gz,1,1AIV-1
|
|
7
|
+
1AU1,https://models.rcsb.org/1AU1.bcif.gz,1,1AU1-1
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
1A21,https://models.rcsb.org/1A21.bcif.gz,A,1A21.A
|
|
2
|
+
1A21,https://models.rcsb.org/1A21.bcif.gz,B,1A21.B
|
|
3
|
+
1A3J,https://models.rcsb.org/1A3J.bcif.gz,A,1A3J.A
|
|
4
|
+
1A3J,https://models.rcsb.org/1A3J.bcif.gz,B,1A3J.B
|
|
5
|
+
1A3J,https://models.rcsb.org/1A3J.bcif.gz,C,1A3J.C
|
|
6
|
+
1A3X,https://models.rcsb.org/1A3X.bcif.gz,A,1A3X.A
|
|
7
|
+
1A3X,https://models.rcsb.org/1A3X.bcif.gz,B,1A3X.B
|
|
8
|
+
1AIV,https://models.rcsb.org/1AIV.bcif.gz,A,1AIV.A
|
|
9
|
+
1AU1,https://models.rcsb.org/1AU1.bcif.gz,A,1AU1.A
|
|
10
|
+
1AU1,https://models.rcsb.org/1AU1.bcif.gz,B,1AU1.B
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import unittest
|
|
5
|
+
|
|
6
|
+
from rcsb_embedding_model.types.api_types import OutFormat, StructureLocation, StructureFormat, Accelerator
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class TestCliInference(unittest.TestCase):
|
|
13
|
+
__test_path = os.path.dirname(__file__)
|
|
14
|
+
|
|
15
|
+
def test_complete_inference(self):
|
|
16
|
+
_remove_files_in_directory(f"{self.__test_path}/resources/tmp")
|
|
17
|
+
from rcsb_embedding_model.cli.inference import complete_embedding
|
|
18
|
+
complete_embedding(
|
|
19
|
+
src_chain_file=f"{self.__test_path}/resources/src_stream/instance-complete-test.csv",
|
|
20
|
+
src_assembly_file=f"{self.__test_path}/resources/src_stream/assembly-complete-test.csv",
|
|
21
|
+
output_res_path=f"{self.__test_path}/resources/tmp",
|
|
22
|
+
output_chain_path=f"{self.__test_path}/resources/tmp",
|
|
23
|
+
output_assembly_path=f"{self.__test_path}/resources/tmp",
|
|
24
|
+
output_format=OutFormat.grouped,
|
|
25
|
+
output_chain_name="instance-inference",
|
|
26
|
+
output_assembly_name="assembly-inference",
|
|
27
|
+
structure_location=StructureLocation.remote,
|
|
28
|
+
structure_format=StructureFormat.bciff,
|
|
29
|
+
min_res_n=0,
|
|
30
|
+
batch_size_res=1,
|
|
31
|
+
num_workers_res=0,
|
|
32
|
+
batch_size_chain=1,
|
|
33
|
+
num_workers_chain=0,
|
|
34
|
+
batch_size_assembly=1,
|
|
35
|
+
num_workers_assembly=0,
|
|
36
|
+
num_nodes=1,
|
|
37
|
+
accelerator=Accelerator.cpu
|
|
38
|
+
)
|
|
39
|
+
self.assertTrue(os.path.exists(f"{self.__test_path}/resources/tmp/instance-inference.json.gz"))
|
|
40
|
+
self.assertTrue(os.path.exists(f"{self.__test_path}/resources/tmp/assembly-inference.json.gz"))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _remove_files_in_directory(directory_path):
|
|
44
|
+
os.makedirs(directory_path, exist_ok=True)
|
|
45
|
+
for filename in os.listdir(directory_path):
|
|
46
|
+
file_path = os.path.join(directory_path, filename)
|
|
47
|
+
try:
|
|
48
|
+
if os.path.isfile(file_path):
|
|
49
|
+
os.unlink(file_path)
|
|
50
|
+
elif os.path.isdir(file_path):
|
|
51
|
+
shutil.rmtree(file_path)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
print(f"Failed to delete {file_path}. Reason: {e}")
|
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/.github/workflows/_workflow-docker.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/assets/embedding-model-architecture.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/model/layers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/utils/data.py
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/src/rcsb_embedding_model/utils/model.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/1acb.A.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/1acb.B.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/2uzi.A.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/2uzi.B.pt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/embeddings/2uzi.C.pt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rcsb_embedding_model-0.0.29 → rcsb_embedding_model-0.0.31}/tests/resources/src_stream/instance.csv
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|