rcsb-embedding-model 0.0.30__py3-none-any.whl → 0.0.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rcsb-embedding-model might be problematic. Click here for more details.
- rcsb_embedding_model/__init__.py +7 -2
- rcsb_embedding_model/cli/inference.py +18 -0
- rcsb_embedding_model/dataset/esm_prot_from_chain.py +9 -3
- rcsb_embedding_model/utils/structure_parser.py +14 -4
- {rcsb_embedding_model-0.0.30.dist-info → rcsb_embedding_model-0.0.32.dist-info}/METADATA +2 -1
- {rcsb_embedding_model-0.0.30.dist-info → rcsb_embedding_model-0.0.32.dist-info}/RECORD +9 -9
- {rcsb_embedding_model-0.0.30.dist-info → rcsb_embedding_model-0.0.32.dist-info}/WHEEL +0 -0
- {rcsb_embedding_model-0.0.30.dist-info → rcsb_embedding_model-0.0.32.dist-info}/entry_points.txt +0 -0
- {rcsb_embedding_model-0.0.30.dist-info → rcsb_embedding_model-0.0.32.dist-info}/licenses/LICENSE.md +0 -0
rcsb_embedding_model/__init__.py
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
from importlib_metadata import version, PackageNotFoundError
|
|
2
2
|
from rcsb_embedding_model.rcsb_structure_embedding import RcsbStructureEmbedding
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
try:
|
|
5
|
+
__version__ = version("rcsb-embedding-model")
|
|
6
|
+
except PackageNotFoundError:
|
|
7
|
+
__version__ = "0.0.0"
|
|
8
|
+
|
|
9
|
+
__all__ = ["RcsbStructureEmbedding", "__version__"]
|
|
@@ -3,6 +3,7 @@ from typing import Annotated, List
|
|
|
3
3
|
|
|
4
4
|
import typer
|
|
5
5
|
|
|
6
|
+
from rcsb_embedding_model import __version__
|
|
6
7
|
from rcsb_embedding_model.cli.args_utils import arg_devices
|
|
7
8
|
from rcsb_embedding_model.types.api_types import StructureFormat, Accelerator, SrcLocation, SrcProteinFrom, \
|
|
8
9
|
StructureLocation, SrcAssemblyFrom, SrcTensorFrom, OutFormat
|
|
@@ -437,5 +438,22 @@ def complete_embedding(
|
|
|
437
438
|
)
|
|
438
439
|
|
|
439
440
|
|
|
441
|
+
def version_callback(value: bool):
|
|
442
|
+
if value:
|
|
443
|
+
typer.echo(f"{__version__}")
|
|
444
|
+
raise typer.Exit()
|
|
445
|
+
|
|
446
|
+
@app.callback()
|
|
447
|
+
def main(
|
|
448
|
+
version: bool = typer.Option(
|
|
449
|
+
None,
|
|
450
|
+
"--version",
|
|
451
|
+
callback=version_callback,
|
|
452
|
+
is_eager=True,
|
|
453
|
+
help="Show the version and exit",
|
|
454
|
+
)
|
|
455
|
+
):
|
|
456
|
+
pass
|
|
457
|
+
|
|
440
458
|
if __name__ == "__main__":
|
|
441
459
|
app()
|
|
@@ -11,7 +11,7 @@ import pandas as pd
|
|
|
11
11
|
|
|
12
12
|
from rcsb_embedding_model.types.api_types import StructureFormat, StructureLocation, SrcLocation
|
|
13
13
|
from rcsb_embedding_model.utils.data import stringio_from_url
|
|
14
|
-
from rcsb_embedding_model.utils.structure_parser import rename_atom_ch
|
|
14
|
+
from rcsb_embedding_model.utils.structure_parser import rename_atom_ch, check_all_hetero, remove_hetero
|
|
15
15
|
from rcsb_embedding_model.utils.structure_provider import StructureProvider
|
|
16
16
|
|
|
17
17
|
|
|
@@ -72,8 +72,14 @@ class EsmProtFromChain(Dataset):
|
|
|
72
72
|
for atom_ch in chain_iter(structure):
|
|
73
73
|
if len(atom_ch) == 0:
|
|
74
74
|
raise IOError(f"No atoms were found in structure chain {src_name}.{chain_id}")
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
if check_all_hetero(atom_ch):
|
|
76
|
+
atom_ch = remove_hetero(atom_ch)
|
|
77
|
+
atom_ch = rename_atom_ch(atom_ch)
|
|
78
|
+
protein_chain = ProteinChain.from_atomarray(atom_ch)
|
|
79
|
+
protein_chain = ESMProtein.from_protein_chain(protein_chain)
|
|
80
|
+
if len(protein_chain) == 0:
|
|
81
|
+
raise IOError(f"No atoms were found in structure chain {src_name}.{chain_id}")
|
|
82
|
+
return protein_chain, item_name
|
|
77
83
|
raise IOError(f"No atoms were found in structure chain {src_name}.{chain_id}")
|
|
78
84
|
|
|
79
85
|
|
|
@@ -50,14 +50,24 @@ def get_assemblies(structure, structure_format="mmcif"):
|
|
|
50
50
|
|
|
51
51
|
def rename_atom_ch(atom_ch, ch="A"):
|
|
52
52
|
renamed_atom_ch = AtomArray(len(atom_ch))
|
|
53
|
-
|
|
54
|
-
for atom in atom_ch:
|
|
53
|
+
for idx, atom in enumerate(atom_ch):
|
|
55
54
|
atom.chain_id = ch
|
|
56
|
-
renamed_atom_ch[
|
|
57
|
-
n += 1
|
|
55
|
+
renamed_atom_ch[idx] = atom
|
|
58
56
|
return renamed_atom_ch
|
|
59
57
|
|
|
60
58
|
|
|
59
|
+
def remove_hetero(atom_ch):
|
|
60
|
+
renamed_atom_ch = AtomArray(len(atom_ch))
|
|
61
|
+
for idx, atom in enumerate(atom_ch):
|
|
62
|
+
atom.hetero = False
|
|
63
|
+
renamed_atom_ch[idx] = atom
|
|
64
|
+
return renamed_atom_ch
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def check_all_hetero(atom_ch):
|
|
68
|
+
return sum(atom_ch.hetero) == len(atom_ch)
|
|
69
|
+
|
|
70
|
+
|
|
61
71
|
def __get_pdb_structure(pdb_file, assembly_id=None):
|
|
62
72
|
return get_pdb_structure(
|
|
63
73
|
pdb_file,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb-embedding-model
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.32
|
|
4
4
|
Summary: Protein Embedding Model for Structure Search
|
|
5
5
|
Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
|
|
6
6
|
Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
|
|
@@ -11,6 +11,7 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.10
|
|
13
13
|
Requires-Dist: esm>=3.2.0
|
|
14
|
+
Requires-Dist: importlib-metadata>=8.7.0
|
|
14
15
|
Requires-Dist: lightning>=2.5.0
|
|
15
16
|
Requires-Dist: typer>=0.15.0
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
rcsb_embedding_model/__init__.py,sha256=
|
|
1
|
+
rcsb_embedding_model/__init__.py,sha256=7YfYO-V-u__19eAZfQ3t5Gf2qrhd_gwQB8rHO0J0puw,306
|
|
2
2
|
rcsb_embedding_model/rcsb_structure_embedding.py,sha256=dKp9hXQO0JAnO4SEfjJ_mG_jHu3UxAPguv6jkOjp-BI,4487
|
|
3
3
|
rcsb_embedding_model/cli/args_utils.py,sha256=7nP2q8pL5dWK_U7opxtWmoFcYVwasky6elHk-dASFaI,165
|
|
4
|
-
rcsb_embedding_model/cli/inference.py,sha256=
|
|
5
|
-
rcsb_embedding_model/dataset/esm_prot_from_chain.py,sha256=
|
|
4
|
+
rcsb_embedding_model/cli/inference.py,sha256=67_Tr3LWeA3T4KS5mkjq6tw77Ypy0R8IwMxEG2FwVqQ,19901
|
|
5
|
+
rcsb_embedding_model/dataset/esm_prot_from_chain.py,sha256=HuKII1zOnO0JF6wZXyqBykiGunSFXmhzBimsSpbEC1I,4291
|
|
6
6
|
rcsb_embedding_model/dataset/esm_prot_from_structure.py,sha256=3HzXCCc-UqmZNbJaeXHyUsSIZZxMc2erbxAPGIxSmfE,2621
|
|
7
7
|
rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py,sha256=69h1VkrIXesHZi1cG3BOMMytSDeRzcBBP0_Z3Xz3dM8,2869
|
|
8
8
|
rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py,sha256=Hd9oH-IVgY6d7Dxy5VfiwHvSaK-Wwhk6ccUBgOwl0TU,3740
|
|
@@ -20,11 +20,11 @@ rcsb_embedding_model/modules/structure_module.py,sha256=4js02XzKvhc_G26ELsGhJ9SC
|
|
|
20
20
|
rcsb_embedding_model/types/api_types.py,sha256=SCwALwvEb0KRKaoWKbuN7JyfOH-1whsI0Z4ki41dht8,1235
|
|
21
21
|
rcsb_embedding_model/utils/data.py,sha256=BOjYdIRHrFqk8qFuKGrgCtVyfDupzgOVmH_0C-ecMvg,3813
|
|
22
22
|
rcsb_embedding_model/utils/model.py,sha256=xr3p02ohOgJ5UInwdIupN68Oq4yvNFhxobZRacS1adg,953
|
|
23
|
-
rcsb_embedding_model/utils/structure_parser.py,sha256=
|
|
23
|
+
rcsb_embedding_model/utils/structure_parser.py,sha256=z_Aid5QgGUAJkLL-fMzG2Uh2wPrHXmBdjhLn_YkPl-k,3035
|
|
24
24
|
rcsb_embedding_model/utils/structure_provider.py,sha256=eWtxjkPpmRfmil_DKR1J6miaXR3lQ28DF5O0qrqSgGA,786
|
|
25
25
|
rcsb_embedding_model/writer/batch_writer.py,sha256=rTFNasB0Xp4-XCNTXKeEWZxSrb7lvZytoRldJUWn9Jg,3312
|
|
26
|
-
rcsb_embedding_model-0.0.
|
|
27
|
-
rcsb_embedding_model-0.0.
|
|
28
|
-
rcsb_embedding_model-0.0.
|
|
29
|
-
rcsb_embedding_model-0.0.
|
|
30
|
-
rcsb_embedding_model-0.0.
|
|
26
|
+
rcsb_embedding_model-0.0.32.dist-info/METADATA,sha256=JWSb2a_wZ2Y8YWzyM3vk_QG--ukrIGsI5Q1-R3bApjs,5351
|
|
27
|
+
rcsb_embedding_model-0.0.32.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
28
|
+
rcsb_embedding_model-0.0.32.dist-info/entry_points.txt,sha256=MK11jTIEmaV-x4CkPX5IymDaVs7Ky_f2xxU8BJVZ_9Q,69
|
|
29
|
+
rcsb_embedding_model-0.0.32.dist-info/licenses/LICENSE.md,sha256=oUaHiKgfBkChth_Sm67WemEvatO1U0Go8LHjaskXY0w,1522
|
|
30
|
+
rcsb_embedding_model-0.0.32.dist-info/RECORD,,
|
|
File without changes
|
{rcsb_embedding_model-0.0.30.dist-info → rcsb_embedding_model-0.0.32.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.30.dist-info → rcsb_embedding_model-0.0.32.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|