rcsb-embedding-model 0.0.34__py3-none-any.whl → 0.0.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rcsb-embedding-model might be problematic. Click here for more details.
- rcsb_embedding_model/cli/inference.py +1 -23
- rcsb_embedding_model/dataset/esm_prot_from_chain.py +4 -6
- rcsb_embedding_model/dataset/esm_prot_from_structure.py +2 -4
- rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py +2 -4
- rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py +2 -4
- rcsb_embedding_model/dataset/residue_embedding_from_structure.py +2 -3
- rcsb_embedding_model/dataset/untils/__init__.py +4 -0
- rcsb_embedding_model/dataset/untils/utils.py +17 -0
- rcsb_embedding_model/inference/assembly_inferece.py +1 -4
- rcsb_embedding_model/inference/chain_inference.py +1 -3
- rcsb_embedding_model/inference/esm_inference.py +1 -4
- rcsb_embedding_model/inference/structure_inference.py +1 -4
- {rcsb_embedding_model-0.0.34.dist-info → rcsb_embedding_model-0.0.36.dist-info}/METADATA +1 -1
- {rcsb_embedding_model-0.0.34.dist-info → rcsb_embedding_model-0.0.36.dist-info}/RECORD +17 -15
- {rcsb_embedding_model-0.0.34.dist-info → rcsb_embedding_model-0.0.36.dist-info}/WHEEL +0 -0
- {rcsb_embedding_model-0.0.34.dist-info → rcsb_embedding_model-0.0.36.dist-info}/entry_points.txt +0 -0
- {rcsb_embedding_model-0.0.34.dist-info → rcsb_embedding_model-0.0.36.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -6,7 +6,7 @@ import typer
|
|
|
6
6
|
from rcsb_embedding_model import __version__
|
|
7
7
|
from rcsb_embedding_model.cli.args_utils import arg_devices
|
|
8
8
|
from rcsb_embedding_model.types.api_types import StructureFormat, Accelerator, SrcLocation, SrcProteinFrom, \
|
|
9
|
-
|
|
9
|
+
SrcAssemblyFrom, SrcTensorFrom, OutFormat
|
|
10
10
|
from rcsb_embedding_model.utils.data import adapt_csv_to_embedding_chain_stream
|
|
11
11
|
|
|
12
12
|
import os
|
|
@@ -42,9 +42,6 @@ def residue_embedding(
|
|
|
42
42
|
output_name: Annotated[str, typer.Option(
|
|
43
43
|
help='File name for storing embeddings as a single JSON file. Used when output-format=grouped.'
|
|
44
44
|
)] = 'inference',
|
|
45
|
-
structure_location: Annotated[StructureLocation, typer.Option(
|
|
46
|
-
help='Structure file location.'
|
|
47
|
-
)] = StructureLocation.local,
|
|
48
45
|
structure_format: Annotated[StructureFormat, typer.Option(
|
|
49
46
|
help='Structure file format.'
|
|
50
47
|
)] = StructureFormat.mmcif,
|
|
@@ -72,7 +69,6 @@ def residue_embedding(
|
|
|
72
69
|
src_stream=src_file,
|
|
73
70
|
src_location=SrcLocation.file,
|
|
74
71
|
src_from=SrcProteinFrom.chain,
|
|
75
|
-
structure_location=structure_location,
|
|
76
72
|
structure_format=structure_format,
|
|
77
73
|
min_res_n=min_res_n,
|
|
78
74
|
batch_size=batch_size,
|
|
@@ -108,9 +104,6 @@ def structure_embedding(
|
|
|
108
104
|
output_name: Annotated[str, typer.Option(
|
|
109
105
|
help='File name for storing embeddings as a single JSON file.'
|
|
110
106
|
)] = 'inference',
|
|
111
|
-
structure_location: Annotated[StructureLocation, typer.Option(
|
|
112
|
-
help='Structure file location.'
|
|
113
|
-
)] = StructureLocation.local,
|
|
114
107
|
structure_format: Annotated[StructureFormat, typer.Option(
|
|
115
108
|
help='Structure file format.'
|
|
116
109
|
)] = StructureFormat.mmcif,
|
|
@@ -138,7 +131,6 @@ def structure_embedding(
|
|
|
138
131
|
src_stream=src_file,
|
|
139
132
|
src_location=SrcLocation.file,
|
|
140
133
|
src_from=SrcProteinFrom.chain,
|
|
141
|
-
structure_location=structure_location,
|
|
142
134
|
structure_format=structure_format,
|
|
143
135
|
min_res_n=min_res_n,
|
|
144
136
|
batch_size=batch_size,
|
|
@@ -183,9 +175,6 @@ def chain_embedding(
|
|
|
183
175
|
output_name: Annotated[str, typer.Option(
|
|
184
176
|
help='File name for storing embeddings as a single JSON file. Used when output-format=grouped.'
|
|
185
177
|
)] = 'inference',
|
|
186
|
-
structure_location: Annotated[StructureLocation, typer.Option(
|
|
187
|
-
help='Structure file location.'
|
|
188
|
-
)] = StructureLocation.local,
|
|
189
178
|
structure_format: Annotated[StructureFormat, typer.Option(
|
|
190
179
|
help='Structure file format.'
|
|
191
180
|
)] = StructureFormat.mmcif,
|
|
@@ -214,7 +203,6 @@ def chain_embedding(
|
|
|
214
203
|
res_embedding_location=res_embedding_location,
|
|
215
204
|
src_location=SrcLocation.stream,
|
|
216
205
|
src_from=SrcTensorFrom.file,
|
|
217
|
-
structure_location=structure_location,
|
|
218
206
|
structure_format=structure_format,
|
|
219
207
|
min_res_n=min_res_n,
|
|
220
208
|
batch_size=batch_size,
|
|
@@ -259,9 +247,6 @@ def assembly_embedding(
|
|
|
259
247
|
output_name: Annotated[str, typer.Option(
|
|
260
248
|
help='File name for storing embeddings as a single JSON file. Used when output-format=grouped.'
|
|
261
249
|
)] = 'inference',
|
|
262
|
-
structure_location: Annotated[StructureLocation, typer.Option(
|
|
263
|
-
help='Structure file location.'
|
|
264
|
-
)] = StructureLocation.local,
|
|
265
250
|
structure_format: Annotated[StructureFormat, typer.Option(
|
|
266
251
|
help='Structure file format.'
|
|
267
252
|
)] = StructureFormat.mmcif,
|
|
@@ -293,7 +278,6 @@ def assembly_embedding(
|
|
|
293
278
|
res_embedding_location=res_embedding_location,
|
|
294
279
|
src_location=SrcLocation.file,
|
|
295
280
|
src_from=SrcAssemblyFrom.assembly,
|
|
296
|
-
structure_location=structure_location,
|
|
297
281
|
structure_format=structure_format,
|
|
298
282
|
min_res_n=min_res_n,
|
|
299
283
|
max_res_n=max_res_n,
|
|
@@ -356,9 +340,6 @@ def complete_embedding(
|
|
|
356
340
|
output_assembly_name: Annotated[str, typer.Option(
|
|
357
341
|
help='File name for storing chain embeddings as a single JSON file. Used when output-format=grouped.'
|
|
358
342
|
)] = 'chain-inference',
|
|
359
|
-
structure_location: Annotated[StructureLocation, typer.Option(
|
|
360
|
-
help='Structure file location.'
|
|
361
|
-
)] = StructureLocation.local,
|
|
362
343
|
structure_format: Annotated[StructureFormat, typer.Option(
|
|
363
344
|
help='Structure file format.'
|
|
364
345
|
)] = StructureFormat.mmcif,
|
|
@@ -397,7 +378,6 @@ def complete_embedding(
|
|
|
397
378
|
src_file=src_chain_file,
|
|
398
379
|
output_path=output_res_path,
|
|
399
380
|
output_format=OutFormat.separated,
|
|
400
|
-
structure_location=structure_location,
|
|
401
381
|
structure_format=structure_format,
|
|
402
382
|
min_res_n=min_res_n,
|
|
403
383
|
batch_size=batch_size_res,
|
|
@@ -412,7 +392,6 @@ def complete_embedding(
|
|
|
412
392
|
output_format=output_format,
|
|
413
393
|
output_name=output_chain_name,
|
|
414
394
|
res_embedding_location=output_res_path,
|
|
415
|
-
structure_location=structure_location,
|
|
416
395
|
structure_format=structure_format,
|
|
417
396
|
min_res_n=min_res_n,
|
|
418
397
|
batch_size=batch_size_chain,
|
|
@@ -427,7 +406,6 @@ def complete_embedding(
|
|
|
427
406
|
output_format=output_format,
|
|
428
407
|
output_name=output_assembly_name,
|
|
429
408
|
res_embedding_location=output_res_path,
|
|
430
|
-
structure_location=structure_location,
|
|
431
409
|
structure_format=structure_format,
|
|
432
410
|
min_res_n=min_res_n,
|
|
433
411
|
batch_size=batch_size_assembly,
|
|
@@ -9,6 +9,7 @@ from esm.utils.structure.protein_chain import ProteinChain
|
|
|
9
9
|
from torch.utils.data import Dataset, DataLoader
|
|
10
10
|
import pandas as pd
|
|
11
11
|
|
|
12
|
+
from rcsb_embedding_model.dataset.untils import get_structure_location
|
|
12
13
|
from rcsb_embedding_model.types.api_types import StructureFormat, StructureLocation, SrcLocation
|
|
13
14
|
from rcsb_embedding_model.utils.data import stringio_from_url
|
|
14
15
|
from rcsb_embedding_model.utils.structure_parser import rename_atom_attr,filter_residues
|
|
@@ -28,14 +29,12 @@ class EsmProtFromChain(Dataset):
|
|
|
28
29
|
self,
|
|
29
30
|
src_stream,
|
|
30
31
|
src_location=SrcLocation.file,
|
|
31
|
-
structure_location=StructureLocation.local,
|
|
32
32
|
structure_format=StructureFormat.mmcif,
|
|
33
33
|
structure_provider=StructureProvider()
|
|
34
34
|
):
|
|
35
35
|
super().__init__()
|
|
36
36
|
self.__structure_provider = structure_provider
|
|
37
37
|
self.src_location = src_location
|
|
38
|
-
self.structure_location = structure_location
|
|
39
38
|
self.structure_format = structure_format
|
|
40
39
|
self.data = pd.DataFrame()
|
|
41
40
|
self.__load_stream(src_stream)
|
|
@@ -65,16 +64,16 @@ class EsmProtFromChain(Dataset):
|
|
|
65
64
|
item_name = self.data.iloc[idx][EsmProtFromChain.ITEM_NAME_ATTR]
|
|
66
65
|
structure = self.__structure_provider.get_structure(
|
|
67
66
|
src_name=src_name,
|
|
68
|
-
src_structure=stringio_from_url(src_structure) if
|
|
67
|
+
src_structure=stringio_from_url(src_structure) if get_structure_location(src_structure) == StructureLocation.remote else src_structure,
|
|
69
68
|
structure_format=self.structure_format,
|
|
70
69
|
chain_id=chain_id
|
|
71
70
|
)
|
|
72
71
|
for atom_ch in chain_iter(structure):
|
|
73
72
|
if len(atom_ch) == 0:
|
|
74
73
|
raise IOError(f"No atoms were found in structure chain {src_name}.{chain_id}")
|
|
75
|
-
atom_ch = filter_residues(atom_ch)
|
|
76
|
-
atom_ch = rename_atom_attr(atom_ch)
|
|
77
74
|
try:
|
|
75
|
+
atom_ch = filter_residues(atom_ch)
|
|
76
|
+
atom_ch = rename_atom_attr(atom_ch)
|
|
78
77
|
protein_chain = ProteinChain.from_atomarray(atom_ch)
|
|
79
78
|
protein_chain = ESMProtein.from_protein_chain(protein_chain)
|
|
80
79
|
except Exception as e:
|
|
@@ -96,7 +95,6 @@ if __name__ == '__main__':
|
|
|
96
95
|
dataset = EsmProtFromChain(
|
|
97
96
|
src_stream=args.file_list,
|
|
98
97
|
src_location=SrcLocation.file,
|
|
99
|
-
structure_location=StructureLocation.remote,
|
|
100
98
|
structure_format=StructureFormat.bciff,
|
|
101
99
|
)
|
|
102
100
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
|
|
4
4
|
from rcsb_embedding_model.dataset.esm_prot_from_chain import EsmProtFromChain
|
|
5
|
+
from rcsb_embedding_model.dataset.untils import get_structure_location
|
|
5
6
|
from rcsb_embedding_model.types.api_types import StructureLocation, StructureFormat, SrcLocation
|
|
6
7
|
from rcsb_embedding_model.utils.data import stringio_from_url
|
|
7
8
|
from rcsb_embedding_model.utils.structure_parser import get_protein_chains
|
|
@@ -20,20 +21,17 @@ class EsmProtFromStructure(EsmProtFromChain):
|
|
|
20
21
|
self,
|
|
21
22
|
src_stream,
|
|
22
23
|
src_location=SrcLocation.file,
|
|
23
|
-
structure_location=StructureLocation.local,
|
|
24
24
|
structure_format=StructureFormat.mmcif,
|
|
25
25
|
min_res_n=0,
|
|
26
26
|
structure_provider=StructureProvider()
|
|
27
27
|
):
|
|
28
28
|
self.min_res_n = min_res_n
|
|
29
29
|
self.src_location = src_location
|
|
30
|
-
self.structure_location = structure_location
|
|
31
30
|
self.structure_format = structure_format
|
|
32
31
|
self.__structure_provider = structure_provider
|
|
33
32
|
super().__init__(
|
|
34
33
|
src_stream=self.__get_chains(src_stream),
|
|
35
34
|
src_location=SrcLocation.stream,
|
|
36
|
-
structure_location=structure_location,
|
|
37
35
|
structure_format=structure_format,
|
|
38
36
|
structure_provider=structure_provider
|
|
39
37
|
)
|
|
@@ -58,7 +56,7 @@ class EsmProtFromStructure(EsmProtFromChain):
|
|
|
58
56
|
item_name = row[EsmProtFromStructure.ITEM_NAME_ATTR]
|
|
59
57
|
structure = self.__structure_provider.get_structure(
|
|
60
58
|
src_name=src_name,
|
|
61
|
-
src_structure=stringio_from_url(src_structure) if
|
|
59
|
+
src_structure=stringio_from_url(src_structure) if get_structure_location(src_structure) == StructureLocation.remote else src_structure,
|
|
62
60
|
structure_format=self.structure_format
|
|
63
61
|
)
|
|
64
62
|
for ch in get_protein_chains(structure, self.min_res_n):
|
|
@@ -3,6 +3,7 @@ import sys
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
5
|
from rcsb_embedding_model.dataset.residue_assembly_embedding_from_tensor_file import ResidueAssemblyEmbeddingFromTensorFile
|
|
6
|
+
from rcsb_embedding_model.dataset.untils import get_structure_location
|
|
6
7
|
from rcsb_embedding_model.types.api_types import SrcLocation, StructureLocation, StructureFormat
|
|
7
8
|
from rcsb_embedding_model.utils.data import stringio_from_url
|
|
8
9
|
from rcsb_embedding_model.utils.structure_parser import get_assemblies
|
|
@@ -22,14 +23,12 @@ class ResidueAssemblyDatasetFromStructure(ResidueAssemblyEmbeddingFromTensorFile
|
|
|
22
23
|
src_stream,
|
|
23
24
|
res_embedding_location,
|
|
24
25
|
src_location=SrcLocation.file,
|
|
25
|
-
structure_location=StructureLocation.local,
|
|
26
26
|
structure_format=StructureFormat.mmcif,
|
|
27
27
|
min_res_n=0,
|
|
28
28
|
max_res_n=sys.maxsize,
|
|
29
29
|
structure_provider=StructureProvider()
|
|
30
30
|
):
|
|
31
31
|
self.src_location = src_location
|
|
32
|
-
self.structure_location = structure_location
|
|
33
32
|
self.structure_format = structure_format
|
|
34
33
|
self.min_res_n = min_res_n
|
|
35
34
|
self.max_res_n = max_res_n
|
|
@@ -37,7 +36,6 @@ class ResidueAssemblyDatasetFromStructure(ResidueAssemblyEmbeddingFromTensorFile
|
|
|
37
36
|
src_stream=self.__get_assemblies(src_stream),
|
|
38
37
|
res_embedding_location=res_embedding_location,
|
|
39
38
|
src_location=SrcLocation.stream,
|
|
40
|
-
structure_location=structure_location,
|
|
41
39
|
structure_format=structure_format,
|
|
42
40
|
min_res_n=min_res_n,
|
|
43
41
|
max_res_n=max_res_n,
|
|
@@ -61,7 +59,7 @@ class ResidueAssemblyDatasetFromStructure(ResidueAssemblyEmbeddingFromTensorFile
|
|
|
61
59
|
for idx, row in data.iterrows():
|
|
62
60
|
src_name = row[ResidueAssemblyDatasetFromStructure.STREAM_NAME_ATTR]
|
|
63
61
|
src_structure = row[ResidueAssemblyDatasetFromStructure.STREAM_ATTR]
|
|
64
|
-
structure = stringio_from_url(src_structure) if
|
|
62
|
+
structure = stringio_from_url(src_structure) if get_structure_location(src_structure) == StructureLocation.remote else src_structure
|
|
65
63
|
item_name = row[ResidueAssemblyDatasetFromStructure.ITEM_NAME_ATTR]
|
|
66
64
|
for assembly_id in get_assemblies(structure=structure, structure_format=self.structure_format):
|
|
67
65
|
assemblies.append((src_name, src_structure, str(assembly_id), f"{item_name}-{assembly_id}"))
|
|
@@ -4,6 +4,7 @@ import sys
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from torch.utils.data import Dataset, DataLoader
|
|
6
6
|
|
|
7
|
+
from rcsb_embedding_model.dataset.untils import get_structure_location
|
|
7
8
|
from rcsb_embedding_model.types.api_types import StructureLocation, StructureFormat, SrcLocation
|
|
8
9
|
from rcsb_embedding_model.utils.data import stringio_from_url, concatenate_tensors
|
|
9
10
|
from rcsb_embedding_model.utils.structure_parser import get_protein_chains
|
|
@@ -24,7 +25,6 @@ class ResidueAssemblyEmbeddingFromTensorFile(Dataset):
|
|
|
24
25
|
src_stream,
|
|
25
26
|
res_embedding_location,
|
|
26
27
|
src_location=SrcLocation.file,
|
|
27
|
-
structure_location=StructureLocation.local,
|
|
28
28
|
structure_format=StructureFormat.mmcif,
|
|
29
29
|
min_res_n=0,
|
|
30
30
|
max_res_n=sys.maxsize,
|
|
@@ -33,7 +33,6 @@ class ResidueAssemblyEmbeddingFromTensorFile(Dataset):
|
|
|
33
33
|
super().__init__()
|
|
34
34
|
self.res_embedding_location = res_embedding_location
|
|
35
35
|
self.src_location = src_location
|
|
36
|
-
self.structure_location = structure_location
|
|
37
36
|
self.structure_format = structure_format
|
|
38
37
|
self.min_res_n = min_res_n
|
|
39
38
|
self.max_res_n = max_res_n
|
|
@@ -65,7 +64,7 @@ class ResidueAssemblyEmbeddingFromTensorFile(Dataset):
|
|
|
65
64
|
item_name = self.data.iloc[idx][ResidueAssemblyEmbeddingFromTensorFile.ITEM_NAME_ATTR]
|
|
66
65
|
structure = self.__structure_provider.get_structure(
|
|
67
66
|
src_name=src_name,
|
|
68
|
-
src_structure=stringio_from_url(src_structure) if
|
|
67
|
+
src_structure=stringio_from_url(src_structure) if get_structure_location(src_structure) == StructureLocation.remote else src_structure,
|
|
69
68
|
structure_format=self.structure_format,
|
|
70
69
|
assembly_id=assembly_id
|
|
71
70
|
)
|
|
@@ -86,7 +85,6 @@ if __name__ == "__main__":
|
|
|
86
85
|
src_stream=args.file_list,
|
|
87
86
|
res_embedding_location=args.res_embeddings_path,
|
|
88
87
|
src_location=SrcLocation.file,
|
|
89
|
-
structure_location=StructureLocation.remote,
|
|
90
88
|
structure_format=StructureFormat.bciff
|
|
91
89
|
)
|
|
92
90
|
|
|
@@ -3,6 +3,7 @@ import os
|
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
5
|
from rcsb_embedding_model.dataset.residue_embedding_from_tensor_file import ResidueEmbeddingFromTensorFile
|
|
6
|
+
from rcsb_embedding_model.dataset.untils import get_structure_location
|
|
6
7
|
from rcsb_embedding_model.types.api_types import SrcLocation, StructureLocation, StructureFormat
|
|
7
8
|
from rcsb_embedding_model.utils.data import stringio_from_url
|
|
8
9
|
from rcsb_embedding_model.utils.structure_parser import get_protein_chains
|
|
@@ -22,7 +23,6 @@ class ResidueEmbeddingFromStructure(ResidueEmbeddingFromTensorFile):
|
|
|
22
23
|
src_stream,
|
|
23
24
|
res_embedding_location,
|
|
24
25
|
src_location=SrcLocation.file,
|
|
25
|
-
structure_location=StructureLocation.local,
|
|
26
26
|
structure_format=StructureFormat.mmcif,
|
|
27
27
|
min_res_n=0,
|
|
28
28
|
structure_provider=StructureProvider()
|
|
@@ -31,7 +31,6 @@ class ResidueEmbeddingFromStructure(ResidueEmbeddingFromTensorFile):
|
|
|
31
31
|
raise FileNotFoundError(f"Folder {res_embedding_location} does not exist")
|
|
32
32
|
self.res_embedding_location = res_embedding_location
|
|
33
33
|
self.src_location = src_location
|
|
34
|
-
self.structure_location = structure_location
|
|
35
34
|
self.structure_format = structure_format
|
|
36
35
|
self.min_res_n = min_res_n
|
|
37
36
|
self.__structure_provider = structure_provider
|
|
@@ -60,7 +59,7 @@ class ResidueEmbeddingFromStructure(ResidueEmbeddingFromTensorFile):
|
|
|
60
59
|
item_name = row[ResidueEmbeddingFromStructure.ITEM_NAME_ATTR]
|
|
61
60
|
structure = self.__structure_provider.get_structure(
|
|
62
61
|
src_name=src_name,
|
|
63
|
-
src_structure=stringio_from_url(src_structure) if
|
|
62
|
+
src_structure=stringio_from_url(src_structure) if get_structure_location(src_structure) == StructureLocation.remote else src_structure,
|
|
64
63
|
structure_format=self.structure_format
|
|
65
64
|
)
|
|
66
65
|
for ch in get_protein_chains(structure, self.min_res_n):
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from urllib.parse import urlparse
|
|
3
|
+
from rcsb_embedding_model.types.api_types import StructureLocation
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_structure_location(s: str) -> str:
|
|
7
|
+
# First, attempt to parse as URL
|
|
8
|
+
parsed = urlparse(s)
|
|
9
|
+
if parsed.scheme.lower() in {'http', 'https', 'ftp'} and parsed.netloc:
|
|
10
|
+
return StructureLocation.remote
|
|
11
|
+
|
|
12
|
+
# Next, test for an existing file or directory
|
|
13
|
+
if os.path.exists(s):
|
|
14
|
+
return StructureLocation.local
|
|
15
|
+
|
|
16
|
+
# Neither URL nor existing file
|
|
17
|
+
raise ValueError(f"Structure file source is neither a recognized URL nor file: {s!r}")
|
|
@@ -3,7 +3,7 @@ import sys
|
|
|
3
3
|
from rcsb_embedding_model.dataset.resdiue_assembly_embedding_from_structure import ResidueAssemblyDatasetFromStructure
|
|
4
4
|
from rcsb_embedding_model.dataset.residue_assembly_embedding_from_tensor_file import ResidueAssemblyEmbeddingFromTensorFile
|
|
5
5
|
from rcsb_embedding_model.types.api_types import FileOrStreamTuple, SrcLocation, Accelerator, Devices, OptionalPath, \
|
|
6
|
-
EmbeddingPath,
|
|
6
|
+
EmbeddingPath, StructureFormat, SrcAssemblyFrom, OutFormat
|
|
7
7
|
from rcsb_embedding_model.inference.chain_inference import predict as chain_predict
|
|
8
8
|
|
|
9
9
|
|
|
@@ -12,7 +12,6 @@ def predict(
|
|
|
12
12
|
res_embedding_location: EmbeddingPath,
|
|
13
13
|
src_location: SrcLocation = SrcLocation.file,
|
|
14
14
|
src_from: SrcAssemblyFrom = SrcAssemblyFrom.assembly,
|
|
15
|
-
structure_location: StructureLocation = StructureLocation.local,
|
|
16
15
|
structure_format: StructureFormat = StructureFormat.mmcif,
|
|
17
16
|
min_res_n: int = 0,
|
|
18
17
|
max_res_n: int = sys.maxsize,
|
|
@@ -29,7 +28,6 @@ def predict(
|
|
|
29
28
|
src_stream=src_stream,
|
|
30
29
|
res_embedding_location=res_embedding_location,
|
|
31
30
|
src_location=src_location,
|
|
32
|
-
structure_location=structure_location,
|
|
33
31
|
structure_format=structure_format,
|
|
34
32
|
min_res_n=min_res_n,
|
|
35
33
|
max_res_n=max_res_n
|
|
@@ -37,7 +35,6 @@ def predict(
|
|
|
37
35
|
src_stream=src_stream,
|
|
38
36
|
res_embedding_location=res_embedding_location,
|
|
39
37
|
src_location=src_location,
|
|
40
|
-
structure_location=structure_location,
|
|
41
38
|
structure_format=structure_format,
|
|
42
39
|
min_res_n=min_res_n,
|
|
43
40
|
max_res_n=max_res_n
|
|
@@ -6,7 +6,7 @@ from rcsb_embedding_model.dataset.residue_embedding_from_structure import Residu
|
|
|
6
6
|
from rcsb_embedding_model.dataset.residue_embedding_from_tensor_file import ResidueEmbeddingFromTensorFile
|
|
7
7
|
from rcsb_embedding_model.modules.chain_module import ChainModule
|
|
8
8
|
from rcsb_embedding_model.types.api_types import Accelerator, Devices, OptionalPath, FileOrStreamTuple, SrcLocation, \
|
|
9
|
-
SrcTensorFrom,
|
|
9
|
+
SrcTensorFrom, StructureFormat, OutFormat
|
|
10
10
|
from rcsb_embedding_model.utils.data import collate_seq_embeddings
|
|
11
11
|
from rcsb_embedding_model.utils.model import get_aggregator_model
|
|
12
12
|
from rcsb_embedding_model.writer.batch_writer import CsvBatchWriter, JsonStorage
|
|
@@ -17,7 +17,6 @@ def predict(
|
|
|
17
17
|
res_embedding_location: OptionalPath = None,
|
|
18
18
|
src_location: SrcLocation = SrcLocation.file,
|
|
19
19
|
src_from: SrcTensorFrom = SrcTensorFrom.file,
|
|
20
|
-
structure_location: StructureLocation = StructureLocation.local,
|
|
21
20
|
structure_format: StructureFormat = StructureFormat.mmcif,
|
|
22
21
|
min_res_n: int = 0,
|
|
23
22
|
batch_size: int = 1,
|
|
@@ -39,7 +38,6 @@ def predict(
|
|
|
39
38
|
src_stream=src_stream,
|
|
40
39
|
res_embedding_location=res_embedding_location,
|
|
41
40
|
src_location=src_location,
|
|
42
|
-
structure_location=structure_location,
|
|
43
41
|
structure_format=structure_format,
|
|
44
42
|
min_res_n=min_res_n
|
|
45
43
|
)
|
|
@@ -5,7 +5,7 @@ from lightning import Trainer
|
|
|
5
5
|
from rcsb_embedding_model.dataset.esm_prot_from_structure import EsmProtFromStructure
|
|
6
6
|
from rcsb_embedding_model.dataset.esm_prot_from_chain import EsmProtFromChain
|
|
7
7
|
from rcsb_embedding_model.modules.esm_module import EsmModule
|
|
8
|
-
from rcsb_embedding_model.types.api_types import StructureFormat, Accelerator, Devices, OptionalPath,
|
|
8
|
+
from rcsb_embedding_model.types.api_types import StructureFormat, Accelerator, Devices, OptionalPath, \
|
|
9
9
|
SrcProteinFrom, FileOrStreamTuple, SrcLocation, OutFormat
|
|
10
10
|
from rcsb_embedding_model.utils.model import get_residue_model
|
|
11
11
|
from rcsb_embedding_model.writer.batch_writer import TensorBatchWriter, JsonStorage
|
|
@@ -15,7 +15,6 @@ def predict(
|
|
|
15
15
|
src_stream: FileOrStreamTuple,
|
|
16
16
|
src_location: SrcLocation = SrcLocation.file,
|
|
17
17
|
src_from: SrcProteinFrom = SrcProteinFrom.chain,
|
|
18
|
-
structure_location: StructureLocation = StructureLocation.local,
|
|
19
18
|
structure_format: StructureFormat = StructureFormat.mmcif,
|
|
20
19
|
min_res_n: int = 0,
|
|
21
20
|
batch_size: int = 1,
|
|
@@ -31,12 +30,10 @@ def predict(
|
|
|
31
30
|
inference_set = EsmProtFromChain(
|
|
32
31
|
src_stream=src_stream,
|
|
33
32
|
src_location=src_location,
|
|
34
|
-
structure_location=structure_location,
|
|
35
33
|
structure_format=structure_format
|
|
36
34
|
) if src_from == SrcProteinFrom.chain else EsmProtFromStructure(
|
|
37
35
|
src_stream=src_stream,
|
|
38
36
|
src_location=src_location,
|
|
39
|
-
structure_location=structure_location,
|
|
40
37
|
structure_format=structure_format,
|
|
41
38
|
min_res_n=min_res_n
|
|
42
39
|
)
|
|
@@ -5,7 +5,7 @@ from lightning import Trainer
|
|
|
5
5
|
from rcsb_embedding_model.dataset.esm_prot_from_structure import EsmProtFromStructure
|
|
6
6
|
from rcsb_embedding_model.dataset.esm_prot_from_chain import EsmProtFromChain
|
|
7
7
|
from rcsb_embedding_model.modules.structure_module import StructureModule
|
|
8
|
-
from rcsb_embedding_model.types.api_types import StructureFormat, Accelerator, Devices, OptionalPath,
|
|
8
|
+
from rcsb_embedding_model.types.api_types import StructureFormat, Accelerator, Devices, OptionalPath, \
|
|
9
9
|
SrcProteinFrom, FileOrStreamTuple, SrcLocation
|
|
10
10
|
from rcsb_embedding_model.utils.model import get_residue_model, get_aggregator_model
|
|
11
11
|
from rcsb_embedding_model.writer.batch_writer import JsonStorage
|
|
@@ -15,7 +15,6 @@ def predict(
|
|
|
15
15
|
src_stream: FileOrStreamTuple,
|
|
16
16
|
src_location: SrcLocation = SrcLocation.file,
|
|
17
17
|
src_from: SrcProteinFrom = SrcProteinFrom.chain,
|
|
18
|
-
structure_location: StructureLocation = StructureLocation.local,
|
|
19
18
|
structure_format: StructureFormat = StructureFormat.mmcif,
|
|
20
19
|
min_res_n: int = 0,
|
|
21
20
|
batch_size: int = 1,
|
|
@@ -30,12 +29,10 @@ def predict(
|
|
|
30
29
|
inference_set = EsmProtFromChain(
|
|
31
30
|
src_stream=src_stream,
|
|
32
31
|
src_location=src_location,
|
|
33
|
-
structure_location=structure_location,
|
|
34
32
|
structure_format=structure_format
|
|
35
33
|
) if src_from == SrcProteinFrom.chain else EsmProtFromStructure(
|
|
36
34
|
src_stream=src_stream,
|
|
37
35
|
src_location=src_location,
|
|
38
|
-
structure_location=structure_location,
|
|
39
36
|
structure_format=structure_format,
|
|
40
37
|
min_res_n=min_res_n
|
|
41
38
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rcsb-embedding-model
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.36
|
|
4
4
|
Summary: Protein Embedding Model for Structure Search
|
|
5
5
|
Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
|
|
6
6
|
Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
|
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
rcsb_embedding_model/__init__.py,sha256=7YfYO-V-u__19eAZfQ3t5Gf2qrhd_gwQB8rHO0J0puw,306
|
|
2
2
|
rcsb_embedding_model/rcsb_structure_embedding.py,sha256=dKp9hXQO0JAnO4SEfjJ_mG_jHu3UxAPguv6jkOjp-BI,4487
|
|
3
3
|
rcsb_embedding_model/cli/args_utils.py,sha256=7nP2q8pL5dWK_U7opxtWmoFcYVwasky6elHk-dASFaI,165
|
|
4
|
-
rcsb_embedding_model/cli/inference.py,sha256=
|
|
5
|
-
rcsb_embedding_model/dataset/esm_prot_from_chain.py,sha256=
|
|
6
|
-
rcsb_embedding_model/dataset/esm_prot_from_structure.py,sha256=
|
|
7
|
-
rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py,sha256=
|
|
8
|
-
rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py,sha256=
|
|
9
|
-
rcsb_embedding_model/dataset/residue_embedding_from_structure.py,sha256=
|
|
4
|
+
rcsb_embedding_model/cli/inference.py,sha256=cXYaais4A3rVAkiucMdJxrYVxezKti8hL3DogBU0_2c,18788
|
|
5
|
+
rcsb_embedding_model/dataset/esm_prot_from_chain.py,sha256=_DYWLDEc492nhUdFRAQjwh0romF9iMwydFNi43-r0TY,4345
|
|
6
|
+
rcsb_embedding_model/dataset/esm_prot_from_structure.py,sha256=VU9BxNUApZ-pus_vmFGEU4eplcCH0fO7KBdic6X_NOM,2546
|
|
7
|
+
rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py,sha256=9iO7ZUcxl0TIBiwNieqjZFfnM7-7V3pl5abYiLzIY0I,2794
|
|
8
|
+
rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py,sha256=6bMjb0hfNbrTOqstnUVHbegw0xeUo7s6INnRsvP7V3I,3663
|
|
9
|
+
rcsb_embedding_model/dataset/residue_embedding_from_structure.py,sha256=tFHiXqGceZjAoYfVkeXG3sa2mz0gd5XBfm9EpJswcWI,2830
|
|
10
10
|
rcsb_embedding_model/dataset/residue_embedding_from_tensor_file.py,sha256=4OPaw55yGKHjY2iPpCnemcfwfmTZ4j5VrGQ2oIMQw6A,1343
|
|
11
|
-
rcsb_embedding_model/
|
|
12
|
-
rcsb_embedding_model/
|
|
13
|
-
rcsb_embedding_model/inference/
|
|
14
|
-
rcsb_embedding_model/inference/
|
|
11
|
+
rcsb_embedding_model/dataset/untils/__init__.py,sha256=O3WOukwvaKJvHUTALD3eYNHRacJo8o5BW7-ZulLZ65g,116
|
|
12
|
+
rcsb_embedding_model/dataset/untils/utils.py,sha256=SPiQ9aO2WLictO4R2JiNlo2ChhlANNMeIhbN0kq11kQ,578
|
|
13
|
+
rcsb_embedding_model/inference/assembly_inferece.py,sha256=b-mAfOJOO-s6gilOedZpaM90OTbhm_RQVqh2zKFG4dQ,2143
|
|
14
|
+
rcsb_embedding_model/inference/chain_inference.py,sha256=0HkV4EnLwg4ttQhf-xwOuSksZwEYDEChnHU4_A0xUXM,2782
|
|
15
|
+
rcsb_embedding_model/inference/esm_inference.py,sha256=nmHJYfSGjEqRPgb3l9s5fqtlyzdbAsiPz-OxHXBTgcI,2360
|
|
16
|
+
rcsb_embedding_model/inference/structure_inference.py,sha256=b44mY7VcCbjbtB35Mi9EhZoM18yyMaF579MKmzwB564,2405
|
|
15
17
|
rcsb_embedding_model/model/layers.py,sha256=lhKaWC4gTS_T5lHOP0mgnnP8nKTPEOm4MrjhESA4hE8,743
|
|
16
18
|
rcsb_embedding_model/model/residue_embedding_aggregator.py,sha256=k3UW63Ax8DtjCMdD3O5xNxtyAu28l2n3-Ab6nS0atm0,1967
|
|
17
19
|
rcsb_embedding_model/modules/chain_module.py,sha256=KsZw2uagO4rpAKWv6ivqEMxIEzgtfQFliHV_vX8kqtc,435
|
|
@@ -23,8 +25,8 @@ rcsb_embedding_model/utils/model.py,sha256=xr3p02ohOgJ5UInwdIupN68Oq4yvNFhxobZRa
|
|
|
23
25
|
rcsb_embedding_model/utils/structure_parser.py,sha256=fSIbq_a_aEigCWY_1dUcW9d9Law0ZDOcZAxJlZL0Rt8,3377
|
|
24
26
|
rcsb_embedding_model/utils/structure_provider.py,sha256=eWtxjkPpmRfmil_DKR1J6miaXR3lQ28DF5O0qrqSgGA,786
|
|
25
27
|
rcsb_embedding_model/writer/batch_writer.py,sha256=rTFNasB0Xp4-XCNTXKeEWZxSrb7lvZytoRldJUWn9Jg,3312
|
|
26
|
-
rcsb_embedding_model-0.0.
|
|
27
|
-
rcsb_embedding_model-0.0.
|
|
28
|
-
rcsb_embedding_model-0.0.
|
|
29
|
-
rcsb_embedding_model-0.0.
|
|
30
|
-
rcsb_embedding_model-0.0.
|
|
28
|
+
rcsb_embedding_model-0.0.36.dist-info/METADATA,sha256=spFNxlrrwMORe5Su0559-997by2cgkuk9-yEQlhew60,5351
|
|
29
|
+
rcsb_embedding_model-0.0.36.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
30
|
+
rcsb_embedding_model-0.0.36.dist-info/entry_points.txt,sha256=MK11jTIEmaV-x4CkPX5IymDaVs7Ky_f2xxU8BJVZ_9Q,69
|
|
31
|
+
rcsb_embedding_model-0.0.36.dist-info/licenses/LICENSE.md,sha256=oUaHiKgfBkChth_Sm67WemEvatO1U0Go8LHjaskXY0w,1522
|
|
32
|
+
rcsb_embedding_model-0.0.36.dist-info/RECORD,,
|
|
File without changes
|
{rcsb_embedding_model-0.0.34.dist-info → rcsb_embedding_model-0.0.36.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{rcsb_embedding_model-0.0.34.dist-info → rcsb_embedding_model-0.0.36.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|