rcsb-embedding-model 0.0.44__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. rcsb_embedding_model-0.0.44/.dockerignore +36 -0
  2. rcsb_embedding_model-0.0.44/.github/workflows/_workflow-docker.yaml +17 -0
  3. rcsb_embedding_model-0.0.44/.github/workflows/publish.yaml +91 -0
  4. rcsb_embedding_model-0.0.44/.gitignore +6 -0
  5. rcsb_embedding_model-0.0.44/Dockerfile +8 -0
  6. rcsb_embedding_model-0.0.44/LICENSE.md +4 -0
  7. rcsb_embedding_model-0.0.44/PKG-INFO +136 -0
  8. rcsb_embedding_model-0.0.44/README.md +111 -0
  9. rcsb_embedding_model-0.0.44/assets/embedding-model-architecture.png +0 -0
  10. rcsb_embedding_model-0.0.44/examples/esm_embeddings.py +23 -0
  11. rcsb_embedding_model-0.0.44/pyproject.toml +39 -0
  12. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/__init__.py +9 -0
  13. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/cli/args_utils.py +9 -0
  14. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/cli/inference.py +449 -0
  15. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/esm_prot_from_chain.py +118 -0
  16. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/esm_prot_from_structure.py +64 -0
  17. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/resdiue_assembly_embedding_from_structure.py +67 -0
  18. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/residue_assembly_embedding_from_tensor_file.py +100 -0
  19. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/residue_embedding_from_structure.py +67 -0
  20. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/residue_embedding_from_tensor_file.py +44 -0
  21. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/untils/__init__.py +4 -0
  22. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/dataset/untils/utils.py +17 -0
  23. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/inference/assembly_inferece.py +60 -0
  24. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/inference/chain_inference.py +83 -0
  25. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/inference/esm_inference.py +76 -0
  26. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/inference/structure_inference.py +79 -0
  27. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/model/layers.py +28 -0
  28. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/model/residue_embedding_aggregator.py +53 -0
  29. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/modules/chain_module.py +19 -0
  30. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/modules/esm_module.py +24 -0
  31. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/modules/structure_module.py +32 -0
  32. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/rcsb_structure_embedding.py +127 -0
  33. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/types/api_types.py +60 -0
  34. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/utils/data.py +171 -0
  35. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/utils/esm/loaders.py +65 -0
  36. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/utils/model.py +28 -0
  37. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/utils/structure_parser.py +100 -0
  38. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/utils/structure_provider.py +27 -0
  39. rcsb_embedding_model-0.0.44/src/rcsb_embedding_model/writer/batch_writer.py +131 -0
  40. rcsb_embedding_model-0.0.44/tests/resources/embeddings/1acb.A.pt +0 -0
  41. rcsb_embedding_model-0.0.44/tests/resources/embeddings/1acb.B.pt +0 -0
  42. rcsb_embedding_model-0.0.44/tests/resources/embeddings/2uzi.A.pt +0 -0
  43. rcsb_embedding_model-0.0.44/tests/resources/embeddings/2uzi.B.pt +0 -0
  44. rcsb_embedding_model-0.0.44/tests/resources/embeddings/2uzi.C.pt +0 -0
  45. rcsb_embedding_model-0.0.44/tests/resources/pdb/1acb.cif +5068 -0
  46. rcsb_embedding_model-0.0.44/tests/resources/pdb/2uzi.cif +6685 -0
  47. rcsb_embedding_model-0.0.44/tests/resources/src_stream/assembly-complete-test.csv +7 -0
  48. rcsb_embedding_model-0.0.44/tests/resources/src_stream/instance-complete-test.csv +10 -0
  49. rcsb_embedding_model-0.0.44/tests/resources/src_stream/instance.csv +2 -0
  50. rcsb_embedding_model-0.0.44/tests/test_cli_inference.py +48 -0
  51. rcsb_embedding_model-0.0.44/tests/test_embedding_model.py +43 -0
  52. rcsb_embedding_model-0.0.44/tests/test_inference.py +172 -0
  53. rcsb_embedding_model-0.0.44/tests/test_remote_inference.py +103 -0
@@ -0,0 +1,36 @@
1
+ # Only list files that could be under version control or could be created in CI/CD.
2
+
3
+ # Note: Patterns are absolute, like `.prettierignore` but unlike `.gitignore`.
4
+
5
+ # Gitignored files
6
+ **/.*
7
+ **/[~#$]*
8
+ **/*[~#$]
9
+
10
+ # Gitignored directories
11
+ node_modules/
12
+
13
+ # Directories
14
+ /dist/
15
+ /tests/
16
+ /assets/
17
+ /examples/
18
+
19
+ # Files in the root directory
20
+ /*.md
21
+ /*.txt
22
+ /CITATION.cff
23
+ /compose.yaml
24
+ /justfile
25
+ /mkdocs.yaml
26
+
27
+ # Keep README.md (needed for build)
28
+ !/README.md
29
+
30
+ # Keep .dockerignore and .gitignore
31
+ !/.dockerignore
32
+ !/.gitignore
33
+
34
+ # Keep legal files
35
+ !/LICENSE.*
36
+ !/NOTICE.txt
@@ -0,0 +1,17 @@
1
+ # Docker build and push workflow
2
+
3
+ name: Run CI/CD Docker Workflow
4
+
5
+ on:
6
+ workflow_call:
7
+
8
+ jobs:
9
+ run-workflow:
10
+ if: github.event_name == 'release'
11
+ name: "Run automated docker workflow"
12
+ uses: rcsb/devops-cicd-github-actions/.github/workflows/workflow-docker.yaml@master
13
+ with:
14
+ dockerfile_location: "Dockerfile" # The location of the Dockerfile relative to the root of the repository. Defaults to "Dockerfile".
15
+ repo_project: "rcsb" # REQUIRED. The name of the project or organization in the remote Docker image repository.
16
+ docker_image_name: "rcsb-embedding-model" # REQUIRED. The name of the Docker image to create.
17
+ docker_build_context: "." # The path location of the docker build context, relative to the project root. Defaults to the project root.
@@ -0,0 +1,91 @@
1
+ name: CI Pipeline
2
+
3
+ on:
4
+ push:
5
+ branches: [master]
6
+ pull_request:
7
+ branches: [master]
8
+ release:
9
+ types: [published]
10
+
11
+ jobs:
12
+ hatch-test:
13
+ name: Test on Python ${{ matrix.python-version }}
14
+ runs-on: ["self-hosted", "buildchain"]
15
+ timeout-minutes: 20
16
+ strategy:
17
+ matrix:
18
+ python-version: ["3.11"]
19
+ steps:
20
+ - name: Checkout code
21
+ uses: actions/checkout@v4
22
+
23
+ - name: Install build dependencies
24
+ run: |
25
+ sudo apt-get update
26
+ sudo apt-get install -y build-essential pkg-config libzstd-dev
27
+
28
+ - name: Set up Python ${{ matrix.python-version }}
29
+ uses: actions/setup-python@v4
30
+ with:
31
+ python-version: ${{ matrix.python-version }}
32
+
33
+ - name: Install Hatch and HaggingFace
34
+ run: pip install hatch huggingface_hub[cli]
35
+
36
+ - name: Run tests
37
+ run: hatch test
38
+
39
+ hatch-build:
40
+ name: Build to PyPI
41
+ needs: hatch-test
42
+ runs-on: ubuntu-latest
43
+ if: github.event_name == 'release'
44
+ steps:
45
+ - name: Checkout code
46
+ uses: actions/checkout@v4
47
+
48
+ - name: Set up Python 3.10
49
+ uses: actions/setup-python@v4
50
+ with:
51
+ python-version: "3.10"
52
+
53
+ - name: Install Hatch
54
+ run: pip install hatch
55
+
56
+ - name: Build distribution
57
+ run: hatch build
58
+
59
+ - name: Store the distribution packages
60
+ uses: actions/upload-artifact@v4
61
+ with:
62
+ name: python-package-distributions
63
+ path: dist/
64
+
65
+ publish-to-pypi:
66
+ name: >-
67
+ Publish Python 🐍 distribution 📦 to PyPI
68
+ if: github.event_name == 'release'
69
+ needs:
70
+ - hatch-build
71
+ runs-on: ubuntu-latest
72
+ environment:
73
+ name: pypi
74
+ url: https://pypi.org/p/rcsb-embedding-model
75
+ permissions:
76
+ id-token: write
77
+
78
+ steps:
79
+ - name: Download all the dists
80
+ uses: actions/download-artifact@v4
81
+ with:
82
+ name: python-package-distributions
83
+ path: dist/
84
+ - name: Publish distribution 📦 to PyPI
85
+ uses: pypa/gh-action-pypi-publish@release/v1
86
+
87
+ push-image:
88
+ needs:
89
+ - hatch-test
90
+ name: Push image to harbor
91
+ uses: ./.github/workflows/_workflow-docker.yaml
@@ -0,0 +1,6 @@
1
+ /.idea
2
+ /rcsb-embedding-model.iml
3
+ /dist/
4
+ /.pypi.rc
5
+ __pycache__
6
+ /tests/resources/tmp
@@ -0,0 +1,8 @@
1
+ FROM python:3.11
2
+
3
+ WORKDIR /app
4
+ COPY . /app/
5
+
6
+ RUN pip install --no-cache-dir -e .
7
+
8
+ ENTRYPOINT ["inference"]
@@ -0,0 +1,4 @@
1
+ # Cambrian Non-Commercial License Agreement
2
+
3
+ This project is licensed under the EvolutionaryScale Cambrian Non-Commercial License Agreement.
4
+ See: https://www.evolutionaryscale.ai/policies/cambrian-non-commercial-license-agreement
@@ -0,0 +1,136 @@
1
+ Metadata-Version: 2.4
2
+ Name: rcsb-embedding-model
3
+ Version: 0.0.44
4
+ Summary: Protein Embedding Model for Structure Search
5
+ Project-URL: Homepage, https://github.com/rcsb/rcsb-embedding-model
6
+ Project-URL: Issues, https://github.com/rcsb/rcsb-embedding-model/issues
7
+ Author-email: Joan Segura <joan.segura@rcsb.org>
8
+ License: # Cambrian Non-Commercial License Agreement
9
+
10
+ This project is licensed under the EvolutionaryScale Cambrian Non-Commercial License Agreement.
11
+ See: https://www.evolutionaryscale.ai/policies/cambrian-non-commercial-license-agreement
12
+ License-File: LICENSE.md
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Requires-Python: >=3.11
16
+ Requires-Dist: biotite>=1.5.0
17
+ Requires-Dist: esm>=3.2.0
18
+ Requires-Dist: hf-xet>=1.1.10
19
+ Requires-Dist: httpx>=0.28.1
20
+ Requires-Dist: huggingface-hub>=0.30.2
21
+ Requires-Dist: importlib-metadata>=8.7.0
22
+ Requires-Dist: lightning>=2.5.0
23
+ Requires-Dist: typer>=0.15.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # RCSB Embedding Model
27
+
28
+ **Version** 0.0.44
29
+
30
+
31
+ ## Overview
32
+
33
+ RCSB Embedding Model is a neural network architecture designed to encode macromolecular 3D structures into fixed-length vector embeddings for efficient large-scale structure similarity search.
34
+
35
+ Preprint: [Multi-scale structural similarity embedding search across entire proteomes](https://www.biorxiv.org/content/10.1101/2025.02.28.640875v1).
36
+
37
+ A web-based implementation using this model for structure similarity search is available at [rcsb-embedding-search](http://embedding-search.rcsb.org).
38
+
39
+ If you are interested in training the model with a new dataset, visit the [rcsb-embedding-search repository](https://github.com/bioinsilico/rcsb-embedding-search), which provides scripts and documentation for training.
40
+
41
+
42
+ ## Features
43
+
44
+ - **Residue-level embeddings** computed using the ESM3 protein language model
45
+ - **Structure-level embeddings** aggregated via a transformer-based aggregator network
46
+ - **Command-line interface** implemented with Typer for high-throughput inference workflows
47
+ - **Python API** for interactive embedding computation and integration into analysis pipelines
48
+ - **High-performance inference** leveraging PyTorch Lightning, with multi-node and multi-GPU support
49
+
50
+ ---
51
+
52
+ ## Installation
53
+
54
+ pip install rcsb-embedding-model
55
+
56
+ **Requirements:**
57
+
58
+ - Python ≥ 3.10
59
+ - ESM >= 3.2.0
60
+ - Lightning ≥ 2.5.0
61
+ - Typer ≥ 0.15.0
62
+
63
+ ---
64
+
65
+ ## Quick Start
66
+
67
+ ### CLI
68
+
69
+ # 1. Compute residue embeddings: Calculate residue level embeddings of protein structures using ESM3. Predictions are stored as torch tensor files.
70
+ inference residue-embedding --src-file data/structures.csv --output-path results/residue_embeddings --structure-format mmcif --batch-size 8 --devices auto
71
+
72
+ # 2. Compute structure embeddings: Calculate single-chain protein embeddings from structural files. Predictions are stored in a single pandas DataFrame file.
73
+ inference structure-embedding --src-file data/structures.csv --output-path results/residue_embeddings --out-df-name df-res-embeddings --batch-size 4 --devices 0 --devives 1
74
+
75
+ # 3. Compute chain embeddings: Calculate single-chain protein embeddings from residue level embeddings stored as torch tensor files. Predictions a re stored as csv files.
76
+ inference chain-embedding --src-file data/structures.csv --output-path results/chain_embeddings --batch-size 4
77
+
78
+ # 4. Compute assembly embeddings: Calculate assembly embeddings from residue level embeddings stored as torch tensor files. Predictions are stored as csv files.
79
+ inference assembly-embedding --src-file data/structures.csv --res-embedding-location results/residue_embeddings --output-path results/assembly_embeddings
80
+
81
+ ### Python API
82
+
83
+ from rcsb_embedding_model import RcsbStructureEmbedding
84
+
85
+ model = RcsbStructureEmbedding()
86
+
87
+ # Compute per-residue embeddings
88
+ res_emb = model.residue_embedding(
89
+ src_structure="examples/1abc.cif",
90
+ src_format="mmcif",
91
+ chain_id="A"
92
+ )
93
+
94
+ # Aggregate to structure-level embedding
95
+ struct_emb = model.aggregator_embedding(res_emb)
96
+
97
+ See the examples and tests directories for more use cases.
98
+
99
+ ---
100
+
101
+ ## Model Architecture
102
+
103
+ The embedding model is trained to predict structural similarity by approximating TM-scores using cosine distances between embeddings. It consists of two main components:
104
+
105
+ - **Protein Language Model (PLM)**: Computes residue-level embeddings from a given 3D structure.
106
+ - **Residue Embedding Aggregator**: A transformer-based neural network that aggregates these residue-level embeddings into a single vector.
107
+
108
+ ![Embedding model architecture](assets/embedding-model-architecture.png)
109
+
110
+ ### **Protein Language Model (PLM)**
111
+ Residue-wise embeddings of protein structures are computed using the [ESM3](https://www.evolutionaryscale.ai/) generative protein language model.
112
+
113
+ ### **Residue Embedding Aggregator**
114
+ The aggregation component consists of six transformer encoder layers, each with a 3,072-neuron feedforward layer and ReLU activations. After processing through these layers, a summation pooling operation is applied, followed by 12 fully connected residual layers that refine the embeddings into a single 1,536-dimensional vector.
115
+
116
+ ---
117
+
118
+ ## Development
119
+
120
+ git clone https://github.com/rcsb/rcsb-embedding-model.git
121
+ cd rcsb-embedding-model
122
+ pip install -e .
123
+ pytest
124
+
125
+ ---
126
+
127
+ ## Citation
128
+
129
+ Segura, J., Bittrich, S., et al. (2024). *Multi-scale structural similarity embedding search across entire proteomes*. bioRxiv. (Preprint: https://www.biorxiv.org/content/10.1101/2025.02.28.640875v1)
130
+
131
+ ---
132
+
133
+ ## License
134
+
135
+ This project uses the EvolutionaryScale ESM-3 model and is distributed under the
136
+ [Cambrian Non-Commercial License Agreement](https://www.evolutionaryscale.ai/policies/cambrian-non-commercial-license-agreement).
@@ -0,0 +1,111 @@
1
+ # RCSB Embedding Model
2
+
3
+ **Version** 0.0.44
4
+
5
+
6
+ ## Overview
7
+
8
+ RCSB Embedding Model is a neural network architecture designed to encode macromolecular 3D structures into fixed-length vector embeddings for efficient large-scale structure similarity search.
9
+
10
+ Preprint: [Multi-scale structural similarity embedding search across entire proteomes](https://www.biorxiv.org/content/10.1101/2025.02.28.640875v1).
11
+
12
+ A web-based implementation using this model for structure similarity search is available at [rcsb-embedding-search](http://embedding-search.rcsb.org).
13
+
14
+ If you are interested in training the model with a new dataset, visit the [rcsb-embedding-search repository](https://github.com/bioinsilico/rcsb-embedding-search), which provides scripts and documentation for training.
15
+
16
+
17
+ ## Features
18
+
19
+ - **Residue-level embeddings** computed using the ESM3 protein language model
20
+ - **Structure-level embeddings** aggregated via a transformer-based aggregator network
21
+ - **Command-line interface** implemented with Typer for high-throughput inference workflows
22
+ - **Python API** for interactive embedding computation and integration into analysis pipelines
23
+ - **High-performance inference** leveraging PyTorch Lightning, with multi-node and multi-GPU support
24
+
25
+ ---
26
+
27
+ ## Installation
28
+
29
+ pip install rcsb-embedding-model
30
+
31
+ **Requirements:**
32
+
33
+ - Python ≥ 3.10
34
+ - ESM >= 3.2.0
35
+ - Lightning ≥ 2.5.0
36
+ - Typer ≥ 0.15.0
37
+
38
+ ---
39
+
40
+ ## Quick Start
41
+
42
+ ### CLI
43
+
44
+ # 1. Compute residue embeddings: Calculate residue level embeddings of protein structures using ESM3. Predictions are stored as torch tensor files.
45
+ inference residue-embedding --src-file data/structures.csv --output-path results/residue_embeddings --structure-format mmcif --batch-size 8 --devices auto
46
+
47
+ # 2. Compute structure embeddings: Calculate single-chain protein embeddings from structural files. Predictions are stored in a single pandas DataFrame file.
48
+ inference structure-embedding --src-file data/structures.csv --output-path results/residue_embeddings --out-df-name df-res-embeddings --batch-size 4 --devices 0 --devives 1
49
+
50
+ # 3. Compute chain embeddings: Calculate single-chain protein embeddings from residue level embeddings stored as torch tensor files. Predictions a re stored as csv files.
51
+ inference chain-embedding --src-file data/structures.csv --output-path results/chain_embeddings --batch-size 4
52
+
53
+ # 4. Compute assembly embeddings: Calculate assembly embeddings from residue level embeddings stored as torch tensor files. Predictions are stored as csv files.
54
+ inference assembly-embedding --src-file data/structures.csv --res-embedding-location results/residue_embeddings --output-path results/assembly_embeddings
55
+
56
+ ### Python API
57
+
58
+ from rcsb_embedding_model import RcsbStructureEmbedding
59
+
60
+ model = RcsbStructureEmbedding()
61
+
62
+ # Compute per-residue embeddings
63
+ res_emb = model.residue_embedding(
64
+ src_structure="examples/1abc.cif",
65
+ src_format="mmcif",
66
+ chain_id="A"
67
+ )
68
+
69
+ # Aggregate to structure-level embedding
70
+ struct_emb = model.aggregator_embedding(res_emb)
71
+
72
+ See the examples and tests directories for more use cases.
73
+
74
+ ---
75
+
76
+ ## Model Architecture
77
+
78
+ The embedding model is trained to predict structural similarity by approximating TM-scores using cosine distances between embeddings. It consists of two main components:
79
+
80
+ - **Protein Language Model (PLM)**: Computes residue-level embeddings from a given 3D structure.
81
+ - **Residue Embedding Aggregator**: A transformer-based neural network that aggregates these residue-level embeddings into a single vector.
82
+
83
+ ![Embedding model architecture](assets/embedding-model-architecture.png)
84
+
85
+ ### **Protein Language Model (PLM)**
86
+ Residue-wise embeddings of protein structures are computed using the [ESM3](https://www.evolutionaryscale.ai/) generative protein language model.
87
+
88
+ ### **Residue Embedding Aggregator**
89
+ The aggregation component consists of six transformer encoder layers, each with a 3,072-neuron feedforward layer and ReLU activations. After processing through these layers, a summation pooling operation is applied, followed by 12 fully connected residual layers that refine the embeddings into a single 1,536-dimensional vector.
90
+
91
+ ---
92
+
93
+ ## Development
94
+
95
+ git clone https://github.com/rcsb/rcsb-embedding-model.git
96
+ cd rcsb-embedding-model
97
+ pip install -e .
98
+ pytest
99
+
100
+ ---
101
+
102
+ ## Citation
103
+
104
+ Segura, J., Bittrich, S., et al. (2024). *Multi-scale structural similarity embedding search across entire proteomes*. bioRxiv. (Preprint: https://www.biorxiv.org/content/10.1101/2025.02.28.640875v1)
105
+
106
+ ---
107
+
108
+ ## License
109
+
110
+ This project uses the EvolutionaryScale ESM-3 model and is distributed under the
111
+ [Cambrian Non-Commercial License Agreement](https://www.evolutionaryscale.ai/policies/cambrian-non-commercial-license-agreement).
@@ -0,0 +1,23 @@
1
+ import argparse
2
+
3
+ from rcsb_embedding_model import RcsbStructureEmbedding
4
+
5
+ if __name__ == "__main__":
6
+
7
+ parser = argparse.ArgumentParser()
8
+ parser.add_argument('--file', type=str, required=True)
9
+ parser.add_argument('--file_format', type=str)
10
+ parser.add_argument('--chain', type=str)
11
+ args = parser.parse_args()
12
+
13
+ model = RcsbStructureEmbedding()
14
+ res_embedding = model.residue_embedding(
15
+ src_structure=args.file,
16
+ src_format=args.file_format,
17
+ chain_id=args.chain
18
+ )
19
+ structure_embedding = model.aggregator_embedding(
20
+ res_embedding
21
+ )
22
+
23
+ print(res_embedding.shape, structure_embedding.shape)
@@ -0,0 +1,39 @@
1
+ [project]
2
+ name = "rcsb-embedding-model"
3
+ version = "0.0.44"
4
+ authors = [
5
+ { name="Joan Segura", email="joan.segura@rcsb.org" },
6
+ ]
7
+ description = "Protein Embedding Model for Structure Search"
8
+ readme = "README.md"
9
+ requires-python = ">=3.11"
10
+ classifiers = [
11
+ "Programming Language :: Python :: 3",
12
+ "Operating System :: OS Independent",
13
+ ]
14
+ license = {file = "LICENSE.md"}
15
+ license-files = ["LICEN[CS]E*"]
16
+ dependencies=[
17
+ "importlib-metadata >= 8.7.0",
18
+ "esm >= 3.2.0",
19
+ "biotite >= 1.5.0",
20
+ "lightning >= 2.5.0",
21
+ "typer >= 0.15.0",
22
+ "hf-xet >= 1.1.10",
23
+ "huggingface-hub >= 0.30.2",
24
+ "httpx >= 0.28.1"
25
+ ]
26
+
27
+ [project.urls]
28
+ Homepage = "https://github.com/rcsb/rcsb-embedding-model"
29
+ Issues = "https://github.com/rcsb/rcsb-embedding-model/issues"
30
+
31
+ [build-system]
32
+ requires = [
33
+ "hatchling >= 1.14.1"
34
+ ]
35
+
36
+ build-backend = "hatchling.build"
37
+
38
+ [project.scripts]
39
+ inference = "rcsb_embedding_model.cli.inference:app"
@@ -0,0 +1,9 @@
1
+ from importlib_metadata import version, PackageNotFoundError
2
+ from rcsb_embedding_model.rcsb_structure_embedding import RcsbStructureEmbedding
3
+
4
+ try:
5
+ __version__ = version("rcsb-embedding-model")
6
+ except PackageNotFoundError:
7
+ __version__ = "0.0.0"
8
+
9
+ __all__ = ["RcsbStructureEmbedding", "__version__"]
@@ -0,0 +1,9 @@
1
+
2
+
3
+ def arg_devices(devices):
4
+ if len(devices) == 1:
5
+ return devices[0] if devices[0] == "auto" else int(devices[0])
6
+ return [int(x) for x in devices]
7
+
8
+
9
+