atomworks 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomworks/__init__.py +48 -0
- atomworks/biotite_patch.py +421 -0
- atomworks/cli/__init__.py +17 -0
- atomworks/cli/__main__.py +13 -0
- atomworks/cli/ccd.py +262 -0
- atomworks/cli/pdb.py +228 -0
- atomworks/cli/setup.py +134 -0
- atomworks/enums.py +280 -0
- atomworks/io/__init__.py +21 -0
- atomworks/io/common.py +97 -0
- atomworks/io/constants.py +690 -0
- atomworks/io/parser.py +765 -0
- atomworks/io/template.py +487 -0
- atomworks/io/tools/chembl_transformations.smirks +53 -0
- atomworks/io/tools/fasta.py +83 -0
- atomworks/io/tools/inference.py +756 -0
- atomworks/io/tools/rdkit.py +816 -0
- atomworks/io/transforms/atom_array.py +797 -0
- atomworks/io/transforms/categories.py +407 -0
- atomworks/io/utils/__init__.py +0 -0
- atomworks/io/utils/assembly.py +199 -0
- atomworks/io/utils/atom_array.py +43 -0
- atomworks/io/utils/atom_array_plus.py +902 -0
- atomworks/io/utils/bonds.py +1000 -0
- atomworks/io/utils/ccd.py +614 -0
- atomworks/io/utils/chain.py +38 -0
- atomworks/io/utils/error.py +111 -0
- atomworks/io/utils/io_utils.py +884 -0
- atomworks/io/utils/non_rcsb.py +221 -0
- atomworks/io/utils/query.py +394 -0
- atomworks/io/utils/scatter.py +217 -0
- atomworks/io/utils/selection.py +464 -0
- atomworks/io/utils/sequence.py +184 -0
- atomworks/io/utils/testing.py +367 -0
- atomworks/io/utils/visualize.py +366 -0
- atomworks/ml/__init__.py +16 -0
- atomworks/ml/common.py +73 -0
- atomworks/ml/datasets/README.md +186 -0
- atomworks/ml/datasets/__init__.py +3 -0
- atomworks/ml/datasets/datasets.py +720 -0
- atomworks/ml/datasets/parsers/__init__.py +3 -0
- atomworks/ml/datasets/parsers/base.py +144 -0
- atomworks/ml/datasets/parsers/custom_metadata_row_parsers.py +146 -0
- atomworks/ml/datasets/parsers/default_metadata_row_parsers.py +299 -0
- atomworks/ml/encoding_definitions.py +752 -0
- atomworks/ml/executables/__init__.py +335 -0
- atomworks/ml/executables/x3dna.py +62 -0
- atomworks/ml/loaders/worksteal.py +652 -0
- atomworks/ml/pipelines/__init__.py +0 -0
- atomworks/ml/pipelines/af3.py +403 -0
- atomworks/ml/pipelines/rf2aa.py +475 -0
- atomworks/ml/preprocessing/__init__.py +6 -0
- atomworks/ml/preprocessing/constants.py +130 -0
- atomworks/ml/preprocessing/get_pn_unit_data_from_structure.py +471 -0
- atomworks/ml/preprocessing/utils/clustering.py +269 -0
- atomworks/ml/preprocessing/utils/fasta.py +62 -0
- atomworks/ml/preprocessing/utils/structure_utils.py +521 -0
- atomworks/ml/samplers.py +535 -0
- atomworks/ml/transforms/_checks.py +62 -0
- atomworks/ml/transforms/af3_reference_molecule.py +638 -0
- atomworks/ml/transforms/atom_array.py +763 -0
- atomworks/ml/transforms/atom_frames.py +428 -0
- atomworks/ml/transforms/atom_level_embeddings.py +218 -0
- atomworks/ml/transforms/atomize.py +211 -0
- atomworks/ml/transforms/base.py +851 -0
- atomworks/ml/transforms/bfactor_conditioned_transforms.py +57 -0
- atomworks/ml/transforms/bonds.py +361 -0
- atomworks/ml/transforms/cached_residue_data.py +283 -0
- atomworks/ml/transforms/center_random_augmentation.py +44 -0
- atomworks/ml/transforms/chirals.py +446 -0
- atomworks/ml/transforms/covalent_modifications.py +112 -0
- atomworks/ml/transforms/crop.py +690 -0
- atomworks/ml/transforms/diffusion/batch_structures.py +50 -0
- atomworks/ml/transforms/diffusion/edm.py +72 -0
- atomworks/ml/transforms/dna/pad_dna.py +712 -0
- atomworks/ml/transforms/encoding.py +522 -0
- atomworks/ml/transforms/feature_aggregation/af3.py +172 -0
- atomworks/ml/transforms/feature_aggregation/confidence.py +63 -0
- atomworks/ml/transforms/feature_aggregation/rf2aa.py +396 -0
- atomworks/ml/transforms/featurize_unresolved_residues.py +396 -0
- atomworks/ml/transforms/filters.py +588 -0
- atomworks/ml/transforms/masks.py +122 -0
- atomworks/ml/transforms/mirror_transform.py +86 -0
- atomworks/ml/transforms/msa/_msa_constants.py +153 -0
- atomworks/ml/transforms/msa/_msa_featurizing_utils.py +360 -0
- atomworks/ml/transforms/msa/_msa_loading_utils.py +276 -0
- atomworks/ml/transforms/msa/_msa_pairing_utils.py +446 -0
- atomworks/ml/transforms/msa/msa.py +1124 -0
- atomworks/ml/transforms/openbabel_utils.py +628 -0
- atomworks/ml/transforms/random_atomize_residues.py +172 -0
- atomworks/ml/transforms/rdkit_utils.py +737 -0
- atomworks/ml/transforms/rf2aa_assumptions.py +249 -0
- atomworks/ml/transforms/sasa.py +156 -0
- atomworks/ml/transforms/symmetry.py +1039 -0
- atomworks/ml/transforms/template.py +1096 -0
- atomworks/ml/utils/__init__.py +0 -0
- atomworks/ml/utils/debug.py +70 -0
- atomworks/ml/utils/error.py +111 -0
- atomworks/ml/utils/geometry.py +333 -0
- atomworks/ml/utils/io.py +475 -0
- atomworks/ml/utils/misc.py +425 -0
- atomworks/ml/utils/nested_dict.py +157 -0
- atomworks/ml/utils/numpy.py +211 -0
- atomworks/ml/utils/rng.py +228 -0
- atomworks/ml/utils/testing.py +46 -0
- atomworks/ml/utils/timer.py +229 -0
- atomworks/ml/utils/token.py +395 -0
- atomworks/py.typed +0 -0
- atomworks-1.0.0.dist-info/METADATA +192 -0
- atomworks-1.0.0.dist-info/RECORD +113 -0
- atomworks-1.0.0.dist-info/WHEEL +4 -0
- atomworks-1.0.0.dist-info/entry_points.txt +2 -0
- atomworks-1.0.0.dist-info/licenses/LICENSE.md +28 -0
atomworks/__init__.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""
|
|
2
|
+
atomworks - Unified package for biological data I/O and machine learning.
|
|
3
|
+
|
|
4
|
+
This package combines functionality from atomworks.io (I/O operations) and atomworks.ml (ML utilities)
|
|
5
|
+
into a unified interface for biological data processing and machine learning.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import importlib
|
|
9
|
+
import importlib.metadata
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import warnings
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
__version__ = importlib.metadata.version("atomworks")
|
|
16
|
+
except ImportError:
|
|
17
|
+
__version__ = "unknown"
|
|
18
|
+
|
|
19
|
+
# Global logging configuration
|
|
20
|
+
logger = logging.getLogger("atomworks")
|
|
21
|
+
_log_level = os.environ.get("ATOMWORKS_LOG_LEVEL", "WARNING").upper()
|
|
22
|
+
logger.setLevel(_log_level)
|
|
23
|
+
|
|
24
|
+
# Ensure that deprecation warnings are not repeated
|
|
25
|
+
warnings.filterwarnings("once", category=DeprecationWarning)
|
|
26
|
+
|
|
27
|
+
# Apply monkey patching to extend AtomArray functionality
|
|
28
|
+
from atomworks.biotite_patch import monkey_patch_biotite # noqa: E402
|
|
29
|
+
|
|
30
|
+
monkey_patch_biotite()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Import version information
|
|
34
|
+
# Import subpackages
|
|
35
|
+
from . import io, ml # noqa: E402
|
|
36
|
+
|
|
37
|
+
# Re-export key functionality from subpackages for convenience
|
|
38
|
+
# This maintains backward compatibility and provides a clean top-level API
|
|
39
|
+
# Key I/O functionality
|
|
40
|
+
from .io.parser import parse # noqa: E402
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
"__version__",
|
|
44
|
+
"io",
|
|
45
|
+
"ml",
|
|
46
|
+
"monkey_patch_atomarray",
|
|
47
|
+
"parse",
|
|
48
|
+
]
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
"""Collection of monkey patches for biotite."""
|
|
2
|
+
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import biotite
|
|
7
|
+
from biotite.structure import AtomArray, AtomArrayStack, Atom
|
|
8
|
+
import numpy as np
|
|
9
|
+
import biotite.structure as struc
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"monkey_patch_biotite",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
_HAS_BEEN_PATCHED = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def apply_if_version_lt(version: str, min_version: str) -> Callable:
|
|
19
|
+
"""
|
|
20
|
+
Decorator to apply a function only if the given version is less than the given minimal version.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
version: Version to check.
|
|
24
|
+
min_version: Minimal semantic version (e.g. "0.38.0"). If the given version is lower, the
|
|
25
|
+
decorated function is called; otherwise, it is a no-op.
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
@apply_if_version_lt(biotite.__version__, "0.38.0")
|
|
29
|
+
def patch_bug():
|
|
30
|
+
# Patch code here
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Decorator that conditionally applies the function.
|
|
35
|
+
"""
|
|
36
|
+
from functools import wraps
|
|
37
|
+
|
|
38
|
+
def version_tuple(version: str) -> tuple[int, ...]:
|
|
39
|
+
# Only consider numeric parts, ignore pre/post-release tags
|
|
40
|
+
return tuple(int(part) for part in version.split(".") if part.isdigit())
|
|
41
|
+
|
|
42
|
+
def decorator(func):
|
|
43
|
+
@wraps(func)
|
|
44
|
+
def wrapper(*args, **kwargs):
|
|
45
|
+
current = version_tuple(version)
|
|
46
|
+
minimum = version_tuple(min_version)
|
|
47
|
+
if current < minimum:
|
|
48
|
+
return func(*args, **kwargs)
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
return wrapper
|
|
52
|
+
|
|
53
|
+
return decorator
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _add_query_mask_idxs_methods() -> None:
|
|
57
|
+
"""Add `query`, `mask`, and `idxs` methods to `AtomArray` and `AtomArrayStack`."""
|
|
58
|
+
from atomworks.io.utils.query import query, mask, idxs
|
|
59
|
+
|
|
60
|
+
def query_method(self: AtomArray | AtomArrayStack, expr: str) -> AtomArray | AtomArrayStack:
|
|
61
|
+
"""
|
|
62
|
+
Query the AtomArray using pandas-like syntax.
|
|
63
|
+
|
|
64
|
+
Examples
|
|
65
|
+
--------
|
|
66
|
+
>>> # Using function calls
|
|
67
|
+
>>> array.query("~has_nan_coord() & has_bonds()")
|
|
68
|
+
|
|
69
|
+
>>> # Combining with regular attributes
|
|
70
|
+
>>> array.query("has_bonds() & (chain_id == 'A') & (atom_name == 'CA')")
|
|
71
|
+
"""
|
|
72
|
+
return query(self, expr) # type: ignore
|
|
73
|
+
|
|
74
|
+
def mask_method(self: AtomArray | AtomArrayStack, expr: str) -> np.ndarray:
|
|
75
|
+
"""
|
|
76
|
+
Query the AtomArray using pandas-like syntax and return a boolean mask.
|
|
77
|
+
"""
|
|
78
|
+
return mask(self, expr) # type: ignore
|
|
79
|
+
|
|
80
|
+
def idxs_method(self: AtomArray | AtomArrayStack, expr: str) -> np.ndarray:
|
|
81
|
+
"""
|
|
82
|
+
Query the AtomArray using pandas-like syntax and return the indices of the matching atoms.
|
|
83
|
+
"""
|
|
84
|
+
return idxs(self, expr) # type: ignore
|
|
85
|
+
|
|
86
|
+
struc.AtomArray.query = query_method
|
|
87
|
+
struc.AtomArrayStack.query = query_method
|
|
88
|
+
struc.AtomArray.mask = mask_method
|
|
89
|
+
struc.AtomArrayStack.mask = mask_method
|
|
90
|
+
struc.AtomArray.idxs = idxs_method
|
|
91
|
+
struc.AtomArrayStack.idxs = idxs_method
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _enable_lean_atom_array_repr() -> None:
|
|
95
|
+
"""Improve the AtomArray representation to be leaner (only shows at most 20 atoms), for debugging."""
|
|
96
|
+
if not getattr(struc.AtomArray, "_repr_lean", False):
|
|
97
|
+
original_repr = struc.AtomArray.__repr__
|
|
98
|
+
|
|
99
|
+
def lean_atom_array_repr(self: struc.AtomArray) -> str:
|
|
100
|
+
"""Lean AtomArray representation that only shows at most 20 atoms (first 10 and last 10)."""
|
|
101
|
+
atoms = ""
|
|
102
|
+
n_atoms = self.array_length()
|
|
103
|
+
for i in range(0, n_atoms):
|
|
104
|
+
if len(atoms) == 0:
|
|
105
|
+
atoms = "\n\t" + self.get_atom(i).__repr__()
|
|
106
|
+
elif i >= 10 and i < (n_atoms - 10):
|
|
107
|
+
if i == 10:
|
|
108
|
+
atoms += "\n\t... (" + str(n_atoms - 21) + " not shown) ..."
|
|
109
|
+
continue
|
|
110
|
+
else:
|
|
111
|
+
atoms = atoms + ",\n\t" + self.get_atom(i).__repr__()
|
|
112
|
+
return f"AtomArray([{atoms}\n])"
|
|
113
|
+
|
|
114
|
+
setattr(struc.AtomArray, "__repr__", lean_atom_array_repr)
|
|
115
|
+
setattr(struc.AtomArray, "_repr_original", original_repr)
|
|
116
|
+
setattr(struc.AtomArray, "_repr_lean", True)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _enable_segment_slices_in_atom_arrays() -> None:
|
|
120
|
+
"""Enable `SegmentSlice` in `AtomArray` slicing."""
|
|
121
|
+
from atomworks.io.utils.selection import SegmentSlice
|
|
122
|
+
|
|
123
|
+
if not getattr(struc.AtomArray, "_getitem_new", False):
|
|
124
|
+
original_getitem = struc.AtomArray.__getitem__
|
|
125
|
+
|
|
126
|
+
def getitem_with_segment_slices(self, item):
|
|
127
|
+
if isinstance(item, SegmentSlice):
|
|
128
|
+
item = item(self)
|
|
129
|
+
return original_getitem(self, item)
|
|
130
|
+
|
|
131
|
+
setattr(struc.AtomArray, "__getitem__", getitem_with_segment_slices)
|
|
132
|
+
setattr(struc.AtomArray, "_getitem_original", original_getitem)
|
|
133
|
+
setattr(struc.AtomArray, "_getitem_new", True)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _update_get_residue_starts() -> None:
|
|
137
|
+
"""Improve the `get_residue_starts` function to disambiguate symmetry copies."""
|
|
138
|
+
from atomworks.io.utils.selection import get_residue_starts # noqa: E402
|
|
139
|
+
|
|
140
|
+
struc.get_residue_starts = get_residue_starts
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _update_array() -> None:
|
|
144
|
+
"""Improve the `array` function to not truncate the datatype of annotations."""
|
|
145
|
+
|
|
146
|
+
def array(atoms: list[Atom]) -> AtomArray:
|
|
147
|
+
"""Patch of Biotite's `array` function to not truncate the datatype of annotations.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
atoms: The atoms to be combined in an array. All atoms must share the same
|
|
151
|
+
annotation categories.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The listed atoms as array.
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
ValueError: If atoms do not share the same annotation categories.
|
|
158
|
+
|
|
159
|
+
Examples:
|
|
160
|
+
Creating an atom array from atoms:
|
|
161
|
+
|
|
162
|
+
>>> atom1 = Atom([1, 2, 3], chain_id="A")
|
|
163
|
+
>>> atom2 = Atom([2, 3, 4], chain_id="A")
|
|
164
|
+
>>> atom3 = Atom([3, 4, 5], chain_id="B")
|
|
165
|
+
>>> atom_array = array([atom1, atom2, atom3])
|
|
166
|
+
>>> print(atom_array)
|
|
167
|
+
A 0 1.000 2.000 3.000
|
|
168
|
+
A 0 2.000 3.000 4.000
|
|
169
|
+
B 0 3.000 4.000 5.000
|
|
170
|
+
"""
|
|
171
|
+
# Check if all atoms have the same annotation names
|
|
172
|
+
# Equality check requires sorting
|
|
173
|
+
names = sorted(atoms[0]._annot.keys())
|
|
174
|
+
for i, atom in enumerate(atoms):
|
|
175
|
+
if sorted(atom._annot.keys()) != names:
|
|
176
|
+
raise ValueError(
|
|
177
|
+
f"The atom at index {i} does not share the same " f"annotation categories as the atom at index 0"
|
|
178
|
+
)
|
|
179
|
+
array = AtomArray(len(atoms))
|
|
180
|
+
|
|
181
|
+
for name in names:
|
|
182
|
+
if hasattr(atoms[0]._annot[name], "dtype"):
|
|
183
|
+
# (Preserve dtype if possible)
|
|
184
|
+
dtype = atoms[0]._annot[name].dtype
|
|
185
|
+
else:
|
|
186
|
+
dtype = type(atoms[0]._annot[name])
|
|
187
|
+
annotation_values = [atom._annot[name] for atom in atoms]
|
|
188
|
+
annotation_values = np.array(annotation_values, dtype=dtype) # maintain dtype
|
|
189
|
+
array.set_annotation(name, annotation_values)
|
|
190
|
+
array._coord = np.stack([atom.coord for atom in atoms])
|
|
191
|
+
return array
|
|
192
|
+
|
|
193
|
+
struc.array = array
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _update_pdbx_set_structure() -> None:
|
|
197
|
+
"""Improve the `set_structure` function to handle altloc atoms."""
|
|
198
|
+
|
|
199
|
+
# fmt: off
|
|
200
|
+
# ruff: noqa
|
|
201
|
+
import biotite.structure.io.pdbx as pdbx
|
|
202
|
+
from biotite.structure.io.pdbx.convert import (
|
|
203
|
+
MaskValue,
|
|
204
|
+
_check_non_empty,
|
|
205
|
+
_determine_entity_id,
|
|
206
|
+
_get_or_create_block,
|
|
207
|
+
_repeat,
|
|
208
|
+
_set_inter_residue_bonds,
|
|
209
|
+
_set_intra_residue_bonds,
|
|
210
|
+
unitcell_from_vectors,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def set_structure(
|
|
214
|
+
pdbx_file,
|
|
215
|
+
array,
|
|
216
|
+
data_block=None,
|
|
217
|
+
include_bonds=False,
|
|
218
|
+
extra_fields=[],
|
|
219
|
+
):
|
|
220
|
+
"""
|
|
221
|
+
Set the ``atom_site`` category with atom information from an
|
|
222
|
+
:class:`AtomArray` or :class:`AtomArrayStack`.
|
|
223
|
+
|
|
224
|
+
This will save the coordinates, the mandatory annotation categories
|
|
225
|
+
and the optional annotation categories
|
|
226
|
+
``atom_id``, ``b_factor``, ``occupancy`` and ``charge``.
|
|
227
|
+
If the atom array (stack) contains the annotation ``'atom_id'``,
|
|
228
|
+
these values will be used for atom numbering instead of continuous
|
|
229
|
+
numbering.
|
|
230
|
+
Furthermore, inter-residue bonds will be written into the
|
|
231
|
+
``struct_conn`` category.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
|
|
236
|
+
The file object.
|
|
237
|
+
array : AtomArray or AtomArrayStack
|
|
238
|
+
The structure to be written. If a stack is given, each array in
|
|
239
|
+
the stack will be in a separate model.
|
|
240
|
+
data_block : str, optional
|
|
241
|
+
The name of the data block.
|
|
242
|
+
Default is the first (and most times only) data block of the
|
|
243
|
+
file.
|
|
244
|
+
If the data block object is passed directly to `pdbx_file`,
|
|
245
|
+
this parameter is ignored.
|
|
246
|
+
If the file is empty, a new data block will be created.
|
|
247
|
+
include_bonds : bool, optional
|
|
248
|
+
If set to true and `array` has associated ``bonds`` , the
|
|
249
|
+
intra-residue bonds will be written into the ``chem_comp_bond``
|
|
250
|
+
category.
|
|
251
|
+
Inter-residue bonds will be written into the ``struct_conn``
|
|
252
|
+
independent of this parameter.
|
|
253
|
+
extra_fields : list of str, optional
|
|
254
|
+
List of additional fields from the ``atom_site`` category
|
|
255
|
+
that should be written into the file.
|
|
256
|
+
Default is an empty list.
|
|
257
|
+
|
|
258
|
+
Notes
|
|
259
|
+
-----
|
|
260
|
+
In some cases, the written inter-residue bonds cannot be read again
|
|
261
|
+
due to ambiguity to which atoms the bond refers.
|
|
262
|
+
This is the case, when two equal residues in the same chain have
|
|
263
|
+
the same (or a masked) `res_id`.
|
|
264
|
+
|
|
265
|
+
Examples
|
|
266
|
+
--------
|
|
267
|
+
|
|
268
|
+
>>> import os.path
|
|
269
|
+
>>> file = CIFFile()
|
|
270
|
+
>>> set_structure(file, atom_array)
|
|
271
|
+
>>> file.write(os.path.join(path_to_directory, "structure.cif"))
|
|
272
|
+
|
|
273
|
+
"""
|
|
274
|
+
_check_non_empty(array)
|
|
275
|
+
|
|
276
|
+
block = _get_or_create_block(pdbx_file, data_block)
|
|
277
|
+
Category = block.subcomponent_class()
|
|
278
|
+
Column = Category.subcomponent_class()
|
|
279
|
+
|
|
280
|
+
# Fill PDBx columns from information
|
|
281
|
+
# in structures' attribute arrays as good as possible
|
|
282
|
+
atom_site = Category()
|
|
283
|
+
atom_site["group_PDB"] = np.where(array.hetero, "HETATM", "ATOM")
|
|
284
|
+
atom_site["type_symbol"] = np.copy(array.element)
|
|
285
|
+
atom_site["label_atom_id"] = np.copy(array.atom_name)
|
|
286
|
+
if "altloc_id" in array.get_annotation_categories():
|
|
287
|
+
atom_site["label_alt_id"] = np.copy(array.altloc_id)
|
|
288
|
+
else:
|
|
289
|
+
atom_site["label_alt_id"] = Column(
|
|
290
|
+
# AtomArrays do not store altloc atoms
|
|
291
|
+
np.full(array.array_length(), "."),
|
|
292
|
+
np.full(array.array_length(), MaskValue.INAPPLICABLE),
|
|
293
|
+
)
|
|
294
|
+
atom_site["label_comp_id"] = np.copy(array.res_name)
|
|
295
|
+
atom_site["label_asym_id"] = np.copy(array.chain_id)
|
|
296
|
+
if "chain_entity" in array.get_annotation_categories():
|
|
297
|
+
atom_site["label_entity_id"] = np.copy(array.chain_entity)
|
|
298
|
+
else:
|
|
299
|
+
atom_site["label_entity_id"] = _determine_entity_id(array.chain_id)
|
|
300
|
+
atom_site["label_seq_id"] = np.copy(array.res_id)
|
|
301
|
+
atom_site["pdbx_PDB_ins_code"] = Column(
|
|
302
|
+
np.copy(array.ins_code),
|
|
303
|
+
np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT),
|
|
304
|
+
)
|
|
305
|
+
atom_site["auth_seq_id"] = atom_site["label_seq_id"]
|
|
306
|
+
atom_site["auth_comp_id"] = atom_site["label_comp_id"]
|
|
307
|
+
atom_site["auth_asym_id"] = atom_site["label_asym_id"]
|
|
308
|
+
atom_site["auth_atom_id"] = atom_site["label_atom_id"]
|
|
309
|
+
|
|
310
|
+
annot_categories = array.get_annotation_categories()
|
|
311
|
+
if "atom_id" in annot_categories:
|
|
312
|
+
atom_site["id"] = np.copy(array.atom_id)
|
|
313
|
+
if "b_factor" in annot_categories:
|
|
314
|
+
atom_site["B_iso_or_equiv"] = np.copy(array.b_factor)
|
|
315
|
+
if "occupancy" in annot_categories:
|
|
316
|
+
atom_site["occupancy"] = np.copy(array.occupancy)
|
|
317
|
+
if "charge" in annot_categories:
|
|
318
|
+
atom_site["pdbx_formal_charge"] = Column(
|
|
319
|
+
np.array([f"{int(c):+d}" if c != 0 else "?" for c in array.charge]),
|
|
320
|
+
np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Handle all remaining custom fields
|
|
324
|
+
if len(extra_fields) > 0:
|
|
325
|
+
# ... check to avoid clashes with standard annotations
|
|
326
|
+
_standard_annotations = [
|
|
327
|
+
"hetero",
|
|
328
|
+
"element",
|
|
329
|
+
"atom_name",
|
|
330
|
+
"res_name",
|
|
331
|
+
"chain_id",
|
|
332
|
+
"res_id",
|
|
333
|
+
"ins_code",
|
|
334
|
+
"atom_id",
|
|
335
|
+
"b_factor",
|
|
336
|
+
"occupancy",
|
|
337
|
+
"charge",
|
|
338
|
+
]
|
|
339
|
+
_reserved_annotation_names = list(atom_site.keys()) + _standard_annotations
|
|
340
|
+
|
|
341
|
+
for annot in extra_fields:
|
|
342
|
+
if annot in _reserved_annotation_names:
|
|
343
|
+
raise ValueError(
|
|
344
|
+
f"Annotation name '{annot}' is reserved and cannot be written to as extra field. "
|
|
345
|
+
"Please choose another name."
|
|
346
|
+
)
|
|
347
|
+
atom_site[annot] = np.copy(array.get_annotation(annot))
|
|
348
|
+
|
|
349
|
+
if array.bonds is not None:
|
|
350
|
+
struct_conn = _set_inter_residue_bonds(array, atom_site)
|
|
351
|
+
if struct_conn is not None:
|
|
352
|
+
block["struct_conn"] = struct_conn
|
|
353
|
+
if include_bonds:
|
|
354
|
+
chem_comp_bond = _set_intra_residue_bonds(array, atom_site)
|
|
355
|
+
if chem_comp_bond is not None:
|
|
356
|
+
block["chem_comp_bond"] = chem_comp_bond
|
|
357
|
+
|
|
358
|
+
# In case of a single model handle each coordinate
|
|
359
|
+
# simply like a flattened array
|
|
360
|
+
if isinstance(array, AtomArray) or (isinstance(array, AtomArrayStack) and array.stack_depth() == 1):
|
|
361
|
+
# 'ravel' flattens coord without copy
|
|
362
|
+
# in case of stack with stack_depth = 1
|
|
363
|
+
atom_site["Cartn_x"] = np.copy(np.ravel(array.coord[..., 0]))
|
|
364
|
+
atom_site["Cartn_y"] = np.copy(np.ravel(array.coord[..., 1]))
|
|
365
|
+
atom_site["Cartn_z"] = np.copy(np.ravel(array.coord[..., 2]))
|
|
366
|
+
atom_site["pdbx_PDB_model_num"] = np.ones(array.array_length(), dtype=np.int32)
|
|
367
|
+
# In case of multiple models repeat annotations
|
|
368
|
+
# and use model-specific coordinates
|
|
369
|
+
else:
|
|
370
|
+
atom_site = _repeat(atom_site, array.stack_depth())
|
|
371
|
+
coord = np.reshape(array.coord, (array.stack_depth() * array.array_length(), 3))
|
|
372
|
+
atom_site["Cartn_x"] = np.copy(coord[:, 0])
|
|
373
|
+
atom_site["Cartn_y"] = np.copy(coord[:, 1])
|
|
374
|
+
atom_site["Cartn_z"] = np.copy(coord[:, 2])
|
|
375
|
+
atom_site["pdbx_PDB_model_num"] = np.repeat(
|
|
376
|
+
np.arange(1, array.stack_depth() + 1, dtype=np.int32),
|
|
377
|
+
repeats=array.array_length(),
|
|
378
|
+
)
|
|
379
|
+
if "atom_id" not in annot_categories:
|
|
380
|
+
# Count from 1
|
|
381
|
+
atom_site["id"] = np.arange(1, len(atom_site["group_PDB"]) + 1)
|
|
382
|
+
block["atom_site"] = atom_site
|
|
383
|
+
|
|
384
|
+
# Write box into file
|
|
385
|
+
if array.box is not None:
|
|
386
|
+
# PDBx files can only store one box for all models
|
|
387
|
+
# -> Use first box
|
|
388
|
+
if array.box.ndim == 3:
|
|
389
|
+
box = array.box[0]
|
|
390
|
+
else:
|
|
391
|
+
box = array.box
|
|
392
|
+
len_a, len_b, len_c, alpha, beta, gamma = unitcell_from_vectors(box)
|
|
393
|
+
cell = Category()
|
|
394
|
+
cell["length_a"] = len_a
|
|
395
|
+
cell["length_b"] = len_b
|
|
396
|
+
cell["length_c"] = len_c
|
|
397
|
+
cell["angle_alpha"] = np.rad2deg(alpha)
|
|
398
|
+
cell["angle_beta"] = np.rad2deg(beta)
|
|
399
|
+
cell["angle_gamma"] = np.rad2deg(gamma)
|
|
400
|
+
block["cell"] = cell
|
|
401
|
+
|
|
402
|
+
pdbx.set_structure = set_structure
|
|
403
|
+
# fmt: on
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def monkey_patch_biotite() -> None:
|
|
407
|
+
"""Monkey-patch biotite to add query, mask, and idxs methods to AtomArray and AtomArrayStack."""
|
|
408
|
+
global _HAS_BEEN_PATCHED
|
|
409
|
+
|
|
410
|
+
if _HAS_BEEN_PATCHED:
|
|
411
|
+
# ... ensure that the monkey patching is only applied once
|
|
412
|
+
return
|
|
413
|
+
|
|
414
|
+
_add_query_mask_idxs_methods()
|
|
415
|
+
_enable_lean_atom_array_repr()
|
|
416
|
+
_enable_segment_slices_in_atom_arrays()
|
|
417
|
+
_update_get_residue_starts()
|
|
418
|
+
_update_array()
|
|
419
|
+
_update_pdbx_set_structure()
|
|
420
|
+
|
|
421
|
+
_HAS_BEEN_PATCHED = True
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""AtomWorks command-line interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
app = typer.Typer(help="AtomWorks command-line interface")
|
|
8
|
+
|
|
9
|
+
# Import commands to register them with the root app and expose sub-apps
|
|
10
|
+
from . import ccd as _ccd # noqa: E402
|
|
11
|
+
from . import pdb as _pdb # noqa: E402, T100
|
|
12
|
+
from . import setup as _setup # noqa: E402
|
|
13
|
+
|
|
14
|
+
# Expose namespaced groups: `atomworks ccd ...`, ...
|
|
15
|
+
app.add_typer(_ccd.app, name="ccd")
|
|
16
|
+
app.add_typer(_pdb.app, name="pdb")
|
|
17
|
+
app.add_typer(_setup.app, name="setup")
|