pyjess 0.7.0__cp38-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjess might be problematic. Click here for more details.
- pyjess/.gitignore +2 -0
- pyjess/CMakeLists.txt +1 -0
- pyjess/__init__.py +21 -0
- pyjess/__main__.py +4 -0
- pyjess/_jess.abi3.so +0 -0
- pyjess/_jess.pyi +268 -0
- pyjess/_jess.pyx +2371 -0
- pyjess/cli.py +281 -0
- pyjess/py.typed +0 -0
- pyjess/tests/__init__.py +20 -0
- pyjess/tests/data/1.3.3.tpl +23 -0
- pyjess/tests/data/1AMY+1.3.3.txt +1872 -0
- pyjess/tests/data/1AMY.cif +6259 -0
- pyjess/tests/data/1AMY.pdb +3941 -0
- pyjess/tests/data/1sur.qry +26 -0
- pyjess/tests/data/4.1.2.tpl +23 -0
- pyjess/tests/data/5ayx.EF.pdb +63 -0
- pyjess/tests/data/__init__.py +0 -0
- pyjess/tests/data/pdb1lnb.pdb +3334 -0
- pyjess/tests/data/template_01.qry +11 -0
- pyjess/tests/data/template_02.qry +11 -0
- pyjess/tests/test_atom.py +111 -0
- pyjess/tests/test_doctest.py +78 -0
- pyjess/tests/test_hit.py +57 -0
- pyjess/tests/test_jess.py +374 -0
- pyjess/tests/test_molecule.py +287 -0
- pyjess/tests/test_template.py +126 -0
- pyjess/tests/test_template_atom.py +92 -0
- pyjess/tests/utils.py +7 -0
- pyjess-0.7.0.dist-info/METADATA +282 -0
- pyjess-0.7.0.dist-info/RECORD +34 -0
- pyjess-0.7.0.dist-info/WHEEL +5 -0
- pyjess-0.7.0.dist-info/entry_points.txt +3 -0
- pyjess-0.7.0.dist-info/licenses/COPYING +21 -0
pyjess/_jess.pyx
ADDED
|
@@ -0,0 +1,2371 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
# cython: language_level=3, linetrace=True, binding=True
|
|
3
|
+
"""Bindings to Jess, a 3D template matching software.
|
|
4
|
+
|
|
5
|
+
Jess is an algorithm for constraint-based structural template matching
|
|
6
|
+
proposed by Jonathan Barker *et al.*. It can be used to identify
|
|
7
|
+
catalytic residues from a known template inside a protein structure.
|
|
8
|
+
Jess is an evolution of TESS, a geometric hashing algorithm developed by
|
|
9
|
+
Andrew Wallace *et al.*, removing some pre-computation and
|
|
10
|
+
structural requirements from the original algorithm.
|
|
11
|
+
|
|
12
|
+
PyJess is a Python module that provides bindings to Jess using
|
|
13
|
+
`Cython <https://cython.org/>`_. It allows creating templates, querying
|
|
14
|
+
them with protein structures, and retrieving the hits using a Python API
|
|
15
|
+
without performing any external I/O. It's also more than 10x faster than
|
|
16
|
+
Jess thanks to algorithmic optimizations added to improve the original Jess
|
|
17
|
+
code while producing consistent results.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
Load templates from a file, either as a file-like object or
|
|
21
|
+
given a filename::
|
|
22
|
+
|
|
23
|
+
>>> t1 = pyjess.Template.load("1.3.3.tpl") # load from filename
|
|
24
|
+
>>> with open("4.1.2.tpl") as f: # load from a file object
|
|
25
|
+
... t2 = pyjess.Template.load(f)
|
|
26
|
+
|
|
27
|
+
Load molecules from a file, either as a file-like object or given
|
|
28
|
+
a filename::
|
|
29
|
+
|
|
30
|
+
>>> mol = pyjess.Molecule.load("1AMY.pdb")
|
|
31
|
+
>>> mol[0]
|
|
32
|
+
Atom(serial=1, name='N', altloc=' ', residue_name='GLN', ...)
|
|
33
|
+
|
|
34
|
+
Create a `Jess` object storing the templates to support running
|
|
35
|
+
queries on them. The individual templates can still be accessed by
|
|
36
|
+
index::
|
|
37
|
+
|
|
38
|
+
>>> jess = pyjess.Jess([t1, t2])
|
|
39
|
+
>>> jess[0].id
|
|
40
|
+
'3r6v'
|
|
41
|
+
|
|
42
|
+
Run a query on the Jess object to retrieve all templates matching
|
|
43
|
+
a `Molecule`, *in no particular order*::
|
|
44
|
+
|
|
45
|
+
>>> hits = jess.query(mol, 2, 2, 2)
|
|
46
|
+
>>> for hit in hits:
|
|
47
|
+
... print(hit.template.id, hit.rmsd)
|
|
48
|
+
2om2 1.4386...
|
|
49
|
+
2om2 1.4877...
|
|
50
|
+
2om2 1.4376...
|
|
51
|
+
2om2 1.5284...
|
|
52
|
+
2om2 1.4863...
|
|
53
|
+
2om2 1.4369...
|
|
54
|
+
2om2 1.4790...
|
|
55
|
+
2om2 1.1414...
|
|
56
|
+
2om2 1.0755...
|
|
57
|
+
2om2 1.1973...
|
|
58
|
+
2om2 1.1353...
|
|
59
|
+
2om2 1.0711...
|
|
60
|
+
2om2 1.1494...
|
|
61
|
+
|
|
62
|
+
By default, a template can match a molecule in more than one way,
|
|
63
|
+
if several sets of atoms match the geometric constraints. Use the
|
|
64
|
+
``best_match`` argument of `~Jess.query` to only retrieve the
|
|
65
|
+
best match per template::
|
|
66
|
+
|
|
67
|
+
>>> hits = jess.query(mol, 2, 2, 2, best_match=True)
|
|
68
|
+
>>> for hit in hits:
|
|
69
|
+
... print(hit.template.id, hit.rmsd)
|
|
70
|
+
2om2 1.071...
|
|
71
|
+
|
|
72
|
+
References:
|
|
73
|
+
- Barker, J. A., & Thornton, J. M. (2003). *An algorithm for
|
|
74
|
+
constraint-based structural template matching: application to
|
|
75
|
+
3D templates with statistical analysis*. Bioinformatics (Oxford,
|
|
76
|
+
England), 19(13), 1644–1649. :doi:`10.1093/bioinformatics/btg226`.
|
|
77
|
+
- Wallace, A. C., Borkakoti, N., & Thornton, J. M. (1997).
|
|
78
|
+
*TESS: a geometric hashing algorithm for deriving 3D coordinate
|
|
79
|
+
templates for searching structural databases. Application to enzyme
|
|
80
|
+
active sites*. Protein science : a publication of the Protein
|
|
81
|
+
Society, 6(11), 2308–2323. :doi:`10.1002/pro.5560061104`.
|
|
82
|
+
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
# --- C imports --------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
cimport cython
|
|
88
|
+
from cpython.exc cimport PyErr_WarnEx
|
|
89
|
+
from cpython.unicode cimport (
|
|
90
|
+
PyUnicode_FromStringAndSize,
|
|
91
|
+
PyUnicode_FromFormat,
|
|
92
|
+
PyUnicode_AsASCIIString,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
from libc.math cimport isnan, exp, INFINITY, NAN
|
|
96
|
+
from libc.stdio cimport FILE, fclose, fdopen, printf, sprintf
|
|
97
|
+
from libc.stdint cimport uintptr_t
|
|
98
|
+
from libc.stdlib cimport calloc, realloc, free, malloc
|
|
99
|
+
from libc.string cimport memcpy, memset, strncpy, strdup
|
|
100
|
+
|
|
101
|
+
cimport jess.atom
|
|
102
|
+
cimport jess.jess
|
|
103
|
+
cimport jess.molecule
|
|
104
|
+
cimport jess.super
|
|
105
|
+
cimport jess.tess_template
|
|
106
|
+
cimport jess.tess_atom
|
|
107
|
+
cimport jess.res_index
|
|
108
|
+
from jess.atom cimport Atom as _Atom
|
|
109
|
+
from jess.jess cimport Jess as _Jess
|
|
110
|
+
from jess.jess cimport JessQuery as _JessQuery
|
|
111
|
+
from jess.molecule cimport Molecule as _Molecule
|
|
112
|
+
from jess.super cimport Superposition as _Superposition
|
|
113
|
+
from jess.template cimport Template as _Template, IgnoreType as _IgnoreType
|
|
114
|
+
from jess.tess_template cimport TessTemplate as _TessTemplate
|
|
115
|
+
from jess.tess_atom cimport TessAtom as _TessAtom
|
|
116
|
+
|
|
117
|
+
# --- Python imports ---------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
import functools
|
|
120
|
+
import io
|
|
121
|
+
|
|
122
|
+
__version__ = PROJECT_VERSION
|
|
123
|
+
|
|
124
|
+
# --- Utils ------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
cdef inline void encode_token(char* dst, const char* src, size_t n) noexcept nogil:
|
|
127
|
+
cdef size_t i
|
|
128
|
+
for i in range(n):
|
|
129
|
+
if src[i] == ord(' ') or src[i] == 0:
|
|
130
|
+
dst[i] = ord('_')
|
|
131
|
+
else:
|
|
132
|
+
dst[i] = src[i]
|
|
133
|
+
dst[n] = 0
|
|
134
|
+
|
|
135
|
+
cdef inline void decode_token(char* dst, const char* src, size_t n) noexcept nogil:
|
|
136
|
+
cdef size_t i
|
|
137
|
+
for i in range(n):
|
|
138
|
+
if src[i] == ord('_') or src[i] == 0:
|
|
139
|
+
dst[i] = ord(' ')
|
|
140
|
+
else:
|
|
141
|
+
dst[i] = src[i]
|
|
142
|
+
dst[n] = 0
|
|
143
|
+
|
|
144
|
+
class nullcontext:
|
|
145
|
+
def __init__(self, return_value=None):
|
|
146
|
+
self.retval = return_value
|
|
147
|
+
def __enter__(self):
|
|
148
|
+
return self.retval
|
|
149
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
# --- Classes ----------------------------------------------------------------
|
|
153
|
+
|
|
154
|
+
cdef class _MoleculeParser:
|
|
155
|
+
cdef str id
|
|
156
|
+
|
|
157
|
+
def __init__(self, str id = None):
|
|
158
|
+
self.id = id
|
|
159
|
+
|
|
160
|
+
cdef class _PDBMoleculeParser(_MoleculeParser):
|
|
161
|
+
cdef bint ignore_endmdl
|
|
162
|
+
cdef bint skip_hetatm
|
|
163
|
+
|
|
164
|
+
def __init__(self, str id = None, bint ignore_endmdl = False, bint skip_hetatm = False):
|
|
165
|
+
super().__init__(id=id)
|
|
166
|
+
self.ignore_endmdl = ignore_endmdl
|
|
167
|
+
self.skip_hetatm = skip_hetatm
|
|
168
|
+
|
|
169
|
+
def loads(self, text, molecule_type):
|
|
170
|
+
return self.load(io.StringIO(text), molecule_type)
|
|
171
|
+
|
|
172
|
+
def load(self, file, molecule_type):
|
|
173
|
+
cdef str line
|
|
174
|
+
cdef str id = self.id
|
|
175
|
+
cdef list atoms = []
|
|
176
|
+
try:
|
|
177
|
+
handle = open(file)
|
|
178
|
+
except TypeError:
|
|
179
|
+
handle = nullcontext(file)
|
|
180
|
+
with handle as f:
|
|
181
|
+
for line in f:
|
|
182
|
+
if line.startswith("HEADER"):
|
|
183
|
+
if id is None:
|
|
184
|
+
id = line[62:66].strip() or None
|
|
185
|
+
elif line.startswith("ATOM"):
|
|
186
|
+
atoms.append(Atom.loads(line))
|
|
187
|
+
elif line.startswith("HETATM") and not self.skip_hetatm:
|
|
188
|
+
atoms.append(Atom.loads(line))
|
|
189
|
+
elif line.startswith("ENDMDL"):
|
|
190
|
+
if not self.ignore_endmdl:
|
|
191
|
+
break
|
|
192
|
+
elif line.lower().startswith(("data_", "loop_")):
|
|
193
|
+
raise ValueError("mmCIF data tags found, file is not in PDB format")
|
|
194
|
+
return molecule_type(atoms, id=id)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
cdef class _CIFMoleculeParser(_MoleculeParser):
|
|
198
|
+
cdef object gemmi
|
|
199
|
+
cdef bint use_author
|
|
200
|
+
cdef bint skip_hetatm
|
|
201
|
+
|
|
202
|
+
_PRIMARY_COLUMNS = [
|
|
203
|
+
'id', 'type_symbol', 'label_atom_id', 'label_alt_id', 'label_comp_id',
|
|
204
|
+
'label_asym_id', 'label_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
|
|
205
|
+
'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
|
|
206
|
+
'?pdbx_formal_charge', '?group_PDB',
|
|
207
|
+
]
|
|
208
|
+
|
|
209
|
+
_AUTH_COLUMNS = [
|
|
210
|
+
'id', 'type_symbol', 'auth_atom_id', 'label_alt_id', 'auth_comp_id',
|
|
211
|
+
'auth_asym_id', 'auth_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
|
|
212
|
+
'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
|
|
213
|
+
'?pdbx_formal_charge', '?group_PDB',
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
def __init__(self, str id = None, bint use_author = False, bint skip_hetatm = False):
|
|
217
|
+
super().__init__(id=id)
|
|
218
|
+
self.gemmi = __import__('gemmi')
|
|
219
|
+
self.use_author = use_author
|
|
220
|
+
self.skip_hetatm = skip_hetatm
|
|
221
|
+
|
|
222
|
+
def _load_block(self, document, molecule_type):
|
|
223
|
+
block = document.sole_block()
|
|
224
|
+
cols = self._AUTH_COLUMNS if self.use_author else self._PRIMARY_COLUMNS
|
|
225
|
+
table = block.find('_atom_site.', cols)
|
|
226
|
+
max_residue_number = 0
|
|
227
|
+
|
|
228
|
+
if not table:
|
|
229
|
+
raise ValueError("missing columns in CIF files")
|
|
230
|
+
|
|
231
|
+
atoms = []
|
|
232
|
+
for row in table:
|
|
233
|
+
if row[14] != "ATOM" and (row[14] != "HETATM" or self.skip_hetatm):
|
|
234
|
+
continue
|
|
235
|
+
|
|
236
|
+
if row[6] == "." and row[14] == "HETATM":
|
|
237
|
+
PyErr_WarnEx(
|
|
238
|
+
UserWarning,
|
|
239
|
+
"HETATM line found without residue number. Consider "
|
|
240
|
+
"parsing with use_author=True to use author-defined "
|
|
241
|
+
"residue numbers, or skip_hetatm=True to disable "
|
|
242
|
+
"parsing of HETATM altogether.",
|
|
243
|
+
3,
|
|
244
|
+
)
|
|
245
|
+
residue_number = max_residue_number
|
|
246
|
+
max_residue_number += 1
|
|
247
|
+
else:
|
|
248
|
+
residue_number = int(row[6])
|
|
249
|
+
max_residue_number = max(residue_number, max_residue_number)
|
|
250
|
+
|
|
251
|
+
atom = Atom(
|
|
252
|
+
serial=int(row[0]),
|
|
253
|
+
element=row[1],
|
|
254
|
+
name=row[2],
|
|
255
|
+
altloc=' ' if row[3] == "." else row[3], # FIXME: replace with None?
|
|
256
|
+
residue_name=row[4],
|
|
257
|
+
chain_id=row[5],
|
|
258
|
+
residue_number=residue_number,
|
|
259
|
+
insertion_code=' ' if not row.has(7) or row[7] == "?" else row[7],
|
|
260
|
+
x=float(row[8]),
|
|
261
|
+
y=float(row[9]),
|
|
262
|
+
z=float(row[10]),
|
|
263
|
+
occupancy=0.0 if row[11] == '.' else float(row[11]),
|
|
264
|
+
temperature_factor=float(row[12]),
|
|
265
|
+
charge=0 if not row.has(13) or row[13] == "?" else int(row[13]),
|
|
266
|
+
)
|
|
267
|
+
atoms.append(atom)
|
|
268
|
+
|
|
269
|
+
id = block.name if self.id is None else self.id
|
|
270
|
+
return molecule_type(atoms, id=id)
|
|
271
|
+
|
|
272
|
+
def loads(self, text, molecule_type):
|
|
273
|
+
document = self.gemmi.cif.read_string(text)
|
|
274
|
+
return self._load_block(document, molecule_type)
|
|
275
|
+
|
|
276
|
+
def load(self, file, molecule_type):
|
|
277
|
+
if hasattr(file, "read"):
|
|
278
|
+
document = self.gemmi.cif.read_string(file.read())
|
|
279
|
+
else:
|
|
280
|
+
document = self.gemmi.cif.read_file(file)
|
|
281
|
+
return self._load_block(document, molecule_type)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
cdef class Molecule:
|
|
285
|
+
"""A molecule structure, as a sequence of `Atom` objects.
|
|
286
|
+
|
|
287
|
+
.. versionadded:: 0.2.2
|
|
288
|
+
Support identifiers of arbitrary length.
|
|
289
|
+
|
|
290
|
+
.. versionadded:: 0.4.0
|
|
291
|
+
Equality, hashing and pickle protocol support.
|
|
292
|
+
|
|
293
|
+
"""
|
|
294
|
+
cdef _Molecule* _mol
|
|
295
|
+
cdef str _id
|
|
296
|
+
|
|
297
|
+
@classmethod
|
|
298
|
+
def loads(
|
|
299
|
+
cls,
|
|
300
|
+
text,
|
|
301
|
+
str format = "pdb",
|
|
302
|
+
*,
|
|
303
|
+
str id = None,
|
|
304
|
+
bint ignore_endmdl = False,
|
|
305
|
+
bint use_author = False,
|
|
306
|
+
bint skip_hetatm = False,
|
|
307
|
+
):
|
|
308
|
+
"""Load a molecule from a PDB string.
|
|
309
|
+
|
|
310
|
+
Arguments:
|
|
311
|
+
text (`str`): The serialized molecule to parse into a new
|
|
312
|
+
object.
|
|
313
|
+
format (`str`): The format to parse the file. Supported formats
|
|
314
|
+
are: ``pdb`` for the Protein Data Bank format, ``cif``
|
|
315
|
+
for Crystallographic Information File format (additionally
|
|
316
|
+
requires the `gemmi` module), or ``detect`` to attempt
|
|
317
|
+
auto-detection (the default).
|
|
318
|
+
|
|
319
|
+
Keyword Arguments:
|
|
320
|
+
id (`str`, optional): The identifier of the molecule. If `None`
|
|
321
|
+
given, the parser will attempt to extract it from the
|
|
322
|
+
``HEADER`` line (for PDB files) or the block name (for CIF
|
|
323
|
+
files).
|
|
324
|
+
ignore_endmdl (`bool`): Pass `True` to make the parser read all
|
|
325
|
+
the atoms from the PDB file. By default, the parser only
|
|
326
|
+
reads the atoms of the first model, and stops at the first
|
|
327
|
+
``ENDMDL`` line. *Ignored for CIF files*.
|
|
328
|
+
use_author (`bool`): Pass `True` to use the author-defined
|
|
329
|
+
labels while parsing CIF files, e.g. read the chain name
|
|
330
|
+
from ``_atom_site.auth_asym_id`` rather than
|
|
331
|
+
``_atom_site.label_asym_id``. *Ignored for PDB files*.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
`~pyjess.Molecule`: The molecule parsed from the PDB file.
|
|
335
|
+
|
|
336
|
+
See Also:
|
|
337
|
+
`Molecule.load` to load a PDB molecule from a file-like
|
|
338
|
+
object or from a path.
|
|
339
|
+
|
|
340
|
+
Caution:
|
|
341
|
+
Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
|
|
342
|
+
while parsing from mmCIF usually discard them. This is because
|
|
343
|
+
mmCIF files store heteroatoms but do not require them to
|
|
344
|
+
have an associated residue number, which can throw off the way
|
|
345
|
+
atoms are modeled in Jess.
|
|
346
|
+
|
|
347
|
+
.. versionadded:: 0.7.0
|
|
348
|
+
The ``format`` argument, and support for CIF parsing.
|
|
349
|
+
|
|
350
|
+
"""
|
|
351
|
+
if format == "detect":
|
|
352
|
+
format = "cif" if text.lstrip().startswith(("data_", "loop_")) else "pdb"
|
|
353
|
+
return cls.load(
|
|
354
|
+
io.StringIO(text),
|
|
355
|
+
format=format,
|
|
356
|
+
id=id,
|
|
357
|
+
ignore_endmdl=ignore_endmdl,
|
|
358
|
+
skip_hetatm=skip_hetatm,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
@classmethod
|
|
362
|
+
def load(
|
|
363
|
+
cls,
|
|
364
|
+
file,
|
|
365
|
+
str format = "detect",
|
|
366
|
+
*,
|
|
367
|
+
str id = None,
|
|
368
|
+
bint ignore_endmdl = False,
|
|
369
|
+
bint use_author = False,
|
|
370
|
+
bint skip_hetatm = False,
|
|
371
|
+
):
|
|
372
|
+
"""Load a molecule from a PDB file.
|
|
373
|
+
|
|
374
|
+
Arguments:
|
|
375
|
+
file (`str`, `os.PathLike`, or file-like object): Either the path
|
|
376
|
+
to a file, or a file-like object opened in **text mode**
|
|
377
|
+
containing a molecule.
|
|
378
|
+
format (`str`): The format to parse the file. Supported formats
|
|
379
|
+
are: ``pdb`` for the Protein Data Bank format, ``cif``
|
|
380
|
+
for Crystallographic Information File format (additionally
|
|
381
|
+
requires the `gemmi` module), or ``detect`` to attempt
|
|
382
|
+
auto-detection (the default).
|
|
383
|
+
|
|
384
|
+
Keyword Arguments:
|
|
385
|
+
id (`str`, optional): The identifier of the molecule. If `None`
|
|
386
|
+
given, the parser will attempt to extract it from the
|
|
387
|
+
``HEADER`` line (for PDB files) or the block name (for CIF
|
|
388
|
+
files).
|
|
389
|
+
ignore_endmdl (`bool`): Pass `True` to make the parser read all
|
|
390
|
+
the atoms from the PDB file. By default, the parser only
|
|
391
|
+
reads the atoms of the first model, and stops at the first
|
|
392
|
+
``ENDMDL`` line. *Ignored for CIF files*.
|
|
393
|
+
use_author (`bool`): Pass `True` to use the author-defined
|
|
394
|
+
labels while parsing CIF files, e.g. read the chain name
|
|
395
|
+
from ``_atom_site.auth_asym_id`` rather than
|
|
396
|
+
``_atom_site.label_asym_id``. *Ignored for PDB files*.
|
|
397
|
+
skip_hetatm (`bool`): Pass `True` to skip parsing of heteroatoms
|
|
398
|
+
(``HETATM``) in the input file.
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
`~pyjess.Molecule`: The molecule parsed from the PDB file.
|
|
402
|
+
|
|
403
|
+
See Also:
|
|
404
|
+
`Molecule.loads` to load a PDB molecule from a string.
|
|
405
|
+
|
|
406
|
+
Caution:
|
|
407
|
+
Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
|
|
408
|
+
while parsing from mmCIF usually discard them. This is because
|
|
409
|
+
mmCIF files store heteroatoms but do not require them to
|
|
410
|
+
have an associated residue number, which can throw off the way
|
|
411
|
+
atoms are modeled in Jess.
|
|
412
|
+
|
|
413
|
+
.. versionadded:: 0.7.0
|
|
414
|
+
The ``format`` and ``skip_hetatm`` arguments, and mmCIF support.
|
|
415
|
+
|
|
416
|
+
"""
|
|
417
|
+
cdef _MoleculeParser parser
|
|
418
|
+
cdef str peek
|
|
419
|
+
|
|
420
|
+
if format == "detect":
|
|
421
|
+
try:
|
|
422
|
+
handle = open(file)
|
|
423
|
+
except TypeError:
|
|
424
|
+
handle = nullcontext(file)
|
|
425
|
+
with handle as f:
|
|
426
|
+
if f.seekable():
|
|
427
|
+
peek = f.read(5)
|
|
428
|
+
f.seek(0)
|
|
429
|
+
else:
|
|
430
|
+
f = f.read()
|
|
431
|
+
peek = f[5:]
|
|
432
|
+
if peek.startswith(("data_", "loop_")):
|
|
433
|
+
parser = _CIFMoleculeParser(
|
|
434
|
+
id=id,
|
|
435
|
+
use_author=use_author,
|
|
436
|
+
skip_hetatm=skip_hetatm,
|
|
437
|
+
)
|
|
438
|
+
else:
|
|
439
|
+
parser = _PDBMoleculeParser(
|
|
440
|
+
id=id,
|
|
441
|
+
ignore_endmdl=ignore_endmdl,
|
|
442
|
+
skip_hetatm=skip_hetatm,
|
|
443
|
+
)
|
|
444
|
+
if isinstance(f, str):
|
|
445
|
+
return parser.loads(f, molecule_type=cls)
|
|
446
|
+
return parser.load(f, molecule_type=cls)
|
|
447
|
+
if format == "pdb":
|
|
448
|
+
parser = _PDBMoleculeParser(
|
|
449
|
+
id=id,
|
|
450
|
+
ignore_endmdl=ignore_endmdl,
|
|
451
|
+
skip_hetatm=skip_hetatm
|
|
452
|
+
)
|
|
453
|
+
elif format == "cif":
|
|
454
|
+
parser = _CIFMoleculeParser(
|
|
455
|
+
id=id,
|
|
456
|
+
use_author=use_author,
|
|
457
|
+
skip_hetatm=skip_hetatm,
|
|
458
|
+
)
|
|
459
|
+
else:
|
|
460
|
+
raise ValueError(f"invalid value for `format` argument: {format!r}")
|
|
461
|
+
return parser.load(file, molecule_type=cls)
|
|
462
|
+
|
|
463
|
+
@classmethod
|
|
464
|
+
def from_biopython(cls, object structure, str id = None):
|
|
465
|
+
"""Create a new `~pyjess.Molecule` from a `Bio.PDB.Structure`.
|
|
466
|
+
|
|
467
|
+
Arguments:
|
|
468
|
+
structure (`Bio.PDB.Structure` or `Bio.PDB.Model`): The
|
|
469
|
+
Biopython object containing the structure data.
|
|
470
|
+
id (`str` or `None`): The identifier to give to the newly
|
|
471
|
+
created molecule. If `None` given, will use the value of
|
|
472
|
+
``structure.id``.
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
`~pyjess.Molecule`: A molecule object suitable for using
|
|
476
|
+
in `Jess.query`.
|
|
477
|
+
|
|
478
|
+
.. versionadded:: 0.7.0
|
|
479
|
+
|
|
480
|
+
"""
|
|
481
|
+
cdef list atoms = []
|
|
482
|
+
for c in structure.get_chains():
|
|
483
|
+
for r in c.get_residues():
|
|
484
|
+
_, residue_number, insertion_code = r.id
|
|
485
|
+
for a in r.get_atoms():
|
|
486
|
+
coord = a.get_coord()
|
|
487
|
+
atom = Atom(
|
|
488
|
+
name=a.fullname,
|
|
489
|
+
x=coord[0],
|
|
490
|
+
y=coord[1],
|
|
491
|
+
z=coord[2],
|
|
492
|
+
altloc=a.altloc,
|
|
493
|
+
charge=a.pqr_charge or 0,
|
|
494
|
+
occupancy=a.occupancy,
|
|
495
|
+
serial=a.serial_number,
|
|
496
|
+
residue_name=r.resname,
|
|
497
|
+
residue_number=residue_number,
|
|
498
|
+
segment=r.segid,
|
|
499
|
+
insertion_code=insertion_code,
|
|
500
|
+
chain_id=c.id,
|
|
501
|
+
temperature_factor=a.bfactor,
|
|
502
|
+
element=a.element,
|
|
503
|
+
)
|
|
504
|
+
atoms.append(atom)
|
|
505
|
+
return cls(atoms, id=structure.id)
|
|
506
|
+
|
|
507
|
+
@classmethod
|
|
508
|
+
def from_gemmi(cls, object model, str id=None):
|
|
509
|
+
"""Create a new `~pyjess.Molecule` from a `gemmi.Model`.
|
|
510
|
+
|
|
511
|
+
Arguments:
|
|
512
|
+
structure (`gemmi.Model`): The ``gemmi`` object
|
|
513
|
+
containing the structure data.
|
|
514
|
+
id (`str` or `None`): The identifier to give to the newly
|
|
515
|
+
created molecule.
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
`~pyjess.Molecule`: A molecule object suitable for using
|
|
519
|
+
in `Jess.query`.
|
|
520
|
+
|
|
521
|
+
.. versionadded:: 0.7.0
|
|
522
|
+
|
|
523
|
+
"""
|
|
524
|
+
cdef list atoms = []
|
|
525
|
+
for cra in model.all():
|
|
526
|
+
a = cra.atom
|
|
527
|
+
r = cra.residue
|
|
528
|
+
c = cra.chain
|
|
529
|
+
atom = Atom(
|
|
530
|
+
name=a.padded_name(),
|
|
531
|
+
x=a.pos[0],
|
|
532
|
+
y=a.pos[1],
|
|
533
|
+
z=a.pos[2],
|
|
534
|
+
altloc=' ' if a.altloc == '\0' else a.altloc,
|
|
535
|
+
charge=a.charge,
|
|
536
|
+
element=a.element.name.upper(),
|
|
537
|
+
occupancy=a.occ,
|
|
538
|
+
temperature_factor=a.b_iso,
|
|
539
|
+
serial=a.serial,
|
|
540
|
+
segment=r.segment,
|
|
541
|
+
residue_name=r.name,
|
|
542
|
+
residue_number=r.seqid.num,
|
|
543
|
+
chain_id=c.name,
|
|
544
|
+
insertion_code=r.seqid.icode,
|
|
545
|
+
)
|
|
546
|
+
atoms.append(atom)
|
|
547
|
+
return cls(atoms, id=id)
|
|
548
|
+
|
|
549
|
+
@classmethod
|
|
550
|
+
def from_biotite(cls, object atom_array, str id=None):
|
|
551
|
+
"""Create a new `~pyjess.Molecule` from a `biotite.structure.AtomArray`.
|
|
552
|
+
|
|
553
|
+
Arguments:
|
|
554
|
+
structure (`biotite.structure.AtomArray`): The ``biotite``
|
|
555
|
+
object containing the structure data.
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
`~pyjess.Molecule`: A molecule object suitable for using
|
|
559
|
+
in `Jess.query`.
|
|
560
|
+
|
|
561
|
+
Caution:
|
|
562
|
+
If loading data with the `biotite.structure.io.pdb.PDBFile` module,
|
|
563
|
+
ensure that you are requesting all atoms and all extra fields
|
|
564
|
+
in `~biotite.structure.io.pdb.PDBFile.get_structure`::
|
|
565
|
+
|
|
566
|
+
db_file = PDBFile.read("data/1AMY.pdb")
|
|
567
|
+
structure = pdb_file.get_structure(
|
|
568
|
+
altloc="all",
|
|
569
|
+
extra_fields=["atom_id", "b_factor", "occupancy", "charge"],
|
|
570
|
+
)
|
|
571
|
+
molecule = Molecule.from_biotite(structure[0])
|
|
572
|
+
|
|
573
|
+
.. versionadded:: 0.7.0
|
|
574
|
+
|
|
575
|
+
"""
|
|
576
|
+
cdef list atoms = []
|
|
577
|
+
for a in atom_array:
|
|
578
|
+
atom = Atom(
|
|
579
|
+
name=str(a.atom_name),
|
|
580
|
+
x=a.coord[0],
|
|
581
|
+
y=a.coord[1],
|
|
582
|
+
z=a.coord[2],
|
|
583
|
+
altloc=str(getattr(a, 'altloc', ' ')),
|
|
584
|
+
charge=getattr(a, 'charge', 0),
|
|
585
|
+
element=str(a.element),
|
|
586
|
+
occupancy=getattr(a, 'occupancy', 1.0),
|
|
587
|
+
temperature_factor=a.b_factor,
|
|
588
|
+
serial=a.atom_id,
|
|
589
|
+
segment=str(getattr(a, 'segment', '')),
|
|
590
|
+
residue_name=str(a.res_name),
|
|
591
|
+
residue_number=a.res_id,
|
|
592
|
+
chain_id=str(a.chain_id),
|
|
593
|
+
insertion_code=str(a.ins_code).ljust(1),
|
|
594
|
+
)
|
|
595
|
+
atoms.append(atom)
|
|
596
|
+
return cls(atoms)
|
|
597
|
+
|
|
598
|
+
def __cinit__(self):
|
|
599
|
+
self._mol = NULL
|
|
600
|
+
|
|
601
|
+
def __dealloc__(self):
|
|
602
|
+
jess.molecule.Molecule_free(self._mol)
|
|
603
|
+
|
|
604
|
+
def __init__(self, object atoms = (), str id = None):
|
|
605
|
+
"""__init__(self, atoms=(), id=None)\n--\n
|
|
606
|
+
|
|
607
|
+
Create a new molecule.
|
|
608
|
+
|
|
609
|
+
Arguments:
|
|
610
|
+
atoms (sequence of `~pyjess.Atom`): The atoms of the molecule.
|
|
611
|
+
id (`str`, optional): The identifier of the molecule.
|
|
612
|
+
|
|
613
|
+
Raises:
|
|
614
|
+
`MemoryError`: When the system allocator fails to allocate
|
|
615
|
+
enough memory for the molecule storage.
|
|
616
|
+
|
|
617
|
+
"""
|
|
618
|
+
cdef Atom atom
|
|
619
|
+
cdef int i
|
|
620
|
+
cdef int count = len(atoms)
|
|
621
|
+
|
|
622
|
+
self._mol = <_Molecule*> malloc(sizeof(_Molecule) + count * sizeof(_Atom*))
|
|
623
|
+
if self._mol is NULL:
|
|
624
|
+
raise MemoryError("Failed to allocate molecule")
|
|
625
|
+
|
|
626
|
+
self._mol.index = NULL
|
|
627
|
+
self._mol.count = count
|
|
628
|
+
for i in range(count):
|
|
629
|
+
self._mol.atom[i] = NULL
|
|
630
|
+
memset(self._mol.id, b' ', 5)
|
|
631
|
+
self._id = id
|
|
632
|
+
|
|
633
|
+
for i, atom in enumerate(atoms):
|
|
634
|
+
self._mol.atom[i] = <_Atom*> malloc(sizeof(_Atom))
|
|
635
|
+
if self._mol.atom[i] is NULL:
|
|
636
|
+
raise MemoryError("Failed to allocate atom")
|
|
637
|
+
memcpy(self._mol.atom[i], atom._atom, sizeof(_Atom))
|
|
638
|
+
|
|
639
|
+
self._mol.index = jess.res_index.ResIndex_create(self._mol.atom, count)
|
|
640
|
+
if self._mol.index is NULL:
|
|
641
|
+
raise MemoryError("Failed to allocate residue index")
|
|
642
|
+
|
|
643
|
+
def __len__(self):
|
|
644
|
+
assert self._mol is not NULL
|
|
645
|
+
return self._mol.count
|
|
646
|
+
|
|
647
|
+
def __getitem__(self, object index):
|
|
648
|
+
assert self._mol is not NULL
|
|
649
|
+
|
|
650
|
+
cdef Atom atom
|
|
651
|
+
cdef ssize_t index_
|
|
652
|
+
cdef ssize_t length = self._mol.count
|
|
653
|
+
|
|
654
|
+
if isinstance(index, slice):
|
|
655
|
+
indices = range(*index.indices(length))
|
|
656
|
+
return type(self)(atoms=[self[i] for i in indices], id=self.id)
|
|
657
|
+
else:
|
|
658
|
+
index_ = index
|
|
659
|
+
if index_ < 0:
|
|
660
|
+
index_ += length
|
|
661
|
+
if index_ < 0 or index_ >= length:
|
|
662
|
+
raise IndexError(index)
|
|
663
|
+
atom = Atom.__new__(Atom)
|
|
664
|
+
atom.owner = self
|
|
665
|
+
atom.owned = True
|
|
666
|
+
atom._atom = <_Atom*> jess.molecule.Molecule_atom(self._mol, index_)
|
|
667
|
+
return atom
|
|
668
|
+
|
|
669
|
+
def __copy__(self):
|
|
670
|
+
return self.copy()
|
|
671
|
+
|
|
672
|
+
def __eq__(self, object other):
|
|
673
|
+
cdef Molecule other_
|
|
674
|
+
if not isinstance(other, Molecule):
|
|
675
|
+
return NotImplemented
|
|
676
|
+
other_ = other
|
|
677
|
+
if self._id != other_._id:
|
|
678
|
+
return False
|
|
679
|
+
if self._mol.count != other_._mol.count:
|
|
680
|
+
return False
|
|
681
|
+
return all(x == y for x,y in zip(self, other_))
|
|
682
|
+
|
|
683
|
+
def __hash__(self):
|
|
684
|
+
return hash((self._id, *(hash(x) for x in self)))
|
|
685
|
+
|
|
686
|
+
def __reduce__(self):
|
|
687
|
+
return type(self), (list(self), self.id)
|
|
688
|
+
|
|
689
|
+
def __sizeof__(self):
|
|
690
|
+
assert self._mol is not NULL
|
|
691
|
+
return (
|
|
692
|
+
sizeof(self)
|
|
693
|
+
+ sizeof(_Molecule)
|
|
694
|
+
+ self._mol.count*(sizeof(_Atom*) + sizeof(_Atom))
|
|
695
|
+
)
|
|
696
|
+
|
|
697
|
+
@property
|
|
698
|
+
def id(self):
|
|
699
|
+
return self._id
|
|
700
|
+
|
|
701
|
+
cpdef Molecule conserved(self, double cutoff = 0.0):
|
|
702
|
+
"""Get a molecule containing only a subset of conserved atoms.
|
|
703
|
+
|
|
704
|
+
Arguments:
|
|
705
|
+
cutoff (`float`): The conservation cutoff for atoms. Atoms
|
|
706
|
+
with a `~Atom.temperature_factor` lower than this value
|
|
707
|
+
will be removed from the result.
|
|
708
|
+
|
|
709
|
+
Returns:
|
|
710
|
+
`~pyjess.Molecule`: A new molecule with atoms below the
|
|
711
|
+
conservation cutoff removed.
|
|
712
|
+
|
|
713
|
+
"""
|
|
714
|
+
assert self._mol is not NULL
|
|
715
|
+
|
|
716
|
+
cdef size_t i
|
|
717
|
+
cdef list atoms
|
|
718
|
+
|
|
719
|
+
if cutoff <= 0.0:
|
|
720
|
+
return self.copy()
|
|
721
|
+
|
|
722
|
+
atoms = []
|
|
723
|
+
for i in range(self._mol.count):
|
|
724
|
+
if self._mol.atom[i].tempFactor >= cutoff:
|
|
725
|
+
atoms.append(self[i])
|
|
726
|
+
|
|
727
|
+
return type(self)(id=self.id, atoms=atoms)
|
|
728
|
+
|
|
729
|
+
cpdef Molecule copy(self):
|
|
730
|
+
"""Create a copy of this molecule and its atoms.
|
|
731
|
+
|
|
732
|
+
Returns:
|
|
733
|
+
`~pyjess.Molecule`: A newly allocated molecule with the same
|
|
734
|
+
identifier and atoms.
|
|
735
|
+
|
|
736
|
+
.. versionadded:: 0.4.0
|
|
737
|
+
|
|
738
|
+
"""
|
|
739
|
+
cdef Molecule copy = Molecule.__new__(Molecule)
|
|
740
|
+
cdef size_t size = sizeof(_Molecule) + self._mol.count * sizeof(_Atom*)
|
|
741
|
+
|
|
742
|
+
with nogil:
|
|
743
|
+
# allocate molecule storage
|
|
744
|
+
copy._mol = <_Molecule*> malloc(size)
|
|
745
|
+
if copy._mol is NULL:
|
|
746
|
+
raise MemoryError("Failed to allocate molecule")
|
|
747
|
+
# copy molecule attributes
|
|
748
|
+
copy._mol.index = NULL
|
|
749
|
+
copy._mol.count = self._mol.count
|
|
750
|
+
memset(copy._mol.id, b' ', 5)
|
|
751
|
+
# copy molecule atoms
|
|
752
|
+
for i in range(self._mol.count):
|
|
753
|
+
copy._mol.atom[i] = <_Atom*> malloc(sizeof(_Atom))
|
|
754
|
+
if copy._mol.atom[i] is NULL:
|
|
755
|
+
raise MemoryError("Failed to allocate atom")
|
|
756
|
+
memcpy(copy._mol.atom[i], self._mol.atom[i], sizeof(_Atom))
|
|
757
|
+
# regenerate index
|
|
758
|
+
copy._mol.index = jess.res_index.ResIndex_create(copy._mol.atom, copy._mol.count)
|
|
759
|
+
if copy._mol.index is NULL:
|
|
760
|
+
raise MemoryError("Failed to allocate residue index")
|
|
761
|
+
|
|
762
|
+
copy._id = self._id
|
|
763
|
+
return copy
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
cdef class Atom:
|
|
767
|
+
"""A single atom in a molecule.
|
|
768
|
+
|
|
769
|
+
.. versionadded:: 0.4.0
|
|
770
|
+
Equality, hashing and pickle protocol support.
|
|
771
|
+
|
|
772
|
+
"""
|
|
773
|
+
cdef object owner
|
|
774
|
+
cdef bint owned
|
|
775
|
+
cdef _Atom* _atom
|
|
776
|
+
|
|
777
|
+
@classmethod
|
|
778
|
+
def load(cls, file):
|
|
779
|
+
"""Load an atom from the given file.
|
|
780
|
+
|
|
781
|
+
Arguments:
|
|
782
|
+
file (file-like object): A file-like object opened in text
|
|
783
|
+
mode to read the atom from.
|
|
784
|
+
|
|
785
|
+
"""
|
|
786
|
+
return cls.loads(file.read())
|
|
787
|
+
|
|
788
|
+
@classmethod
|
|
789
|
+
def loads(cls, text):
|
|
790
|
+
"""Load an atom from the given string.
|
|
791
|
+
|
|
792
|
+
Arguments:
|
|
793
|
+
text (`str`, `bytes` or `bytearray`): The atom line to read the
|
|
794
|
+
atom metadata from.
|
|
795
|
+
|
|
796
|
+
"""
|
|
797
|
+
cdef const unsigned char* s
|
|
798
|
+
cdef bytearray b
|
|
799
|
+
cdef Atom atom
|
|
800
|
+
|
|
801
|
+
if isinstance(text, str):
|
|
802
|
+
b = bytearray(text, 'utf-8')
|
|
803
|
+
else:
|
|
804
|
+
b = bytearray(text)
|
|
805
|
+
if not b.endswith(b'\n'):
|
|
806
|
+
b.append(b'\n')
|
|
807
|
+
b.append(b'\0')
|
|
808
|
+
s = b
|
|
809
|
+
|
|
810
|
+
atom = cls.__new__(cls)
|
|
811
|
+
with nogil:
|
|
812
|
+
atom._atom = <_Atom*> malloc(sizeof(_Atom))
|
|
813
|
+
if atom._atom == NULL:
|
|
814
|
+
raise MemoryError("Failed to allocate atom")
|
|
815
|
+
if not jess.atom.Atom_parse(atom._atom, <const char*> s):
|
|
816
|
+
raise ValueError(f"Failed to parse atom: {text!r}")
|
|
817
|
+
|
|
818
|
+
return atom
|
|
819
|
+
|
|
820
|
+
def __cinit__(self):
|
|
821
|
+
self._atom = NULL
|
|
822
|
+
self.owner = None
|
|
823
|
+
self.owned = False
|
|
824
|
+
|
|
825
|
+
def __dealloc__(self):
|
|
826
|
+
if not self.owned:
|
|
827
|
+
free(self._atom)
|
|
828
|
+
|
|
829
|
+
def __init__(
|
|
830
|
+
self,
|
|
831
|
+
*,
|
|
832
|
+
int serial,
|
|
833
|
+
str name,
|
|
834
|
+
str residue_name,
|
|
835
|
+
str chain_id,
|
|
836
|
+
int residue_number,
|
|
837
|
+
double x,
|
|
838
|
+
double y,
|
|
839
|
+
double z,
|
|
840
|
+
double occupancy = 0.0,
|
|
841
|
+
double temperature_factor = 0.0,
|
|
842
|
+
str altloc = ' ',
|
|
843
|
+
str insertion_code = ' ',
|
|
844
|
+
str segment = '',
|
|
845
|
+
str element = '',
|
|
846
|
+
int charge = 0,
|
|
847
|
+
):
|
|
848
|
+
"""__init__(self, *, serial, name, residue_name, chain_id, residue_number, x, y, z, occupancy=0.0, temperature_factor=0.0, altloc=' ', insertion_code=' ', segment='', element='', charge=0)\n--\n
|
|
849
|
+
|
|
850
|
+
Create a new atom.
|
|
851
|
+
|
|
852
|
+
Raises:
|
|
853
|
+
`MemoryError`: When the system allocator fails to allocate
|
|
854
|
+
enough memory for the atom storage.
|
|
855
|
+
`ValueError`: When either of the ``name``, ``residue_name``,
|
|
856
|
+
``segment``, ``element`` or ``chain_id`` strings is too
|
|
857
|
+
long.
|
|
858
|
+
|
|
859
|
+
"""
|
|
860
|
+
cdef bytearray _name
|
|
861
|
+
cdef bytes _residue_name
|
|
862
|
+
cdef bytes _segment
|
|
863
|
+
cdef bytes _element
|
|
864
|
+
|
|
865
|
+
if len(name) > 4:
|
|
866
|
+
raise ValueError(f"Invalid atom name: {name!r}")
|
|
867
|
+
if len(residue_name) > 3:
|
|
868
|
+
raise ValueError(f"Invalid residue name: {residue_name!r}")
|
|
869
|
+
if len(segment) > 4:
|
|
870
|
+
raise ValueError(f"Invalid segment: {segment!r}")
|
|
871
|
+
if len(element) > 2:
|
|
872
|
+
raise ValueError(f"Invalid element: {element!r}")
|
|
873
|
+
if len(chain_id) > 2:
|
|
874
|
+
raise ValueError(f"Invalid chain ID: {chain_id!r}")
|
|
875
|
+
|
|
876
|
+
self._atom = <_Atom*> malloc(sizeof(_Atom))
|
|
877
|
+
if self._atom is NULL:
|
|
878
|
+
raise MemoryError("Failed to allocate atom")
|
|
879
|
+
|
|
880
|
+
_residue_name = PyUnicode_AsASCIIString(residue_name)
|
|
881
|
+
_segment = PyUnicode_AsASCIIString(segment)
|
|
882
|
+
_element = PyUnicode_AsASCIIString(element)
|
|
883
|
+
|
|
884
|
+
self._atom.serial = serial
|
|
885
|
+
self._atom.altLoc = ord(altloc)
|
|
886
|
+
self._atom.chainID1 = ord(chain_id[0]) if len(chain_id) > 0 else 0
|
|
887
|
+
self._atom.chainID2 = ord(chain_id[1]) if len(chain_id) > 1 else ord(' ')
|
|
888
|
+
self._atom.resSeq = residue_number
|
|
889
|
+
self._atom.iCode = ord(insertion_code)
|
|
890
|
+
self._atom.x[0] = x
|
|
891
|
+
self._atom.x[1] = y
|
|
892
|
+
self._atom.x[2] = z
|
|
893
|
+
self._atom.occupancy = occupancy
|
|
894
|
+
self._atom.tempFactor = temperature_factor
|
|
895
|
+
self._atom.charge = charge
|
|
896
|
+
encode_token(self._atom.resName, _residue_name.ljust(3, b'\0'), 3)
|
|
897
|
+
encode_token(self._atom.segID, _segment.ljust(4, b'\0'), 4)
|
|
898
|
+
encode_token(self._atom.element, _element.ljust(2, b'\0'), 2)
|
|
899
|
+
|
|
900
|
+
# FIXME: is alignment proper?
|
|
901
|
+
_name = bytearray(name, 'ascii')
|
|
902
|
+
if len(_name) < 4:
|
|
903
|
+
_name.insert(0, ord('_'))
|
|
904
|
+
encode_token(self._atom.name, _name.ljust(4, b'\0'), 4)
|
|
905
|
+
|
|
906
|
+
def __copy__(self):
|
|
907
|
+
return self.copy()
|
|
908
|
+
|
|
909
|
+
cdef dict _state(self):
|
|
910
|
+
return {
|
|
911
|
+
"serial": self.serial,
|
|
912
|
+
"name": self.name,
|
|
913
|
+
"altloc": self.altloc,
|
|
914
|
+
"residue_name": self.residue_name,
|
|
915
|
+
"chain_id": self.chain_id,
|
|
916
|
+
"residue_number": self.residue_number,
|
|
917
|
+
"insertion_code": self.insertion_code,
|
|
918
|
+
"x": self.x,
|
|
919
|
+
"y": self.y,
|
|
920
|
+
"z": self.z,
|
|
921
|
+
"temperature_factor": self.temperature_factor,
|
|
922
|
+
"occupancy": self.occupancy,
|
|
923
|
+
"segment": self.segment,
|
|
924
|
+
"element": self.element,
|
|
925
|
+
"charge": self.charge,
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
def __reduce__(self):
|
|
929
|
+
cdef dict state = self._state()
|
|
930
|
+
return functools.partial(type(self), **state), ()
|
|
931
|
+
|
|
932
|
+
def __repr__(self):
|
|
933
|
+
cdef str ty = type(self).__name__
|
|
934
|
+
cdef list args = []
|
|
935
|
+
for k,v in self._state().items():
|
|
936
|
+
if v is not None:
|
|
937
|
+
args.append(f"{k}={v!r}")
|
|
938
|
+
return f"{ty}({', '.join(args)})"
|
|
939
|
+
|
|
940
|
+
def __sizeof__(self):
|
|
941
|
+
cdef size_t size = sizeof(self)
|
|
942
|
+
if not self.owned:
|
|
943
|
+
size += sizeof(_Atom)
|
|
944
|
+
return size
|
|
945
|
+
|
|
946
|
+
def __eq__(self, object other):
|
|
947
|
+
cdef Atom other_
|
|
948
|
+
if not isinstance(other, Atom):
|
|
949
|
+
return NotImplemented
|
|
950
|
+
other_ = other
|
|
951
|
+
# FIXME: it should be possible to do a memcmp here.
|
|
952
|
+
return self._state() == other_._state()
|
|
953
|
+
|
|
954
|
+
def __hash__(self):
|
|
955
|
+
return hash(tuple(self._state().values()))
|
|
956
|
+
|
|
957
|
+
@property
|
|
958
|
+
def serial(self):
|
|
959
|
+
"""`int`: The atom serial number.
|
|
960
|
+
"""
|
|
961
|
+
assert self._atom is not NULL
|
|
962
|
+
return self._atom.serial
|
|
963
|
+
|
|
964
|
+
@property
|
|
965
|
+
def altloc(self):
|
|
966
|
+
"""`str`: The alternate location indicator for the atom.
|
|
967
|
+
"""
|
|
968
|
+
assert self._atom is not NULL
|
|
969
|
+
return chr(self._atom.altLoc)
|
|
970
|
+
|
|
971
|
+
@property
|
|
972
|
+
def name(self):
|
|
973
|
+
"""`str`: The atom name.
|
|
974
|
+
"""
|
|
975
|
+
assert self._atom is not NULL
|
|
976
|
+
return self._atom.name[:4].decode('ascii').strip("_")
|
|
977
|
+
|
|
978
|
+
@property
|
|
979
|
+
def residue_name(self):
|
|
980
|
+
"""`str`: The residue name.
|
|
981
|
+
"""
|
|
982
|
+
assert self._atom is not NULL
|
|
983
|
+
return self._atom.resName[:3].decode('ascii').strip("_")
|
|
984
|
+
|
|
985
|
+
@property
|
|
986
|
+
def residue_number(self):
|
|
987
|
+
"""`int`: The residue sequence number.
|
|
988
|
+
"""
|
|
989
|
+
assert self._atom is not NULL
|
|
990
|
+
return self._atom.resSeq
|
|
991
|
+
|
|
992
|
+
@property
|
|
993
|
+
def segment(self):
|
|
994
|
+
"""`str`: The segment identifier.
|
|
995
|
+
"""
|
|
996
|
+
assert self._atom is not NULL
|
|
997
|
+
return self._atom.segID[:4].decode('ascii').strip('_')
|
|
998
|
+
|
|
999
|
+
@property
|
|
1000
|
+
def element(self):
|
|
1001
|
+
"""`str`: The element symbol.
|
|
1002
|
+
"""
|
|
1003
|
+
assert self._atom is not NULL
|
|
1004
|
+
return self._atom.element[:2].decode('ascii').strip('_')
|
|
1005
|
+
|
|
1006
|
+
@property
|
|
1007
|
+
def insertion_code(self):
|
|
1008
|
+
"""`str`: The code for insertion of residues.
|
|
1009
|
+
"""
|
|
1010
|
+
assert self._atom is not NULL
|
|
1011
|
+
return chr(self._atom.iCode)
|
|
1012
|
+
|
|
1013
|
+
@property
|
|
1014
|
+
def chain_id(self):
|
|
1015
|
+
"""`str`: The identifier of the chain the atom belongs to.
|
|
1016
|
+
"""
|
|
1017
|
+
assert self._atom is not NULL
|
|
1018
|
+
return PyUnicode_FromFormat("%c%c", self._atom.chainID1, self._atom.chainID2).strip()
|
|
1019
|
+
|
|
1020
|
+
@property
|
|
1021
|
+
def occupancy(self):
|
|
1022
|
+
"""`float`: The atom occupancy.
|
|
1023
|
+
"""
|
|
1024
|
+
assert self._atom is not NULL
|
|
1025
|
+
return self._atom.occupancy
|
|
1026
|
+
|
|
1027
|
+
@property
|
|
1028
|
+
def temperature_factor(self):
|
|
1029
|
+
"""`float`: The atom temperature factor.
|
|
1030
|
+
"""
|
|
1031
|
+
assert self._atom is not NULL
|
|
1032
|
+
return self._atom.tempFactor
|
|
1033
|
+
|
|
1034
|
+
@property
|
|
1035
|
+
def charge(self):
|
|
1036
|
+
"""`int`: The atom charge.
|
|
1037
|
+
"""
|
|
1038
|
+
assert self._atom is not NULL
|
|
1039
|
+
return self._atom.charge
|
|
1040
|
+
|
|
1041
|
+
@property
|
|
1042
|
+
def x(self):
|
|
1043
|
+
"""`float`: The atom coordinate in the 1st dimension.
|
|
1044
|
+
"""
|
|
1045
|
+
assert self._atom is not NULL
|
|
1046
|
+
return self._atom.x[0]
|
|
1047
|
+
|
|
1048
|
+
@property
|
|
1049
|
+
def y(self):
|
|
1050
|
+
"""`float`: The atom coordinate in the 2nd dimension.
|
|
1051
|
+
"""
|
|
1052
|
+
assert self._atom is not NULL
|
|
1053
|
+
return self._atom.x[1]
|
|
1054
|
+
|
|
1055
|
+
@property
|
|
1056
|
+
def z(self):
|
|
1057
|
+
"""`float`: The atom coordinate in the 3rd dimension.
|
|
1058
|
+
"""
|
|
1059
|
+
assert self._atom is not NULL
|
|
1060
|
+
return self._atom.x[2]
|
|
1061
|
+
|
|
1062
|
+
cpdef Atom copy(self):
|
|
1063
|
+
"""Create a copy of this atom.
|
|
1064
|
+
|
|
1065
|
+
Returns:
|
|
1066
|
+
`~pyjess.Atom`: A newly allocated atom with identical attributes.
|
|
1067
|
+
|
|
1068
|
+
.. versionadded:: 0.4.0
|
|
1069
|
+
|
|
1070
|
+
"""
|
|
1071
|
+
cdef Atom copy = Atom.__new__(Atom)
|
|
1072
|
+
copy._atom = <_Atom*> malloc(sizeof(_Atom))
|
|
1073
|
+
if copy._atom is NULL:
|
|
1074
|
+
raise MemoryError("Failed to allocate atom")
|
|
1075
|
+
memcpy(copy._atom, self._atom, sizeof(_Atom))
|
|
1076
|
+
return copy
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
cdef class TemplateAtom:
|
|
1080
|
+
"""A single template atom.
|
|
1081
|
+
|
|
1082
|
+
.. versionadded:: 0.4.0
|
|
1083
|
+
Equality, hashing and pickle protocol support.
|
|
1084
|
+
|
|
1085
|
+
"""
|
|
1086
|
+
cdef object owner
|
|
1087
|
+
cdef bint owned
|
|
1088
|
+
cdef _TessAtom* _atom
|
|
1089
|
+
|
|
1090
|
+
@classmethod
|
|
1091
|
+
def load(cls, file):
|
|
1092
|
+
"""Load a template atom from the given file.
|
|
1093
|
+
|
|
1094
|
+
Arguments:
|
|
1095
|
+
file (str, os.PathLike or file-like object): A file-like object
|
|
1096
|
+
opened in text or binary mode to read the template atom from.
|
|
1097
|
+
|
|
1098
|
+
"""
|
|
1099
|
+
try:
|
|
1100
|
+
handle = open(file)
|
|
1101
|
+
except TypeError:
|
|
1102
|
+
handle = nullcontext(file)
|
|
1103
|
+
with handle as f:
|
|
1104
|
+
return cls.loads(f.read())
|
|
1105
|
+
|
|
1106
|
+
@classmethod
|
|
1107
|
+
def loads(cls, text):
|
|
1108
|
+
"""Load a template atom from the given string.
|
|
1109
|
+
|
|
1110
|
+
Arguments:
|
|
1111
|
+
text (`str`, `bytes` or `bytearray`): The atom line to read the
|
|
1112
|
+
atom metadata from.
|
|
1113
|
+
|
|
1114
|
+
"""
|
|
1115
|
+
cdef bytearray b
|
|
1116
|
+
cdef TemplateAtom atom
|
|
1117
|
+
|
|
1118
|
+
if isinstance(text, str):
|
|
1119
|
+
b = bytearray(text, 'utf-8')
|
|
1120
|
+
else:
|
|
1121
|
+
b = bytearray(text)
|
|
1122
|
+
if not b.endswith(b'\n'):
|
|
1123
|
+
b.append(b'\n')
|
|
1124
|
+
b.append(b'\0')
|
|
1125
|
+
|
|
1126
|
+
atom = TemplateAtom.__new__(TemplateAtom)
|
|
1127
|
+
atom._atom = jess.tess_atom.TessAtom_create(<const char*> b)
|
|
1128
|
+
if atom._atom == NULL:
|
|
1129
|
+
raise ValueError(f"Failed to parse template atom: {text!r}")
|
|
1130
|
+
|
|
1131
|
+
# validate match mode *now* to avoid Jess exiting when it does so later
|
|
1132
|
+
if atom.match_mode not in range(-1, 9) and atom.match_mode not in range(100, 108):
|
|
1133
|
+
raise ValueError(f"Invalid match mode: {atom.match_mode!r}")
|
|
1134
|
+
|
|
1135
|
+
return atom
|
|
1136
|
+
|
|
1137
|
+
def __cinit__(self):
|
|
1138
|
+
self.owner = None
|
|
1139
|
+
self.owned = False
|
|
1140
|
+
self._atom = NULL
|
|
1141
|
+
|
|
1142
|
+
def __dealloc__(self):
|
|
1143
|
+
if not self.owned:
|
|
1144
|
+
jess.tess_atom.TessAtom_free(self._atom)
|
|
1145
|
+
|
|
1146
|
+
def __init__(
|
|
1147
|
+
self,
|
|
1148
|
+
*,
|
|
1149
|
+
str chain_id,
|
|
1150
|
+
int residue_number,
|
|
1151
|
+
double x,
|
|
1152
|
+
double y,
|
|
1153
|
+
double z,
|
|
1154
|
+
object residue_names,
|
|
1155
|
+
object atom_names,
|
|
1156
|
+
double distance_weight = 0.0,
|
|
1157
|
+
int match_mode = 0,
|
|
1158
|
+
):
|
|
1159
|
+
"""__init__(self, *, chain_id, residue_number, x, y, z, residue_names, atom_names, distance_weight=0.0, match_mode=0)\n--\n
|
|
1160
|
+
|
|
1161
|
+
Create a new template atom.
|
|
1162
|
+
|
|
1163
|
+
Raises:
|
|
1164
|
+
`MemoryError`: When the system allocator fails to allocate
|
|
1165
|
+
enough memory for the template atom storage.
|
|
1166
|
+
|
|
1167
|
+
"""
|
|
1168
|
+
cdef size_t m
|
|
1169
|
+
cdef char* p
|
|
1170
|
+
cdef size_t ac
|
|
1171
|
+
cdef size_t rc
|
|
1172
|
+
cdef size_t alloc_size
|
|
1173
|
+
|
|
1174
|
+
# validate match mode to avoid a potential hard exit later
|
|
1175
|
+
if match_mode not in range(-1, 9) and match_mode not in range(100, 108):
|
|
1176
|
+
raise ValueError(f"Invalid match mode: {match_mode!r}")
|
|
1177
|
+
if len(chain_id) > 2:
|
|
1178
|
+
raise ValueError(f"Invalid chain ID: {chain_id!r}")
|
|
1179
|
+
|
|
1180
|
+
# compute total allocation
|
|
1181
|
+
ac = len(atom_names)
|
|
1182
|
+
rc = len(residue_names)
|
|
1183
|
+
alloc_size = sizeof(_TessAtom) + sizeof(char*) * (ac + rc) + sizeof(char) * (5*ac + 4*rc)
|
|
1184
|
+
|
|
1185
|
+
# allocate base memory
|
|
1186
|
+
self._atom = <_TessAtom*> malloc(alloc_size)
|
|
1187
|
+
if self._atom is NULL:
|
|
1188
|
+
raise MemoryError("Failed to allocate template atom")
|
|
1189
|
+
|
|
1190
|
+
# copy base data
|
|
1191
|
+
self._atom.code = match_mode
|
|
1192
|
+
self._atom.resSeq = residue_number
|
|
1193
|
+
self._atom.pos[0] = x
|
|
1194
|
+
self._atom.pos[1] = y
|
|
1195
|
+
self._atom.pos[2] = z
|
|
1196
|
+
self._atom.chainID1, self._atom.chainID2 = map(ord, chain_id.ljust(2))
|
|
1197
|
+
self._atom.nameCount = ac
|
|
1198
|
+
self._atom.resNameCount = rc
|
|
1199
|
+
self._atom.distWeight = distance_weight
|
|
1200
|
+
|
|
1201
|
+
# setup string pointers
|
|
1202
|
+
p = <char*> &self._atom[1]
|
|
1203
|
+
self._atom.name = <char**> p
|
|
1204
|
+
p += sizeof(char*)*ac
|
|
1205
|
+
for m in range(ac):
|
|
1206
|
+
self._atom.name[m] = <char*> p
|
|
1207
|
+
p += 5
|
|
1208
|
+
self._atom.resName = <char**> p
|
|
1209
|
+
p += sizeof(char*)*rc
|
|
1210
|
+
for m in range(rc):
|
|
1211
|
+
self._atom.resName[m] = <char*> p
|
|
1212
|
+
p += 4
|
|
1213
|
+
|
|
1214
|
+
# copy atom names
|
|
1215
|
+
for m, name in enumerate(atom_names):
|
|
1216
|
+
if isinstance(name, str):
|
|
1217
|
+
_name = bytearray(name, 'ascii')
|
|
1218
|
+
else:
|
|
1219
|
+
_name = bytearray(name)
|
|
1220
|
+
# FIXME: is alignment proper?
|
|
1221
|
+
if len(_name) > 4:
|
|
1222
|
+
raise ValueError(f"Invalid atom name: {name!r}")
|
|
1223
|
+
elif len(_name) <= 3:
|
|
1224
|
+
_name.insert(0, ord('_'))
|
|
1225
|
+
encode_token(self._atom.name[m], _name.ljust(4, b'\0'), 4)
|
|
1226
|
+
|
|
1227
|
+
# copy residue names
|
|
1228
|
+
for m, name in enumerate(residue_names):
|
|
1229
|
+
_name = name.encode('ascii') if isinstance(name, str) else name
|
|
1230
|
+
if len(_name) > 3:
|
|
1231
|
+
raise ValueError(f"Invalid residue name: {name!r}")
|
|
1232
|
+
encode_token(self._atom.resName[m], _name.ljust(3, b'\0'), 3)
|
|
1233
|
+
|
|
1234
|
+
cdef dict _state(self):
|
|
1235
|
+
return {
|
|
1236
|
+
"chain_id": self.chain_id,
|
|
1237
|
+
"residue_number": self.residue_number,
|
|
1238
|
+
"x": self.x,
|
|
1239
|
+
"y": self.y,
|
|
1240
|
+
"z": self.z,
|
|
1241
|
+
"residue_names": self.residue_names,
|
|
1242
|
+
"atom_names": self.atom_names,
|
|
1243
|
+
"distance_weight": self.distance_weight,
|
|
1244
|
+
"match_mode": self.match_mode,
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
def __repr__(self):
|
|
1248
|
+
cdef str ty = type(self).__name__
|
|
1249
|
+
cdef list args = []
|
|
1250
|
+
for k, v in self._state().items():
|
|
1251
|
+
args.append(f"{k}={v!r}")
|
|
1252
|
+
return f"{ty}({', '.join(args)})"
|
|
1253
|
+
|
|
1254
|
+
def __copy__(self):
|
|
1255
|
+
return self.copy()
|
|
1256
|
+
|
|
1257
|
+
def __eq__(self, object other):
|
|
1258
|
+
cdef TemplateAtom other_
|
|
1259
|
+
if not isinstance(other, TemplateAtom):
|
|
1260
|
+
return NotImplemented
|
|
1261
|
+
other_ = other
|
|
1262
|
+
return self._state() == other_._state()
|
|
1263
|
+
|
|
1264
|
+
def __hash__(self):
|
|
1265
|
+
return hash(tuple(self._state().values()))
|
|
1266
|
+
|
|
1267
|
+
def __reduce__(self):
|
|
1268
|
+
return functools.partial(type(self), **self._state()), ()
|
|
1269
|
+
|
|
1270
|
+
def __sizeof__(self):
|
|
1271
|
+
assert self._atom is not NULL
|
|
1272
|
+
|
|
1273
|
+
cdef size_t ac = self._atom.nameCount
|
|
1274
|
+
cdef size_t rc = self._atom.resNameCount
|
|
1275
|
+
cdef size_t size = sizeof(self)
|
|
1276
|
+
|
|
1277
|
+
if not self.owned:
|
|
1278
|
+
size += (
|
|
1279
|
+
sizeof(_TessAtom)
|
|
1280
|
+
+ sizeof(char*) * (ac + rc)
|
|
1281
|
+
+ sizeof(char) * (5*ac + 4*rc)
|
|
1282
|
+
)
|
|
1283
|
+
return size
|
|
1284
|
+
|
|
1285
|
+
@property
|
|
1286
|
+
def match_mode(self):
|
|
1287
|
+
"""`int`: The match mode for this particular atom.
|
|
1288
|
+
"""
|
|
1289
|
+
assert self._atom is not NULL
|
|
1290
|
+
return self._atom.code
|
|
1291
|
+
|
|
1292
|
+
@property
|
|
1293
|
+
def residue_number(self):
|
|
1294
|
+
"""`int`: The residue sequence number.
|
|
1295
|
+
"""
|
|
1296
|
+
assert self._atom is not NULL
|
|
1297
|
+
return self._atom.resSeq
|
|
1298
|
+
|
|
1299
|
+
@property
|
|
1300
|
+
def chain_id(self):
|
|
1301
|
+
"""`str`: The identifier of the chain the atom belongs to.
|
|
1302
|
+
"""
|
|
1303
|
+
assert self._atom is not NULL
|
|
1304
|
+
cdef char c1 = jess.tess_atom.TessAtom_chainID1(self._atom)
|
|
1305
|
+
cdef char c2 = jess.tess_atom.TessAtom_chainID2(self._atom)
|
|
1306
|
+
return PyUnicode_FromFormat("%c%c", c1, c2).strip()
|
|
1307
|
+
|
|
1308
|
+
@property
|
|
1309
|
+
def x(self):
|
|
1310
|
+
"""`float`: The x coordinate of the atom.
|
|
1311
|
+
"""
|
|
1312
|
+
assert self._atom is not NULL
|
|
1313
|
+
return self._atom.pos[0]
|
|
1314
|
+
|
|
1315
|
+
@property
|
|
1316
|
+
def y(self):
|
|
1317
|
+
"""`float`: The y coordinate of the atom.
|
|
1318
|
+
"""
|
|
1319
|
+
assert self._atom is not NULL
|
|
1320
|
+
return self._atom.pos[1]
|
|
1321
|
+
|
|
1322
|
+
@property
|
|
1323
|
+
def z(self):
|
|
1324
|
+
"""`float`: The z coordinate of the atom.
|
|
1325
|
+
"""
|
|
1326
|
+
assert self._atom is not NULL
|
|
1327
|
+
return self._atom.pos[2]
|
|
1328
|
+
|
|
1329
|
+
@property
|
|
1330
|
+
def atom_names(self):
|
|
1331
|
+
"""`tuple` of `str`: The different atom names for this atom.
|
|
1332
|
+
|
|
1333
|
+
.. versionchanged:: 0.4.1
|
|
1334
|
+
Property now returns a `tuple` rather than a `list`.
|
|
1335
|
+
|
|
1336
|
+
"""
|
|
1337
|
+
assert self._atom is not NULL
|
|
1338
|
+
|
|
1339
|
+
cdef int i
|
|
1340
|
+
cdef list l = []
|
|
1341
|
+
|
|
1342
|
+
for i in range(self._atom.nameCount):
|
|
1343
|
+
l.append(self._atom.name[i].replace(b'_', b'').decode())
|
|
1344
|
+
return tuple(l)
|
|
1345
|
+
|
|
1346
|
+
@property
|
|
1347
|
+
def residue_names(self):
|
|
1348
|
+
"""`tuple` of `str`: The different residue names for this atom.
|
|
1349
|
+
|
|
1350
|
+
.. versionchanged:: 0.4.1
|
|
1351
|
+
Property now returns a `tuple` rather than a `list`.
|
|
1352
|
+
|
|
1353
|
+
"""
|
|
1354
|
+
assert self._atom is not NULL
|
|
1355
|
+
|
|
1356
|
+
cdef int i
|
|
1357
|
+
cdef list l = []
|
|
1358
|
+
|
|
1359
|
+
for i in range(self._atom.resNameCount):
|
|
1360
|
+
l.append(self._atom.resName[i].replace(b'_', b'').decode())
|
|
1361
|
+
return tuple(l)
|
|
1362
|
+
|
|
1363
|
+
@property
|
|
1364
|
+
def distance_weight(self):
|
|
1365
|
+
"""`float`: The distance weight for this atom.
|
|
1366
|
+
"""
|
|
1367
|
+
assert self._atom is not NULL
|
|
1368
|
+
return self._atom.distWeight
|
|
1369
|
+
|
|
1370
|
+
cpdef TemplateAtom copy(self):
|
|
1371
|
+
"""Create a copy of this template atom.
|
|
1372
|
+
|
|
1373
|
+
Returns:
|
|
1374
|
+
`~pyjess.TemplateAtom`: A new template atom object with
|
|
1375
|
+
identical attributes.
|
|
1376
|
+
|
|
1377
|
+
.. versionadded:: 0.4.0
|
|
1378
|
+
|
|
1379
|
+
"""
|
|
1380
|
+
cdef TemplateAtom atom = TemplateAtom.__new__(TemplateAtom)
|
|
1381
|
+
with nogil:
|
|
1382
|
+
atom._atom = jess.tess_atom.TessAtom_copy(self._atom)
|
|
1383
|
+
return atom
|
|
1384
|
+
|
|
1385
|
+
|
|
1386
|
+
cdef class Template:
|
|
1387
|
+
"""A template, as a sequence of `TemplateAtom` objects.
|
|
1388
|
+
|
|
1389
|
+
.. versionadded:: 0.4.0
|
|
1390
|
+
Equality, hashing and pickle protocol support.
|
|
1391
|
+
|
|
1392
|
+
"""
|
|
1393
|
+
cdef object owner
|
|
1394
|
+
cdef bint owned
|
|
1395
|
+
cdef _Template* _tpl
|
|
1396
|
+
cdef _TessTemplate* _tess
|
|
1397
|
+
|
|
1398
|
+
@classmethod
|
|
1399
|
+
def loads(cls, text, str id = None):
|
|
1400
|
+
"""Load a template from a string.
|
|
1401
|
+
|
|
1402
|
+
Arguments:
|
|
1403
|
+
file (`str`, `os.PathLike`, or file-like object): Either the path
|
|
1404
|
+
to a file, or a file-like object opened in **text mode**
|
|
1405
|
+
containing the template.
|
|
1406
|
+
id (`str`, optional): The identifier of the template. By default,
|
|
1407
|
+
the parser will take the one from the ``PDB_ID`` remark if
|
|
1408
|
+
found in the header.
|
|
1409
|
+
|
|
1410
|
+
Returns:
|
|
1411
|
+
`~pyjess.Template`: The template parsed from the given string.
|
|
1412
|
+
|
|
1413
|
+
See Also:
|
|
1414
|
+
`Template.load` to load a template from a file-like object or
|
|
1415
|
+
from a path.
|
|
1416
|
+
|
|
1417
|
+
"""
|
|
1418
|
+
return cls.load(io.StringIO(text), id=id)
|
|
1419
|
+
|
|
1420
|
+
@classmethod
|
|
1421
|
+
def load(cls, file, str id = None):
|
|
1422
|
+
"""Load a template from the given file.
|
|
1423
|
+
|
|
1424
|
+
Arguments:
|
|
1425
|
+
file (`str`, `os.PathLike` or file-like object): Either the
|
|
1426
|
+
path to a file, or a file-like object opened in **text mode**
|
|
1427
|
+
to read the template from.
|
|
1428
|
+
id (`str`, optional): The identifier of the template. By default,
|
|
1429
|
+
the parser will take the one from the ``PDB_ID`` remark if
|
|
1430
|
+
found in the header.
|
|
1431
|
+
|
|
1432
|
+
Returns:
|
|
1433
|
+
`~pyjess.Template`: The template parsed from the given file.
|
|
1434
|
+
|
|
1435
|
+
"""
|
|
1436
|
+
cdef str line
|
|
1437
|
+
cdef list atoms = []
|
|
1438
|
+
try:
|
|
1439
|
+
handle = open(file)
|
|
1440
|
+
except TypeError:
|
|
1441
|
+
handle = nullcontext(file)
|
|
1442
|
+
with handle as f:
|
|
1443
|
+
for line in f:
|
|
1444
|
+
if line.startswith("ATOM"):
|
|
1445
|
+
atoms.append(TemplateAtom.loads(line))
|
|
1446
|
+
elif id is None and line.startswith("REMARK PDB_ID"):
|
|
1447
|
+
id = line.split(" ", maxsplit=2)[2].strip()
|
|
1448
|
+
return cls(atoms, id=id)
|
|
1449
|
+
|
|
1450
|
+
def __cinit__(self):
|
|
1451
|
+
self._tpl = NULL
|
|
1452
|
+
self._tess = NULL
|
|
1453
|
+
self.owner = None
|
|
1454
|
+
self.owned = False
|
|
1455
|
+
|
|
1456
|
+
def __dealloc__(self):
|
|
1457
|
+
if not self.owned:
|
|
1458
|
+
jess.tess_template.TessTemplate_free(self._tpl)
|
|
1459
|
+
|
|
1460
|
+
def __init__(self, object atoms = (), str id = None):
|
|
1461
|
+
"""__init__(self, atoms=(), id=None)\n--\n
|
|
1462
|
+
|
|
1463
|
+
Create a new template.
|
|
1464
|
+
|
|
1465
|
+
Arguments:
|
|
1466
|
+
atoms (sequence of `~pyjess.TemplateAtom`): The atoms of the
|
|
1467
|
+
templates.
|
|
1468
|
+
id (`str`, optional): The identifier of the template.
|
|
1469
|
+
|
|
1470
|
+
Raises:
|
|
1471
|
+
`MemoryError`: When the system allocator fails to allocate
|
|
1472
|
+
enough memory for the template storage.
|
|
1473
|
+
|
|
1474
|
+
"""
|
|
1475
|
+
cdef int i
|
|
1476
|
+
cdef int j
|
|
1477
|
+
cdef double dist
|
|
1478
|
+
cdef TemplateAtom atom
|
|
1479
|
+
cdef size_t alloc_size
|
|
1480
|
+
cdef int count = len(atoms)
|
|
1481
|
+
|
|
1482
|
+
alloc_size = (
|
|
1483
|
+
sizeof(_Template) + sizeof(_TessTemplate)
|
|
1484
|
+
+ count * sizeof(_TessAtom*)
|
|
1485
|
+
+ count * sizeof(double*)
|
|
1486
|
+
+ count * count * sizeof(double)
|
|
1487
|
+
)
|
|
1488
|
+
|
|
1489
|
+
self._tpl = <_Template*> calloc(1, alloc_size)
|
|
1490
|
+
if self._tpl is NULL:
|
|
1491
|
+
raise MemoryError("Failed to allocate template")
|
|
1492
|
+
|
|
1493
|
+
# setup memory for atoms
|
|
1494
|
+
self._tess = <_TessTemplate*> &self._tpl[1]
|
|
1495
|
+
self._tess.atom = <_TessAtom**> &self._tess[1]
|
|
1496
|
+
for i in range(count):
|
|
1497
|
+
self._tess.atom[i] = NULL
|
|
1498
|
+
|
|
1499
|
+
# setup memory and pointers for distances
|
|
1500
|
+
self._tess.distance = <double**> &self._tess.atom[count]
|
|
1501
|
+
if count > 0:
|
|
1502
|
+
self._tess.distance[0] = <double*> &self._tess.distance[count]
|
|
1503
|
+
for i in range(1, count):
|
|
1504
|
+
self._tess.distance[i] = <double*> &self._tess.distance[i-1][count]
|
|
1505
|
+
|
|
1506
|
+
# setup template function pointers
|
|
1507
|
+
self._tpl.free = jess.tess_template.TessTemplate_free
|
|
1508
|
+
self._tpl.match = jess.tess_template.TessTemplate_match
|
|
1509
|
+
self._tpl.position = jess.tess_template.TessTemplate_position
|
|
1510
|
+
self._tpl.count = jess.tess_template.TessTemplate_count
|
|
1511
|
+
self._tpl.range = jess.tess_template.TessTemplate_range
|
|
1512
|
+
self._tpl.check = jess.tess_template.TessTemplate_check
|
|
1513
|
+
self._tpl.candidates = jess.tess_template.TessTemplate_candidates
|
|
1514
|
+
self._tpl.name = jess.tess_template.TessTemplate_name
|
|
1515
|
+
self._tpl.logE = jess.tess_template.TessTemplate_logE
|
|
1516
|
+
self._tpl.distWeight = jess.tess_template.TessTemplate_distWeight
|
|
1517
|
+
self._tpl.copy = jess.tess_template.TessTemplate_copy
|
|
1518
|
+
|
|
1519
|
+
# copy name and atom count
|
|
1520
|
+
self._tess.count = count
|
|
1521
|
+
self._tess.symbol = NULL if id is None else strdup(id.encode())
|
|
1522
|
+
|
|
1523
|
+
# copy atom data
|
|
1524
|
+
for i, atom in enumerate(atoms):
|
|
1525
|
+
assert i < count
|
|
1526
|
+
self._tess.atom[i] = jess.tess_atom.TessAtom_copy(atom._atom)
|
|
1527
|
+
if self._tess.atom[i] is NULL:
|
|
1528
|
+
raise MemoryError("Failed to allocate template atom")
|
|
1529
|
+
|
|
1530
|
+
# compute distances
|
|
1531
|
+
for i in range(count):
|
|
1532
|
+
self._tess.distance[i][i] = 0.0
|
|
1533
|
+
for j in range(i+1, count):
|
|
1534
|
+
dist = jess.tess_atom.TessAtom_distance(self._tess.atom[i], self._tess.atom[j])
|
|
1535
|
+
self._tess.distance[i][j] = dist
|
|
1536
|
+
self._tess.distance[j][i] = dist
|
|
1537
|
+
|
|
1538
|
+
# compute dimension
|
|
1539
|
+
residues = {
|
|
1540
|
+
(
|
|
1541
|
+
self._tess.atom[i].resSeq ,
|
|
1542
|
+
self._tess.atom[i].chainID1,
|
|
1543
|
+
self._tess.atom[i].chainID2,
|
|
1544
|
+
)
|
|
1545
|
+
for i in range(count)
|
|
1546
|
+
}
|
|
1547
|
+
self._tess.dim = len(residues)
|
|
1548
|
+
|
|
1549
|
+
def __copy__(self):
|
|
1550
|
+
return self.copy()
|
|
1551
|
+
|
|
1552
|
+
def __len__(self):
|
|
1553
|
+
assert self._tpl is not NULL
|
|
1554
|
+
return self._tess.count
|
|
1555
|
+
|
|
1556
|
+
def __getitem__(self, object index):
|
|
1557
|
+
assert self._tess is not NULL
|
|
1558
|
+
|
|
1559
|
+
cdef TemplateAtom atom
|
|
1560
|
+
cdef ssize_t length = self._tess.count
|
|
1561
|
+
cdef ssize_t index_
|
|
1562
|
+
|
|
1563
|
+
if isinstance(index, slice):
|
|
1564
|
+
indices = range(*index.indices(length))
|
|
1565
|
+
return type(self)(atoms=[self[i] for i in indices], id=self.id)
|
|
1566
|
+
else:
|
|
1567
|
+
index_ = index
|
|
1568
|
+
if index_ < 0:
|
|
1569
|
+
index_ += length
|
|
1570
|
+
if index_ < 0 or index_ >= length:
|
|
1571
|
+
raise IndexError(index)
|
|
1572
|
+
atom = TemplateAtom.__new__(TemplateAtom)
|
|
1573
|
+
atom.owner = self
|
|
1574
|
+
atom.owned = True
|
|
1575
|
+
atom._atom = self._tess.atom[index_]
|
|
1576
|
+
return atom
|
|
1577
|
+
|
|
1578
|
+
def __eq__(self, object other):
|
|
1579
|
+
cdef Template other_
|
|
1580
|
+
if not isinstance(other, Template):
|
|
1581
|
+
return NotImplemented
|
|
1582
|
+
other_ = other
|
|
1583
|
+
if self.id != other_.id:
|
|
1584
|
+
return False
|
|
1585
|
+
if self.dimension != other_.dimension:
|
|
1586
|
+
return False
|
|
1587
|
+
if len(self) != len(other_):
|
|
1588
|
+
return False
|
|
1589
|
+
return all(x == y for x,y in zip(self, other_))
|
|
1590
|
+
|
|
1591
|
+
def __hash__(self):
|
|
1592
|
+
return hash((
|
|
1593
|
+
self.id,
|
|
1594
|
+
*(hash(x) for x in self)
|
|
1595
|
+
))
|
|
1596
|
+
|
|
1597
|
+
def __reduce__(self):
|
|
1598
|
+
return type(self), (list(self), self.id)
|
|
1599
|
+
|
|
1600
|
+
def __sizeof__(self):
|
|
1601
|
+
assert self._tess is not NULL
|
|
1602
|
+
|
|
1603
|
+
cdef size_t i
|
|
1604
|
+
cdef size_t ac
|
|
1605
|
+
cdef size_t rc
|
|
1606
|
+
cdef _TessAtom* atom
|
|
1607
|
+
cdef size_t size = sizeof(self)
|
|
1608
|
+
|
|
1609
|
+
size = (
|
|
1610
|
+
sizeof(_Template)
|
|
1611
|
+
+ sizeof(_TessTemplate)
|
|
1612
|
+
+ self._tess.count * sizeof(_TessAtom*)
|
|
1613
|
+
+ self._tess.count * sizeof(double*)
|
|
1614
|
+
+ self._tess.count * self._tess.count * sizeof(double)
|
|
1615
|
+
)
|
|
1616
|
+
for i in range(self._tess.count):
|
|
1617
|
+
atom = self._tess.atom[i]
|
|
1618
|
+
ac = atom.nameCount
|
|
1619
|
+
rc = atom.resNameCount
|
|
1620
|
+
size += (
|
|
1621
|
+
sizeof(_TessAtom)
|
|
1622
|
+
+ sizeof(char*) * (ac + rc)
|
|
1623
|
+
+ sizeof(char) * (5*ac + 4*rc)
|
|
1624
|
+
)
|
|
1625
|
+
return size
|
|
1626
|
+
|
|
1627
|
+
@property
|
|
1628
|
+
def id(self):
|
|
1629
|
+
"""`str` or `None`: An identifier for the template, if any.
|
|
1630
|
+
"""
|
|
1631
|
+
assert self._tpl is not NULL
|
|
1632
|
+
|
|
1633
|
+
cdef const char* name = self._tpl.name(self._tpl)
|
|
1634
|
+
if name is NULL:
|
|
1635
|
+
return None
|
|
1636
|
+
return name.decode()
|
|
1637
|
+
|
|
1638
|
+
@property
|
|
1639
|
+
def dimension(self):
|
|
1640
|
+
"""`int`: The dimension of the template (i.e. number of residues).
|
|
1641
|
+
"""
|
|
1642
|
+
assert self._tess is not NULL
|
|
1643
|
+
return self._tess.dim
|
|
1644
|
+
|
|
1645
|
+
cpdef Template copy(self):
|
|
1646
|
+
"""Create a copy of the template.
|
|
1647
|
+
|
|
1648
|
+
Returns:
|
|
1649
|
+
`~pyjess.Template`: A new template object with identical
|
|
1650
|
+
attributes and a copy of the `TemplateAtom` it contains.
|
|
1651
|
+
|
|
1652
|
+
"""
|
|
1653
|
+
cdef Template tpl = Template.__new__(Template)
|
|
1654
|
+
with nogil:
|
|
1655
|
+
tpl._tpl = self._tpl.copy(self._tpl)
|
|
1656
|
+
tpl._tess = <_TessTemplate*> &tpl._tpl[1]
|
|
1657
|
+
return tpl
|
|
1658
|
+
|
|
1659
|
+
cdef class Query:
|
|
1660
|
+
"""A query over templates with a given molecule.
|
|
1661
|
+
|
|
1662
|
+
Jess iterates over the templates and attempt matches the query
|
|
1663
|
+
molecule, so the hits can actually be generated iteratively. This
|
|
1664
|
+
class allows accessing the hits as a Python iterator.
|
|
1665
|
+
|
|
1666
|
+
Attributes:
|
|
1667
|
+
jess (`~pyjess.Jess`): The templates this object is currently
|
|
1668
|
+
scanning.
|
|
1669
|
+
molecule (`~pyjess.Molecule`): The query molecule to align to
|
|
1670
|
+
the templates.
|
|
1671
|
+
rmsd_threshold (`float`): The RMSD threshold for reporting
|
|
1672
|
+
results.
|
|
1673
|
+
best_match (`bool`): Whether the query will return only the
|
|
1674
|
+
best match to each template.
|
|
1675
|
+
|
|
1676
|
+
"""
|
|
1677
|
+
cdef _JessQuery* _jq
|
|
1678
|
+
cdef bint _partial
|
|
1679
|
+
cdef int _candidates
|
|
1680
|
+
cdef uintptr_t _prev_tpl
|
|
1681
|
+
cdef int _max_candidates
|
|
1682
|
+
cdef _IgnoreType _ignore_chain
|
|
1683
|
+
|
|
1684
|
+
cdef readonly Jess jess
|
|
1685
|
+
cdef readonly Molecule molecule
|
|
1686
|
+
cdef readonly bint best_match
|
|
1687
|
+
cdef readonly double rmsd_threshold
|
|
1688
|
+
|
|
1689
|
+
def __cinit__(self):
|
|
1690
|
+
self._jq = NULL
|
|
1691
|
+
self._candidates = 0
|
|
1692
|
+
self._partial = False
|
|
1693
|
+
self._prev_tpl = 0
|
|
1694
|
+
|
|
1695
|
+
def __dealloc__(self):
|
|
1696
|
+
jess.jess.JessQuery_free(self._jq)
|
|
1697
|
+
|
|
1698
|
+
def __iter__(self):
|
|
1699
|
+
return self
|
|
1700
|
+
|
|
1701
|
+
@property
|
|
1702
|
+
def ignore_chain(self):
|
|
1703
|
+
"""`str` or `None`: The way atom chains are considered or discarded.
|
|
1704
|
+
"""
|
|
1705
|
+
if self._ignore_chain == _IgnoreType.ignoreNone:
|
|
1706
|
+
return None
|
|
1707
|
+
elif self._ignore_chain == _IgnoreType.ignoreResidues:
|
|
1708
|
+
return "residues"
|
|
1709
|
+
elif self._ignore_chain == _IgnoreType.ignoreAtoms:
|
|
1710
|
+
return "atoms"
|
|
1711
|
+
|
|
1712
|
+
@ignore_chain.setter
|
|
1713
|
+
def ignore_chain(self, ignore_chain):
|
|
1714
|
+
if ignore_chain is None:
|
|
1715
|
+
self._ignore_chain = _IgnoreType.ignoreNone
|
|
1716
|
+
elif ignore_chain == "residues":
|
|
1717
|
+
self._ignore_chain = _IgnoreType.ignoreResidues
|
|
1718
|
+
elif ignore_chain == "atoms":
|
|
1719
|
+
self._ignore_chain = _IgnoreType.ignoreAtoms
|
|
1720
|
+
else:
|
|
1721
|
+
raise ValueError(f"invalid value for `ignore_chain`: {ignore_chain!r}")
|
|
1722
|
+
|
|
1723
|
+
@property
|
|
1724
|
+
def max_candidates(self):
|
|
1725
|
+
"""`int`: The maximum number of candidate hits to report *by template*.
|
|
1726
|
+
"""
|
|
1727
|
+
return None if self._max_candidates == -1 else self._max_candidates
|
|
1728
|
+
|
|
1729
|
+
@max_candidates.setter
|
|
1730
|
+
def max_candidates(self, max_candidates):
|
|
1731
|
+
if max_candidates is None:
|
|
1732
|
+
self._max_candidates = -1
|
|
1733
|
+
elif max_candidates >= 0:
|
|
1734
|
+
self._max_candidates = max_candidates
|
|
1735
|
+
else:
|
|
1736
|
+
raise ValueError(f"invalid value for `max_candidates` argument: {max_candidates!r}")
|
|
1737
|
+
|
|
1738
|
+
cdef bint _advance(self) noexcept nogil:
|
|
1739
|
+
if self._partial:
|
|
1740
|
+
self._partial = False
|
|
1741
|
+
return True
|
|
1742
|
+
return jess.jess.JessQuery_next(self._jq, self._ignore_chain)
|
|
1743
|
+
|
|
1744
|
+
cdef bint _rewind(self) noexcept nogil:
|
|
1745
|
+
self._partial = True
|
|
1746
|
+
|
|
1747
|
+
cdef int _copy_atoms(self, const _Template* tpl, Hit hit) except -1 nogil:
|
|
1748
|
+
cdef _Atom** atoms = jess.jess.JessQuery_atoms(self._jq)
|
|
1749
|
+
cdef int count = tpl.count(tpl)
|
|
1750
|
+
|
|
1751
|
+
hit._atoms = <_Atom*> realloc(hit._atoms, count * sizeof(_Atom))
|
|
1752
|
+
if hit._atoms is NULL:
|
|
1753
|
+
raise MemoryError("Failed to allocate hit atoms")
|
|
1754
|
+
for i in range(count):
|
|
1755
|
+
memcpy(&hit._atoms[i], atoms[i], sizeof(_Atom))
|
|
1756
|
+
return count
|
|
1757
|
+
|
|
1758
|
+
cdef int _copy_superposition(self, _Superposition* sup, Hit hit) noexcept nogil:
|
|
1759
|
+
cdef const double* M = jess.super.Superposition_rotation(sup)
|
|
1760
|
+
cdef const double* c = jess.super.Superposition_centroid(sup, 0)
|
|
1761
|
+
cdef const double* v = jess.super.Superposition_centroid(sup, 1)
|
|
1762
|
+
memcpy(hit._rotation, M, 9*sizeof(double))
|
|
1763
|
+
memcpy(hit._centre[0], c, 3*sizeof(double))
|
|
1764
|
+
memcpy(hit._centre[1], v, 3*sizeof(double))
|
|
1765
|
+
return 0
|
|
1766
|
+
|
|
1767
|
+
def __next__(self):
|
|
1768
|
+
assert self._jq is not NULL
|
|
1769
|
+
|
|
1770
|
+
cdef double rmsd
|
|
1771
|
+
cdef const double* rot
|
|
1772
|
+
cdef _Template* tpl = NULL
|
|
1773
|
+
cdef _Template* hit_tpl = NULL
|
|
1774
|
+
cdef _Superposition* sup = NULL
|
|
1775
|
+
cdef Hit hit = Hit.__new__(Hit)
|
|
1776
|
+
|
|
1777
|
+
# prepare the hit to be returned
|
|
1778
|
+
hit.rmsd = INFINITY
|
|
1779
|
+
hit._atoms = NULL
|
|
1780
|
+
hit._molecule = self.molecule
|
|
1781
|
+
hit_tpl = NULL
|
|
1782
|
+
hit_found = False
|
|
1783
|
+
|
|
1784
|
+
# search the next hit without the GIL to allow parallel queries.
|
|
1785
|
+
with nogil:
|
|
1786
|
+
while self._advance():
|
|
1787
|
+
# load current iteration template, and check that the hit
|
|
1788
|
+
# was obtained with the current template and not with the
|
|
1789
|
+
# previous one
|
|
1790
|
+
self._prev_tpl = <uintptr_t> tpl
|
|
1791
|
+
tpl = jess.jess.JessQuery_template(self._jq)
|
|
1792
|
+
if hit_found and hit_tpl != tpl:
|
|
1793
|
+
self._rewind()
|
|
1794
|
+
break
|
|
1795
|
+
|
|
1796
|
+
# load superposition and compute RMSD for the current iteration
|
|
1797
|
+
sup = jess.jess.JessQuery_superposition(self._jq)
|
|
1798
|
+
rmsd = jess.super.Superposition_rmsd(sup)
|
|
1799
|
+
|
|
1800
|
+
# NB(@althonos): we don't need to compute the E-value to get the
|
|
1801
|
+
# best match by molecule/template pair since the
|
|
1802
|
+
# logE-value for a fixed pair varies by the RMSD
|
|
1803
|
+
# term only (see `TessTemplate_logE`)
|
|
1804
|
+
|
|
1805
|
+
# check that the candidate passes threshold, and return it
|
|
1806
|
+
# if not in best match, otherwise record it until the next
|
|
1807
|
+
# template is reached (or the iterator finished)
|
|
1808
|
+
if rmsd <= self.rmsd_threshold and rmsd < hit.rmsd:
|
|
1809
|
+
# check if the rotation matrix contains NaN values
|
|
1810
|
+
rot = jess.super.Superposition_rotation(sup)
|
|
1811
|
+
nan = False
|
|
1812
|
+
for i in range(9):
|
|
1813
|
+
nan |= isnan(rot[i])
|
|
1814
|
+
|
|
1815
|
+
if nan:
|
|
1816
|
+
with gil:
|
|
1817
|
+
PyErr_WarnEx(
|
|
1818
|
+
UserWarning,
|
|
1819
|
+
"Jess returned a superposition matrix with NaN values",
|
|
1820
|
+
2,
|
|
1821
|
+
)
|
|
1822
|
+
else:
|
|
1823
|
+
self._copy_atoms(tpl, hit)
|
|
1824
|
+
self._copy_superposition(sup, hit)
|
|
1825
|
+
hit.rmsd = rmsd
|
|
1826
|
+
hit_tpl = tpl
|
|
1827
|
+
hit_found = True
|
|
1828
|
+
|
|
1829
|
+
# check if we already made it to the next template,
|
|
1830
|
+
# or if we need to short-circuit the iteration and
|
|
1831
|
+
# force the query to move to the next template as
|
|
1832
|
+
# we found too many candidates already.
|
|
1833
|
+
if <uintptr_t> tpl != self._prev_tpl:
|
|
1834
|
+
self._candidates = 0
|
|
1835
|
+
else:
|
|
1836
|
+
self._candidates += 1
|
|
1837
|
+
if self._max_candidates != -1 and self._candidates > self._max_candidates:
|
|
1838
|
+
self._candidates = 0
|
|
1839
|
+
jess.jess.JessQuery_nextTemplate(self._jq)
|
|
1840
|
+
|
|
1841
|
+
# free superposition items (as relevant data was copied in
|
|
1842
|
+
# the Hit if needed) and return hits immediately if we are
|
|
1843
|
+
# not in best match mode
|
|
1844
|
+
jess.super.Superposition_free(sup)
|
|
1845
|
+
if hit_found and not self.best_match:
|
|
1846
|
+
break
|
|
1847
|
+
|
|
1848
|
+
if not hit_found:
|
|
1849
|
+
raise StopIteration
|
|
1850
|
+
|
|
1851
|
+
# get the template object for the hit
|
|
1852
|
+
hit.template = self.jess._templates[self.jess._indices[<size_t> hit_tpl]]
|
|
1853
|
+
return hit
|
|
1854
|
+
|
|
1855
|
+
|
|
1856
|
+
cdef class Hit:
|
|
1857
|
+
"""A hit identified between a query molecule and a target template.
|
|
1858
|
+
|
|
1859
|
+
Attributes:
|
|
1860
|
+
rmsd (`float`): The RMSD between the aligned structures.
|
|
1861
|
+
template (`~pyjess.Template`): The template that matched the
|
|
1862
|
+
query molecule.
|
|
1863
|
+
molecule (`~pyjess.Molecule`): The query molecule.
|
|
1864
|
+
|
|
1865
|
+
"""
|
|
1866
|
+
cdef double[9] _rotation
|
|
1867
|
+
cdef double[2][3] _centre
|
|
1868
|
+
cdef _Atom* _atoms
|
|
1869
|
+
|
|
1870
|
+
cdef readonly double rmsd
|
|
1871
|
+
cdef readonly Template template
|
|
1872
|
+
cdef Molecule _molecule
|
|
1873
|
+
|
|
1874
|
+
def __dealloc__(self):
|
|
1875
|
+
free(self._atoms)
|
|
1876
|
+
|
|
1877
|
+
def __getstate__(self):
|
|
1878
|
+
return {
|
|
1879
|
+
"rotation": list(self._rotation),
|
|
1880
|
+
"centre": list(self._centre),
|
|
1881
|
+
"atoms": self.atoms(transform=False),
|
|
1882
|
+
"rmsd": self.rmsd,
|
|
1883
|
+
"template": self.template,
|
|
1884
|
+
"molecule": self.molecule(transform=False),
|
|
1885
|
+
}
|
|
1886
|
+
|
|
1887
|
+
def __setstate__(self, state):
|
|
1888
|
+
cdef size_t i
|
|
1889
|
+
cdef size_t count
|
|
1890
|
+
cdef Atom atom
|
|
1891
|
+
|
|
1892
|
+
self.rmsd = state["rmsd"]
|
|
1893
|
+
self.template = state["template"]
|
|
1894
|
+
self._molecule = state["molecule"]
|
|
1895
|
+
self._rotation = state["rotation"]
|
|
1896
|
+
self._centre = state["centre"]
|
|
1897
|
+
|
|
1898
|
+
# check number of atoms is consistent
|
|
1899
|
+
count = len(self.template)
|
|
1900
|
+
if len(state["atoms"]) != count:
|
|
1901
|
+
raise ValueError(f"unexpected number of atoms: {len(state['atoms'])!r} (expected {count!r})")
|
|
1902
|
+
# allocate or reallocate memory for atoms
|
|
1903
|
+
self._atoms = <_Atom*> realloc(self._atoms, count * sizeof(_Atom))
|
|
1904
|
+
if self._atoms is NULL:
|
|
1905
|
+
raise MemoryError("Failed to allocate hit atoms")
|
|
1906
|
+
# copy atom data
|
|
1907
|
+
for i, atom in enumerate(state["atoms"]):
|
|
1908
|
+
memcpy(&self._atoms[i], atom._atom, sizeof(_Atom))
|
|
1909
|
+
|
|
1910
|
+
cdef void _transform_atom(self, double* x, const double* src):
|
|
1911
|
+
cdef size_t i
|
|
1912
|
+
cdef size_t j
|
|
1913
|
+
cdef const double* M = self._rotation
|
|
1914
|
+
cdef const double* c = self._centre[0]
|
|
1915
|
+
cdef const double* v = self._centre[1]
|
|
1916
|
+
|
|
1917
|
+
for i in range(3):
|
|
1918
|
+
x[i] = v[i]
|
|
1919
|
+
for j in range(3):
|
|
1920
|
+
x[i] += M[3*i + j] * (src[j] - c[j])
|
|
1921
|
+
|
|
1922
|
+
@property
|
|
1923
|
+
def determinant(self):
|
|
1924
|
+
"""`float`: The determinant of the rotation matrix.
|
|
1925
|
+
"""
|
|
1926
|
+
cdef const double* p = self._rotation
|
|
1927
|
+
cdef double det = 0.0
|
|
1928
|
+
|
|
1929
|
+
with nogil:
|
|
1930
|
+
det += p[0] * (p[4] * p[8] - p[5] * p[7])
|
|
1931
|
+
det -= p[1] * (p[3] * p[8] - p[5] * p[6])
|
|
1932
|
+
det += p[2] * (p[3] * p[7] - p[4] * p[6])
|
|
1933
|
+
return det
|
|
1934
|
+
|
|
1935
|
+
@property
|
|
1936
|
+
def log_evalue(self):
|
|
1937
|
+
"""`float`: The logarithm of the E-value estimated for the hit.
|
|
1938
|
+
"""
|
|
1939
|
+
assert self.template._tpl is not NULL
|
|
1940
|
+
|
|
1941
|
+
cdef int n
|
|
1942
|
+
cdef double e
|
|
1943
|
+
|
|
1944
|
+
with nogil:
|
|
1945
|
+
n = jess.molecule.Molecule_count(self._molecule._mol)
|
|
1946
|
+
e = self.template._tpl.logE(self.template._tpl, self.rmsd, n)
|
|
1947
|
+
return e
|
|
1948
|
+
|
|
1949
|
+
@property
|
|
1950
|
+
def evalue(self):
|
|
1951
|
+
"""`float`: The E-value estimated for the hit.
|
|
1952
|
+
"""
|
|
1953
|
+
cdef int n
|
|
1954
|
+
cdef double e
|
|
1955
|
+
|
|
1956
|
+
with nogil:
|
|
1957
|
+
n = jess.molecule.Molecule_count(self._molecule._mol)
|
|
1958
|
+
e = exp(self.template._tpl.logE(self.template._tpl, self.rmsd, n))
|
|
1959
|
+
return e
|
|
1960
|
+
|
|
1961
|
+
cpdef list atoms(self, bint transform=True):
|
|
1962
|
+
"""Get the list of query atoms matching the template.
|
|
1963
|
+
|
|
1964
|
+
Arguments:
|
|
1965
|
+
transform (`bool`): Whether or not to transform coordinates
|
|
1966
|
+
of hits into template frame.
|
|
1967
|
+
|
|
1968
|
+
Returns:
|
|
1969
|
+
`list` of `~pyjess.Atom`: The list of matching atoms.
|
|
1970
|
+
|
|
1971
|
+
"""
|
|
1972
|
+
assert self.template._tpl is not NULL
|
|
1973
|
+
|
|
1974
|
+
cdef Atom atom
|
|
1975
|
+
cdef int i
|
|
1976
|
+
cdef int j
|
|
1977
|
+
cdef int k
|
|
1978
|
+
cdef int count = self.template._tpl.count(self.template._tpl)
|
|
1979
|
+
cdef list atoms = []
|
|
1980
|
+
|
|
1981
|
+
cdef const double* M = self._rotation
|
|
1982
|
+
cdef const double* c = self._centre[0]
|
|
1983
|
+
cdef const double* v = self._centre[1]
|
|
1984
|
+
|
|
1985
|
+
for k in range(count):
|
|
1986
|
+
atom = Atom.__new__(Atom)
|
|
1987
|
+
if transform:
|
|
1988
|
+
atom._atom = <_Atom*> malloc(sizeof(_Atom))
|
|
1989
|
+
memcpy(atom._atom, &self._atoms[k], sizeof(_Atom))
|
|
1990
|
+
self._transform_atom(atom._atom.x, self._atoms[k].x)
|
|
1991
|
+
else:
|
|
1992
|
+
atom.owned = True
|
|
1993
|
+
atom.owner = self
|
|
1994
|
+
atom._atom = &self._atoms[k]
|
|
1995
|
+
atoms.append(atom)
|
|
1996
|
+
|
|
1997
|
+
return atoms
|
|
1998
|
+
|
|
1999
|
+
cpdef Molecule molecule(self, bint transform=False):
|
|
2000
|
+
"""Get the molecule matching the template.
|
|
2001
|
+
|
|
2002
|
+
Arguments:
|
|
2003
|
+
transform (`bool`): Whether or not to transform coordinates
|
|
2004
|
+
of the molecule atoms into template frame.
|
|
2005
|
+
|
|
2006
|
+
Returns:
|
|
2007
|
+
`~pyjess.Molecule`: The matching molecule, optionally
|
|
2008
|
+
rotated to match the template coordinate.
|
|
2009
|
+
|
|
2010
|
+
.. versionadded:: 0.5.0
|
|
2011
|
+
|
|
2012
|
+
"""
|
|
2013
|
+
assert self.template._tpl is not NULL
|
|
2014
|
+
|
|
2015
|
+
cdef _Atom* atom
|
|
2016
|
+
cdef Molecule mol
|
|
2017
|
+
cdef size_t i
|
|
2018
|
+
cdef size_t j
|
|
2019
|
+
cdef size_t k
|
|
2020
|
+
cdef const double* M = self._rotation
|
|
2021
|
+
cdef const double* c = self._centre[0]
|
|
2022
|
+
cdef const double* v = self._centre[1]
|
|
2023
|
+
|
|
2024
|
+
if not transform:
|
|
2025
|
+
return self._molecule
|
|
2026
|
+
|
|
2027
|
+
mol = self._molecule.copy()
|
|
2028
|
+
for k in range(mol._mol.count):
|
|
2029
|
+
atom = mol._mol.atom[k]
|
|
2030
|
+
self._transform_atom(atom.x, self._molecule._mol.atom[k].x)
|
|
2031
|
+
|
|
2032
|
+
return mol
|
|
2033
|
+
|
|
2034
|
+
cpdef str dumps(self, str format="pdb", bint transform=True):
|
|
2035
|
+
"""Write the hit to a string.
|
|
2036
|
+
|
|
2037
|
+
Arguments:
|
|
2038
|
+
format (`str`): The format in which to write the hit.
|
|
2039
|
+
Currently only supports ``pdb``, which writes the hits
|
|
2040
|
+
in the same format as Jess.
|
|
2041
|
+
transform (`bool`): Whether or not to transform coordinates
|
|
2042
|
+
of the molecule atoms into template frame.
|
|
2043
|
+
|
|
2044
|
+
Raises:
|
|
2045
|
+
`RuntimeError`: When attempting to dump a `Hit` which was
|
|
2046
|
+
obtained from a `Template` which has no `~Template.id`.
|
|
2047
|
+
|
|
2048
|
+
.. versionadded:: 0.7.0
|
|
2049
|
+
|
|
2050
|
+
"""
|
|
2051
|
+
file = io.StringIO()
|
|
2052
|
+
self.dump(file, format=format, transform=transform)
|
|
2053
|
+
return file.getvalue()
|
|
2054
|
+
|
|
2055
|
+
cpdef void dump(self, object file, str format="pdb", bint transform=True):
|
|
2056
|
+
"""Write the hit to a file.
|
|
2057
|
+
|
|
2058
|
+
Arguments:
|
|
2059
|
+
file (file-like object): A file opened in *text* mode where the
|
|
2060
|
+
hit will be written.
|
|
2061
|
+
format (`str`): The format in which to write the hit.
|
|
2062
|
+
Currently only supports ``pdb``, which writes the hits
|
|
2063
|
+
in the same format as Jess.
|
|
2064
|
+
transform (`bool`): Whether or not to transform coordinates
|
|
2065
|
+
of the molecule atoms into template frame.
|
|
2066
|
+
|
|
2067
|
+
Raises:
|
|
2068
|
+
`RuntimeError`: When attempting to dump a `Hit` which was
|
|
2069
|
+
obtained from a `Template` which has no `~Template.id`.
|
|
2070
|
+
|
|
2071
|
+
.. versionadded:: 0.7.0
|
|
2072
|
+
|
|
2073
|
+
"""
|
|
2074
|
+
assert self.template._tpl is not NULL
|
|
2075
|
+
assert self._molecule._mol is not NULL
|
|
2076
|
+
|
|
2077
|
+
cdef _Atom* atom
|
|
2078
|
+
cdef size_t k
|
|
2079
|
+
cdef char[80] buffer
|
|
2080
|
+
cdef char[5] name
|
|
2081
|
+
cdef char[5] resname
|
|
2082
|
+
cdef double[3] x
|
|
2083
|
+
cdef int count = self.template._tpl.count(self.template._tpl)
|
|
2084
|
+
|
|
2085
|
+
if self.template.id is None:
|
|
2086
|
+
raise RuntimeError("cannot dump `Hit` where `self.template.id` is `None`")
|
|
2087
|
+
|
|
2088
|
+
file.write("REMARK ")
|
|
2089
|
+
file.write(self._molecule.id)
|
|
2090
|
+
file.write(f" {self.rmsd:5.3f} ")
|
|
2091
|
+
file.write(self.template.id)
|
|
2092
|
+
file.write(f" Det={self.determinant:4,.1f} log(E)~ {self.log_evalue:4.2f}\n")
|
|
2093
|
+
|
|
2094
|
+
for k in range(count):
|
|
2095
|
+
atom = &self._atoms[k]
|
|
2096
|
+
decode_token(name, atom.name, 4)
|
|
2097
|
+
decode_token(resname, atom.resName, 3)
|
|
2098
|
+
if transform:
|
|
2099
|
+
self._transform_atom(x, atom.x)
|
|
2100
|
+
else:
|
|
2101
|
+
memcpy(x, atom.x, 3*sizeof(double))
|
|
2102
|
+
n = sprintf(
|
|
2103
|
+
buffer,
|
|
2104
|
+
"ATOM %5i%5s%c%-3s%c%c%4i%-4c%8.3f%8.3f%8.3f%6.2f%6.2f\n",
|
|
2105
|
+
atom.serial,
|
|
2106
|
+
name,
|
|
2107
|
+
atom.altLoc,
|
|
2108
|
+
resname,
|
|
2109
|
+
atom.chainID1,
|
|
2110
|
+
atom.chainID2,
|
|
2111
|
+
atom.resSeq,
|
|
2112
|
+
atom.iCode,
|
|
2113
|
+
x[0],
|
|
2114
|
+
x[1],
|
|
2115
|
+
x[2],
|
|
2116
|
+
atom.occupancy,
|
|
2117
|
+
atom.tempFactor,
|
|
2118
|
+
atom.segID,
|
|
2119
|
+
atom.element,
|
|
2120
|
+
atom.charge
|
|
2121
|
+
)
|
|
2122
|
+
file.write(PyUnicode_FromStringAndSize(buffer, n))
|
|
2123
|
+
file.write("ENDMDL\n")
|
|
2124
|
+
|
|
2125
|
+
cdef class Jess:
|
|
2126
|
+
"""A handle to run Jess over a list of templates.
|
|
2127
|
+
|
|
2128
|
+
Example:
|
|
2129
|
+
Create a `Jess` object from a list of templates::
|
|
2130
|
+
|
|
2131
|
+
>>> t1 = Template.load("1.3.3.tpl")
|
|
2132
|
+
>>> t2 = Template.load("4.1.2.tpl")
|
|
2133
|
+
>>> jess = Jess([t1, t2])
|
|
2134
|
+
|
|
2135
|
+
Once initialized, the `Jess` object cannot be modified further.
|
|
2136
|
+
Use the `~Jess.query` method to query the templates with a
|
|
2137
|
+
molecule::
|
|
2138
|
+
|
|
2139
|
+
>>> molecule = Molecule.load("1AMY.pdb")
|
|
2140
|
+
>>> query = jess.query(molecule, 2, 2, 2)
|
|
2141
|
+
|
|
2142
|
+
The returned `Query` object is an iterator that can be
|
|
2143
|
+
advanced through a ``for`` loop, or with the `next` built-in
|
|
2144
|
+
function to get the first hit:
|
|
2145
|
+
|
|
2146
|
+
>>> hit = next(query)
|
|
2147
|
+
>>> hit.rmsd
|
|
2148
|
+
1.4386...
|
|
2149
|
+
|
|
2150
|
+
The hit can also be formatted in PDB format like in the
|
|
2151
|
+
original JESS code::
|
|
2152
|
+
|
|
2153
|
+
>>> print(hit.dumps(format="pdb"), end="")
|
|
2154
|
+
REMARK 1AMY 1.439 2om2 Det= 1.0 log(E)~ 1.11
|
|
2155
|
+
ATOM 729 CA THR A 94 34.202 -24.426 8.851 1.00 2.00
|
|
2156
|
+
ATOM 732 CB THR A 94 35.157 -23.467 8.101 1.00 4.66
|
|
2157
|
+
ATOM 733 OG1 THR A 94 36.338 -23.247 8.871 1.00 9.85
|
|
2158
|
+
ATOM 746 CD GLU A 96 41.454 -29.509 8.013 1.00 24.05
|
|
2159
|
+
ATOM 748 OE2 GLU A 96 42.536 -29.680 7.441 1.00 34.44
|
|
2160
|
+
ATOM 747 OE1 GLU A 96 41.212 -28.521 8.708 1.00 18.56
|
|
2161
|
+
ATOM 437 CZ ARG A 55 44.471 -26.619 10.181 1.00 8.51
|
|
2162
|
+
ATOM 436 NE ARG A 55 44.334 -27.346 11.290 1.00 9.05
|
|
2163
|
+
ATOM 438 NH1 ARG A 55 43.590 -26.751 9.179 1.00 13.17
|
|
2164
|
+
ENDMDL
|
|
2165
|
+
|
|
2166
|
+
.. versionadded:: 0.4.0
|
|
2167
|
+
Equality, hashing and pickle protocol support.
|
|
2168
|
+
|
|
2169
|
+
"""
|
|
2170
|
+
cdef _Jess* _jess
|
|
2171
|
+
cdef dict _indices
|
|
2172
|
+
cdef tuple _templates
|
|
2173
|
+
cdef size_t length
|
|
2174
|
+
|
|
2175
|
+
def __cinit__(self):
|
|
2176
|
+
self._jess = NULL
|
|
2177
|
+
self.length = 0
|
|
2178
|
+
|
|
2179
|
+
def __dealloc__(self):
|
|
2180
|
+
jess.jess.Jess_free(self._jess)
|
|
2181
|
+
|
|
2182
|
+
def __init__(self, object templates = ()):
|
|
2183
|
+
"""__init__(self, templates=())\n--\n
|
|
2184
|
+
|
|
2185
|
+
Create a new Jess database containing the given templates.
|
|
2186
|
+
|
|
2187
|
+
Arguments:
|
|
2188
|
+
templates (sequence of `~pyjess.Template`): The templates to
|
|
2189
|
+
index in the database for further querying.
|
|
2190
|
+
|
|
2191
|
+
Caution:
|
|
2192
|
+
The `~pyjess.Template` objects given in argument will be copied
|
|
2193
|
+
because the internal C data structure requires ownership of the
|
|
2194
|
+
data. Modification to the original `~pyjess.Template` objects will
|
|
2195
|
+
not have an effect on the newly created `~pyjess.Jess` templates.
|
|
2196
|
+
|
|
2197
|
+
"""
|
|
2198
|
+
cdef Template template
|
|
2199
|
+
cdef _Template* tpl
|
|
2200
|
+
cdef list _templates = []
|
|
2201
|
+
|
|
2202
|
+
self._jess = jess.jess.Jess_create()
|
|
2203
|
+
self._indices = {}
|
|
2204
|
+
|
|
2205
|
+
for template in templates:
|
|
2206
|
+
# NOTE: the Jess storage owns the data, so we make a copy of the
|
|
2207
|
+
# template given as argument to avoid a double-free.
|
|
2208
|
+
tpl = template._tpl.copy(template._tpl)
|
|
2209
|
+
jess.jess.Jess_addTemplate(self._jess, tpl)
|
|
2210
|
+
self._indices[<size_t> tpl] = self.length
|
|
2211
|
+
_templates.append(template)
|
|
2212
|
+
self.length += 1
|
|
2213
|
+
|
|
2214
|
+
self._templates = tuple(_templates)
|
|
2215
|
+
|
|
2216
|
+
def __copy__(self):
|
|
2217
|
+
return self.copy()
|
|
2218
|
+
|
|
2219
|
+
def __reduce__(self):
|
|
2220
|
+
return type(self), (self._templates,)
|
|
2221
|
+
|
|
2222
|
+
def __eq__(self, object other):
|
|
2223
|
+
cdef Jess other_
|
|
2224
|
+
if not isinstance(other, Jess):
|
|
2225
|
+
return NotImplemented
|
|
2226
|
+
other_ = other
|
|
2227
|
+
return self._templates == other_._templates
|
|
2228
|
+
|
|
2229
|
+
def __hash__(self):
|
|
2230
|
+
return hash((Jess, self._templates))
|
|
2231
|
+
|
|
2232
|
+
def __len__(self):
|
|
2233
|
+
return self.length
|
|
2234
|
+
|
|
2235
|
+
def __getitem__(self, object index):
|
|
2236
|
+
cdef ssize_t index_
|
|
2237
|
+
|
|
2238
|
+
if isinstance(index, slice):
|
|
2239
|
+
indices = range(*index.indices(self.length))
|
|
2240
|
+
return type(self)(map(self.__getitem__, indices))
|
|
2241
|
+
else:
|
|
2242
|
+
index_ = index
|
|
2243
|
+
if index_ < 0:
|
|
2244
|
+
index_ += self.length
|
|
2245
|
+
if index_ < 0 or index_ >= self.length:
|
|
2246
|
+
raise IndexError(index)
|
|
2247
|
+
return self._templates[index_]
|
|
2248
|
+
|
|
2249
|
+
cpdef Jess copy(self):
|
|
2250
|
+
"""Create a copy of the `Jess` object.
|
|
2251
|
+
|
|
2252
|
+
Returns:
|
|
2253
|
+
`~pyjess.Jess`: A `Jess` object containing the same templates.
|
|
2254
|
+
|
|
2255
|
+
.. versionadded:: 0.4.0
|
|
2256
|
+
|
|
2257
|
+
"""
|
|
2258
|
+
return type(self)(self._templates)
|
|
2259
|
+
|
|
2260
|
+
def query(
|
|
2261
|
+
self,
|
|
2262
|
+
Molecule molecule,
|
|
2263
|
+
double rmsd_threshold,
|
|
2264
|
+
double distance_cutoff,
|
|
2265
|
+
double max_dynamic_distance,
|
|
2266
|
+
*,
|
|
2267
|
+
object max_candidates = None,
|
|
2268
|
+
object ignore_chain = None,
|
|
2269
|
+
bint best_match = False,
|
|
2270
|
+
bint reorder = True,
|
|
2271
|
+
):
|
|
2272
|
+
"""Scan for templates matching the given molecule.
|
|
2273
|
+
|
|
2274
|
+
Arguments:
|
|
2275
|
+
molecule (`~pyjess.Molecule`): The protein to match the
|
|
2276
|
+
templates to.
|
|
2277
|
+
rmsd_threshold (`float`): The RMSD threshold for reporting
|
|
2278
|
+
results.
|
|
2279
|
+
distance_cutoff (`float`): The global distance cutoff
|
|
2280
|
+
used to guide the search.
|
|
2281
|
+
max_dynamic_distance (`float`): The maximum template/query
|
|
2282
|
+
dynamic distance after adding the global distance cutoff
|
|
2283
|
+
and the individual atom distance cutoff defined for each
|
|
2284
|
+
atom of the template.
|
|
2285
|
+
max_candidates (`int` or `None`): The maximum number of candidate
|
|
2286
|
+
hits to report by template. If a non-`None` value is given,
|
|
2287
|
+
it may speed up querying for unspecific templates, but also
|
|
2288
|
+
produce results potentially inconsistent with Jess.
|
|
2289
|
+
ignore_chain (`str` or `None`): Whether to check or ignore the
|
|
2290
|
+
chain of the atoms to match. The different supported modes
|
|
2291
|
+
are:
|
|
2292
|
+
|
|
2293
|
+
- `None`: Force the atoms in the molecule to belong
|
|
2294
|
+
to different (resp. same) chains if so is the case
|
|
2295
|
+
in the template.
|
|
2296
|
+
- ``residues``: Allow atoms to belong to different
|
|
2297
|
+
(resp. same) chains even if it is not the case in
|
|
2298
|
+
the template, but force all atoms of a residue to
|
|
2299
|
+
belong to the same chain.
|
|
2300
|
+
- ``atoms``: Allow atoms to belong to any chain,
|
|
2301
|
+
independently to the template or the residue they
|
|
2302
|
+
belong to.
|
|
2303
|
+
|
|
2304
|
+
best_match (`bool`): Pass `True` to return only the best match
|
|
2305
|
+
to each template, based on RMSD. In case of ties, the
|
|
2306
|
+
first match is returned. Note that a match must still
|
|
2307
|
+
be passing the RMSD threshold given in ``rmsd_threshold``
|
|
2308
|
+
to be returned.
|
|
2309
|
+
reorder (`bool`): Whether to enable template atom reordering
|
|
2310
|
+
to accelerate matching in the scanner algorithm. Pass
|
|
2311
|
+
`False` to reverse to the original, slower algorithm
|
|
2312
|
+
which matches atoms in the same order as they appear in
|
|
2313
|
+
the template, at the cost of longer run times.
|
|
2314
|
+
|
|
2315
|
+
Returns:
|
|
2316
|
+
`~pyjess.Query`: An iterator over the query hits.
|
|
2317
|
+
|
|
2318
|
+
Caution:
|
|
2319
|
+
Since ``v0.6.0``, this function uses an optimized variant of
|
|
2320
|
+
the Jess scanning algorithm which minimized the number of steps
|
|
2321
|
+
needed to generate matches, by re-ordering the order the
|
|
2322
|
+
template atoms are iterated upon. Because of this change,
|
|
2323
|
+
the query may return *exactly* the same matches but in an order
|
|
2324
|
+
that *differs* from the original Jess version. If you really
|
|
2325
|
+
need results in the original order, set ``reorder`` to `False`.
|
|
2326
|
+
|
|
2327
|
+
.. versionadded:: 0.6.0
|
|
2328
|
+
The ``reorder`` argument, defaulting to `True`.
|
|
2329
|
+
|
|
2330
|
+
.. versionchanged:: 0.7.0
|
|
2331
|
+
Default value of ``max_candidates`` argument to `None`.
|
|
2332
|
+
|
|
2333
|
+
.. versionchanged:: 0.7.0
|
|
2334
|
+
``ignore_chain`` now expects string variants rather than `bool`.
|
|
2335
|
+
|
|
2336
|
+
"""
|
|
2337
|
+
|
|
2338
|
+
if ignore_chain is True:
|
|
2339
|
+
PyErr_WarnEx(
|
|
2340
|
+
DeprecationWarning,
|
|
2341
|
+
"`ignore_chain` parameter expects string parameters "
|
|
2342
|
+
"to specificy the mode since PyJess v0.7.0. "
|
|
2343
|
+
"Use `ignore_chain='atoms'` instead of `ignore_chain=True`",
|
|
2344
|
+
2,
|
|
2345
|
+
)
|
|
2346
|
+
ignore_chain="atoms"
|
|
2347
|
+
elif ignore_chain is False:
|
|
2348
|
+
PyErr_WarnEx(
|
|
2349
|
+
DeprecationWarning,
|
|
2350
|
+
"`ignore_chain` parameter expects string parameters "
|
|
2351
|
+
"to specificy the mode since PyJess v0.7.0. "
|
|
2352
|
+
"Use `ignore_chain=None` instead of `ignore_chain=False`",
|
|
2353
|
+
2,
|
|
2354
|
+
)
|
|
2355
|
+
ignore_chain=None
|
|
2356
|
+
|
|
2357
|
+
cdef Query query = Query.__new__(Query)
|
|
2358
|
+
query.max_candidates = max_candidates
|
|
2359
|
+
query.ignore_chain = ignore_chain
|
|
2360
|
+
query.rmsd_threshold = rmsd_threshold
|
|
2361
|
+
query.best_match = best_match
|
|
2362
|
+
query.molecule = molecule
|
|
2363
|
+
query.jess = self
|
|
2364
|
+
query._jq = jess.jess.Jess_query(
|
|
2365
|
+
self._jess,
|
|
2366
|
+
molecule._mol,
|
|
2367
|
+
distance_cutoff,
|
|
2368
|
+
max_dynamic_distance,
|
|
2369
|
+
reorder,
|
|
2370
|
+
)
|
|
2371
|
+
return query
|