pyjess 0.7.0__cp38-abi3-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjess might be problematic. Click here for more details.

pyjess/_jess.pyx ADDED
@@ -0,0 +1,2371 @@
1
+ # coding: utf-8
2
+ # cython: language_level=3, linetrace=True, binding=True
3
+ """Bindings to Jess, a 3D template matching software.
4
+
5
+ Jess is an algorithm for constraint-based structural template matching
6
+ proposed by Jonathan Barker *et al.*. It can be used to identify
7
+ catalytic residues from a known template inside a protein structure.
8
+ Jess is an evolution of TESS, a geometric hashing algorithm developed by
9
+ Andrew Wallace *et al.*, removing some pre-computation and
10
+ structural requirements from the original algorithm.
11
+
12
+ PyJess is a Python module that provides bindings to Jess using
13
+ `Cython <https://cython.org/>`_. It allows creating templates, querying
14
+ them with protein structures, and retrieving the hits using a Python API
15
+ without performing any external I/O. It's also more than 10x faster than
16
+ Jess thanks to algorithmic optimizations added to improve the original Jess
17
+ code while producing consistent results.
18
+
19
+ Example:
20
+ Load templates from a file, either as a file-like object or
21
+ given a filename::
22
+
23
+ >>> t1 = pyjess.Template.load("1.3.3.tpl") # load from filename
24
+ >>> with open("4.1.2.tpl") as f: # load from a file object
25
+ ... t2 = pyjess.Template.load(f)
26
+
27
+ Load molecules from a file, either as a file-like object or given
28
+ a filename::
29
+
30
+ >>> mol = pyjess.Molecule.load("1AMY.pdb")
31
+ >>> mol[0]
32
+ Atom(serial=1, name='N', altloc=' ', residue_name='GLN', ...)
33
+
34
+ Create a `Jess` object storing the templates to support running
35
+ queries on them. The individual templates can still be accessed by
36
+ index::
37
+
38
+ >>> jess = pyjess.Jess([t1, t2])
39
+ >>> jess[0].id
40
+ '3r6v'
41
+
42
+ Run a query on the Jess object to retrieve all templates matching
43
+ a `Molecule`, *in no particular order*::
44
+
45
+ >>> hits = jess.query(mol, 2, 2, 2)
46
+ >>> for hit in hits:
47
+ ... print(hit.template.id, hit.rmsd)
48
+ 2om2 1.4386...
49
+ 2om2 1.4877...
50
+ 2om2 1.4376...
51
+ 2om2 1.5284...
52
+ 2om2 1.4863...
53
+ 2om2 1.4369...
54
+ 2om2 1.4790...
55
+ 2om2 1.1414...
56
+ 2om2 1.0755...
57
+ 2om2 1.1973...
58
+ 2om2 1.1353...
59
+ 2om2 1.0711...
60
+ 2om2 1.1494...
61
+
62
+ By default, a template can match a molecule in more than one way,
63
+ if several sets of atoms match the geometric constraints. Use the
64
+ ``best_match`` argument of `~Jess.query` to only retrieve the
65
+ best match per template::
66
+
67
+ >>> hits = jess.query(mol, 2, 2, 2, best_match=True)
68
+ >>> for hit in hits:
69
+ ... print(hit.template.id, hit.rmsd)
70
+ 2om2 1.071...
71
+
72
+ References:
73
+ - Barker, J. A., & Thornton, J. M. (2003). *An algorithm for
74
+ constraint-based structural template matching: application to
75
+ 3D templates with statistical analysis*. Bioinformatics (Oxford,
76
+ England), 19(13), 1644–1649. :doi:`10.1093/bioinformatics/btg226`.
77
+ - Wallace, A. C., Borkakoti, N., & Thornton, J. M. (1997).
78
+ *TESS: a geometric hashing algorithm for deriving 3D coordinate
79
+ templates for searching structural databases. Application to enzyme
80
+ active sites*. Protein science : a publication of the Protein
81
+ Society, 6(11), 2308–2323. :doi:`10.1002/pro.5560061104`.
82
+
83
+ """
84
+
85
+ # --- C imports --------------------------------------------------------------
86
+
87
+ cimport cython
88
+ from cpython.exc cimport PyErr_WarnEx
89
+ from cpython.unicode cimport (
90
+ PyUnicode_FromStringAndSize,
91
+ PyUnicode_FromFormat,
92
+ PyUnicode_AsASCIIString,
93
+ )
94
+
95
+ from libc.math cimport isnan, exp, INFINITY, NAN
96
+ from libc.stdio cimport FILE, fclose, fdopen, printf, sprintf
97
+ from libc.stdint cimport uintptr_t
98
+ from libc.stdlib cimport calloc, realloc, free, malloc
99
+ from libc.string cimport memcpy, memset, strncpy, strdup
100
+
101
+ cimport jess.atom
102
+ cimport jess.jess
103
+ cimport jess.molecule
104
+ cimport jess.super
105
+ cimport jess.tess_template
106
+ cimport jess.tess_atom
107
+ cimport jess.res_index
108
+ from jess.atom cimport Atom as _Atom
109
+ from jess.jess cimport Jess as _Jess
110
+ from jess.jess cimport JessQuery as _JessQuery
111
+ from jess.molecule cimport Molecule as _Molecule
112
+ from jess.super cimport Superposition as _Superposition
113
+ from jess.template cimport Template as _Template, IgnoreType as _IgnoreType
114
+ from jess.tess_template cimport TessTemplate as _TessTemplate
115
+ from jess.tess_atom cimport TessAtom as _TessAtom
116
+
117
+ # --- Python imports ---------------------------------------------------------
118
+
119
+ import functools
120
+ import io
121
+
122
+ __version__ = PROJECT_VERSION
123
+
124
+ # --- Utils ------------------------------------------------------------------
125
+
126
+ cdef inline void encode_token(char* dst, const char* src, size_t n) noexcept nogil:
127
+ cdef size_t i
128
+ for i in range(n):
129
+ if src[i] == ord(' ') or src[i] == 0:
130
+ dst[i] = ord('_')
131
+ else:
132
+ dst[i] = src[i]
133
+ dst[n] = 0
134
+
135
+ cdef inline void decode_token(char* dst, const char* src, size_t n) noexcept nogil:
136
+ cdef size_t i
137
+ for i in range(n):
138
+ if src[i] == ord('_') or src[i] == 0:
139
+ dst[i] = ord(' ')
140
+ else:
141
+ dst[i] = src[i]
142
+ dst[n] = 0
143
+
144
+ class nullcontext:
145
+ def __init__(self, return_value=None):
146
+ self.retval = return_value
147
+ def __enter__(self):
148
+ return self.retval
149
+ def __exit__(self, exc_type, exc_value, traceback):
150
+ return False
151
+
152
+ # --- Classes ----------------------------------------------------------------
153
+
154
+ cdef class _MoleculeParser:
155
+ cdef str id
156
+
157
+ def __init__(self, str id = None):
158
+ self.id = id
159
+
160
+ cdef class _PDBMoleculeParser(_MoleculeParser):
161
+ cdef bint ignore_endmdl
162
+ cdef bint skip_hetatm
163
+
164
+ def __init__(self, str id = None, bint ignore_endmdl = False, bint skip_hetatm = False):
165
+ super().__init__(id=id)
166
+ self.ignore_endmdl = ignore_endmdl
167
+ self.skip_hetatm = skip_hetatm
168
+
169
+ def loads(self, text, molecule_type):
170
+ return self.load(io.StringIO(text), molecule_type)
171
+
172
+ def load(self, file, molecule_type):
173
+ cdef str line
174
+ cdef str id = self.id
175
+ cdef list atoms = []
176
+ try:
177
+ handle = open(file)
178
+ except TypeError:
179
+ handle = nullcontext(file)
180
+ with handle as f:
181
+ for line in f:
182
+ if line.startswith("HEADER"):
183
+ if id is None:
184
+ id = line[62:66].strip() or None
185
+ elif line.startswith("ATOM"):
186
+ atoms.append(Atom.loads(line))
187
+ elif line.startswith("HETATM") and not self.skip_hetatm:
188
+ atoms.append(Atom.loads(line))
189
+ elif line.startswith("ENDMDL"):
190
+ if not self.ignore_endmdl:
191
+ break
192
+ elif line.lower().startswith(("data_", "loop_")):
193
+ raise ValueError("mmCIF data tags found, file is not in PDB format")
194
+ return molecule_type(atoms, id=id)
195
+
196
+
197
+ cdef class _CIFMoleculeParser(_MoleculeParser):
198
+ cdef object gemmi
199
+ cdef bint use_author
200
+ cdef bint skip_hetatm
201
+
202
+ _PRIMARY_COLUMNS = [
203
+ 'id', 'type_symbol', 'label_atom_id', 'label_alt_id', 'label_comp_id',
204
+ 'label_asym_id', 'label_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
205
+ 'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
206
+ '?pdbx_formal_charge', '?group_PDB',
207
+ ]
208
+
209
+ _AUTH_COLUMNS = [
210
+ 'id', 'type_symbol', 'auth_atom_id', 'label_alt_id', 'auth_comp_id',
211
+ 'auth_asym_id', 'auth_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
212
+ 'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
213
+ '?pdbx_formal_charge', '?group_PDB',
214
+ ]
215
+
216
+ def __init__(self, str id = None, bint use_author = False, bint skip_hetatm = False):
217
+ super().__init__(id=id)
218
+ self.gemmi = __import__('gemmi')
219
+ self.use_author = use_author
220
+ self.skip_hetatm = skip_hetatm
221
+
222
+ def _load_block(self, document, molecule_type):
223
+ block = document.sole_block()
224
+ cols = self._AUTH_COLUMNS if self.use_author else self._PRIMARY_COLUMNS
225
+ table = block.find('_atom_site.', cols)
226
+ max_residue_number = 0
227
+
228
+ if not table:
229
+ raise ValueError("missing columns in CIF files")
230
+
231
+ atoms = []
232
+ for row in table:
233
+ if row[14] != "ATOM" and (row[14] != "HETATM" or self.skip_hetatm):
234
+ continue
235
+
236
+ if row[6] == "." and row[14] == "HETATM":
237
+ PyErr_WarnEx(
238
+ UserWarning,
239
+ "HETATM line found without residue number. Consider "
240
+ "parsing with use_author=True to use author-defined "
241
+ "residue numbers, or skip_hetatm=True to disable "
242
+ "parsing of HETATM altogether.",
243
+ 3,
244
+ )
245
+ residue_number = max_residue_number
246
+ max_residue_number += 1
247
+ else:
248
+ residue_number = int(row[6])
249
+ max_residue_number = max(residue_number, max_residue_number)
250
+
251
+ atom = Atom(
252
+ serial=int(row[0]),
253
+ element=row[1],
254
+ name=row[2],
255
+ altloc=' ' if row[3] == "." else row[3], # FIXME: replace with None?
256
+ residue_name=row[4],
257
+ chain_id=row[5],
258
+ residue_number=residue_number,
259
+ insertion_code=' ' if not row.has(7) or row[7] == "?" else row[7],
260
+ x=float(row[8]),
261
+ y=float(row[9]),
262
+ z=float(row[10]),
263
+ occupancy=0.0 if row[11] == '.' else float(row[11]),
264
+ temperature_factor=float(row[12]),
265
+ charge=0 if not row.has(13) or row[13] == "?" else int(row[13]),
266
+ )
267
+ atoms.append(atom)
268
+
269
+ id = block.name if self.id is None else self.id
270
+ return molecule_type(atoms, id=id)
271
+
272
+ def loads(self, text, molecule_type):
273
+ document = self.gemmi.cif.read_string(text)
274
+ return self._load_block(document, molecule_type)
275
+
276
+ def load(self, file, molecule_type):
277
+ if hasattr(file, "read"):
278
+ document = self.gemmi.cif.read_string(file.read())
279
+ else:
280
+ document = self.gemmi.cif.read_file(file)
281
+ return self._load_block(document, molecule_type)
282
+
283
+
284
+ cdef class Molecule:
285
+ """A molecule structure, as a sequence of `Atom` objects.
286
+
287
+ .. versionadded:: 0.2.2
288
+ Support identifiers of arbitrary length.
289
+
290
+ .. versionadded:: 0.4.0
291
+ Equality, hashing and pickle protocol support.
292
+
293
+ """
294
+ cdef _Molecule* _mol
295
+ cdef str _id
296
+
297
+ @classmethod
298
+ def loads(
299
+ cls,
300
+ text,
301
+ str format = "pdb",
302
+ *,
303
+ str id = None,
304
+ bint ignore_endmdl = False,
305
+ bint use_author = False,
306
+ bint skip_hetatm = False,
307
+ ):
308
+ """Load a molecule from a PDB string.
309
+
310
+ Arguments:
311
+ text (`str`): The serialized molecule to parse into a new
312
+ object.
313
+ format (`str`): The format to parse the file. Supported formats
314
+ are: ``pdb`` for the Protein Data Bank format, ``cif``
315
+ for Crystallographic Information File format (additionally
316
+ requires the `gemmi` module), or ``detect`` to attempt
317
+ auto-detection (the default).
318
+
319
+ Keyword Arguments:
320
+ id (`str`, optional): The identifier of the molecule. If `None`
321
+ given, the parser will attempt to extract it from the
322
+ ``HEADER`` line (for PDB files) or the block name (for CIF
323
+ files).
324
+ ignore_endmdl (`bool`): Pass `True` to make the parser read all
325
+ the atoms from the PDB file. By default, the parser only
326
+ reads the atoms of the first model, and stops at the first
327
+ ``ENDMDL`` line. *Ignored for CIF files*.
328
+ use_author (`bool`): Pass `True` to use the author-defined
329
+ labels while parsing CIF files, e.g. read the chain name
330
+ from ``_atom_site.auth_asym_id`` rather than
331
+ ``_atom_site.label_asym_id``. *Ignored for PDB files*.
332
+
333
+ Returns:
334
+ `~pyjess.Molecule`: The molecule parsed from the PDB file.
335
+
336
+ See Also:
337
+ `Molecule.load` to load a PDB molecule from a file-like
338
+ object or from a path.
339
+
340
+ Caution:
341
+ Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
342
+ while parsing from mmCIF usually discard them. This is because
343
+ mmCIF files store heteroatoms but do not require them to
344
+ have an associated residue number, which can throw off the way
345
+ atoms are modeled in Jess.
346
+
347
+ .. versionadded:: 0.7.0
348
+ The ``format`` argument, and support for CIF parsing.
349
+
350
+ """
351
+ if format == "detect":
352
+ format = "cif" if text.lstrip().startswith(("data_", "loop_")) else "pdb"
353
+ return cls.load(
354
+ io.StringIO(text),
355
+ format=format,
356
+ id=id,
357
+ ignore_endmdl=ignore_endmdl,
358
+ skip_hetatm=skip_hetatm,
359
+ )
360
+
361
+ @classmethod
362
+ def load(
363
+ cls,
364
+ file,
365
+ str format = "detect",
366
+ *,
367
+ str id = None,
368
+ bint ignore_endmdl = False,
369
+ bint use_author = False,
370
+ bint skip_hetatm = False,
371
+ ):
372
+ """Load a molecule from a PDB file.
373
+
374
+ Arguments:
375
+ file (`str`, `os.PathLike`, or file-like object): Either the path
376
+ to a file, or a file-like object opened in **text mode**
377
+ containing a molecule.
378
+ format (`str`): The format to parse the file. Supported formats
379
+ are: ``pdb`` for the Protein Data Bank format, ``cif``
380
+ for Crystallographic Information File format (additionally
381
+ requires the `gemmi` module), or ``detect`` to attempt
382
+ auto-detection (the default).
383
+
384
+ Keyword Arguments:
385
+ id (`str`, optional): The identifier of the molecule. If `None`
386
+ given, the parser will attempt to extract it from the
387
+ ``HEADER`` line (for PDB files) or the block name (for CIF
388
+ files).
389
+ ignore_endmdl (`bool`): Pass `True` to make the parser read all
390
+ the atoms from the PDB file. By default, the parser only
391
+ reads the atoms of the first model, and stops at the first
392
+ ``ENDMDL`` line. *Ignored for CIF files*.
393
+ use_author (`bool`): Pass `True` to use the author-defined
394
+ labels while parsing CIF files, e.g. read the chain name
395
+ from ``_atom_site.auth_asym_id`` rather than
396
+ ``_atom_site.label_asym_id``. *Ignored for PDB files*.
397
+ skip_hetatm (`bool`): Pass `True` to skip parsing of heteroatoms
398
+ (``HETATM``) in the input file.
399
+
400
+ Returns:
401
+ `~pyjess.Molecule`: The molecule parsed from the PDB file.
402
+
403
+ See Also:
404
+ `Molecule.loads` to load a PDB molecule from a string.
405
+
406
+ Caution:
407
+ Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
408
+ while parsing from mmCIF usually discard them. This is because
409
+ mmCIF files store heteroatoms but do not require them to
410
+ have an associated residue number, which can throw off the way
411
+ atoms are modeled in Jess.
412
+
413
+ .. versionadded:: 0.7.0
414
+ The ``format`` and ``skip_hetatm`` arguments, and mmCIF support.
415
+
416
+ """
417
+ cdef _MoleculeParser parser
418
+ cdef str peek
419
+
420
+ if format == "detect":
421
+ try:
422
+ handle = open(file)
423
+ except TypeError:
424
+ handle = nullcontext(file)
425
+ with handle as f:
426
+ if f.seekable():
427
+ peek = f.read(5)
428
+ f.seek(0)
429
+ else:
430
+ f = f.read()
431
+ peek = f[5:]
432
+ if peek.startswith(("data_", "loop_")):
433
+ parser = _CIFMoleculeParser(
434
+ id=id,
435
+ use_author=use_author,
436
+ skip_hetatm=skip_hetatm,
437
+ )
438
+ else:
439
+ parser = _PDBMoleculeParser(
440
+ id=id,
441
+ ignore_endmdl=ignore_endmdl,
442
+ skip_hetatm=skip_hetatm,
443
+ )
444
+ if isinstance(f, str):
445
+ return parser.loads(f, molecule_type=cls)
446
+ return parser.load(f, molecule_type=cls)
447
+ if format == "pdb":
448
+ parser = _PDBMoleculeParser(
449
+ id=id,
450
+ ignore_endmdl=ignore_endmdl,
451
+ skip_hetatm=skip_hetatm
452
+ )
453
+ elif format == "cif":
454
+ parser = _CIFMoleculeParser(
455
+ id=id,
456
+ use_author=use_author,
457
+ skip_hetatm=skip_hetatm,
458
+ )
459
+ else:
460
+ raise ValueError(f"invalid value for `format` argument: {format!r}")
461
+ return parser.load(file, molecule_type=cls)
462
+
463
+ @classmethod
464
+ def from_biopython(cls, object structure, str id = None):
465
+ """Create a new `~pyjess.Molecule` from a `Bio.PDB.Structure`.
466
+
467
+ Arguments:
468
+ structure (`Bio.PDB.Structure` or `Bio.PDB.Model`): The
469
+ Biopython object containing the structure data.
470
+ id (`str` or `None`): The identifier to give to the newly
471
+ created molecule. If `None` given, will use the value of
472
+ ``structure.id``.
473
+
474
+ Returns:
475
+ `~pyjess.Molecule`: A molecule object suitable for using
476
+ in `Jess.query`.
477
+
478
+ .. versionadded:: 0.7.0
479
+
480
+ """
481
+ cdef list atoms = []
482
+ for c in structure.get_chains():
483
+ for r in c.get_residues():
484
+ _, residue_number, insertion_code = r.id
485
+ for a in r.get_atoms():
486
+ coord = a.get_coord()
487
+ atom = Atom(
488
+ name=a.fullname,
489
+ x=coord[0],
490
+ y=coord[1],
491
+ z=coord[2],
492
+ altloc=a.altloc,
493
+ charge=a.pqr_charge or 0,
494
+ occupancy=a.occupancy,
495
+ serial=a.serial_number,
496
+ residue_name=r.resname,
497
+ residue_number=residue_number,
498
+ segment=r.segid,
499
+ insertion_code=insertion_code,
500
+ chain_id=c.id,
501
+ temperature_factor=a.bfactor,
502
+ element=a.element,
503
+ )
504
+ atoms.append(atom)
505
+ return cls(atoms, id=structure.id)
506
+
507
+ @classmethod
508
+ def from_gemmi(cls, object model, str id=None):
509
+ """Create a new `~pyjess.Molecule` from a `gemmi.Model`.
510
+
511
+ Arguments:
512
+ structure (`gemmi.Model`): The ``gemmi`` object
513
+ containing the structure data.
514
+ id (`str` or `None`): The identifier to give to the newly
515
+ created molecule.
516
+
517
+ Returns:
518
+ `~pyjess.Molecule`: A molecule object suitable for using
519
+ in `Jess.query`.
520
+
521
+ .. versionadded:: 0.7.0
522
+
523
+ """
524
+ cdef list atoms = []
525
+ for cra in model.all():
526
+ a = cra.atom
527
+ r = cra.residue
528
+ c = cra.chain
529
+ atom = Atom(
530
+ name=a.padded_name(),
531
+ x=a.pos[0],
532
+ y=a.pos[1],
533
+ z=a.pos[2],
534
+ altloc=' ' if a.altloc == '\0' else a.altloc,
535
+ charge=a.charge,
536
+ element=a.element.name.upper(),
537
+ occupancy=a.occ,
538
+ temperature_factor=a.b_iso,
539
+ serial=a.serial,
540
+ segment=r.segment,
541
+ residue_name=r.name,
542
+ residue_number=r.seqid.num,
543
+ chain_id=c.name,
544
+ insertion_code=r.seqid.icode,
545
+ )
546
+ atoms.append(atom)
547
+ return cls(atoms, id=id)
548
+
549
+ @classmethod
550
+ def from_biotite(cls, object atom_array, str id=None):
551
+ """Create a new `~pyjess.Molecule` from a `biotite.structure.AtomArray`.
552
+
553
+ Arguments:
554
+ structure (`biotite.structure.AtomArray`): The ``biotite``
555
+ object containing the structure data.
556
+
557
+ Returns:
558
+ `~pyjess.Molecule`: A molecule object suitable for using
559
+ in `Jess.query`.
560
+
561
+ Caution:
562
+ If loading data with the `biotite.structure.io.pdb.PDBFile` module,
563
+ ensure that you are requesting all atoms and all extra fields
564
+ in `~biotite.structure.io.pdb.PDBFile.get_structure`::
565
+
566
+ db_file = PDBFile.read("data/1AMY.pdb")
567
+ structure = pdb_file.get_structure(
568
+ altloc="all",
569
+ extra_fields=["atom_id", "b_factor", "occupancy", "charge"],
570
+ )
571
+ molecule = Molecule.from_biotite(structure[0])
572
+
573
+ .. versionadded:: 0.7.0
574
+
575
+ """
576
+ cdef list atoms = []
577
+ for a in atom_array:
578
+ atom = Atom(
579
+ name=str(a.atom_name),
580
+ x=a.coord[0],
581
+ y=a.coord[1],
582
+ z=a.coord[2],
583
+ altloc=str(getattr(a, 'altloc', ' ')),
584
+ charge=getattr(a, 'charge', 0),
585
+ element=str(a.element),
586
+ occupancy=getattr(a, 'occupancy', 1.0),
587
+ temperature_factor=a.b_factor,
588
+ serial=a.atom_id,
589
+ segment=str(getattr(a, 'segment', '')),
590
+ residue_name=str(a.res_name),
591
+ residue_number=a.res_id,
592
+ chain_id=str(a.chain_id),
593
+ insertion_code=str(a.ins_code).ljust(1),
594
+ )
595
+ atoms.append(atom)
596
+ return cls(atoms)
597
+
598
+ def __cinit__(self):
599
+ self._mol = NULL
600
+
601
+ def __dealloc__(self):
602
+ jess.molecule.Molecule_free(self._mol)
603
+
604
+ def __init__(self, object atoms = (), str id = None):
605
+ """__init__(self, atoms=(), id=None)\n--\n
606
+
607
+ Create a new molecule.
608
+
609
+ Arguments:
610
+ atoms (sequence of `~pyjess.Atom`): The atoms of the molecule.
611
+ id (`str`, optional): The identifier of the molecule.
612
+
613
+ Raises:
614
+ `MemoryError`: When the system allocator fails to allocate
615
+ enough memory for the molecule storage.
616
+
617
+ """
618
+ cdef Atom atom
619
+ cdef int i
620
+ cdef int count = len(atoms)
621
+
622
+ self._mol = <_Molecule*> malloc(sizeof(_Molecule) + count * sizeof(_Atom*))
623
+ if self._mol is NULL:
624
+ raise MemoryError("Failed to allocate molecule")
625
+
626
+ self._mol.index = NULL
627
+ self._mol.count = count
628
+ for i in range(count):
629
+ self._mol.atom[i] = NULL
630
+ memset(self._mol.id, b' ', 5)
631
+ self._id = id
632
+
633
+ for i, atom in enumerate(atoms):
634
+ self._mol.atom[i] = <_Atom*> malloc(sizeof(_Atom))
635
+ if self._mol.atom[i] is NULL:
636
+ raise MemoryError("Failed to allocate atom")
637
+ memcpy(self._mol.atom[i], atom._atom, sizeof(_Atom))
638
+
639
+ self._mol.index = jess.res_index.ResIndex_create(self._mol.atom, count)
640
+ if self._mol.index is NULL:
641
+ raise MemoryError("Failed to allocate residue index")
642
+
643
+ def __len__(self):
644
+ assert self._mol is not NULL
645
+ return self._mol.count
646
+
647
+ def __getitem__(self, object index):
648
+ assert self._mol is not NULL
649
+
650
+ cdef Atom atom
651
+ cdef ssize_t index_
652
+ cdef ssize_t length = self._mol.count
653
+
654
+ if isinstance(index, slice):
655
+ indices = range(*index.indices(length))
656
+ return type(self)(atoms=[self[i] for i in indices], id=self.id)
657
+ else:
658
+ index_ = index
659
+ if index_ < 0:
660
+ index_ += length
661
+ if index_ < 0 or index_ >= length:
662
+ raise IndexError(index)
663
+ atom = Atom.__new__(Atom)
664
+ atom.owner = self
665
+ atom.owned = True
666
+ atom._atom = <_Atom*> jess.molecule.Molecule_atom(self._mol, index_)
667
+ return atom
668
+
669
+ def __copy__(self):
670
+ return self.copy()
671
+
672
+ def __eq__(self, object other):
673
+ cdef Molecule other_
674
+ if not isinstance(other, Molecule):
675
+ return NotImplemented
676
+ other_ = other
677
+ if self._id != other_._id:
678
+ return False
679
+ if self._mol.count != other_._mol.count:
680
+ return False
681
+ return all(x == y for x,y in zip(self, other_))
682
+
683
+ def __hash__(self):
684
+ return hash((self._id, *(hash(x) for x in self)))
685
+
686
+ def __reduce__(self):
687
+ return type(self), (list(self), self.id)
688
+
689
+ def __sizeof__(self):
690
+ assert self._mol is not NULL
691
+ return (
692
+ sizeof(self)
693
+ + sizeof(_Molecule)
694
+ + self._mol.count*(sizeof(_Atom*) + sizeof(_Atom))
695
+ )
696
+
697
+ @property
698
+ def id(self):
699
+ return self._id
700
+
701
+ cpdef Molecule conserved(self, double cutoff = 0.0):
702
+ """Get a molecule containing only a subset of conserved atoms.
703
+
704
+ Arguments:
705
+ cutoff (`float`): The conservation cutoff for atoms. Atoms
706
+ with a `~Atom.temperature_factor` lower than this value
707
+ will be removed from the result.
708
+
709
+ Returns:
710
+ `~pyjess.Molecule`: A new molecule with atoms below the
711
+ conservation cutoff removed.
712
+
713
+ """
714
+ assert self._mol is not NULL
715
+
716
+ cdef size_t i
717
+ cdef list atoms
718
+
719
+ if cutoff <= 0.0:
720
+ return self.copy()
721
+
722
+ atoms = []
723
+ for i in range(self._mol.count):
724
+ if self._mol.atom[i].tempFactor >= cutoff:
725
+ atoms.append(self[i])
726
+
727
+ return type(self)(id=self.id, atoms=atoms)
728
+
729
+ cpdef Molecule copy(self):
730
+ """Create a copy of this molecule and its atoms.
731
+
732
+ Returns:
733
+ `~pyjess.Molecule`: A newly allocated molecule with the same
734
+ identifier and atoms.
735
+
736
+ .. versionadded:: 0.4.0
737
+
738
+ """
739
+ cdef Molecule copy = Molecule.__new__(Molecule)
740
+ cdef size_t size = sizeof(_Molecule) + self._mol.count * sizeof(_Atom*)
741
+
742
+ with nogil:
743
+ # allocate molecule storage
744
+ copy._mol = <_Molecule*> malloc(size)
745
+ if copy._mol is NULL:
746
+ raise MemoryError("Failed to allocate molecule")
747
+ # copy molecule attributes
748
+ copy._mol.index = NULL
749
+ copy._mol.count = self._mol.count
750
+ memset(copy._mol.id, b' ', 5)
751
+ # copy molecule atoms
752
+ for i in range(self._mol.count):
753
+ copy._mol.atom[i] = <_Atom*> malloc(sizeof(_Atom))
754
+ if copy._mol.atom[i] is NULL:
755
+ raise MemoryError("Failed to allocate atom")
756
+ memcpy(copy._mol.atom[i], self._mol.atom[i], sizeof(_Atom))
757
+ # regenerate index
758
+ copy._mol.index = jess.res_index.ResIndex_create(copy._mol.atom, copy._mol.count)
759
+ if copy._mol.index is NULL:
760
+ raise MemoryError("Failed to allocate residue index")
761
+
762
+ copy._id = self._id
763
+ return copy
764
+
765
+
766
+ cdef class Atom:
767
+ """A single atom in a molecule.
768
+
769
+ .. versionadded:: 0.4.0
770
+ Equality, hashing and pickle protocol support.
771
+
772
+ """
773
+ cdef object owner
774
+ cdef bint owned
775
+ cdef _Atom* _atom
776
+
777
+ @classmethod
778
+ def load(cls, file):
779
+ """Load an atom from the given file.
780
+
781
+ Arguments:
782
+ file (file-like object): A file-like object opened in text
783
+ mode to read the atom from.
784
+
785
+ """
786
+ return cls.loads(file.read())
787
+
788
+ @classmethod
789
+ def loads(cls, text):
790
+ """Load an atom from the given string.
791
+
792
+ Arguments:
793
+ text (`str`, `bytes` or `bytearray`): The atom line to read the
794
+ atom metadata from.
795
+
796
+ """
797
+ cdef const unsigned char* s
798
+ cdef bytearray b
799
+ cdef Atom atom
800
+
801
+ if isinstance(text, str):
802
+ b = bytearray(text, 'utf-8')
803
+ else:
804
+ b = bytearray(text)
805
+ if not b.endswith(b'\n'):
806
+ b.append(b'\n')
807
+ b.append(b'\0')
808
+ s = b
809
+
810
+ atom = cls.__new__(cls)
811
+ with nogil:
812
+ atom._atom = <_Atom*> malloc(sizeof(_Atom))
813
+ if atom._atom == NULL:
814
+ raise MemoryError("Failed to allocate atom")
815
+ if not jess.atom.Atom_parse(atom._atom, <const char*> s):
816
+ raise ValueError(f"Failed to parse atom: {text!r}")
817
+
818
+ return atom
819
+
820
+ def __cinit__(self):
821
+ self._atom = NULL
822
+ self.owner = None
823
+ self.owned = False
824
+
825
+ def __dealloc__(self):
826
+ if not self.owned:
827
+ free(self._atom)
828
+
829
+ def __init__(
830
+ self,
831
+ *,
832
+ int serial,
833
+ str name,
834
+ str residue_name,
835
+ str chain_id,
836
+ int residue_number,
837
+ double x,
838
+ double y,
839
+ double z,
840
+ double occupancy = 0.0,
841
+ double temperature_factor = 0.0,
842
+ str altloc = ' ',
843
+ str insertion_code = ' ',
844
+ str segment = '',
845
+ str element = '',
846
+ int charge = 0,
847
+ ):
848
+ """__init__(self, *, serial, name, residue_name, chain_id, residue_number, x, y, z, occupancy=0.0, temperature_factor=0.0, altloc=' ', insertion_code=' ', segment='', element='', charge=0)\n--\n
849
+
850
+ Create a new atom.
851
+
852
+ Raises:
853
+ `MemoryError`: When the system allocator fails to allocate
854
+ enough memory for the atom storage.
855
+ `ValueError`: When either of the ``name``, ``residue_name``,
856
+ ``segment``, ``element`` or ``chain_id`` strings is too
857
+ long.
858
+
859
+ """
860
+ cdef bytearray _name
861
+ cdef bytes _residue_name
862
+ cdef bytes _segment
863
+ cdef bytes _element
864
+
865
+ if len(name) > 4:
866
+ raise ValueError(f"Invalid atom name: {name!r}")
867
+ if len(residue_name) > 3:
868
+ raise ValueError(f"Invalid residue name: {residue_name!r}")
869
+ if len(segment) > 4:
870
+ raise ValueError(f"Invalid segment: {segment!r}")
871
+ if len(element) > 2:
872
+ raise ValueError(f"Invalid element: {element!r}")
873
+ if len(chain_id) > 2:
874
+ raise ValueError(f"Invalid chain ID: {chain_id!r}")
875
+
876
+ self._atom = <_Atom*> malloc(sizeof(_Atom))
877
+ if self._atom is NULL:
878
+ raise MemoryError("Failed to allocate atom")
879
+
880
+ _residue_name = PyUnicode_AsASCIIString(residue_name)
881
+ _segment = PyUnicode_AsASCIIString(segment)
882
+ _element = PyUnicode_AsASCIIString(element)
883
+
884
+ self._atom.serial = serial
885
+ self._atom.altLoc = ord(altloc)
886
+ self._atom.chainID1 = ord(chain_id[0]) if len(chain_id) > 0 else 0
887
+ self._atom.chainID2 = ord(chain_id[1]) if len(chain_id) > 1 else ord(' ')
888
+ self._atom.resSeq = residue_number
889
+ self._atom.iCode = ord(insertion_code)
890
+ self._atom.x[0] = x
891
+ self._atom.x[1] = y
892
+ self._atom.x[2] = z
893
+ self._atom.occupancy = occupancy
894
+ self._atom.tempFactor = temperature_factor
895
+ self._atom.charge = charge
896
+ encode_token(self._atom.resName, _residue_name.ljust(3, b'\0'), 3)
897
+ encode_token(self._atom.segID, _segment.ljust(4, b'\0'), 4)
898
+ encode_token(self._atom.element, _element.ljust(2, b'\0'), 2)
899
+
900
+ # FIXME: is alignment proper?
901
+ _name = bytearray(name, 'ascii')
902
+ if len(_name) < 4:
903
+ _name.insert(0, ord('_'))
904
+ encode_token(self._atom.name, _name.ljust(4, b'\0'), 4)
905
+
906
+ def __copy__(self):
907
+ return self.copy()
908
+
909
+ cdef dict _state(self):
910
+ return {
911
+ "serial": self.serial,
912
+ "name": self.name,
913
+ "altloc": self.altloc,
914
+ "residue_name": self.residue_name,
915
+ "chain_id": self.chain_id,
916
+ "residue_number": self.residue_number,
917
+ "insertion_code": self.insertion_code,
918
+ "x": self.x,
919
+ "y": self.y,
920
+ "z": self.z,
921
+ "temperature_factor": self.temperature_factor,
922
+ "occupancy": self.occupancy,
923
+ "segment": self.segment,
924
+ "element": self.element,
925
+ "charge": self.charge,
926
+ }
927
+
928
+ def __reduce__(self):
929
+ cdef dict state = self._state()
930
+ return functools.partial(type(self), **state), ()
931
+
932
+ def __repr__(self):
933
+ cdef str ty = type(self).__name__
934
+ cdef list args = []
935
+ for k,v in self._state().items():
936
+ if v is not None:
937
+ args.append(f"{k}={v!r}")
938
+ return f"{ty}({', '.join(args)})"
939
+
940
+ def __sizeof__(self):
941
+ cdef size_t size = sizeof(self)
942
+ if not self.owned:
943
+ size += sizeof(_Atom)
944
+ return size
945
+
946
+ def __eq__(self, object other):
947
+ cdef Atom other_
948
+ if not isinstance(other, Atom):
949
+ return NotImplemented
950
+ other_ = other
951
+ # FIXME: it should be possible to do a memcmp here.
952
+ return self._state() == other_._state()
953
+
954
+ def __hash__(self):
955
+ return hash(tuple(self._state().values()))
956
+
957
+ @property
958
+ def serial(self):
959
+ """`int`: The atom serial number.
960
+ """
961
+ assert self._atom is not NULL
962
+ return self._atom.serial
963
+
964
+ @property
965
+ def altloc(self):
966
+ """`str`: The alternate location indicator for the atom.
967
+ """
968
+ assert self._atom is not NULL
969
+ return chr(self._atom.altLoc)
970
+
971
+ @property
972
+ def name(self):
973
+ """`str`: The atom name.
974
+ """
975
+ assert self._atom is not NULL
976
+ return self._atom.name[:4].decode('ascii').strip("_")
977
+
978
+ @property
979
+ def residue_name(self):
980
+ """`str`: The residue name.
981
+ """
982
+ assert self._atom is not NULL
983
+ return self._atom.resName[:3].decode('ascii').strip("_")
984
+
985
+ @property
986
+ def residue_number(self):
987
+ """`int`: The residue sequence number.
988
+ """
989
+ assert self._atom is not NULL
990
+ return self._atom.resSeq
991
+
992
+ @property
993
+ def segment(self):
994
+ """`str`: The segment identifier.
995
+ """
996
+ assert self._atom is not NULL
997
+ return self._atom.segID[:4].decode('ascii').strip('_')
998
+
999
+ @property
1000
+ def element(self):
1001
+ """`str`: The element symbol.
1002
+ """
1003
+ assert self._atom is not NULL
1004
+ return self._atom.element[:2].decode('ascii').strip('_')
1005
+
1006
+ @property
1007
+ def insertion_code(self):
1008
+ """`str`: The code for insertion of residues.
1009
+ """
1010
+ assert self._atom is not NULL
1011
+ return chr(self._atom.iCode)
1012
+
1013
+ @property
1014
+ def chain_id(self):
1015
+ """`str`: The identifier of the chain the atom belongs to.
1016
+ """
1017
+ assert self._atom is not NULL
1018
+ return PyUnicode_FromFormat("%c%c", self._atom.chainID1, self._atom.chainID2).strip()
1019
+
1020
+ @property
1021
+ def occupancy(self):
1022
+ """`float`: The atom occupancy.
1023
+ """
1024
+ assert self._atom is not NULL
1025
+ return self._atom.occupancy
1026
+
1027
+ @property
1028
+ def temperature_factor(self):
1029
+ """`float`: The atom temperature factor.
1030
+ """
1031
+ assert self._atom is not NULL
1032
+ return self._atom.tempFactor
1033
+
1034
+ @property
1035
+ def charge(self):
1036
+ """`int`: The atom charge.
1037
+ """
1038
+ assert self._atom is not NULL
1039
+ return self._atom.charge
1040
+
1041
+ @property
1042
+ def x(self):
1043
+ """`float`: The atom coordinate in the 1st dimension.
1044
+ """
1045
+ assert self._atom is not NULL
1046
+ return self._atom.x[0]
1047
+
1048
+ @property
1049
+ def y(self):
1050
+ """`float`: The atom coordinate in the 2nd dimension.
1051
+ """
1052
+ assert self._atom is not NULL
1053
+ return self._atom.x[1]
1054
+
1055
+ @property
1056
+ def z(self):
1057
+ """`float`: The atom coordinate in the 3rd dimension.
1058
+ """
1059
+ assert self._atom is not NULL
1060
+ return self._atom.x[2]
1061
+
1062
+ cpdef Atom copy(self):
1063
+ """Create a copy of this atom.
1064
+
1065
+ Returns:
1066
+ `~pyjess.Atom`: A newly allocated atom with identical attributes.
1067
+
1068
+ .. versionadded:: 0.4.0
1069
+
1070
+ """
1071
+ cdef Atom copy = Atom.__new__(Atom)
1072
+ copy._atom = <_Atom*> malloc(sizeof(_Atom))
1073
+ if copy._atom is NULL:
1074
+ raise MemoryError("Failed to allocate atom")
1075
+ memcpy(copy._atom, self._atom, sizeof(_Atom))
1076
+ return copy
1077
+
1078
+
1079
+ cdef class TemplateAtom:
1080
+ """A single template atom.
1081
+
1082
+ .. versionadded:: 0.4.0
1083
+ Equality, hashing and pickle protocol support.
1084
+
1085
+ """
1086
+ cdef object owner
1087
+ cdef bint owned
1088
+ cdef _TessAtom* _atom
1089
+
1090
+ @classmethod
1091
+ def load(cls, file):
1092
+ """Load a template atom from the given file.
1093
+
1094
+ Arguments:
1095
+ file (str, os.PathLike or file-like object): A file-like object
1096
+ opened in text or binary mode to read the template atom from.
1097
+
1098
+ """
1099
+ try:
1100
+ handle = open(file)
1101
+ except TypeError:
1102
+ handle = nullcontext(file)
1103
+ with handle as f:
1104
+ return cls.loads(f.read())
1105
+
1106
+ @classmethod
1107
+ def loads(cls, text):
1108
+ """Load a template atom from the given string.
1109
+
1110
+ Arguments:
1111
+ text (`str`, `bytes` or `bytearray`): The atom line to read the
1112
+ atom metadata from.
1113
+
1114
+ """
1115
+ cdef bytearray b
1116
+ cdef TemplateAtom atom
1117
+
1118
+ if isinstance(text, str):
1119
+ b = bytearray(text, 'utf-8')
1120
+ else:
1121
+ b = bytearray(text)
1122
+ if not b.endswith(b'\n'):
1123
+ b.append(b'\n')
1124
+ b.append(b'\0')
1125
+
1126
+ atom = TemplateAtom.__new__(TemplateAtom)
1127
+ atom._atom = jess.tess_atom.TessAtom_create(<const char*> b)
1128
+ if atom._atom == NULL:
1129
+ raise ValueError(f"Failed to parse template atom: {text!r}")
1130
+
1131
+ # validate match mode *now* to avoid Jess exiting when it does so later
1132
+ if atom.match_mode not in range(-1, 9) and atom.match_mode not in range(100, 108):
1133
+ raise ValueError(f"Invalid match mode: {atom.match_mode!r}")
1134
+
1135
+ return atom
1136
+
1137
+ def __cinit__(self):
1138
+ self.owner = None
1139
+ self.owned = False
1140
+ self._atom = NULL
1141
+
1142
+ def __dealloc__(self):
1143
+ if not self.owned:
1144
+ jess.tess_atom.TessAtom_free(self._atom)
1145
+
1146
+ def __init__(
1147
+ self,
1148
+ *,
1149
+ str chain_id,
1150
+ int residue_number,
1151
+ double x,
1152
+ double y,
1153
+ double z,
1154
+ object residue_names,
1155
+ object atom_names,
1156
+ double distance_weight = 0.0,
1157
+ int match_mode = 0,
1158
+ ):
1159
+ """__init__(self, *, chain_id, residue_number, x, y, z, residue_names, atom_names, distance_weight=0.0, match_mode=0)\n--\n
1160
+
1161
+ Create a new template atom.
1162
+
1163
+ Raises:
1164
+ `MemoryError`: When the system allocator fails to allocate
1165
+ enough memory for the template atom storage.
1166
+
1167
+ """
1168
+ cdef size_t m
1169
+ cdef char* p
1170
+ cdef size_t ac
1171
+ cdef size_t rc
1172
+ cdef size_t alloc_size
1173
+
1174
+ # validate match mode to avoid a potential hard exit later
1175
+ if match_mode not in range(-1, 9) and match_mode not in range(100, 108):
1176
+ raise ValueError(f"Invalid match mode: {match_mode!r}")
1177
+ if len(chain_id) > 2:
1178
+ raise ValueError(f"Invalid chain ID: {chain_id!r}")
1179
+
1180
+ # compute total allocation
1181
+ ac = len(atom_names)
1182
+ rc = len(residue_names)
1183
+ alloc_size = sizeof(_TessAtom) + sizeof(char*) * (ac + rc) + sizeof(char) * (5*ac + 4*rc)
1184
+
1185
+ # allocate base memory
1186
+ self._atom = <_TessAtom*> malloc(alloc_size)
1187
+ if self._atom is NULL:
1188
+ raise MemoryError("Failed to allocate template atom")
1189
+
1190
+ # copy base data
1191
+ self._atom.code = match_mode
1192
+ self._atom.resSeq = residue_number
1193
+ self._atom.pos[0] = x
1194
+ self._atom.pos[1] = y
1195
+ self._atom.pos[2] = z
1196
+ self._atom.chainID1, self._atom.chainID2 = map(ord, chain_id.ljust(2))
1197
+ self._atom.nameCount = ac
1198
+ self._atom.resNameCount = rc
1199
+ self._atom.distWeight = distance_weight
1200
+
1201
+ # setup string pointers
1202
+ p = <char*> &self._atom[1]
1203
+ self._atom.name = <char**> p
1204
+ p += sizeof(char*)*ac
1205
+ for m in range(ac):
1206
+ self._atom.name[m] = <char*> p
1207
+ p += 5
1208
+ self._atom.resName = <char**> p
1209
+ p += sizeof(char*)*rc
1210
+ for m in range(rc):
1211
+ self._atom.resName[m] = <char*> p
1212
+ p += 4
1213
+
1214
+ # copy atom names
1215
+ for m, name in enumerate(atom_names):
1216
+ if isinstance(name, str):
1217
+ _name = bytearray(name, 'ascii')
1218
+ else:
1219
+ _name = bytearray(name)
1220
+ # FIXME: is alignment proper?
1221
+ if len(_name) > 4:
1222
+ raise ValueError(f"Invalid atom name: {name!r}")
1223
+ elif len(_name) <= 3:
1224
+ _name.insert(0, ord('_'))
1225
+ encode_token(self._atom.name[m], _name.ljust(4, b'\0'), 4)
1226
+
1227
+ # copy residue names
1228
+ for m, name in enumerate(residue_names):
1229
+ _name = name.encode('ascii') if isinstance(name, str) else name
1230
+ if len(_name) > 3:
1231
+ raise ValueError(f"Invalid residue name: {name!r}")
1232
+ encode_token(self._atom.resName[m], _name.ljust(3, b'\0'), 3)
1233
+
1234
+ cdef dict _state(self):
1235
+ return {
1236
+ "chain_id": self.chain_id,
1237
+ "residue_number": self.residue_number,
1238
+ "x": self.x,
1239
+ "y": self.y,
1240
+ "z": self.z,
1241
+ "residue_names": self.residue_names,
1242
+ "atom_names": self.atom_names,
1243
+ "distance_weight": self.distance_weight,
1244
+ "match_mode": self.match_mode,
1245
+ }
1246
+
1247
+ def __repr__(self):
1248
+ cdef str ty = type(self).__name__
1249
+ cdef list args = []
1250
+ for k, v in self._state().items():
1251
+ args.append(f"{k}={v!r}")
1252
+ return f"{ty}({', '.join(args)})"
1253
+
1254
+ def __copy__(self):
1255
+ return self.copy()
1256
+
1257
+ def __eq__(self, object other):
1258
+ cdef TemplateAtom other_
1259
+ if not isinstance(other, TemplateAtom):
1260
+ return NotImplemented
1261
+ other_ = other
1262
+ return self._state() == other_._state()
1263
+
1264
+ def __hash__(self):
1265
+ return hash(tuple(self._state().values()))
1266
+
1267
+ def __reduce__(self):
1268
+ return functools.partial(type(self), **self._state()), ()
1269
+
1270
+ def __sizeof__(self):
1271
+ assert self._atom is not NULL
1272
+
1273
+ cdef size_t ac = self._atom.nameCount
1274
+ cdef size_t rc = self._atom.resNameCount
1275
+ cdef size_t size = sizeof(self)
1276
+
1277
+ if not self.owned:
1278
+ size += (
1279
+ sizeof(_TessAtom)
1280
+ + sizeof(char*) * (ac + rc)
1281
+ + sizeof(char) * (5*ac + 4*rc)
1282
+ )
1283
+ return size
1284
+
1285
+ @property
1286
+ def match_mode(self):
1287
+ """`int`: The match mode for this particular atom.
1288
+ """
1289
+ assert self._atom is not NULL
1290
+ return self._atom.code
1291
+
1292
+ @property
1293
+ def residue_number(self):
1294
+ """`int`: The residue sequence number.
1295
+ """
1296
+ assert self._atom is not NULL
1297
+ return self._atom.resSeq
1298
+
1299
+ @property
1300
+ def chain_id(self):
1301
+ """`str`: The identifier of the chain the atom belongs to.
1302
+ """
1303
+ assert self._atom is not NULL
1304
+ cdef char c1 = jess.tess_atom.TessAtom_chainID1(self._atom)
1305
+ cdef char c2 = jess.tess_atom.TessAtom_chainID2(self._atom)
1306
+ return PyUnicode_FromFormat("%c%c", c1, c2).strip()
1307
+
1308
+ @property
1309
+ def x(self):
1310
+ """`float`: The x coordinate of the atom.
1311
+ """
1312
+ assert self._atom is not NULL
1313
+ return self._atom.pos[0]
1314
+
1315
+ @property
1316
+ def y(self):
1317
+ """`float`: The y coordinate of the atom.
1318
+ """
1319
+ assert self._atom is not NULL
1320
+ return self._atom.pos[1]
1321
+
1322
+ @property
1323
+ def z(self):
1324
+ """`float`: The z coordinate of the atom.
1325
+ """
1326
+ assert self._atom is not NULL
1327
+ return self._atom.pos[2]
1328
+
1329
+ @property
1330
+ def atom_names(self):
1331
+ """`tuple` of `str`: The different atom names for this atom.
1332
+
1333
+ .. versionchanged:: 0.4.1
1334
+ Property now returns a `tuple` rather than a `list`.
1335
+
1336
+ """
1337
+ assert self._atom is not NULL
1338
+
1339
+ cdef int i
1340
+ cdef list l = []
1341
+
1342
+ for i in range(self._atom.nameCount):
1343
+ l.append(self._atom.name[i].replace(b'_', b'').decode())
1344
+ return tuple(l)
1345
+
1346
+ @property
1347
+ def residue_names(self):
1348
+ """`tuple` of `str`: The different residue names for this atom.
1349
+
1350
+ .. versionchanged:: 0.4.1
1351
+ Property now returns a `tuple` rather than a `list`.
1352
+
1353
+ """
1354
+ assert self._atom is not NULL
1355
+
1356
+ cdef int i
1357
+ cdef list l = []
1358
+
1359
+ for i in range(self._atom.resNameCount):
1360
+ l.append(self._atom.resName[i].replace(b'_', b'').decode())
1361
+ return tuple(l)
1362
+
1363
+ @property
1364
+ def distance_weight(self):
1365
+ """`float`: The distance weight for this atom.
1366
+ """
1367
+ assert self._atom is not NULL
1368
+ return self._atom.distWeight
1369
+
1370
+ cpdef TemplateAtom copy(self):
1371
+ """Create a copy of this template atom.
1372
+
1373
+ Returns:
1374
+ `~pyjess.TemplateAtom`: A new template atom object with
1375
+ identical attributes.
1376
+
1377
+ .. versionadded:: 0.4.0
1378
+
1379
+ """
1380
+ cdef TemplateAtom atom = TemplateAtom.__new__(TemplateAtom)
1381
+ with nogil:
1382
+ atom._atom = jess.tess_atom.TessAtom_copy(self._atom)
1383
+ return atom
1384
+
1385
+
1386
+ cdef class Template:
1387
+ """A template, as a sequence of `TemplateAtom` objects.
1388
+
1389
+ .. versionadded:: 0.4.0
1390
+ Equality, hashing and pickle protocol support.
1391
+
1392
+ """
1393
+ cdef object owner
1394
+ cdef bint owned
1395
+ cdef _Template* _tpl
1396
+ cdef _TessTemplate* _tess
1397
+
1398
+ @classmethod
1399
+ def loads(cls, text, str id = None):
1400
+ """Load a template from a string.
1401
+
1402
+ Arguments:
1403
+ file (`str`, `os.PathLike`, or file-like object): Either the path
1404
+ to a file, or a file-like object opened in **text mode**
1405
+ containing the template.
1406
+ id (`str`, optional): The identifier of the template. By default,
1407
+ the parser will take the one from the ``PDB_ID`` remark if
1408
+ found in the header.
1409
+
1410
+ Returns:
1411
+ `~pyjess.Template`: The template parsed from the given string.
1412
+
1413
+ See Also:
1414
+ `Template.load` to load a template from a file-like object or
1415
+ from a path.
1416
+
1417
+ """
1418
+ return cls.load(io.StringIO(text), id=id)
1419
+
1420
+ @classmethod
1421
+ def load(cls, file, str id = None):
1422
+ """Load a template from the given file.
1423
+
1424
+ Arguments:
1425
+ file (`str`, `os.PathLike` or file-like object): Either the
1426
+ path to a file, or a file-like object opened in **text mode**
1427
+ to read the template from.
1428
+ id (`str`, optional): The identifier of the template. By default,
1429
+ the parser will take the one from the ``PDB_ID`` remark if
1430
+ found in the header.
1431
+
1432
+ Returns:
1433
+ `~pyjess.Template`: The template parsed from the given file.
1434
+
1435
+ """
1436
+ cdef str line
1437
+ cdef list atoms = []
1438
+ try:
1439
+ handle = open(file)
1440
+ except TypeError:
1441
+ handle = nullcontext(file)
1442
+ with handle as f:
1443
+ for line in f:
1444
+ if line.startswith("ATOM"):
1445
+ atoms.append(TemplateAtom.loads(line))
1446
+ elif id is None and line.startswith("REMARK PDB_ID"):
1447
+ id = line.split(" ", maxsplit=2)[2].strip()
1448
+ return cls(atoms, id=id)
1449
+
1450
+ def __cinit__(self):
1451
+ self._tpl = NULL
1452
+ self._tess = NULL
1453
+ self.owner = None
1454
+ self.owned = False
1455
+
1456
+ def __dealloc__(self):
1457
+ if not self.owned:
1458
+ jess.tess_template.TessTemplate_free(self._tpl)
1459
+
1460
+ def __init__(self, object atoms = (), str id = None):
1461
+ """__init__(self, atoms=(), id=None)\n--\n
1462
+
1463
+ Create a new template.
1464
+
1465
+ Arguments:
1466
+ atoms (sequence of `~pyjess.TemplateAtom`): The atoms of the
1467
+ templates.
1468
+ id (`str`, optional): The identifier of the template.
1469
+
1470
+ Raises:
1471
+ `MemoryError`: When the system allocator fails to allocate
1472
+ enough memory for the template storage.
1473
+
1474
+ """
1475
+ cdef int i
1476
+ cdef int j
1477
+ cdef double dist
1478
+ cdef TemplateAtom atom
1479
+ cdef size_t alloc_size
1480
+ cdef int count = len(atoms)
1481
+
1482
+ alloc_size = (
1483
+ sizeof(_Template) + sizeof(_TessTemplate)
1484
+ + count * sizeof(_TessAtom*)
1485
+ + count * sizeof(double*)
1486
+ + count * count * sizeof(double)
1487
+ )
1488
+
1489
+ self._tpl = <_Template*> calloc(1, alloc_size)
1490
+ if self._tpl is NULL:
1491
+ raise MemoryError("Failed to allocate template")
1492
+
1493
+ # setup memory for atoms
1494
+ self._tess = <_TessTemplate*> &self._tpl[1]
1495
+ self._tess.atom = <_TessAtom**> &self._tess[1]
1496
+ for i in range(count):
1497
+ self._tess.atom[i] = NULL
1498
+
1499
+ # setup memory and pointers for distances
1500
+ self._tess.distance = <double**> &self._tess.atom[count]
1501
+ if count > 0:
1502
+ self._tess.distance[0] = <double*> &self._tess.distance[count]
1503
+ for i in range(1, count):
1504
+ self._tess.distance[i] = <double*> &self._tess.distance[i-1][count]
1505
+
1506
+ # setup template function pointers
1507
+ self._tpl.free = jess.tess_template.TessTemplate_free
1508
+ self._tpl.match = jess.tess_template.TessTemplate_match
1509
+ self._tpl.position = jess.tess_template.TessTemplate_position
1510
+ self._tpl.count = jess.tess_template.TessTemplate_count
1511
+ self._tpl.range = jess.tess_template.TessTemplate_range
1512
+ self._tpl.check = jess.tess_template.TessTemplate_check
1513
+ self._tpl.candidates = jess.tess_template.TessTemplate_candidates
1514
+ self._tpl.name = jess.tess_template.TessTemplate_name
1515
+ self._tpl.logE = jess.tess_template.TessTemplate_logE
1516
+ self._tpl.distWeight = jess.tess_template.TessTemplate_distWeight
1517
+ self._tpl.copy = jess.tess_template.TessTemplate_copy
1518
+
1519
+ # copy name and atom count
1520
+ self._tess.count = count
1521
+ self._tess.symbol = NULL if id is None else strdup(id.encode())
1522
+
1523
+ # copy atom data
1524
+ for i, atom in enumerate(atoms):
1525
+ assert i < count
1526
+ self._tess.atom[i] = jess.tess_atom.TessAtom_copy(atom._atom)
1527
+ if self._tess.atom[i] is NULL:
1528
+ raise MemoryError("Failed to allocate template atom")
1529
+
1530
+ # compute distances
1531
+ for i in range(count):
1532
+ self._tess.distance[i][i] = 0.0
1533
+ for j in range(i+1, count):
1534
+ dist = jess.tess_atom.TessAtom_distance(self._tess.atom[i], self._tess.atom[j])
1535
+ self._tess.distance[i][j] = dist
1536
+ self._tess.distance[j][i] = dist
1537
+
1538
+ # compute dimension
1539
+ residues = {
1540
+ (
1541
+ self._tess.atom[i].resSeq ,
1542
+ self._tess.atom[i].chainID1,
1543
+ self._tess.atom[i].chainID2,
1544
+ )
1545
+ for i in range(count)
1546
+ }
1547
+ self._tess.dim = len(residues)
1548
+
1549
+ def __copy__(self):
1550
+ return self.copy()
1551
+
1552
+ def __len__(self):
1553
+ assert self._tpl is not NULL
1554
+ return self._tess.count
1555
+
1556
+ def __getitem__(self, object index):
1557
+ assert self._tess is not NULL
1558
+
1559
+ cdef TemplateAtom atom
1560
+ cdef ssize_t length = self._tess.count
1561
+ cdef ssize_t index_
1562
+
1563
+ if isinstance(index, slice):
1564
+ indices = range(*index.indices(length))
1565
+ return type(self)(atoms=[self[i] for i in indices], id=self.id)
1566
+ else:
1567
+ index_ = index
1568
+ if index_ < 0:
1569
+ index_ += length
1570
+ if index_ < 0 or index_ >= length:
1571
+ raise IndexError(index)
1572
+ atom = TemplateAtom.__new__(TemplateAtom)
1573
+ atom.owner = self
1574
+ atom.owned = True
1575
+ atom._atom = self._tess.atom[index_]
1576
+ return atom
1577
+
1578
+ def __eq__(self, object other):
1579
+ cdef Template other_
1580
+ if not isinstance(other, Template):
1581
+ return NotImplemented
1582
+ other_ = other
1583
+ if self.id != other_.id:
1584
+ return False
1585
+ if self.dimension != other_.dimension:
1586
+ return False
1587
+ if len(self) != len(other_):
1588
+ return False
1589
+ return all(x == y for x,y in zip(self, other_))
1590
+
1591
+ def __hash__(self):
1592
+ return hash((
1593
+ self.id,
1594
+ *(hash(x) for x in self)
1595
+ ))
1596
+
1597
+ def __reduce__(self):
1598
+ return type(self), (list(self), self.id)
1599
+
1600
+ def __sizeof__(self):
1601
+ assert self._tess is not NULL
1602
+
1603
+ cdef size_t i
1604
+ cdef size_t ac
1605
+ cdef size_t rc
1606
+ cdef _TessAtom* atom
1607
+ cdef size_t size = sizeof(self)
1608
+
1609
+ size = (
1610
+ sizeof(_Template)
1611
+ + sizeof(_TessTemplate)
1612
+ + self._tess.count * sizeof(_TessAtom*)
1613
+ + self._tess.count * sizeof(double*)
1614
+ + self._tess.count * self._tess.count * sizeof(double)
1615
+ )
1616
+ for i in range(self._tess.count):
1617
+ atom = self._tess.atom[i]
1618
+ ac = atom.nameCount
1619
+ rc = atom.resNameCount
1620
+ size += (
1621
+ sizeof(_TessAtom)
1622
+ + sizeof(char*) * (ac + rc)
1623
+ + sizeof(char) * (5*ac + 4*rc)
1624
+ )
1625
+ return size
1626
+
1627
+ @property
1628
+ def id(self):
1629
+ """`str` or `None`: An identifier for the template, if any.
1630
+ """
1631
+ assert self._tpl is not NULL
1632
+
1633
+ cdef const char* name = self._tpl.name(self._tpl)
1634
+ if name is NULL:
1635
+ return None
1636
+ return name.decode()
1637
+
1638
+ @property
1639
+ def dimension(self):
1640
+ """`int`: The dimension of the template (i.e. number of residues).
1641
+ """
1642
+ assert self._tess is not NULL
1643
+ return self._tess.dim
1644
+
1645
+ cpdef Template copy(self):
1646
+ """Create a copy of the template.
1647
+
1648
+ Returns:
1649
+ `~pyjess.Template`: A new template object with identical
1650
+ attributes and a copy of the `TemplateAtom` it contains.
1651
+
1652
+ """
1653
+ cdef Template tpl = Template.__new__(Template)
1654
+ with nogil:
1655
+ tpl._tpl = self._tpl.copy(self._tpl)
1656
+ tpl._tess = <_TessTemplate*> &tpl._tpl[1]
1657
+ return tpl
1658
+
1659
+ cdef class Query:
1660
+ """A query over templates with a given molecule.
1661
+
1662
+ Jess iterates over the templates and attempt matches the query
1663
+ molecule, so the hits can actually be generated iteratively. This
1664
+ class allows accessing the hits as a Python iterator.
1665
+
1666
+ Attributes:
1667
+ jess (`~pyjess.Jess`): The templates this object is currently
1668
+ scanning.
1669
+ molecule (`~pyjess.Molecule`): The query molecule to align to
1670
+ the templates.
1671
+ rmsd_threshold (`float`): The RMSD threshold for reporting
1672
+ results.
1673
+ best_match (`bool`): Whether the query will return only the
1674
+ best match to each template.
1675
+
1676
+ """
1677
+ cdef _JessQuery* _jq
1678
+ cdef bint _partial
1679
+ cdef int _candidates
1680
+ cdef uintptr_t _prev_tpl
1681
+ cdef int _max_candidates
1682
+ cdef _IgnoreType _ignore_chain
1683
+
1684
+ cdef readonly Jess jess
1685
+ cdef readonly Molecule molecule
1686
+ cdef readonly bint best_match
1687
+ cdef readonly double rmsd_threshold
1688
+
1689
+ def __cinit__(self):
1690
+ self._jq = NULL
1691
+ self._candidates = 0
1692
+ self._partial = False
1693
+ self._prev_tpl = 0
1694
+
1695
+ def __dealloc__(self):
1696
+ jess.jess.JessQuery_free(self._jq)
1697
+
1698
+ def __iter__(self):
1699
+ return self
1700
+
1701
+ @property
1702
+ def ignore_chain(self):
1703
+ """`str` or `None`: The way atom chains are considered or discarded.
1704
+ """
1705
+ if self._ignore_chain == _IgnoreType.ignoreNone:
1706
+ return None
1707
+ elif self._ignore_chain == _IgnoreType.ignoreResidues:
1708
+ return "residues"
1709
+ elif self._ignore_chain == _IgnoreType.ignoreAtoms:
1710
+ return "atoms"
1711
+
1712
+ @ignore_chain.setter
1713
+ def ignore_chain(self, ignore_chain):
1714
+ if ignore_chain is None:
1715
+ self._ignore_chain = _IgnoreType.ignoreNone
1716
+ elif ignore_chain == "residues":
1717
+ self._ignore_chain = _IgnoreType.ignoreResidues
1718
+ elif ignore_chain == "atoms":
1719
+ self._ignore_chain = _IgnoreType.ignoreAtoms
1720
+ else:
1721
+ raise ValueError(f"invalid value for `ignore_chain`: {ignore_chain!r}")
1722
+
1723
+ @property
1724
+ def max_candidates(self):
1725
+ """`int`: The maximum number of candidate hits to report *by template*.
1726
+ """
1727
+ return None if self._max_candidates == -1 else self._max_candidates
1728
+
1729
+ @max_candidates.setter
1730
+ def max_candidates(self, max_candidates):
1731
+ if max_candidates is None:
1732
+ self._max_candidates = -1
1733
+ elif max_candidates >= 0:
1734
+ self._max_candidates = max_candidates
1735
+ else:
1736
+ raise ValueError(f"invalid value for `max_candidates` argument: {max_candidates!r}")
1737
+
1738
+ cdef bint _advance(self) noexcept nogil:
1739
+ if self._partial:
1740
+ self._partial = False
1741
+ return True
1742
+ return jess.jess.JessQuery_next(self._jq, self._ignore_chain)
1743
+
1744
+ cdef bint _rewind(self) noexcept nogil:
1745
+ self._partial = True
1746
+
1747
+ cdef int _copy_atoms(self, const _Template* tpl, Hit hit) except -1 nogil:
1748
+ cdef _Atom** atoms = jess.jess.JessQuery_atoms(self._jq)
1749
+ cdef int count = tpl.count(tpl)
1750
+
1751
+ hit._atoms = <_Atom*> realloc(hit._atoms, count * sizeof(_Atom))
1752
+ if hit._atoms is NULL:
1753
+ raise MemoryError("Failed to allocate hit atoms")
1754
+ for i in range(count):
1755
+ memcpy(&hit._atoms[i], atoms[i], sizeof(_Atom))
1756
+ return count
1757
+
1758
+ cdef int _copy_superposition(self, _Superposition* sup, Hit hit) noexcept nogil:
1759
+ cdef const double* M = jess.super.Superposition_rotation(sup)
1760
+ cdef const double* c = jess.super.Superposition_centroid(sup, 0)
1761
+ cdef const double* v = jess.super.Superposition_centroid(sup, 1)
1762
+ memcpy(hit._rotation, M, 9*sizeof(double))
1763
+ memcpy(hit._centre[0], c, 3*sizeof(double))
1764
+ memcpy(hit._centre[1], v, 3*sizeof(double))
1765
+ return 0
1766
+
1767
+ def __next__(self):
1768
+ assert self._jq is not NULL
1769
+
1770
+ cdef double rmsd
1771
+ cdef const double* rot
1772
+ cdef _Template* tpl = NULL
1773
+ cdef _Template* hit_tpl = NULL
1774
+ cdef _Superposition* sup = NULL
1775
+ cdef Hit hit = Hit.__new__(Hit)
1776
+
1777
+ # prepare the hit to be returned
1778
+ hit.rmsd = INFINITY
1779
+ hit._atoms = NULL
1780
+ hit._molecule = self.molecule
1781
+ hit_tpl = NULL
1782
+ hit_found = False
1783
+
1784
+ # search the next hit without the GIL to allow parallel queries.
1785
+ with nogil:
1786
+ while self._advance():
1787
+ # load current iteration template, and check that the hit
1788
+ # was obtained with the current template and not with the
1789
+ # previous one
1790
+ self._prev_tpl = <uintptr_t> tpl
1791
+ tpl = jess.jess.JessQuery_template(self._jq)
1792
+ if hit_found and hit_tpl != tpl:
1793
+ self._rewind()
1794
+ break
1795
+
1796
+ # load superposition and compute RMSD for the current iteration
1797
+ sup = jess.jess.JessQuery_superposition(self._jq)
1798
+ rmsd = jess.super.Superposition_rmsd(sup)
1799
+
1800
+ # NB(@althonos): we don't need to compute the E-value to get the
1801
+ # best match by molecule/template pair since the
1802
+ # logE-value for a fixed pair varies by the RMSD
1803
+ # term only (see `TessTemplate_logE`)
1804
+
1805
+ # check that the candidate passes threshold, and return it
1806
+ # if not in best match, otherwise record it until the next
1807
+ # template is reached (or the iterator finished)
1808
+ if rmsd <= self.rmsd_threshold and rmsd < hit.rmsd:
1809
+ # check if the rotation matrix contains NaN values
1810
+ rot = jess.super.Superposition_rotation(sup)
1811
+ nan = False
1812
+ for i in range(9):
1813
+ nan |= isnan(rot[i])
1814
+
1815
+ if nan:
1816
+ with gil:
1817
+ PyErr_WarnEx(
1818
+ UserWarning,
1819
+ "Jess returned a superposition matrix with NaN values",
1820
+ 2,
1821
+ )
1822
+ else:
1823
+ self._copy_atoms(tpl, hit)
1824
+ self._copy_superposition(sup, hit)
1825
+ hit.rmsd = rmsd
1826
+ hit_tpl = tpl
1827
+ hit_found = True
1828
+
1829
+ # check if we already made it to the next template,
1830
+ # or if we need to short-circuit the iteration and
1831
+ # force the query to move to the next template as
1832
+ # we found too many candidates already.
1833
+ if <uintptr_t> tpl != self._prev_tpl:
1834
+ self._candidates = 0
1835
+ else:
1836
+ self._candidates += 1
1837
+ if self._max_candidates != -1 and self._candidates > self._max_candidates:
1838
+ self._candidates = 0
1839
+ jess.jess.JessQuery_nextTemplate(self._jq)
1840
+
1841
+ # free superposition items (as relevant data was copied in
1842
+ # the Hit if needed) and return hits immediately if we are
1843
+ # not in best match mode
1844
+ jess.super.Superposition_free(sup)
1845
+ if hit_found and not self.best_match:
1846
+ break
1847
+
1848
+ if not hit_found:
1849
+ raise StopIteration
1850
+
1851
+ # get the template object for the hit
1852
+ hit.template = self.jess._templates[self.jess._indices[<size_t> hit_tpl]]
1853
+ return hit
1854
+
1855
+
1856
+ cdef class Hit:
1857
+ """A hit identified between a query molecule and a target template.
1858
+
1859
+ Attributes:
1860
+ rmsd (`float`): The RMSD between the aligned structures.
1861
+ template (`~pyjess.Template`): The template that matched the
1862
+ query molecule.
1863
+ molecule (`~pyjess.Molecule`): The query molecule.
1864
+
1865
+ """
1866
+ cdef double[9] _rotation
1867
+ cdef double[2][3] _centre
1868
+ cdef _Atom* _atoms
1869
+
1870
+ cdef readonly double rmsd
1871
+ cdef readonly Template template
1872
+ cdef Molecule _molecule
1873
+
1874
+ def __dealloc__(self):
1875
+ free(self._atoms)
1876
+
1877
+ def __getstate__(self):
1878
+ return {
1879
+ "rotation": list(self._rotation),
1880
+ "centre": list(self._centre),
1881
+ "atoms": self.atoms(transform=False),
1882
+ "rmsd": self.rmsd,
1883
+ "template": self.template,
1884
+ "molecule": self.molecule(transform=False),
1885
+ }
1886
+
1887
+ def __setstate__(self, state):
1888
+ cdef size_t i
1889
+ cdef size_t count
1890
+ cdef Atom atom
1891
+
1892
+ self.rmsd = state["rmsd"]
1893
+ self.template = state["template"]
1894
+ self._molecule = state["molecule"]
1895
+ self._rotation = state["rotation"]
1896
+ self._centre = state["centre"]
1897
+
1898
+ # check number of atoms is consistent
1899
+ count = len(self.template)
1900
+ if len(state["atoms"]) != count:
1901
+ raise ValueError(f"unexpected number of atoms: {len(state['atoms'])!r} (expected {count!r})")
1902
+ # allocate or reallocate memory for atoms
1903
+ self._atoms = <_Atom*> realloc(self._atoms, count * sizeof(_Atom))
1904
+ if self._atoms is NULL:
1905
+ raise MemoryError("Failed to allocate hit atoms")
1906
+ # copy atom data
1907
+ for i, atom in enumerate(state["atoms"]):
1908
+ memcpy(&self._atoms[i], atom._atom, sizeof(_Atom))
1909
+
1910
+ cdef void _transform_atom(self, double* x, const double* src):
1911
+ cdef size_t i
1912
+ cdef size_t j
1913
+ cdef const double* M = self._rotation
1914
+ cdef const double* c = self._centre[0]
1915
+ cdef const double* v = self._centre[1]
1916
+
1917
+ for i in range(3):
1918
+ x[i] = v[i]
1919
+ for j in range(3):
1920
+ x[i] += M[3*i + j] * (src[j] - c[j])
1921
+
1922
+ @property
1923
+ def determinant(self):
1924
+ """`float`: The determinant of the rotation matrix.
1925
+ """
1926
+ cdef const double* p = self._rotation
1927
+ cdef double det = 0.0
1928
+
1929
+ with nogil:
1930
+ det += p[0] * (p[4] * p[8] - p[5] * p[7])
1931
+ det -= p[1] * (p[3] * p[8] - p[5] * p[6])
1932
+ det += p[2] * (p[3] * p[7] - p[4] * p[6])
1933
+ return det
1934
+
1935
+ @property
1936
+ def log_evalue(self):
1937
+ """`float`: The logarithm of the E-value estimated for the hit.
1938
+ """
1939
+ assert self.template._tpl is not NULL
1940
+
1941
+ cdef int n
1942
+ cdef double e
1943
+
1944
+ with nogil:
1945
+ n = jess.molecule.Molecule_count(self._molecule._mol)
1946
+ e = self.template._tpl.logE(self.template._tpl, self.rmsd, n)
1947
+ return e
1948
+
1949
+ @property
1950
+ def evalue(self):
1951
+ """`float`: The E-value estimated for the hit.
1952
+ """
1953
+ cdef int n
1954
+ cdef double e
1955
+
1956
+ with nogil:
1957
+ n = jess.molecule.Molecule_count(self._molecule._mol)
1958
+ e = exp(self.template._tpl.logE(self.template._tpl, self.rmsd, n))
1959
+ return e
1960
+
1961
+ cpdef list atoms(self, bint transform=True):
1962
+ """Get the list of query atoms matching the template.
1963
+
1964
+ Arguments:
1965
+ transform (`bool`): Whether or not to transform coordinates
1966
+ of hits into template frame.
1967
+
1968
+ Returns:
1969
+ `list` of `~pyjess.Atom`: The list of matching atoms.
1970
+
1971
+ """
1972
+ assert self.template._tpl is not NULL
1973
+
1974
+ cdef Atom atom
1975
+ cdef int i
1976
+ cdef int j
1977
+ cdef int k
1978
+ cdef int count = self.template._tpl.count(self.template._tpl)
1979
+ cdef list atoms = []
1980
+
1981
+ cdef const double* M = self._rotation
1982
+ cdef const double* c = self._centre[0]
1983
+ cdef const double* v = self._centre[1]
1984
+
1985
+ for k in range(count):
1986
+ atom = Atom.__new__(Atom)
1987
+ if transform:
1988
+ atom._atom = <_Atom*> malloc(sizeof(_Atom))
1989
+ memcpy(atom._atom, &self._atoms[k], sizeof(_Atom))
1990
+ self._transform_atom(atom._atom.x, self._atoms[k].x)
1991
+ else:
1992
+ atom.owned = True
1993
+ atom.owner = self
1994
+ atom._atom = &self._atoms[k]
1995
+ atoms.append(atom)
1996
+
1997
+ return atoms
1998
+
1999
+ cpdef Molecule molecule(self, bint transform=False):
2000
+ """Get the molecule matching the template.
2001
+
2002
+ Arguments:
2003
+ transform (`bool`): Whether or not to transform coordinates
2004
+ of the molecule atoms into template frame.
2005
+
2006
+ Returns:
2007
+ `~pyjess.Molecule`: The matching molecule, optionally
2008
+ rotated to match the template coordinate.
2009
+
2010
+ .. versionadded:: 0.5.0
2011
+
2012
+ """
2013
+ assert self.template._tpl is not NULL
2014
+
2015
+ cdef _Atom* atom
2016
+ cdef Molecule mol
2017
+ cdef size_t i
2018
+ cdef size_t j
2019
+ cdef size_t k
2020
+ cdef const double* M = self._rotation
2021
+ cdef const double* c = self._centre[0]
2022
+ cdef const double* v = self._centre[1]
2023
+
2024
+ if not transform:
2025
+ return self._molecule
2026
+
2027
+ mol = self._molecule.copy()
2028
+ for k in range(mol._mol.count):
2029
+ atom = mol._mol.atom[k]
2030
+ self._transform_atom(atom.x, self._molecule._mol.atom[k].x)
2031
+
2032
+ return mol
2033
+
2034
+ cpdef str dumps(self, str format="pdb", bint transform=True):
2035
+ """Write the hit to a string.
2036
+
2037
+ Arguments:
2038
+ format (`str`): The format in which to write the hit.
2039
+ Currently only supports ``pdb``, which writes the hits
2040
+ in the same format as Jess.
2041
+ transform (`bool`): Whether or not to transform coordinates
2042
+ of the molecule atoms into template frame.
2043
+
2044
+ Raises:
2045
+ `RuntimeError`: When attempting to dump a `Hit` which was
2046
+ obtained from a `Template` which has no `~Template.id`.
2047
+
2048
+ .. versionadded:: 0.7.0
2049
+
2050
+ """
2051
+ file = io.StringIO()
2052
+ self.dump(file, format=format, transform=transform)
2053
+ return file.getvalue()
2054
+
2055
+ cpdef void dump(self, object file, str format="pdb", bint transform=True):
2056
+ """Write the hit to a file.
2057
+
2058
+ Arguments:
2059
+ file (file-like object): A file opened in *text* mode where the
2060
+ hit will be written.
2061
+ format (`str`): The format in which to write the hit.
2062
+ Currently only supports ``pdb``, which writes the hits
2063
+ in the same format as Jess.
2064
+ transform (`bool`): Whether or not to transform coordinates
2065
+ of the molecule atoms into template frame.
2066
+
2067
+ Raises:
2068
+ `RuntimeError`: When attempting to dump a `Hit` which was
2069
+ obtained from a `Template` which has no `~Template.id`.
2070
+
2071
+ .. versionadded:: 0.7.0
2072
+
2073
+ """
2074
+ assert self.template._tpl is not NULL
2075
+ assert self._molecule._mol is not NULL
2076
+
2077
+ cdef _Atom* atom
2078
+ cdef size_t k
2079
+ cdef char[80] buffer
2080
+ cdef char[5] name
2081
+ cdef char[5] resname
2082
+ cdef double[3] x
2083
+ cdef int count = self.template._tpl.count(self.template._tpl)
2084
+
2085
+ if self.template.id is None:
2086
+ raise RuntimeError("cannot dump `Hit` where `self.template.id` is `None`")
2087
+
2088
+ file.write("REMARK ")
2089
+ file.write(self._molecule.id)
2090
+ file.write(f" {self.rmsd:5.3f} ")
2091
+ file.write(self.template.id)
2092
+ file.write(f" Det={self.determinant:4,.1f} log(E)~ {self.log_evalue:4.2f}\n")
2093
+
2094
+ for k in range(count):
2095
+ atom = &self._atoms[k]
2096
+ decode_token(name, atom.name, 4)
2097
+ decode_token(resname, atom.resName, 3)
2098
+ if transform:
2099
+ self._transform_atom(x, atom.x)
2100
+ else:
2101
+ memcpy(x, atom.x, 3*sizeof(double))
2102
+ n = sprintf(
2103
+ buffer,
2104
+ "ATOM %5i%5s%c%-3s%c%c%4i%-4c%8.3f%8.3f%8.3f%6.2f%6.2f\n",
2105
+ atom.serial,
2106
+ name,
2107
+ atom.altLoc,
2108
+ resname,
2109
+ atom.chainID1,
2110
+ atom.chainID2,
2111
+ atom.resSeq,
2112
+ atom.iCode,
2113
+ x[0],
2114
+ x[1],
2115
+ x[2],
2116
+ atom.occupancy,
2117
+ atom.tempFactor,
2118
+ atom.segID,
2119
+ atom.element,
2120
+ atom.charge
2121
+ )
2122
+ file.write(PyUnicode_FromStringAndSize(buffer, n))
2123
+ file.write("ENDMDL\n")
2124
+
2125
+ cdef class Jess:
2126
+ """A handle to run Jess over a list of templates.
2127
+
2128
+ Example:
2129
+ Create a `Jess` object from a list of templates::
2130
+
2131
+ >>> t1 = Template.load("1.3.3.tpl")
2132
+ >>> t2 = Template.load("4.1.2.tpl")
2133
+ >>> jess = Jess([t1, t2])
2134
+
2135
+ Once initialized, the `Jess` object cannot be modified further.
2136
+ Use the `~Jess.query` method to query the templates with a
2137
+ molecule::
2138
+
2139
+ >>> molecule = Molecule.load("1AMY.pdb")
2140
+ >>> query = jess.query(molecule, 2, 2, 2)
2141
+
2142
+ The returned `Query` object is an iterator that can be
2143
+ advanced through a ``for`` loop, or with the `next` built-in
2144
+ function to get the first hit:
2145
+
2146
+ >>> hit = next(query)
2147
+ >>> hit.rmsd
2148
+ 1.4386...
2149
+
2150
+ The hit can also be formatted in PDB format like in the
2151
+ original JESS code::
2152
+
2153
+ >>> print(hit.dumps(format="pdb"), end="")
2154
+ REMARK 1AMY 1.439 2om2 Det= 1.0 log(E)~ 1.11
2155
+ ATOM 729 CA THR A 94 34.202 -24.426 8.851 1.00 2.00
2156
+ ATOM 732 CB THR A 94 35.157 -23.467 8.101 1.00 4.66
2157
+ ATOM 733 OG1 THR A 94 36.338 -23.247 8.871 1.00 9.85
2158
+ ATOM 746 CD GLU A 96 41.454 -29.509 8.013 1.00 24.05
2159
+ ATOM 748 OE2 GLU A 96 42.536 -29.680 7.441 1.00 34.44
2160
+ ATOM 747 OE1 GLU A 96 41.212 -28.521 8.708 1.00 18.56
2161
+ ATOM 437 CZ ARG A 55 44.471 -26.619 10.181 1.00 8.51
2162
+ ATOM 436 NE ARG A 55 44.334 -27.346 11.290 1.00 9.05
2163
+ ATOM 438 NH1 ARG A 55 43.590 -26.751 9.179 1.00 13.17
2164
+ ENDMDL
2165
+
2166
+ .. versionadded:: 0.4.0
2167
+ Equality, hashing and pickle protocol support.
2168
+
2169
+ """
2170
+ cdef _Jess* _jess
2171
+ cdef dict _indices
2172
+ cdef tuple _templates
2173
+ cdef size_t length
2174
+
2175
+ def __cinit__(self):
2176
+ self._jess = NULL
2177
+ self.length = 0
2178
+
2179
+ def __dealloc__(self):
2180
+ jess.jess.Jess_free(self._jess)
2181
+
2182
+ def __init__(self, object templates = ()):
2183
+ """__init__(self, templates=())\n--\n
2184
+
2185
+ Create a new Jess database containing the given templates.
2186
+
2187
+ Arguments:
2188
+ templates (sequence of `~pyjess.Template`): The templates to
2189
+ index in the database for further querying.
2190
+
2191
+ Caution:
2192
+ The `~pyjess.Template` objects given in argument will be copied
2193
+ because the internal C data structure requires ownership of the
2194
+ data. Modification to the original `~pyjess.Template` objects will
2195
+ not have an effect on the newly created `~pyjess.Jess` templates.
2196
+
2197
+ """
2198
+ cdef Template template
2199
+ cdef _Template* tpl
2200
+ cdef list _templates = []
2201
+
2202
+ self._jess = jess.jess.Jess_create()
2203
+ self._indices = {}
2204
+
2205
+ for template in templates:
2206
+ # NOTE: the Jess storage owns the data, so we make a copy of the
2207
+ # template given as argument to avoid a double-free.
2208
+ tpl = template._tpl.copy(template._tpl)
2209
+ jess.jess.Jess_addTemplate(self._jess, tpl)
2210
+ self._indices[<size_t> tpl] = self.length
2211
+ _templates.append(template)
2212
+ self.length += 1
2213
+
2214
+ self._templates = tuple(_templates)
2215
+
2216
+ def __copy__(self):
2217
+ return self.copy()
2218
+
2219
+ def __reduce__(self):
2220
+ return type(self), (self._templates,)
2221
+
2222
+ def __eq__(self, object other):
2223
+ cdef Jess other_
2224
+ if not isinstance(other, Jess):
2225
+ return NotImplemented
2226
+ other_ = other
2227
+ return self._templates == other_._templates
2228
+
2229
+ def __hash__(self):
2230
+ return hash((Jess, self._templates))
2231
+
2232
+ def __len__(self):
2233
+ return self.length
2234
+
2235
+ def __getitem__(self, object index):
2236
+ cdef ssize_t index_
2237
+
2238
+ if isinstance(index, slice):
2239
+ indices = range(*index.indices(self.length))
2240
+ return type(self)(map(self.__getitem__, indices))
2241
+ else:
2242
+ index_ = index
2243
+ if index_ < 0:
2244
+ index_ += self.length
2245
+ if index_ < 0 or index_ >= self.length:
2246
+ raise IndexError(index)
2247
+ return self._templates[index_]
2248
+
2249
+ cpdef Jess copy(self):
2250
+ """Create a copy of the `Jess` object.
2251
+
2252
+ Returns:
2253
+ `~pyjess.Jess`: A `Jess` object containing the same templates.
2254
+
2255
+ .. versionadded:: 0.4.0
2256
+
2257
+ """
2258
+ return type(self)(self._templates)
2259
+
2260
+ def query(
2261
+ self,
2262
+ Molecule molecule,
2263
+ double rmsd_threshold,
2264
+ double distance_cutoff,
2265
+ double max_dynamic_distance,
2266
+ *,
2267
+ object max_candidates = None,
2268
+ object ignore_chain = None,
2269
+ bint best_match = False,
2270
+ bint reorder = True,
2271
+ ):
2272
+ """Scan for templates matching the given molecule.
2273
+
2274
+ Arguments:
2275
+ molecule (`~pyjess.Molecule`): The protein to match the
2276
+ templates to.
2277
+ rmsd_threshold (`float`): The RMSD threshold for reporting
2278
+ results.
2279
+ distance_cutoff (`float`): The global distance cutoff
2280
+ used to guide the search.
2281
+ max_dynamic_distance (`float`): The maximum template/query
2282
+ dynamic distance after adding the global distance cutoff
2283
+ and the individual atom distance cutoff defined for each
2284
+ atom of the template.
2285
+ max_candidates (`int` or `None`): The maximum number of candidate
2286
+ hits to report by template. If a non-`None` value is given,
2287
+ it may speed up querying for unspecific templates, but also
2288
+ produce results potentially inconsistent with Jess.
2289
+ ignore_chain (`str` or `None`): Whether to check or ignore the
2290
+ chain of the atoms to match. The different supported modes
2291
+ are:
2292
+
2293
+ - `None`: Force the atoms in the molecule to belong
2294
+ to different (resp. same) chains if so is the case
2295
+ in the template.
2296
+ - ``residues``: Allow atoms to belong to different
2297
+ (resp. same) chains even if it is not the case in
2298
+ the template, but force all atoms of a residue to
2299
+ belong to the same chain.
2300
+ - ``atoms``: Allow atoms to belong to any chain,
2301
+ independently to the template or the residue they
2302
+ belong to.
2303
+
2304
+ best_match (`bool`): Pass `True` to return only the best match
2305
+ to each template, based on RMSD. In case of ties, the
2306
+ first match is returned. Note that a match must still
2307
+ be passing the RMSD threshold given in ``rmsd_threshold``
2308
+ to be returned.
2309
+ reorder (`bool`): Whether to enable template atom reordering
2310
+ to accelerate matching in the scanner algorithm. Pass
2311
+ `False` to reverse to the original, slower algorithm
2312
+ which matches atoms in the same order as they appear in
2313
+ the template, at the cost of longer run times.
2314
+
2315
+ Returns:
2316
+ `~pyjess.Query`: An iterator over the query hits.
2317
+
2318
+ Caution:
2319
+ Since ``v0.6.0``, this function uses an optimized variant of
2320
+ the Jess scanning algorithm which minimized the number of steps
2321
+ needed to generate matches, by re-ordering the order the
2322
+ template atoms are iterated upon. Because of this change,
2323
+ the query may return *exactly* the same matches but in an order
2324
+ that *differs* from the original Jess version. If you really
2325
+ need results in the original order, set ``reorder`` to `False`.
2326
+
2327
+ .. versionadded:: 0.6.0
2328
+ The ``reorder`` argument, defaulting to `True`.
2329
+
2330
+ .. versionchanged:: 0.7.0
2331
+ Default value of ``max_candidates`` argument to `None`.
2332
+
2333
+ .. versionchanged:: 0.7.0
2334
+ ``ignore_chain`` now expects string variants rather than `bool`.
2335
+
2336
+ """
2337
+
2338
+ if ignore_chain is True:
2339
+ PyErr_WarnEx(
2340
+ DeprecationWarning,
2341
+ "`ignore_chain` parameter expects string parameters "
2342
+ "to specificy the mode since PyJess v0.7.0. "
2343
+ "Use `ignore_chain='atoms'` instead of `ignore_chain=True`",
2344
+ 2,
2345
+ )
2346
+ ignore_chain="atoms"
2347
+ elif ignore_chain is False:
2348
+ PyErr_WarnEx(
2349
+ DeprecationWarning,
2350
+ "`ignore_chain` parameter expects string parameters "
2351
+ "to specificy the mode since PyJess v0.7.0. "
2352
+ "Use `ignore_chain=None` instead of `ignore_chain=False`",
2353
+ 2,
2354
+ )
2355
+ ignore_chain=None
2356
+
2357
+ cdef Query query = Query.__new__(Query)
2358
+ query.max_candidates = max_candidates
2359
+ query.ignore_chain = ignore_chain
2360
+ query.rmsd_threshold = rmsd_threshold
2361
+ query.best_match = best_match
2362
+ query.molecule = molecule
2363
+ query.jess = self
2364
+ query._jq = jess.jess.Jess_query(
2365
+ self._jess,
2366
+ molecule._mol,
2367
+ distance_cutoff,
2368
+ max_dynamic_distance,
2369
+ reorder,
2370
+ )
2371
+ return query