pyjess 0.5.2__pp310-pypy310_pp73-win_amd64.whl → 0.7.0__pp310-pypy310_pp73-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjess might be problematic. Click here for more details.

pyjess/_jess.pyx CHANGED
@@ -2,6 +2,73 @@
2
2
  # cython: language_level=3, linetrace=True, binding=True
3
3
  """Bindings to Jess, a 3D template matching software.
4
4
 
5
+ Jess is an algorithm for constraint-based structural template matching
6
+ proposed by Jonathan Barker *et al.*. It can be used to identify
7
+ catalytic residues from a known template inside a protein structure.
8
+ Jess is an evolution of TESS, a geometric hashing algorithm developed by
9
+ Andrew Wallace *et al.*, removing some pre-computation and
10
+ structural requirements from the original algorithm.
11
+
12
+ PyJess is a Python module that provides bindings to Jess using
13
+ `Cython <https://cython.org/>`_. It allows creating templates, querying
14
+ them with protein structures, and retrieving the hits using a Python API
15
+ without performing any external I/O. It's also more than 10x faster than
16
+ Jess thanks to algorithmic optimizations added to improve the original Jess
17
+ code while producing consistent results.
18
+
19
+ Example:
20
+ Load templates from a file, either as a file-like object or
21
+ given a filename::
22
+
23
+ >>> t1 = pyjess.Template.load("1.3.3.tpl") # load from filename
24
+ >>> with open("4.1.2.tpl") as f: # load from a file object
25
+ ... t2 = pyjess.Template.load(f)
26
+
27
+ Load molecules from a file, either as a file-like object or given
28
+ a filename::
29
+
30
+ >>> mol = pyjess.Molecule.load("1AMY.pdb")
31
+ >>> mol[0]
32
+ Atom(serial=1, name='N', altloc=' ', residue_name='GLN', ...)
33
+
34
+ Create a `Jess` object storing the templates to support running
35
+ queries on them. The individual templates can still be accessed by
36
+ index::
37
+
38
+ >>> jess = pyjess.Jess([t1, t2])
39
+ >>> jess[0].id
40
+ '3r6v'
41
+
42
+ Run a query on the Jess object to retrieve all templates matching
43
+ a `Molecule`, *in no particular order*::
44
+
45
+ >>> hits = jess.query(mol, 2, 2, 2)
46
+ >>> for hit in hits:
47
+ ... print(hit.template.id, hit.rmsd)
48
+ 2om2 1.4386...
49
+ 2om2 1.4877...
50
+ 2om2 1.4376...
51
+ 2om2 1.5284...
52
+ 2om2 1.4863...
53
+ 2om2 1.4369...
54
+ 2om2 1.4790...
55
+ 2om2 1.1414...
56
+ 2om2 1.0755...
57
+ 2om2 1.1973...
58
+ 2om2 1.1353...
59
+ 2om2 1.0711...
60
+ 2om2 1.1494...
61
+
62
+ By default, a template can match a molecule in more than one way,
63
+ if several sets of atoms match the geometric constraints. Use the
64
+ ``best_match`` argument of `~Jess.query` to only retrieve the
65
+ best match per template::
66
+
67
+ >>> hits = jess.query(mol, 2, 2, 2, best_match=True)
68
+ >>> for hit in hits:
69
+ ... print(hit.template.id, hit.rmsd)
70
+ 2om2 1.071...
71
+
5
72
  References:
6
73
  - Barker, J. A., & Thornton, J. M. (2003). *An algorithm for
7
74
  constraint-based structural template matching: application to
@@ -18,10 +85,16 @@ References:
18
85
  # --- C imports --------------------------------------------------------------
19
86
 
20
87
  cimport cython
21
- from cpython.unicode cimport PyUnicode_FromStringAndSize
88
+ from cpython.exc cimport PyErr_WarnEx
89
+ from cpython.unicode cimport (
90
+ PyUnicode_FromStringAndSize,
91
+ PyUnicode_FromFormat,
92
+ PyUnicode_AsASCIIString,
93
+ )
22
94
 
23
95
  from libc.math cimport isnan, exp, INFINITY, NAN
24
- from libc.stdio cimport FILE, fclose, fdopen, printf
96
+ from libc.stdio cimport FILE, fclose, fdopen, printf, sprintf
97
+ from libc.stdint cimport uintptr_t
25
98
  from libc.stdlib cimport calloc, realloc, free, malloc
26
99
  from libc.string cimport memcpy, memset, strncpy, strdup
27
100
 
@@ -37,24 +110,20 @@ from jess.jess cimport Jess as _Jess
37
110
  from jess.jess cimport JessQuery as _JessQuery
38
111
  from jess.molecule cimport Molecule as _Molecule
39
112
  from jess.super cimport Superposition as _Superposition
40
- from jess.template cimport Template as _Template
113
+ from jess.template cimport Template as _Template, IgnoreType as _IgnoreType
41
114
  from jess.tess_template cimport TessTemplate as _TessTemplate
42
115
  from jess.tess_atom cimport TessAtom as _TessAtom
43
116
 
44
117
  # --- Python imports ---------------------------------------------------------
45
118
 
46
- import contextlib
47
119
  import functools
48
120
  import io
49
- import itertools
50
- import os
51
- import warnings
52
121
 
53
122
  __version__ = PROJECT_VERSION
54
123
 
55
124
  # --- Utils ------------------------------------------------------------------
56
125
 
57
- cdef inline void copy_token(char* dst, const char* src, size_t n) noexcept nogil:
126
+ cdef inline void encode_token(char* dst, const char* src, size_t n) noexcept nogil:
58
127
  cdef size_t i
59
128
  for i in range(n):
60
129
  if src[i] == ord(' ') or src[i] == 0:
@@ -63,12 +132,155 @@ cdef inline void copy_token(char* dst, const char* src, size_t n) noexcept nogil
63
132
  dst[i] = src[i]
64
133
  dst[n] = 0
65
134
 
66
- @contextlib.contextmanager
67
- def nullcontext(return_value=None):
68
- yield return_value
135
+ cdef inline void decode_token(char* dst, const char* src, size_t n) noexcept nogil:
136
+ cdef size_t i
137
+ for i in range(n):
138
+ if src[i] == ord('_') or src[i] == 0:
139
+ dst[i] = ord(' ')
140
+ else:
141
+ dst[i] = src[i]
142
+ dst[n] = 0
143
+
144
+ class nullcontext:
145
+ def __init__(self, return_value=None):
146
+ self.retval = return_value
147
+ def __enter__(self):
148
+ return self.retval
149
+ def __exit__(self, exc_type, exc_value, traceback):
150
+ return False
69
151
 
70
152
  # --- Classes ----------------------------------------------------------------
71
153
 
154
+ cdef class _MoleculeParser:
155
+ cdef str id
156
+
157
+ def __init__(self, str id = None):
158
+ self.id = id
159
+
160
+ cdef class _PDBMoleculeParser(_MoleculeParser):
161
+ cdef bint ignore_endmdl
162
+ cdef bint skip_hetatm
163
+
164
+ def __init__(self, str id = None, bint ignore_endmdl = False, bint skip_hetatm = False):
165
+ super().__init__(id=id)
166
+ self.ignore_endmdl = ignore_endmdl
167
+ self.skip_hetatm = skip_hetatm
168
+
169
+ def loads(self, text, molecule_type):
170
+ return self.load(io.StringIO(text), molecule_type)
171
+
172
+ def load(self, file, molecule_type):
173
+ cdef str line
174
+ cdef str id = self.id
175
+ cdef list atoms = []
176
+ try:
177
+ handle = open(file)
178
+ except TypeError:
179
+ handle = nullcontext(file)
180
+ with handle as f:
181
+ for line in f:
182
+ if line.startswith("HEADER"):
183
+ if id is None:
184
+ id = line[62:66].strip() or None
185
+ elif line.startswith("ATOM"):
186
+ atoms.append(Atom.loads(line))
187
+ elif line.startswith("HETATM") and not self.skip_hetatm:
188
+ atoms.append(Atom.loads(line))
189
+ elif line.startswith("ENDMDL"):
190
+ if not self.ignore_endmdl:
191
+ break
192
+ elif line.lower().startswith(("data_", "loop_")):
193
+ raise ValueError("mmCIF data tags found, file is not in PDB format")
194
+ return molecule_type(atoms, id=id)
195
+
196
+
197
+ cdef class _CIFMoleculeParser(_MoleculeParser):
198
+ cdef object gemmi
199
+ cdef bint use_author
200
+ cdef bint skip_hetatm
201
+
202
+ _PRIMARY_COLUMNS = [
203
+ 'id', 'type_symbol', 'label_atom_id', 'label_alt_id', 'label_comp_id',
204
+ 'label_asym_id', 'label_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
205
+ 'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
206
+ '?pdbx_formal_charge', '?group_PDB',
207
+ ]
208
+
209
+ _AUTH_COLUMNS = [
210
+ 'id', 'type_symbol', 'auth_atom_id', 'label_alt_id', 'auth_comp_id',
211
+ 'auth_asym_id', 'auth_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
212
+ 'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
213
+ '?pdbx_formal_charge', '?group_PDB',
214
+ ]
215
+
216
+ def __init__(self, str id = None, bint use_author = False, bint skip_hetatm = False):
217
+ super().__init__(id=id)
218
+ self.gemmi = __import__('gemmi')
219
+ self.use_author = use_author
220
+ self.skip_hetatm = skip_hetatm
221
+
222
+ def _load_block(self, document, molecule_type):
223
+ block = document.sole_block()
224
+ cols = self._AUTH_COLUMNS if self.use_author else self._PRIMARY_COLUMNS
225
+ table = block.find('_atom_site.', cols)
226
+ max_residue_number = 0
227
+
228
+ if not table:
229
+ raise ValueError("missing columns in CIF files")
230
+
231
+ atoms = []
232
+ for row in table:
233
+ if row[14] != "ATOM" and (row[14] != "HETATM" or self.skip_hetatm):
234
+ continue
235
+
236
+ if row[6] == "." and row[14] == "HETATM":
237
+ PyErr_WarnEx(
238
+ UserWarning,
239
+ "HETATM line found without residue number. Consider "
240
+ "parsing with use_author=True to use author-defined "
241
+ "residue numbers, or skip_hetatm=True to disable "
242
+ "parsing of HETATM altogether.",
243
+ 3,
244
+ )
245
+ residue_number = max_residue_number
246
+ max_residue_number += 1
247
+ else:
248
+ residue_number = int(row[6])
249
+ max_residue_number = max(residue_number, max_residue_number)
250
+
251
+ atom = Atom(
252
+ serial=int(row[0]),
253
+ element=row[1],
254
+ name=row[2],
255
+ altloc=' ' if row[3] == "." else row[3], # FIXME: replace with None?
256
+ residue_name=row[4],
257
+ chain_id=row[5],
258
+ residue_number=residue_number,
259
+ insertion_code=' ' if not row.has(7) or row[7] == "?" else row[7],
260
+ x=float(row[8]),
261
+ y=float(row[9]),
262
+ z=float(row[10]),
263
+ occupancy=0.0 if row[11] == '.' else float(row[11]),
264
+ temperature_factor=float(row[12]),
265
+ charge=0 if not row.has(13) or row[13] == "?" else int(row[13]),
266
+ )
267
+ atoms.append(atom)
268
+
269
+ id = block.name if self.id is None else self.id
270
+ return molecule_type(atoms, id=id)
271
+
272
+ def loads(self, text, molecule_type):
273
+ document = self.gemmi.cif.read_string(text)
274
+ return self._load_block(document, molecule_type)
275
+
276
+ def load(self, file, molecule_type):
277
+ if hasattr(file, "read"):
278
+ document = self.gemmi.cif.read_string(file.read())
279
+ else:
280
+ document = self.gemmi.cif.read_file(file)
281
+ return self._load_block(document, molecule_type)
282
+
283
+
72
284
  cdef class Molecule:
73
285
  """A molecule structure, as a sequence of `Atom` objects.
74
286
 
@@ -83,20 +295,40 @@ cdef class Molecule:
83
295
  cdef str _id
84
296
 
85
297
  @classmethod
86
- def loads(cls, text, str id = None, bint ignore_endmdl = False):
298
+ def loads(
299
+ cls,
300
+ text,
301
+ str format = "pdb",
302
+ *,
303
+ str id = None,
304
+ bint ignore_endmdl = False,
305
+ bint use_author = False,
306
+ bint skip_hetatm = False,
307
+ ):
87
308
  """Load a molecule from a PDB string.
88
309
 
89
310
  Arguments:
90
- file (`str`, `os.PathLike`, or file-like object): Either the path
91
- to a file, or a file-like object opened in **text mode**
92
- containing a PDB molecule.
311
+ text (`str`): The serialized molecule to parse into a new
312
+ object.
313
+ format (`str`): The format to parse the file. Supported formats
314
+ are: ``pdb`` for the Protein Data Bank format, ``cif``
315
+ for Crystallographic Information File format (additionally
316
+ requires the `gemmi` module), or ``detect`` to attempt
317
+ auto-detection (the default).
318
+
319
+ Keyword Arguments:
93
320
  id (`str`, optional): The identifier of the molecule. If `None`
94
321
  given, the parser will attempt to extract it from the
95
- ``HEADER`` line.
322
+ ``HEADER`` line (for PDB files) or the block name (for CIF
323
+ files).
96
324
  ignore_endmdl (`bool`): Pass `True` to make the parser read all
97
325
  the atoms from the PDB file. By default, the parser only
98
326
  reads the atoms of the first model, and stops at the first
99
- ``ENDMDL`` line.
327
+ ``ENDMDL`` line. *Ignored for CIF files*.
328
+ use_author (`bool`): Pass `True` to use the author-defined
329
+ labels while parsing CIF files, e.g. read the chain name
330
+ from ``_atom_site.auth_asym_id`` rather than
331
+ ``_atom_site.label_asym_id``. *Ignored for PDB files*.
100
332
 
101
333
  Returns:
102
334
  `~pyjess.Molecule`: The molecule parsed from the PDB file.
@@ -105,46 +337,264 @@ cdef class Molecule:
105
337
  `Molecule.load` to load a PDB molecule from a file-like
106
338
  object or from a path.
107
339
 
340
+ Caution:
341
+ Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
342
+ while parsing from mmCIF usually discard them. This is because
343
+ mmCIF files store heteroatoms but do not require them to
344
+ have an associated residue number, which can throw off the way
345
+ atoms are modeled in Jess.
346
+
347
+ .. versionadded:: 0.7.0
348
+ The ``format`` argument, and support for CIF parsing.
349
+
108
350
  """
109
- return cls.load(io.StringIO(text), id=id, ignore_endmdl=ignore_endmdl)
351
+ if format == "detect":
352
+ format = "cif" if text.lstrip().startswith(("data_", "loop_")) else "pdb"
353
+ return cls.load(
354
+ io.StringIO(text),
355
+ format=format,
356
+ id=id,
357
+ ignore_endmdl=ignore_endmdl,
358
+ skip_hetatm=skip_hetatm,
359
+ )
110
360
 
111
361
  @classmethod
112
- def load(cls, file, str id = None, bint ignore_endmdl = False):
362
+ def load(
363
+ cls,
364
+ file,
365
+ str format = "detect",
366
+ *,
367
+ str id = None,
368
+ bint ignore_endmdl = False,
369
+ bint use_author = False,
370
+ bint skip_hetatm = False,
371
+ ):
113
372
  """Load a molecule from a PDB file.
114
373
 
115
374
  Arguments:
116
375
  file (`str`, `os.PathLike`, or file-like object): Either the path
117
376
  to a file, or a file-like object opened in **text mode**
118
- containing a PDB molecule.
377
+ containing a molecule.
378
+ format (`str`): The format to parse the file. Supported formats
379
+ are: ``pdb`` for the Protein Data Bank format, ``cif``
380
+ for Crystallographic Information File format (additionally
381
+ requires the `gemmi` module), or ``detect`` to attempt
382
+ auto-detection (the default).
383
+
384
+ Keyword Arguments:
119
385
  id (`str`, optional): The identifier of the molecule. If `None`
120
386
  given, the parser will attempt to extract it from the
121
- ``HEADER`` line.
387
+ ``HEADER`` line (for PDB files) or the block name (for CIF
388
+ files).
122
389
  ignore_endmdl (`bool`): Pass `True` to make the parser read all
123
390
  the atoms from the PDB file. By default, the parser only
124
391
  reads the atoms of the first model, and stops at the first
125
- ``ENDMDL`` line.
392
+ ``ENDMDL`` line. *Ignored for CIF files*.
393
+ use_author (`bool`): Pass `True` to use the author-defined
394
+ labels while parsing CIF files, e.g. read the chain name
395
+ from ``_atom_site.auth_asym_id`` rather than
396
+ ``_atom_site.label_asym_id``. *Ignored for PDB files*.
397
+ skip_hetatm (`bool`): Pass `True` to skip parsing of heteroatoms
398
+ (``HETATM``) in the input file.
126
399
 
127
400
  Returns:
128
401
  `~pyjess.Molecule`: The molecule parsed from the PDB file.
129
402
 
403
+ See Also:
404
+ `Molecule.loads` to load a PDB molecule from a string.
405
+
406
+ Caution:
407
+ Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
408
+ while parsing from mmCIF usually discard them. This is because
409
+ mmCIF files store heteroatoms but do not require them to
410
+ have an associated residue number, which can throw off the way
411
+ atoms are modeled in Jess.
412
+
413
+ .. versionadded:: 0.7.0
414
+ The ``format`` and ``skip_hetatm`` arguments, and mmCIF support.
415
+
130
416
  """
131
- try:
132
- handle = open(file)
133
- except TypeError:
134
- handle = nullcontext(file)
135
- with handle as f:
136
- atoms = []
137
- for line in f:
138
- if line.startswith("HEADER"):
139
- if id is None:
140
- id = line[62:66].strip() or None
141
- elif line.startswith(("ATOM", "HETATM")):
142
- atoms.append(Atom.loads(line))
143
- elif line.startswith("ENDMDL"):
144
- if not ignore_endmdl:
145
- break
417
+ cdef _MoleculeParser parser
418
+ cdef str peek
419
+
420
+ if format == "detect":
421
+ try:
422
+ handle = open(file)
423
+ except TypeError:
424
+ handle = nullcontext(file)
425
+ with handle as f:
426
+ if f.seekable():
427
+ peek = f.read(5)
428
+ f.seek(0)
429
+ else:
430
+ f = f.read()
431
+ peek = f[5:]
432
+ if peek.startswith(("data_", "loop_")):
433
+ parser = _CIFMoleculeParser(
434
+ id=id,
435
+ use_author=use_author,
436
+ skip_hetatm=skip_hetatm,
437
+ )
438
+ else:
439
+ parser = _PDBMoleculeParser(
440
+ id=id,
441
+ ignore_endmdl=ignore_endmdl,
442
+ skip_hetatm=skip_hetatm,
443
+ )
444
+ if isinstance(f, str):
445
+ return parser.loads(f, molecule_type=cls)
446
+ return parser.load(f, molecule_type=cls)
447
+ if format == "pdb":
448
+ parser = _PDBMoleculeParser(
449
+ id=id,
450
+ ignore_endmdl=ignore_endmdl,
451
+ skip_hetatm=skip_hetatm
452
+ )
453
+ elif format == "cif":
454
+ parser = _CIFMoleculeParser(
455
+ id=id,
456
+ use_author=use_author,
457
+ skip_hetatm=skip_hetatm,
458
+ )
459
+ else:
460
+ raise ValueError(f"invalid value for `format` argument: {format!r}")
461
+ return parser.load(file, molecule_type=cls)
462
+
463
+ @classmethod
464
+ def from_biopython(cls, object structure, str id = None):
465
+ """Create a new `~pyjess.Molecule` from a `Bio.PDB.Structure`.
466
+
467
+ Arguments:
468
+ structure (`Bio.PDB.Structure` or `Bio.PDB.Model`): The
469
+ Biopython object containing the structure data.
470
+ id (`str` or `None`): The identifier to give to the newly
471
+ created molecule. If `None` given, will use the value of
472
+ ``structure.id``.
473
+
474
+ Returns:
475
+ `~pyjess.Molecule`: A molecule object suitable for using
476
+ in `Jess.query`.
477
+
478
+ .. versionadded:: 0.7.0
479
+
480
+ """
481
+ cdef list atoms = []
482
+ for c in structure.get_chains():
483
+ for r in c.get_residues():
484
+ _, residue_number, insertion_code = r.id
485
+ for a in r.get_atoms():
486
+ coord = a.get_coord()
487
+ atom = Atom(
488
+ name=a.fullname,
489
+ x=coord[0],
490
+ y=coord[1],
491
+ z=coord[2],
492
+ altloc=a.altloc,
493
+ charge=a.pqr_charge or 0,
494
+ occupancy=a.occupancy,
495
+ serial=a.serial_number,
496
+ residue_name=r.resname,
497
+ residue_number=residue_number,
498
+ segment=r.segid,
499
+ insertion_code=insertion_code,
500
+ chain_id=c.id,
501
+ temperature_factor=a.bfactor,
502
+ element=a.element,
503
+ )
504
+ atoms.append(atom)
505
+ return cls(atoms, id=structure.id)
506
+
507
+ @classmethod
508
+ def from_gemmi(cls, object model, str id=None):
509
+ """Create a new `~pyjess.Molecule` from a `gemmi.Model`.
510
+
511
+ Arguments:
512
+ structure (`gemmi.Model`): The ``gemmi`` object
513
+ containing the structure data.
514
+ id (`str` or `None`): The identifier to give to the newly
515
+ created molecule.
516
+
517
+ Returns:
518
+ `~pyjess.Molecule`: A molecule object suitable for using
519
+ in `Jess.query`.
520
+
521
+ .. versionadded:: 0.7.0
522
+
523
+ """
524
+ cdef list atoms = []
525
+ for cra in model.all():
526
+ a = cra.atom
527
+ r = cra.residue
528
+ c = cra.chain
529
+ atom = Atom(
530
+ name=a.padded_name(),
531
+ x=a.pos[0],
532
+ y=a.pos[1],
533
+ z=a.pos[2],
534
+ altloc=' ' if a.altloc == '\0' else a.altloc,
535
+ charge=a.charge,
536
+ element=a.element.name.upper(),
537
+ occupancy=a.occ,
538
+ temperature_factor=a.b_iso,
539
+ serial=a.serial,
540
+ segment=r.segment,
541
+ residue_name=r.name,
542
+ residue_number=r.seqid.num,
543
+ chain_id=c.name,
544
+ insertion_code=r.seqid.icode,
545
+ )
546
+ atoms.append(atom)
146
547
  return cls(atoms, id=id)
147
548
 
549
+ @classmethod
550
+ def from_biotite(cls, object atom_array, str id=None):
551
+ """Create a new `~pyjess.Molecule` from a `biotite.structure.AtomArray`.
552
+
553
+ Arguments:
554
+ structure (`biotite.structure.AtomArray`): The ``biotite``
555
+ object containing the structure data.
556
+
557
+ Returns:
558
+ `~pyjess.Molecule`: A molecule object suitable for using
559
+ in `Jess.query`.
560
+
561
+ Caution:
562
+ If loading data with the `biotite.structure.io.pdb.PDBFile` module,
563
+ ensure that you are requesting all atoms and all extra fields
564
+ in `~biotite.structure.io.pdb.PDBFile.get_structure`::
565
+
566
+ db_file = PDBFile.read("data/1AMY.pdb")
567
+ structure = pdb_file.get_structure(
568
+ altloc="all",
569
+ extra_fields=["atom_id", "b_factor", "occupancy", "charge"],
570
+ )
571
+ molecule = Molecule.from_biotite(structure[0])
572
+
573
+ .. versionadded:: 0.7.0
574
+
575
+ """
576
+ cdef list atoms = []
577
+ for a in atom_array:
578
+ atom = Atom(
579
+ name=str(a.atom_name),
580
+ x=a.coord[0],
581
+ y=a.coord[1],
582
+ z=a.coord[2],
583
+ altloc=str(getattr(a, 'altloc', ' ')),
584
+ charge=getattr(a, 'charge', 0),
585
+ element=str(a.element),
586
+ occupancy=getattr(a, 'occupancy', 1.0),
587
+ temperature_factor=a.b_factor,
588
+ serial=a.atom_id,
589
+ segment=str(getattr(a, 'segment', '')),
590
+ residue_name=str(a.res_name),
591
+ residue_number=a.res_id,
592
+ chain_id=str(a.chain_id),
593
+ insertion_code=str(a.ins_code).ljust(1),
594
+ )
595
+ atoms.append(atom)
596
+ return cls(atoms)
597
+
148
598
  def __cinit__(self):
149
599
  self._mol = NULL
150
600
 
@@ -249,17 +699,32 @@ cdef class Molecule:
249
699
  return self._id
250
700
 
251
701
  cpdef Molecule conserved(self, double cutoff = 0.0):
702
+ """Get a molecule containing only a subset of conserved atoms.
703
+
704
+ Arguments:
705
+ cutoff (`float`): The conservation cutoff for atoms. Atoms
706
+ with a `~Atom.temperature_factor` lower than this value
707
+ will be removed from the result.
708
+
709
+ Returns:
710
+ `~pyjess.Molecule`: A new molecule with atoms below the
711
+ conservation cutoff removed.
712
+
713
+ """
252
714
  assert self._mol is not NULL
253
- cdef Atom atom
254
- return type(self)(
255
- id=self.id,
256
- atoms=[
257
- atom
258
- for atom in self
259
- if cutoff <= 0.0
260
- or atom._atom.tempFactor >= cutoff
261
- ]
262
- )
715
+
716
+ cdef size_t i
717
+ cdef list atoms
718
+
719
+ if cutoff <= 0.0:
720
+ return self.copy()
721
+
722
+ atoms = []
723
+ for i in range(self._mol.count):
724
+ if self._mol.atom[i].tempFactor >= cutoff:
725
+ atoms.append(self[i])
726
+
727
+ return type(self)(id=self.id, atoms=atoms)
263
728
 
264
729
  cpdef Molecule copy(self):
265
730
  """Create a copy of this molecule and its atoms.
@@ -329,8 +794,9 @@ cdef class Atom:
329
794
  atom metadata from.
330
795
 
331
796
  """
332
- cdef bytearray b
333
- cdef Atom atom
797
+ cdef const unsigned char* s
798
+ cdef bytearray b
799
+ cdef Atom atom
334
800
 
335
801
  if isinstance(text, str):
336
802
  b = bytearray(text, 'utf-8')
@@ -339,14 +805,15 @@ cdef class Atom:
339
805
  if not b.endswith(b'\n'):
340
806
  b.append(b'\n')
341
807
  b.append(b'\0')
808
+ s = b
342
809
 
343
810
  atom = cls.__new__(cls)
344
- atom._atom = <_Atom*> malloc(sizeof(_Atom))
345
- if atom._atom == NULL:
346
- raise MemoryError("Failed to allocate atom")
347
-
348
- if not jess.atom.Atom_parse(atom._atom, b):
349
- raise ValueError(f"Failed to parse atom: {text!r}")
811
+ with nogil:
812
+ atom._atom = <_Atom*> malloc(sizeof(_Atom))
813
+ if atom._atom == NULL:
814
+ raise MemoryError("Failed to allocate atom")
815
+ if not jess.atom.Atom_parse(atom._atom, <const char*> s):
816
+ raise ValueError(f"Failed to parse atom: {text!r}")
350
817
 
351
818
  return atom
352
819
 
@@ -364,21 +831,21 @@ cdef class Atom:
364
831
  *,
365
832
  int serial,
366
833
  str name,
367
- str altloc,
368
834
  str residue_name,
369
835
  str chain_id,
370
836
  int residue_number,
371
- str insertion_code,
372
837
  double x,
373
838
  double y,
374
839
  double z,
375
840
  double occupancy = 0.0,
376
841
  double temperature_factor = 0.0,
842
+ str altloc = ' ',
843
+ str insertion_code = ' ',
377
844
  str segment = '',
378
845
  str element = '',
379
846
  int charge = 0,
380
847
  ):
381
- """__init__(self, *, serial, name, altloc, residue_name, chain_id, residue_number, insertion_code, x, y, z, occupancy=0.0, temperature_factor=0.0, segment='', element='', charge=0)\n--\n
848
+ """__init__(self, *, serial, name, residue_name, chain_id, residue_number, x, y, z, occupancy=0.0, temperature_factor=0.0, altloc=' ', insertion_code=' ', segment='', element='', charge=0)\n--\n
382
849
 
383
850
  Create a new atom.
384
851
 
@@ -390,11 +857,16 @@ cdef class Atom:
390
857
  long.
391
858
 
392
859
  """
860
+ cdef bytearray _name
861
+ cdef bytes _residue_name
862
+ cdef bytes _segment
863
+ cdef bytes _element
864
+
393
865
  if len(name) > 4:
394
866
  raise ValueError(f"Invalid atom name: {name!r}")
395
867
  if len(residue_name) > 3:
396
868
  raise ValueError(f"Invalid residue name: {residue_name!r}")
397
- if len(segment) > 3:
869
+ if len(segment) > 4:
398
870
  raise ValueError(f"Invalid segment: {segment!r}")
399
871
  if len(element) > 2:
400
872
  raise ValueError(f"Invalid element: {element!r}")
@@ -405,6 +877,10 @@ cdef class Atom:
405
877
  if self._atom is NULL:
406
878
  raise MemoryError("Failed to allocate atom")
407
879
 
880
+ _residue_name = PyUnicode_AsASCIIString(residue_name)
881
+ _segment = PyUnicode_AsASCIIString(segment)
882
+ _element = PyUnicode_AsASCIIString(element)
883
+
408
884
  self._atom.serial = serial
409
885
  self._atom.altLoc = ord(altloc)
410
886
  self._atom.chainID1 = ord(chain_id[0]) if len(chain_id) > 0 else 0
@@ -417,14 +893,15 @@ cdef class Atom:
417
893
  self._atom.occupancy = occupancy
418
894
  self._atom.tempFactor = temperature_factor
419
895
  self._atom.charge = charge
420
- copy_token(self._atom.resName, residue_name.encode('ascii').ljust(3, b'\0'), 3)
421
- copy_token(self._atom.segID, segment.encode('ascii').ljust(3, b'\0'), 3)
422
- copy_token(self._atom.element, element.encode('ascii').ljust(2, b'\0'), 2)
896
+ encode_token(self._atom.resName, _residue_name.ljust(3, b'\0'), 3)
897
+ encode_token(self._atom.segID, _segment.ljust(4, b'\0'), 4)
898
+ encode_token(self._atom.element, _element.ljust(2, b'\0'), 2)
423
899
 
900
+ # FIXME: is alignment proper?
424
901
  _name = bytearray(name, 'ascii')
425
902
  if len(_name) < 4:
426
903
  _name.insert(0, ord('_'))
427
- copy_token(self._atom.name, _name.ljust(4, b'\0'), 4)
904
+ encode_token(self._atom.name, _name.ljust(4, b'\0'), 4)
428
905
 
429
906
  def __copy__(self):
430
907
  return self.copy()
@@ -517,7 +994,7 @@ cdef class Atom:
517
994
  """`str`: The segment identifier.
518
995
  """
519
996
  assert self._atom is not NULL
520
- return self._atom.segID[:3].decode('ascii').strip('_')
997
+ return self._atom.segID[:4].decode('ascii').strip('_')
521
998
 
522
999
  @property
523
1000
  def element(self):
@@ -538,7 +1015,7 @@ cdef class Atom:
538
1015
  """`str`: The identifier of the chain the atom belongs to.
539
1016
  """
540
1017
  assert self._atom is not NULL
541
- return "{}{}".format(chr(self._atom.chainID1), chr(self._atom.chainID2)).strip()
1018
+ return PyUnicode_FromFormat("%c%c", self._atom.chainID1, self._atom.chainID2).strip()
542
1019
 
543
1020
  @property
544
1021
  def occupancy(self):
@@ -563,16 +1040,22 @@ cdef class Atom:
563
1040
 
564
1041
  @property
565
1042
  def x(self):
1043
+ """`float`: The atom coordinate in the 1st dimension.
1044
+ """
566
1045
  assert self._atom is not NULL
567
1046
  return self._atom.x[0]
568
1047
 
569
1048
  @property
570
1049
  def y(self):
1050
+ """`float`: The atom coordinate in the 2nd dimension.
1051
+ """
571
1052
  assert self._atom is not NULL
572
1053
  return self._atom.x[1]
573
1054
 
574
1055
  @property
575
1056
  def z(self):
1057
+ """`float`: The atom coordinate in the 3rd dimension.
1058
+ """
576
1059
  assert self._atom is not NULL
577
1060
  return self._atom.x[2]
578
1061
 
@@ -734,18 +1217,19 @@ cdef class TemplateAtom:
734
1217
  _name = bytearray(name, 'ascii')
735
1218
  else:
736
1219
  _name = bytearray(name)
1220
+ # FIXME: is alignment proper?
737
1221
  if len(_name) > 4:
738
1222
  raise ValueError(f"Invalid atom name: {name!r}")
739
- elif len(_name) < 3:
1223
+ elif len(_name) <= 3:
740
1224
  _name.insert(0, ord('_'))
741
- copy_token(self._atom.name[m], _name.ljust(4, b'\0'), 4)
1225
+ encode_token(self._atom.name[m], _name.ljust(4, b'\0'), 4)
742
1226
 
743
1227
  # copy residue names
744
1228
  for m, name in enumerate(residue_names):
745
1229
  _name = name.encode('ascii') if isinstance(name, str) else name
746
1230
  if len(_name) > 3:
747
1231
  raise ValueError(f"Invalid residue name: {name!r}")
748
- copy_token(self._atom.resName[m], _name.ljust(3, b'\0'), 3)
1232
+ encode_token(self._atom.resName[m], _name.ljust(3, b'\0'), 3)
749
1233
 
750
1234
  cdef dict _state(self):
751
1235
  return {
@@ -819,7 +1303,7 @@ cdef class TemplateAtom:
819
1303
  assert self._atom is not NULL
820
1304
  cdef char c1 = jess.tess_atom.TessAtom_chainID1(self._atom)
821
1305
  cdef char c2 = jess.tess_atom.TessAtom_chainID2(self._atom)
822
- return "{}{}".format(chr(c1), chr(c2)).strip()
1306
+ return PyUnicode_FromFormat("%c%c", c1, c2).strip()
823
1307
 
824
1308
  @property
825
1309
  def x(self):
@@ -893,7 +1377,10 @@ cdef class TemplateAtom:
893
1377
  .. versionadded:: 0.4.0
894
1378
 
895
1379
  """
896
- return type(self)(**self._state())
1380
+ cdef TemplateAtom atom = TemplateAtom.__new__(TemplateAtom)
1381
+ with nogil:
1382
+ atom._atom = jess.tess_atom.TessAtom_copy(self._atom)
1383
+ return atom
897
1384
 
898
1385
 
899
1386
  cdef class Template:
@@ -946,12 +1433,13 @@ cdef class Template:
946
1433
  `~pyjess.Template`: The template parsed from the given file.
947
1434
 
948
1435
  """
1436
+ cdef str line
1437
+ cdef list atoms = []
949
1438
  try:
950
1439
  handle = open(file)
951
1440
  except TypeError:
952
1441
  handle = nullcontext(file)
953
1442
  with handle as f:
954
- atoms = []
955
1443
  for line in f:
956
1444
  if line.startswith("ATOM"):
957
1445
  atoms.append(TemplateAtom.loads(line))
@@ -1048,7 +1536,14 @@ cdef class Template:
1048
1536
  self._tess.distance[j][i] = dist
1049
1537
 
1050
1538
  # compute dimension
1051
- residues = { self._tess.atom[i].resSeq for i in range(count) }
1539
+ residues = {
1540
+ (
1541
+ self._tess.atom[i].resSeq ,
1542
+ self._tess.atom[i].chainID1,
1543
+ self._tess.atom[i].chainID2,
1544
+ )
1545
+ for i in range(count)
1546
+ }
1052
1547
  self._tess.dim = len(residues)
1053
1548
 
1054
1549
  def __copy__(self):
@@ -1131,6 +1626,8 @@ cdef class Template:
1131
1626
 
1132
1627
  @property
1133
1628
  def id(self):
1629
+ """`str` or `None`: An identifier for the template, if any.
1630
+ """
1134
1631
  assert self._tpl is not NULL
1135
1632
 
1136
1633
  cdef const char* name = self._tpl.name(self._tpl)
@@ -1146,11 +1643,18 @@ cdef class Template:
1146
1643
  return self._tess.dim
1147
1644
 
1148
1645
  cpdef Template copy(self):
1149
- return Template(
1150
- self,
1151
- self.id
1152
- )
1646
+ """Create a copy of the template.
1153
1647
 
1648
+ Returns:
1649
+ `~pyjess.Template`: A new template object with identical
1650
+ attributes and a copy of the `TemplateAtom` it contains.
1651
+
1652
+ """
1653
+ cdef Template tpl = Template.__new__(Template)
1654
+ with nogil:
1655
+ tpl._tpl = self._tpl.copy(self._tpl)
1656
+ tpl._tess = <_TessTemplate*> &tpl._tpl[1]
1657
+ return tpl
1154
1658
 
1155
1659
  cdef class Query:
1156
1660
  """A query over templates with a given molecule.
@@ -1166,10 +1670,6 @@ cdef class Query:
1166
1670
  the templates.
1167
1671
  rmsd_threshold (`float`): The RMSD threshold for reporting
1168
1672
  results.
1169
- max_candidates (`int`): The maximum number of candidate hits
1170
- to report.
1171
- ignore_chain (`bool`): Whether to check or ignore the chain of
1172
- the atoms to match.
1173
1673
  best_match (`bool`): Whether the query will return only the
1174
1674
  best match to each template.
1175
1675
 
@@ -1177,18 +1677,20 @@ cdef class Query:
1177
1677
  cdef _JessQuery* _jq
1178
1678
  cdef bint _partial
1179
1679
  cdef int _candidates
1680
+ cdef uintptr_t _prev_tpl
1681
+ cdef int _max_candidates
1682
+ cdef _IgnoreType _ignore_chain
1180
1683
 
1181
1684
  cdef readonly Jess jess
1182
1685
  cdef readonly Molecule molecule
1183
- cdef readonly bint ignore_chain
1184
1686
  cdef readonly bint best_match
1185
1687
  cdef readonly double rmsd_threshold
1186
- cdef readonly int max_candidates
1187
1688
 
1188
1689
  def __cinit__(self):
1189
1690
  self._jq = NULL
1190
1691
  self._candidates = 0
1191
1692
  self._partial = False
1693
+ self._prev_tpl = 0
1192
1694
 
1193
1695
  def __dealloc__(self):
1194
1696
  jess.jess.JessQuery_free(self._jq)
@@ -1196,11 +1698,48 @@ cdef class Query:
1196
1698
  def __iter__(self):
1197
1699
  return self
1198
1700
 
1701
+ @property
1702
+ def ignore_chain(self):
1703
+ """`str` or `None`: The way atom chains are considered or discarded.
1704
+ """
1705
+ if self._ignore_chain == _IgnoreType.ignoreNone:
1706
+ return None
1707
+ elif self._ignore_chain == _IgnoreType.ignoreResidues:
1708
+ return "residues"
1709
+ elif self._ignore_chain == _IgnoreType.ignoreAtoms:
1710
+ return "atoms"
1711
+
1712
+ @ignore_chain.setter
1713
+ def ignore_chain(self, ignore_chain):
1714
+ if ignore_chain is None:
1715
+ self._ignore_chain = _IgnoreType.ignoreNone
1716
+ elif ignore_chain == "residues":
1717
+ self._ignore_chain = _IgnoreType.ignoreResidues
1718
+ elif ignore_chain == "atoms":
1719
+ self._ignore_chain = _IgnoreType.ignoreAtoms
1720
+ else:
1721
+ raise ValueError(f"invalid value for `ignore_chain`: {ignore_chain!r}")
1722
+
1723
+ @property
1724
+ def max_candidates(self):
1725
+ """`int`: The maximum number of candidate hits to report *by template*.
1726
+ """
1727
+ return None if self._max_candidates == -1 else self._max_candidates
1728
+
1729
+ @max_candidates.setter
1730
+ def max_candidates(self, max_candidates):
1731
+ if max_candidates is None:
1732
+ self._max_candidates = -1
1733
+ elif max_candidates >= 0:
1734
+ self._max_candidates = max_candidates
1735
+ else:
1736
+ raise ValueError(f"invalid value for `max_candidates` argument: {max_candidates!r}")
1737
+
1199
1738
  cdef bint _advance(self) noexcept nogil:
1200
1739
  if self._partial:
1201
1740
  self._partial = False
1202
1741
  return True
1203
- return jess.jess.JessQuery_next(self._jq, self.ignore_chain)
1742
+ return jess.jess.JessQuery_next(self._jq, self._ignore_chain)
1204
1743
 
1205
1744
  cdef bint _rewind(self) noexcept nogil:
1206
1745
  self._partial = True
@@ -1244,10 +1783,11 @@ cdef class Query:
1244
1783
 
1245
1784
  # search the next hit without the GIL to allow parallel queries.
1246
1785
  with nogil:
1247
- while self._advance() and self._candidates < self.max_candidates:
1786
+ while self._advance():
1248
1787
  # load current iteration template, and check that the hit
1249
1788
  # was obtained with the current template and not with the
1250
1789
  # previous one
1790
+ self._prev_tpl = <uintptr_t> tpl
1251
1791
  tpl = jess.jess.JessQuery_template(self._jq)
1252
1792
  if hit_found and hit_tpl != tpl:
1253
1793
  self._rewind()
@@ -1274,10 +1814,10 @@ cdef class Query:
1274
1814
 
1275
1815
  if nan:
1276
1816
  with gil:
1277
- warnings.warn(
1278
- "Jess returned a superposition matrix with NaN values",
1817
+ PyErr_WarnEx(
1279
1818
  UserWarning,
1280
- stacklevel=2,
1819
+ "Jess returned a superposition matrix with NaN values",
1820
+ 2,
1281
1821
  )
1282
1822
  else:
1283
1823
  self._copy_atoms(tpl, hit)
@@ -1286,9 +1826,21 @@ cdef class Query:
1286
1826
  hit_tpl = tpl
1287
1827
  hit_found = True
1288
1828
 
1289
- # free superposition items that are not used in a hit, and
1290
- # return hits immediately if we are not in best match mode
1291
- self._candidates += 1
1829
+ # check if we already made it to the next template,
1830
+ # or if we need to short-circuit the iteration and
1831
+ # force the query to move to the next template as
1832
+ # we found too many candidates already.
1833
+ if <uintptr_t> tpl != self._prev_tpl:
1834
+ self._candidates = 0
1835
+ else:
1836
+ self._candidates += 1
1837
+ if self._max_candidates != -1 and self._candidates > self._max_candidates:
1838
+ self._candidates = 0
1839
+ jess.jess.JessQuery_nextTemplate(self._jq)
1840
+
1841
+ # free superposition items (as relevant data was copied in
1842
+ # the Hit if needed) and return hits immediately if we are
1843
+ # not in best match mode
1292
1844
  jess.super.Superposition_free(sup)
1293
1845
  if hit_found and not self.best_match:
1294
1846
  break
@@ -1355,6 +1907,18 @@ cdef class Hit:
1355
1907
  for i, atom in enumerate(state["atoms"]):
1356
1908
  memcpy(&self._atoms[i], atom._atom, sizeof(_Atom))
1357
1909
 
1910
+ cdef void _transform_atom(self, double* x, const double* src):
1911
+ cdef size_t i
1912
+ cdef size_t j
1913
+ cdef const double* M = self._rotation
1914
+ cdef const double* c = self._centre[0]
1915
+ cdef const double* v = self._centre[1]
1916
+
1917
+ for i in range(3):
1918
+ x[i] = v[i]
1919
+ for j in range(3):
1920
+ x[i] += M[3*i + j] * (src[j] - c[j])
1921
+
1358
1922
  @property
1359
1923
  def determinant(self):
1360
1924
  """`float`: The determinant of the rotation matrix.
@@ -1423,15 +1987,11 @@ cdef class Hit:
1423
1987
  if transform:
1424
1988
  atom._atom = <_Atom*> malloc(sizeof(_Atom))
1425
1989
  memcpy(atom._atom, &self._atoms[k], sizeof(_Atom))
1426
- for i in range(3):
1427
- atom._atom.x[i] = v[i]
1428
- for j in range(3):
1429
- atom._atom.x[i] += M[3*i + j] * (self._atoms[k].x[j] - c[j])
1990
+ self._transform_atom(atom._atom.x, self._atoms[k].x)
1430
1991
  else:
1431
1992
  atom.owned = True
1432
1993
  atom.owner = self
1433
1994
  atom._atom = &self._atoms[k]
1434
-
1435
1995
  atoms.append(atom)
1436
1996
 
1437
1997
  return atoms
@@ -1467,17 +2027,142 @@ cdef class Hit:
1467
2027
  mol = self._molecule.copy()
1468
2028
  for k in range(mol._mol.count):
1469
2029
  atom = mol._mol.atom[k]
1470
- for i in range(3):
1471
- atom.x[i] = v[i]
1472
- for j in range(3):
1473
- atom.x[i] += M[3*i + j] * (self._molecule._mol.atom[k].x[j] - c[j])
2030
+ self._transform_atom(atom.x, self._molecule._mol.atom[k].x)
1474
2031
 
1475
2032
  return mol
1476
2033
 
2034
+ cpdef str dumps(self, str format="pdb", bint transform=True):
2035
+ """Write the hit to a string.
2036
+
2037
+ Arguments:
2038
+ format (`str`): The format in which to write the hit.
2039
+ Currently only supports ``pdb``, which writes the hits
2040
+ in the same format as Jess.
2041
+ transform (`bool`): Whether or not to transform coordinates
2042
+ of the molecule atoms into template frame.
2043
+
2044
+ Raises:
2045
+ `RuntimeError`: When attempting to dump a `Hit` which was
2046
+ obtained from a `Template` which has no `~Template.id`.
2047
+
2048
+ .. versionadded:: 0.7.0
2049
+
2050
+ """
2051
+ file = io.StringIO()
2052
+ self.dump(file, format=format, transform=transform)
2053
+ return file.getvalue()
2054
+
2055
+ cpdef void dump(self, object file, str format="pdb", bint transform=True):
2056
+ """Write the hit to a file.
2057
+
2058
+ Arguments:
2059
+ file (file-like object): A file opened in *text* mode where the
2060
+ hit will be written.
2061
+ format (`str`): The format in which to write the hit.
2062
+ Currently only supports ``pdb``, which writes the hits
2063
+ in the same format as Jess.
2064
+ transform (`bool`): Whether or not to transform coordinates
2065
+ of the molecule atoms into template frame.
2066
+
2067
+ Raises:
2068
+ `RuntimeError`: When attempting to dump a `Hit` which was
2069
+ obtained from a `Template` which has no `~Template.id`.
2070
+
2071
+ .. versionadded:: 0.7.0
2072
+
2073
+ """
2074
+ assert self.template._tpl is not NULL
2075
+ assert self._molecule._mol is not NULL
2076
+
2077
+ cdef _Atom* atom
2078
+ cdef size_t k
2079
+ cdef char[80] buffer
2080
+ cdef char[5] name
2081
+ cdef char[5] resname
2082
+ cdef double[3] x
2083
+ cdef int count = self.template._tpl.count(self.template._tpl)
2084
+
2085
+ if self.template.id is None:
2086
+ raise RuntimeError("cannot dump `Hit` where `self.template.id` is `None`")
2087
+
2088
+ file.write("REMARK ")
2089
+ file.write(self._molecule.id)
2090
+ file.write(f" {self.rmsd:5.3f} ")
2091
+ file.write(self.template.id)
2092
+ file.write(f" Det={self.determinant:4,.1f} log(E)~ {self.log_evalue:4.2f}\n")
2093
+
2094
+ for k in range(count):
2095
+ atom = &self._atoms[k]
2096
+ decode_token(name, atom.name, 4)
2097
+ decode_token(resname, atom.resName, 3)
2098
+ if transform:
2099
+ self._transform_atom(x, atom.x)
2100
+ else:
2101
+ memcpy(x, atom.x, 3*sizeof(double))
2102
+ n = sprintf(
2103
+ buffer,
2104
+ "ATOM %5i%5s%c%-3s%c%c%4i%-4c%8.3f%8.3f%8.3f%6.2f%6.2f\n",
2105
+ atom.serial,
2106
+ name,
2107
+ atom.altLoc,
2108
+ resname,
2109
+ atom.chainID1,
2110
+ atom.chainID2,
2111
+ atom.resSeq,
2112
+ atom.iCode,
2113
+ x[0],
2114
+ x[1],
2115
+ x[2],
2116
+ atom.occupancy,
2117
+ atom.tempFactor,
2118
+ atom.segID,
2119
+ atom.element,
2120
+ atom.charge
2121
+ )
2122
+ file.write(PyUnicode_FromStringAndSize(buffer, n))
2123
+ file.write("ENDMDL\n")
1477
2124
 
1478
2125
  cdef class Jess:
1479
2126
  """A handle to run Jess over a list of templates.
1480
2127
 
2128
+ Example:
2129
+ Create a `Jess` object from a list of templates::
2130
+
2131
+ >>> t1 = Template.load("1.3.3.tpl")
2132
+ >>> t2 = Template.load("4.1.2.tpl")
2133
+ >>> jess = Jess([t1, t2])
2134
+
2135
+ Once initialized, the `Jess` object cannot be modified further.
2136
+ Use the `~Jess.query` method to query the templates with a
2137
+ molecule::
2138
+
2139
+ >>> molecule = Molecule.load("1AMY.pdb")
2140
+ >>> query = jess.query(molecule, 2, 2, 2)
2141
+
2142
+ The returned `Query` object is an iterator that can be
2143
+ advanced through a ``for`` loop, or with the `next` built-in
2144
+ function to get the first hit:
2145
+
2146
+ >>> hit = next(query)
2147
+ >>> hit.rmsd
2148
+ 1.4386...
2149
+
2150
+ The hit can also be formatted in PDB format like in the
2151
+ original JESS code::
2152
+
2153
+ >>> print(hit.dumps(format="pdb"), end="")
2154
+ REMARK 1AMY 1.439 2om2 Det= 1.0 log(E)~ 1.11
2155
+ ATOM 729 CA THR A 94 34.202 -24.426 8.851 1.00 2.00
2156
+ ATOM 732 CB THR A 94 35.157 -23.467 8.101 1.00 4.66
2157
+ ATOM 733 OG1 THR A 94 36.338 -23.247 8.871 1.00 9.85
2158
+ ATOM 746 CD GLU A 96 41.454 -29.509 8.013 1.00 24.05
2159
+ ATOM 748 OE2 GLU A 96 42.536 -29.680 7.441 1.00 34.44
2160
+ ATOM 747 OE1 GLU A 96 41.212 -28.521 8.708 1.00 18.56
2161
+ ATOM 437 CZ ARG A 55 44.471 -26.619 10.181 1.00 8.51
2162
+ ATOM 436 NE ARG A 55 44.334 -27.346 11.290 1.00 9.05
2163
+ ATOM 438 NH1 ARG A 55 43.590 -26.751 9.179 1.00 13.17
2164
+ ENDMDL
2165
+
1481
2166
  .. versionadded:: 0.4.0
1482
2167
  Equality, hashing and pickle protocol support.
1483
2168
 
@@ -1579,9 +2264,10 @@ cdef class Jess:
1579
2264
  double distance_cutoff,
1580
2265
  double max_dynamic_distance,
1581
2266
  *,
1582
- int max_candidates = 1000,
1583
- bint ignore_chain = False,
2267
+ object max_candidates = None,
2268
+ object ignore_chain = None,
1584
2269
  bint best_match = False,
2270
+ bint reorder = True,
1585
2271
  ):
1586
2272
  """Scan for templates matching the given molecule.
1587
2273
 
@@ -1596,18 +2282,81 @@ cdef class Jess:
1596
2282
  dynamic distance after adding the global distance cutoff
1597
2283
  and the individual atom distance cutoff defined for each
1598
2284
  atom of the template.
1599
- ignore_chain (`bool`): Whether to check or ignore the chain of
1600
- the atoms to match.
2285
+ max_candidates (`int` or `None`): The maximum number of candidate
2286
+ hits to report by template. If a non-`None` value is given,
2287
+ it may speed up querying for unspecific templates, but also
2288
+ produce results potentially inconsistent with Jess.
2289
+ ignore_chain (`str` or `None`): Whether to check or ignore the
2290
+ chain of the atoms to match. The different supported modes
2291
+ are:
2292
+
2293
+ - `None`: Force the atoms in the molecule to belong
2294
+ to different (resp. same) chains if so is the case
2295
+ in the template.
2296
+ - ``residues``: Allow atoms to belong to different
2297
+ (resp. same) chains even if it is not the case in
2298
+ the template, but force all atoms of a residue to
2299
+ belong to the same chain.
2300
+ - ``atoms``: Allow atoms to belong to any chain,
2301
+ independently to the template or the residue they
2302
+ belong to.
2303
+
1601
2304
  best_match (`bool`): Pass `True` to return only the best match
1602
- to each template.
2305
+ to each template, based on RMSD. In case of ties, the
2306
+ first match is returned. Note that a match must still
2307
+ be passing the RMSD threshold given in ``rmsd_threshold``
2308
+ to be returned.
2309
+ reorder (`bool`): Whether to enable template atom reordering
2310
+ to accelerate matching in the scanner algorithm. Pass
2311
+ `False` to reverse to the original, slower algorithm
2312
+ which matches atoms in the same order as they appear in
2313
+ the template, at the cost of longer run times.
1603
2314
 
1604
2315
  Returns:
1605
2316
  `~pyjess.Query`: An iterator over the query hits.
1606
2317
 
2318
+ Caution:
2319
+ Since ``v0.6.0``, this function uses an optimized variant of
2320
+ the Jess scanning algorithm which minimized the number of steps
2321
+ needed to generate matches, by re-ordering the order the
2322
+ template atoms are iterated upon. Because of this change,
2323
+ the query may return *exactly* the same matches but in an order
2324
+ that *differs* from the original Jess version. If you really
2325
+ need results in the original order, set ``reorder`` to `False`.
2326
+
2327
+ .. versionadded:: 0.6.0
2328
+ The ``reorder`` argument, defaulting to `True`.
2329
+
2330
+ .. versionchanged:: 0.7.0
2331
+ Default value of ``max_candidates`` argument to `None`.
2332
+
2333
+ .. versionchanged:: 0.7.0
2334
+ ``ignore_chain`` now expects string variants rather than `bool`.
2335
+
1607
2336
  """
2337
+
2338
+ if ignore_chain is True:
2339
+ PyErr_WarnEx(
2340
+ DeprecationWarning,
2341
+ "`ignore_chain` parameter expects string parameters "
2342
+ "to specificy the mode since PyJess v0.7.0. "
2343
+ "Use `ignore_chain='atoms'` instead of `ignore_chain=True`",
2344
+ 2,
2345
+ )
2346
+ ignore_chain="atoms"
2347
+ elif ignore_chain is False:
2348
+ PyErr_WarnEx(
2349
+ DeprecationWarning,
2350
+ "`ignore_chain` parameter expects string parameters "
2351
+ "to specificy the mode since PyJess v0.7.0. "
2352
+ "Use `ignore_chain=None` instead of `ignore_chain=False`",
2353
+ 2,
2354
+ )
2355
+ ignore_chain=None
2356
+
1608
2357
  cdef Query query = Query.__new__(Query)
1609
- query.ignore_chain = ignore_chain
1610
2358
  query.max_candidates = max_candidates
2359
+ query.ignore_chain = ignore_chain
1611
2360
  query.rmsd_threshold = rmsd_threshold
1612
2361
  query.best_match = best_match
1613
2362
  query.molecule = molecule
@@ -1617,5 +2366,6 @@ cdef class Jess:
1617
2366
  molecule._mol,
1618
2367
  distance_cutoff,
1619
2368
  max_dynamic_distance,
2369
+ reorder,
1620
2370
  )
1621
2371
  return query