pyjess 0.6.0__pp39-pypy39_pp73-win_amd64.whl → 0.7.0__pp39-pypy39_pp73-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyjess/_jess.pyx CHANGED
@@ -2,6 +2,73 @@
2
2
  # cython: language_level=3, linetrace=True, binding=True
3
3
  """Bindings to Jess, a 3D template matching software.
4
4
 
5
+ Jess is an algorithm for constraint-based structural template matching
6
+ proposed by Jonathan Barker *et al.*. It can be used to identify
7
+ catalytic residues from a known template inside a protein structure.
8
+ Jess is an evolution of TESS, a geometric hashing algorithm developed by
9
+ Andrew Wallace *et al.*, removing some pre-computation and
10
+ structural requirements from the original algorithm.
11
+
12
+ PyJess is a Python module that provides bindings to Jess using
13
+ `Cython <https://cython.org/>`_. It allows creating templates, querying
14
+ them with protein structures, and retrieving the hits using a Python API
15
+ without performing any external I/O. It's also more than 10x faster than
16
+ Jess thanks to algorithmic optimizations added to improve the original Jess
17
+ code while producing consistent results.
18
+
19
+ Example:
20
+ Load templates from a file, either as a file-like object or
21
+ given a filename::
22
+
23
+ >>> t1 = pyjess.Template.load("1.3.3.tpl") # load from filename
24
+ >>> with open("4.1.2.tpl") as f: # load from a file object
25
+ ... t2 = pyjess.Template.load(f)
26
+
27
+ Load molecules from a file, either as a file-like object or given
28
+ a filename::
29
+
30
+ >>> mol = pyjess.Molecule.load("1AMY.pdb")
31
+ >>> mol[0]
32
+ Atom(serial=1, name='N', altloc=' ', residue_name='GLN', ...)
33
+
34
+ Create a `Jess` object storing the templates to support running
35
+ queries on them. The individual templates can still be accessed by
36
+ index::
37
+
38
+ >>> jess = pyjess.Jess([t1, t2])
39
+ >>> jess[0].id
40
+ '3r6v'
41
+
42
+ Run a query on the Jess object to retrieve all templates matching
43
+ a `Molecule`, *in no particular order*::
44
+
45
+ >>> hits = jess.query(mol, 2, 2, 2)
46
+ >>> for hit in hits:
47
+ ... print(hit.template.id, hit.rmsd)
48
+ 2om2 1.4386...
49
+ 2om2 1.4877...
50
+ 2om2 1.4376...
51
+ 2om2 1.5284...
52
+ 2om2 1.4863...
53
+ 2om2 1.4369...
54
+ 2om2 1.4790...
55
+ 2om2 1.1414...
56
+ 2om2 1.0755...
57
+ 2om2 1.1973...
58
+ 2om2 1.1353...
59
+ 2om2 1.0711...
60
+ 2om2 1.1494...
61
+
62
+ By default, a template can match a molecule in more than one way,
63
+ if several sets of atoms match the geometric constraints. Use the
64
+ ``best_match`` argument of `~Jess.query` to only retrieve the
65
+ best match per template::
66
+
67
+ >>> hits = jess.query(mol, 2, 2, 2, best_match=True)
68
+ >>> for hit in hits:
69
+ ... print(hit.template.id, hit.rmsd)
70
+ 2om2 1.071...
71
+
5
72
  References:
6
73
  - Barker, J. A., & Thornton, J. M. (2003). *An algorithm for
7
74
  constraint-based structural template matching: application to
@@ -18,10 +85,16 @@ References:
18
85
  # --- C imports --------------------------------------------------------------
19
86
 
20
87
  cimport cython
21
- from cpython.unicode cimport PyUnicode_FromStringAndSize
88
+ from cpython.exc cimport PyErr_WarnEx
89
+ from cpython.unicode cimport (
90
+ PyUnicode_FromStringAndSize,
91
+ PyUnicode_FromFormat,
92
+ PyUnicode_AsASCIIString,
93
+ )
22
94
 
23
95
  from libc.math cimport isnan, exp, INFINITY, NAN
24
- from libc.stdio cimport FILE, fclose, fdopen, printf
96
+ from libc.stdio cimport FILE, fclose, fdopen, printf, sprintf
97
+ from libc.stdint cimport uintptr_t
25
98
  from libc.stdlib cimport calloc, realloc, free, malloc
26
99
  from libc.string cimport memcpy, memset, strncpy, strdup
27
100
 
@@ -37,24 +110,20 @@ from jess.jess cimport Jess as _Jess
37
110
  from jess.jess cimport JessQuery as _JessQuery
38
111
  from jess.molecule cimport Molecule as _Molecule
39
112
  from jess.super cimport Superposition as _Superposition
40
- from jess.template cimport Template as _Template
113
+ from jess.template cimport Template as _Template, IgnoreType as _IgnoreType
41
114
  from jess.tess_template cimport TessTemplate as _TessTemplate
42
115
  from jess.tess_atom cimport TessAtom as _TessAtom
43
116
 
44
117
  # --- Python imports ---------------------------------------------------------
45
118
 
46
- import contextlib
47
119
  import functools
48
120
  import io
49
- import itertools
50
- import os
51
- import warnings
52
121
 
53
122
  __version__ = PROJECT_VERSION
54
123
 
55
124
  # --- Utils ------------------------------------------------------------------
56
125
 
57
- cdef inline void copy_token(char* dst, const char* src, size_t n) noexcept nogil:
126
+ cdef inline void encode_token(char* dst, const char* src, size_t n) noexcept nogil:
58
127
  cdef size_t i
59
128
  for i in range(n):
60
129
  if src[i] == ord(' ') or src[i] == 0:
@@ -63,12 +132,155 @@ cdef inline void copy_token(char* dst, const char* src, size_t n) noexcept nogil
63
132
  dst[i] = src[i]
64
133
  dst[n] = 0
65
134
 
66
- @contextlib.contextmanager
67
- def nullcontext(return_value=None):
68
- yield return_value
135
+ cdef inline void decode_token(char* dst, const char* src, size_t n) noexcept nogil:
136
+ cdef size_t i
137
+ for i in range(n):
138
+ if src[i] == ord('_') or src[i] == 0:
139
+ dst[i] = ord(' ')
140
+ else:
141
+ dst[i] = src[i]
142
+ dst[n] = 0
143
+
144
+ class nullcontext:
145
+ def __init__(self, return_value=None):
146
+ self.retval = return_value
147
+ def __enter__(self):
148
+ return self.retval
149
+ def __exit__(self, exc_type, exc_value, traceback):
150
+ return False
69
151
 
70
152
  # --- Classes ----------------------------------------------------------------
71
153
 
154
+ cdef class _MoleculeParser:
155
+ cdef str id
156
+
157
+ def __init__(self, str id = None):
158
+ self.id = id
159
+
160
+ cdef class _PDBMoleculeParser(_MoleculeParser):
161
+ cdef bint ignore_endmdl
162
+ cdef bint skip_hetatm
163
+
164
+ def __init__(self, str id = None, bint ignore_endmdl = False, bint skip_hetatm = False):
165
+ super().__init__(id=id)
166
+ self.ignore_endmdl = ignore_endmdl
167
+ self.skip_hetatm = skip_hetatm
168
+
169
+ def loads(self, text, molecule_type):
170
+ return self.load(io.StringIO(text), molecule_type)
171
+
172
+ def load(self, file, molecule_type):
173
+ cdef str line
174
+ cdef str id = self.id
175
+ cdef list atoms = []
176
+ try:
177
+ handle = open(file)
178
+ except TypeError:
179
+ handle = nullcontext(file)
180
+ with handle as f:
181
+ for line in f:
182
+ if line.startswith("HEADER"):
183
+ if id is None:
184
+ id = line[62:66].strip() or None
185
+ elif line.startswith("ATOM"):
186
+ atoms.append(Atom.loads(line))
187
+ elif line.startswith("HETATM") and not self.skip_hetatm:
188
+ atoms.append(Atom.loads(line))
189
+ elif line.startswith("ENDMDL"):
190
+ if not self.ignore_endmdl:
191
+ break
192
+ elif line.lower().startswith(("data_", "loop_")):
193
+ raise ValueError("mmCIF data tags found, file is not in PDB format")
194
+ return molecule_type(atoms, id=id)
195
+
196
+
197
+ cdef class _CIFMoleculeParser(_MoleculeParser):
198
+ cdef object gemmi
199
+ cdef bint use_author
200
+ cdef bint skip_hetatm
201
+
202
+ _PRIMARY_COLUMNS = [
203
+ 'id', 'type_symbol', 'label_atom_id', 'label_alt_id', 'label_comp_id',
204
+ 'label_asym_id', 'label_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
205
+ 'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
206
+ '?pdbx_formal_charge', '?group_PDB',
207
+ ]
208
+
209
+ _AUTH_COLUMNS = [
210
+ 'id', 'type_symbol', 'auth_atom_id', 'label_alt_id', 'auth_comp_id',
211
+ 'auth_asym_id', 'auth_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
212
+ 'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
213
+ '?pdbx_formal_charge', '?group_PDB',
214
+ ]
215
+
216
+ def __init__(self, str id = None, bint use_author = False, bint skip_hetatm = False):
217
+ super().__init__(id=id)
218
+ self.gemmi = __import__('gemmi')
219
+ self.use_author = use_author
220
+ self.skip_hetatm = skip_hetatm
221
+
222
+ def _load_block(self, document, molecule_type):
223
+ block = document.sole_block()
224
+ cols = self._AUTH_COLUMNS if self.use_author else self._PRIMARY_COLUMNS
225
+ table = block.find('_atom_site.', cols)
226
+ max_residue_number = 0
227
+
228
+ if not table:
229
+ raise ValueError("missing columns in CIF files")
230
+
231
+ atoms = []
232
+ for row in table:
233
+ if row[14] != "ATOM" and (row[14] != "HETATM" or self.skip_hetatm):
234
+ continue
235
+
236
+ if row[6] == "." and row[14] == "HETATM":
237
+ PyErr_WarnEx(
238
+ UserWarning,
239
+ "HETATM line found without residue number. Consider "
240
+ "parsing with use_author=True to use author-defined "
241
+ "residue numbers, or skip_hetatm=True to disable "
242
+ "parsing of HETATM altogether.",
243
+ 3,
244
+ )
245
+ residue_number = max_residue_number
246
+ max_residue_number += 1
247
+ else:
248
+ residue_number = int(row[6])
249
+ max_residue_number = max(residue_number, max_residue_number)
250
+
251
+ atom = Atom(
252
+ serial=int(row[0]),
253
+ element=row[1],
254
+ name=row[2],
255
+ altloc=' ' if row[3] == "." else row[3], # FIXME: replace with None?
256
+ residue_name=row[4],
257
+ chain_id=row[5],
258
+ residue_number=residue_number,
259
+ insertion_code=' ' if not row.has(7) or row[7] == "?" else row[7],
260
+ x=float(row[8]),
261
+ y=float(row[9]),
262
+ z=float(row[10]),
263
+ occupancy=0.0 if row[11] == '.' else float(row[11]),
264
+ temperature_factor=float(row[12]),
265
+ charge=0 if not row.has(13) or row[13] == "?" else int(row[13]),
266
+ )
267
+ atoms.append(atom)
268
+
269
+ id = block.name if self.id is None else self.id
270
+ return molecule_type(atoms, id=id)
271
+
272
+ def loads(self, text, molecule_type):
273
+ document = self.gemmi.cif.read_string(text)
274
+ return self._load_block(document, molecule_type)
275
+
276
+ def load(self, file, molecule_type):
277
+ if hasattr(file, "read"):
278
+ document = self.gemmi.cif.read_string(file.read())
279
+ else:
280
+ document = self.gemmi.cif.read_file(file)
281
+ return self._load_block(document, molecule_type)
282
+
283
+
72
284
  cdef class Molecule:
73
285
  """A molecule structure, as a sequence of `Atom` objects.
74
286
 
@@ -83,20 +295,40 @@ cdef class Molecule:
83
295
  cdef str _id
84
296
 
85
297
  @classmethod
86
- def loads(cls, text, str id = None, bint ignore_endmdl = False):
298
+ def loads(
299
+ cls,
300
+ text,
301
+ str format = "pdb",
302
+ *,
303
+ str id = None,
304
+ bint ignore_endmdl = False,
305
+ bint use_author = False,
306
+ bint skip_hetatm = False,
307
+ ):
87
308
  """Load a molecule from a PDB string.
88
309
 
89
310
  Arguments:
90
- file (`str`, `os.PathLike`, or file-like object): Either the path
91
- to a file, or a file-like object opened in **text mode**
92
- containing a PDB molecule.
311
+ text (`str`): The serialized molecule to parse into a new
312
+ object.
313
+ format (`str`): The format to parse the file. Supported formats
314
+ are: ``pdb`` for the Protein Data Bank format, ``cif``
315
+ for Crystallographic Information File format (additionally
316
+ requires the `gemmi` module), or ``detect`` to attempt
317
+ auto-detection (the default).
318
+
319
+ Keyword Arguments:
93
320
  id (`str`, optional): The identifier of the molecule. If `None`
94
321
  given, the parser will attempt to extract it from the
95
- ``HEADER`` line.
322
+ ``HEADER`` line (for PDB files) or the block name (for CIF
323
+ files).
96
324
  ignore_endmdl (`bool`): Pass `True` to make the parser read all
97
325
  the atoms from the PDB file. By default, the parser only
98
326
  reads the atoms of the first model, and stops at the first
99
- ``ENDMDL`` line.
327
+ ``ENDMDL`` line. *Ignored for CIF files*.
328
+ use_author (`bool`): Pass `True` to use the author-defined
329
+ labels while parsing CIF files, e.g. read the chain name
330
+ from ``_atom_site.auth_asym_id`` rather than
331
+ ``_atom_site.label_asym_id``. *Ignored for PDB files*.
100
332
 
101
333
  Returns:
102
334
  `~pyjess.Molecule`: The molecule parsed from the PDB file.
@@ -105,46 +337,264 @@ cdef class Molecule:
105
337
  `Molecule.load` to load a PDB molecule from a file-like
106
338
  object or from a path.
107
339
 
340
+ Caution:
341
+ Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
342
+ while parsing from mmCIF usually discard them. This is because
343
+ mmCIF files store heteroatoms but do not require them to
344
+ have an associated residue number, which can throw off the way
345
+ atoms are modeled in Jess.
346
+
347
+ .. versionadded:: 0.7.0
348
+ The ``format`` argument, and support for CIF parsing.
349
+
108
350
  """
109
- return cls.load(io.StringIO(text), id=id, ignore_endmdl=ignore_endmdl)
351
+ if format == "detect":
352
+ format = "cif" if text.lstrip().startswith(("data_", "loop_")) else "pdb"
353
+ return cls.load(
354
+ io.StringIO(text),
355
+ format=format,
356
+ id=id,
357
+ ignore_endmdl=ignore_endmdl,
358
+ skip_hetatm=skip_hetatm,
359
+ )
110
360
 
111
361
  @classmethod
112
- def load(cls, file, str id = None, bint ignore_endmdl = False):
362
+ def load(
363
+ cls,
364
+ file,
365
+ str format = "detect",
366
+ *,
367
+ str id = None,
368
+ bint ignore_endmdl = False,
369
+ bint use_author = False,
370
+ bint skip_hetatm = False,
371
+ ):
113
372
  """Load a molecule from a PDB file.
114
373
 
115
374
  Arguments:
116
375
  file (`str`, `os.PathLike`, or file-like object): Either the path
117
376
  to a file, or a file-like object opened in **text mode**
118
- containing a PDB molecule.
377
+ containing a molecule.
378
+ format (`str`): The format to parse the file. Supported formats
379
+ are: ``pdb`` for the Protein Data Bank format, ``cif``
380
+ for Crystallographic Information File format (additionally
381
+ requires the `gemmi` module), or ``detect`` to attempt
382
+ auto-detection (the default).
383
+
384
+ Keyword Arguments:
119
385
  id (`str`, optional): The identifier of the molecule. If `None`
120
386
  given, the parser will attempt to extract it from the
121
- ``HEADER`` line.
387
+ ``HEADER`` line (for PDB files) or the block name (for CIF
388
+ files).
122
389
  ignore_endmdl (`bool`): Pass `True` to make the parser read all
123
390
  the atoms from the PDB file. By default, the parser only
124
391
  reads the atoms of the first model, and stops at the first
125
- ``ENDMDL`` line.
392
+ ``ENDMDL`` line. *Ignored for CIF files*.
393
+ use_author (`bool`): Pass `True` to use the author-defined
394
+ labels while parsing CIF files, e.g. read the chain name
395
+ from ``_atom_site.auth_asym_id`` rather than
396
+ ``_atom_site.label_asym_id``. *Ignored for PDB files*.
397
+ skip_hetatm (`bool`): Pass `True` to skip parsing of heteroatoms
398
+ (``HETATM``) in the input file.
126
399
 
127
400
  Returns:
128
401
  `~pyjess.Molecule`: The molecule parsed from the PDB file.
129
402
 
403
+ See Also:
404
+ `Molecule.loads` to load a PDB molecule from a string.
405
+
406
+ Caution:
407
+ Parsing from PDB file retains the heteroatoms (``HETATM`` lines)
408
+ while parsing from mmCIF usually discard them. This is because
409
+ mmCIF files store heteroatoms but do not require them to
410
+ have an associated residue number, which can throw off the way
411
+ atoms are modeled in Jess.
412
+
413
+ .. versionadded:: 0.7.0
414
+ The ``format`` and ``skip_hetatm`` arguments, and mmCIF support.
415
+
130
416
  """
131
- try:
132
- handle = open(file)
133
- except TypeError:
134
- handle = nullcontext(file)
135
- with handle as f:
136
- atoms = []
137
- for line in f:
138
- if line.startswith("HEADER"):
139
- if id is None:
140
- id = line[62:66].strip() or None
141
- elif line.startswith(("ATOM", "HETATM")):
142
- atoms.append(Atom.loads(line))
143
- elif line.startswith("ENDMDL"):
144
- if not ignore_endmdl:
145
- break
417
+ cdef _MoleculeParser parser
418
+ cdef str peek
419
+
420
+ if format == "detect":
421
+ try:
422
+ handle = open(file)
423
+ except TypeError:
424
+ handle = nullcontext(file)
425
+ with handle as f:
426
+ if f.seekable():
427
+ peek = f.read(5)
428
+ f.seek(0)
429
+ else:
430
+ f = f.read()
431
+ peek = f[5:]
432
+ if peek.startswith(("data_", "loop_")):
433
+ parser = _CIFMoleculeParser(
434
+ id=id,
435
+ use_author=use_author,
436
+ skip_hetatm=skip_hetatm,
437
+ )
438
+ else:
439
+ parser = _PDBMoleculeParser(
440
+ id=id,
441
+ ignore_endmdl=ignore_endmdl,
442
+ skip_hetatm=skip_hetatm,
443
+ )
444
+ if isinstance(f, str):
445
+ return parser.loads(f, molecule_type=cls)
446
+ return parser.load(f, molecule_type=cls)
447
+ if format == "pdb":
448
+ parser = _PDBMoleculeParser(
449
+ id=id,
450
+ ignore_endmdl=ignore_endmdl,
451
+ skip_hetatm=skip_hetatm
452
+ )
453
+ elif format == "cif":
454
+ parser = _CIFMoleculeParser(
455
+ id=id,
456
+ use_author=use_author,
457
+ skip_hetatm=skip_hetatm,
458
+ )
459
+ else:
460
+ raise ValueError(f"invalid value for `format` argument: {format!r}")
461
+ return parser.load(file, molecule_type=cls)
462
+
463
+ @classmethod
464
+ def from_biopython(cls, object structure, str id = None):
465
+ """Create a new `~pyjess.Molecule` from a `Bio.PDB.Structure`.
466
+
467
+ Arguments:
468
+ structure (`Bio.PDB.Structure` or `Bio.PDB.Model`): The
469
+ Biopython object containing the structure data.
470
+ id (`str` or `None`): The identifier to give to the newly
471
+ created molecule. If `None` given, will use the value of
472
+ ``structure.id``.
473
+
474
+ Returns:
475
+ `~pyjess.Molecule`: A molecule object suitable for using
476
+ in `Jess.query`.
477
+
478
+ .. versionadded:: 0.7.0
479
+
480
+ """
481
+ cdef list atoms = []
482
+ for c in structure.get_chains():
483
+ for r in c.get_residues():
484
+ _, residue_number, insertion_code = r.id
485
+ for a in r.get_atoms():
486
+ coord = a.get_coord()
487
+ atom = Atom(
488
+ name=a.fullname,
489
+ x=coord[0],
490
+ y=coord[1],
491
+ z=coord[2],
492
+ altloc=a.altloc,
493
+ charge=a.pqr_charge or 0,
494
+ occupancy=a.occupancy,
495
+ serial=a.serial_number,
496
+ residue_name=r.resname,
497
+ residue_number=residue_number,
498
+ segment=r.segid,
499
+ insertion_code=insertion_code,
500
+ chain_id=c.id,
501
+ temperature_factor=a.bfactor,
502
+ element=a.element,
503
+ )
504
+ atoms.append(atom)
505
+ return cls(atoms, id=structure.id)
506
+
507
+ @classmethod
508
+ def from_gemmi(cls, object model, str id=None):
509
+ """Create a new `~pyjess.Molecule` from a `gemmi.Model`.
510
+
511
+ Arguments:
512
+ structure (`gemmi.Model`): The ``gemmi`` object
513
+ containing the structure data.
514
+ id (`str` or `None`): The identifier to give to the newly
515
+ created molecule.
516
+
517
+ Returns:
518
+ `~pyjess.Molecule`: A molecule object suitable for using
519
+ in `Jess.query`.
520
+
521
+ .. versionadded:: 0.7.0
522
+
523
+ """
524
+ cdef list atoms = []
525
+ for cra in model.all():
526
+ a = cra.atom
527
+ r = cra.residue
528
+ c = cra.chain
529
+ atom = Atom(
530
+ name=a.padded_name(),
531
+ x=a.pos[0],
532
+ y=a.pos[1],
533
+ z=a.pos[2],
534
+ altloc=' ' if a.altloc == '\0' else a.altloc,
535
+ charge=a.charge,
536
+ element=a.element.name.upper(),
537
+ occupancy=a.occ,
538
+ temperature_factor=a.b_iso,
539
+ serial=a.serial,
540
+ segment=r.segment,
541
+ residue_name=r.name,
542
+ residue_number=r.seqid.num,
543
+ chain_id=c.name,
544
+ insertion_code=r.seqid.icode,
545
+ )
546
+ atoms.append(atom)
146
547
  return cls(atoms, id=id)
147
548
 
549
+ @classmethod
550
+ def from_biotite(cls, object atom_array, str id=None):
551
+ """Create a new `~pyjess.Molecule` from a `biotite.structure.AtomArray`.
552
+
553
+ Arguments:
554
+ structure (`biotite.structure.AtomArray`): The ``biotite``
555
+ object containing the structure data.
556
+
557
+ Returns:
558
+ `~pyjess.Molecule`: A molecule object suitable for using
559
+ in `Jess.query`.
560
+
561
+ Caution:
562
+ If loading data with the `biotite.structure.io.pdb.PDBFile` module,
563
+ ensure that you are requesting all atoms and all extra fields
564
+ in `~biotite.structure.io.pdb.PDBFile.get_structure`::
565
+
566
+ db_file = PDBFile.read("data/1AMY.pdb")
567
+ structure = pdb_file.get_structure(
568
+ altloc="all",
569
+ extra_fields=["atom_id", "b_factor", "occupancy", "charge"],
570
+ )
571
+ molecule = Molecule.from_biotite(structure[0])
572
+
573
+ .. versionadded:: 0.7.0
574
+
575
+ """
576
+ cdef list atoms = []
577
+ for a in atom_array:
578
+ atom = Atom(
579
+ name=str(a.atom_name),
580
+ x=a.coord[0],
581
+ y=a.coord[1],
582
+ z=a.coord[2],
583
+ altloc=str(getattr(a, 'altloc', ' ')),
584
+ charge=getattr(a, 'charge', 0),
585
+ element=str(a.element),
586
+ occupancy=getattr(a, 'occupancy', 1.0),
587
+ temperature_factor=a.b_factor,
588
+ serial=a.atom_id,
589
+ segment=str(getattr(a, 'segment', '')),
590
+ residue_name=str(a.res_name),
591
+ residue_number=a.res_id,
592
+ chain_id=str(a.chain_id),
593
+ insertion_code=str(a.ins_code).ljust(1),
594
+ )
595
+ atoms.append(atom)
596
+ return cls(atoms)
597
+
148
598
  def __cinit__(self):
149
599
  self._mol = NULL
150
600
 
@@ -249,17 +699,32 @@ cdef class Molecule:
249
699
  return self._id
250
700
 
251
701
  cpdef Molecule conserved(self, double cutoff = 0.0):
702
+ """Get a molecule containing only a subset of conserved atoms.
703
+
704
+ Arguments:
705
+ cutoff (`float`): The conservation cutoff for atoms. Atoms
706
+ with a `~Atom.temperature_factor` lower than this value
707
+ will be removed from the result.
708
+
709
+ Returns:
710
+ `~pyjess.Molecule`: A new molecule with atoms below the
711
+ conservation cutoff removed.
712
+
713
+ """
252
714
  assert self._mol is not NULL
253
- cdef Atom atom
254
- return type(self)(
255
- id=self.id,
256
- atoms=[
257
- atom
258
- for atom in self
259
- if cutoff <= 0.0
260
- or atom._atom.tempFactor >= cutoff
261
- ]
262
- )
715
+
716
+ cdef size_t i
717
+ cdef list atoms
718
+
719
+ if cutoff <= 0.0:
720
+ return self.copy()
721
+
722
+ atoms = []
723
+ for i in range(self._mol.count):
724
+ if self._mol.atom[i].tempFactor >= cutoff:
725
+ atoms.append(self[i])
726
+
727
+ return type(self)(id=self.id, atoms=atoms)
263
728
 
264
729
  cpdef Molecule copy(self):
265
730
  """Create a copy of this molecule and its atoms.
@@ -366,21 +831,21 @@ cdef class Atom:
366
831
  *,
367
832
  int serial,
368
833
  str name,
369
- str altloc,
370
834
  str residue_name,
371
835
  str chain_id,
372
836
  int residue_number,
373
- str insertion_code,
374
837
  double x,
375
838
  double y,
376
839
  double z,
377
840
  double occupancy = 0.0,
378
841
  double temperature_factor = 0.0,
842
+ str altloc = ' ',
843
+ str insertion_code = ' ',
379
844
  str segment = '',
380
845
  str element = '',
381
846
  int charge = 0,
382
847
  ):
383
- """__init__(self, *, serial, name, altloc, residue_name, chain_id, residue_number, insertion_code, x, y, z, occupancy=0.0, temperature_factor=0.0, segment='', element='', charge=0)\n--\n
848
+ """__init__(self, *, serial, name, residue_name, chain_id, residue_number, x, y, z, occupancy=0.0, temperature_factor=0.0, altloc=' ', insertion_code=' ', segment='', element='', charge=0)\n--\n
384
849
 
385
850
  Create a new atom.
386
851
 
@@ -392,11 +857,16 @@ cdef class Atom:
392
857
  long.
393
858
 
394
859
  """
860
+ cdef bytearray _name
861
+ cdef bytes _residue_name
862
+ cdef bytes _segment
863
+ cdef bytes _element
864
+
395
865
  if len(name) > 4:
396
866
  raise ValueError(f"Invalid atom name: {name!r}")
397
867
  if len(residue_name) > 3:
398
868
  raise ValueError(f"Invalid residue name: {residue_name!r}")
399
- if len(segment) > 3:
869
+ if len(segment) > 4:
400
870
  raise ValueError(f"Invalid segment: {segment!r}")
401
871
  if len(element) > 2:
402
872
  raise ValueError(f"Invalid element: {element!r}")
@@ -407,6 +877,10 @@ cdef class Atom:
407
877
  if self._atom is NULL:
408
878
  raise MemoryError("Failed to allocate atom")
409
879
 
880
+ _residue_name = PyUnicode_AsASCIIString(residue_name)
881
+ _segment = PyUnicode_AsASCIIString(segment)
882
+ _element = PyUnicode_AsASCIIString(element)
883
+
410
884
  self._atom.serial = serial
411
885
  self._atom.altLoc = ord(altloc)
412
886
  self._atom.chainID1 = ord(chain_id[0]) if len(chain_id) > 0 else 0
@@ -419,14 +893,15 @@ cdef class Atom:
419
893
  self._atom.occupancy = occupancy
420
894
  self._atom.tempFactor = temperature_factor
421
895
  self._atom.charge = charge
422
- copy_token(self._atom.resName, residue_name.encode('ascii').ljust(3, b'\0'), 3)
423
- copy_token(self._atom.segID, segment.encode('ascii').ljust(3, b'\0'), 3)
424
- copy_token(self._atom.element, element.encode('ascii').ljust(2, b'\0'), 2)
896
+ encode_token(self._atom.resName, _residue_name.ljust(3, b'\0'), 3)
897
+ encode_token(self._atom.segID, _segment.ljust(4, b'\0'), 4)
898
+ encode_token(self._atom.element, _element.ljust(2, b'\0'), 2)
425
899
 
900
+ # FIXME: is alignment proper?
426
901
  _name = bytearray(name, 'ascii')
427
902
  if len(_name) < 4:
428
903
  _name.insert(0, ord('_'))
429
- copy_token(self._atom.name, _name.ljust(4, b'\0'), 4)
904
+ encode_token(self._atom.name, _name.ljust(4, b'\0'), 4)
430
905
 
431
906
  def __copy__(self):
432
907
  return self.copy()
@@ -519,7 +994,7 @@ cdef class Atom:
519
994
  """`str`: The segment identifier.
520
995
  """
521
996
  assert self._atom is not NULL
522
- return self._atom.segID[:3].decode('ascii').strip('_')
997
+ return self._atom.segID[:4].decode('ascii').strip('_')
523
998
 
524
999
  @property
525
1000
  def element(self):
@@ -540,7 +1015,7 @@ cdef class Atom:
540
1015
  """`str`: The identifier of the chain the atom belongs to.
541
1016
  """
542
1017
  assert self._atom is not NULL
543
- return "{}{}".format(chr(self._atom.chainID1), chr(self._atom.chainID2)).strip()
1018
+ return PyUnicode_FromFormat("%c%c", self._atom.chainID1, self._atom.chainID2).strip()
544
1019
 
545
1020
  @property
546
1021
  def occupancy(self):
@@ -565,16 +1040,22 @@ cdef class Atom:
565
1040
 
566
1041
  @property
567
1042
  def x(self):
1043
+ """`float`: The atom coordinate in the 1st dimension.
1044
+ """
568
1045
  assert self._atom is not NULL
569
1046
  return self._atom.x[0]
570
1047
 
571
1048
  @property
572
1049
  def y(self):
1050
+ """`float`: The atom coordinate in the 2nd dimension.
1051
+ """
573
1052
  assert self._atom is not NULL
574
1053
  return self._atom.x[1]
575
1054
 
576
1055
  @property
577
1056
  def z(self):
1057
+ """`float`: The atom coordinate in the 3rd dimension.
1058
+ """
578
1059
  assert self._atom is not NULL
579
1060
  return self._atom.x[2]
580
1061
 
@@ -736,18 +1217,19 @@ cdef class TemplateAtom:
736
1217
  _name = bytearray(name, 'ascii')
737
1218
  else:
738
1219
  _name = bytearray(name)
1220
+ # FIXME: is alignment proper?
739
1221
  if len(_name) > 4:
740
1222
  raise ValueError(f"Invalid atom name: {name!r}")
741
- elif len(_name) < 3:
1223
+ elif len(_name) <= 3:
742
1224
  _name.insert(0, ord('_'))
743
- copy_token(self._atom.name[m], _name.ljust(4, b'\0'), 4)
1225
+ encode_token(self._atom.name[m], _name.ljust(4, b'\0'), 4)
744
1226
 
745
1227
  # copy residue names
746
1228
  for m, name in enumerate(residue_names):
747
1229
  _name = name.encode('ascii') if isinstance(name, str) else name
748
1230
  if len(_name) > 3:
749
1231
  raise ValueError(f"Invalid residue name: {name!r}")
750
- copy_token(self._atom.resName[m], _name.ljust(3, b'\0'), 3)
1232
+ encode_token(self._atom.resName[m], _name.ljust(3, b'\0'), 3)
751
1233
 
752
1234
  cdef dict _state(self):
753
1235
  return {
@@ -821,7 +1303,7 @@ cdef class TemplateAtom:
821
1303
  assert self._atom is not NULL
822
1304
  cdef char c1 = jess.tess_atom.TessAtom_chainID1(self._atom)
823
1305
  cdef char c2 = jess.tess_atom.TessAtom_chainID2(self._atom)
824
- return "{}{}".format(chr(c1), chr(c2)).strip()
1306
+ return PyUnicode_FromFormat("%c%c", c1, c2).strip()
825
1307
 
826
1308
  @property
827
1309
  def x(self):
@@ -895,7 +1377,10 @@ cdef class TemplateAtom:
895
1377
  .. versionadded:: 0.4.0
896
1378
 
897
1379
  """
898
- return type(self)(**self._state())
1380
+ cdef TemplateAtom atom = TemplateAtom.__new__(TemplateAtom)
1381
+ with nogil:
1382
+ atom._atom = jess.tess_atom.TessAtom_copy(self._atom)
1383
+ return atom
899
1384
 
900
1385
 
901
1386
  cdef class Template:
@@ -948,12 +1433,13 @@ cdef class Template:
948
1433
  `~pyjess.Template`: The template parsed from the given file.
949
1434
 
950
1435
  """
1436
+ cdef str line
1437
+ cdef list atoms = []
951
1438
  try:
952
1439
  handle = open(file)
953
1440
  except TypeError:
954
1441
  handle = nullcontext(file)
955
1442
  with handle as f:
956
- atoms = []
957
1443
  for line in f:
958
1444
  if line.startswith("ATOM"):
959
1445
  atoms.append(TemplateAtom.loads(line))
@@ -1050,7 +1536,14 @@ cdef class Template:
1050
1536
  self._tess.distance[j][i] = dist
1051
1537
 
1052
1538
  # compute dimension
1053
- residues = { self._tess.atom[i].resSeq for i in range(count) }
1539
+ residues = {
1540
+ (
1541
+ self._tess.atom[i].resSeq ,
1542
+ self._tess.atom[i].chainID1,
1543
+ self._tess.atom[i].chainID2,
1544
+ )
1545
+ for i in range(count)
1546
+ }
1054
1547
  self._tess.dim = len(residues)
1055
1548
 
1056
1549
  def __copy__(self):
@@ -1133,6 +1626,8 @@ cdef class Template:
1133
1626
 
1134
1627
  @property
1135
1628
  def id(self):
1629
+ """`str` or `None`: An identifier for the template, if any.
1630
+ """
1136
1631
  assert self._tpl is not NULL
1137
1632
 
1138
1633
  cdef const char* name = self._tpl.name(self._tpl)
@@ -1148,11 +1643,18 @@ cdef class Template:
1148
1643
  return self._tess.dim
1149
1644
 
1150
1645
  cpdef Template copy(self):
1151
- return Template(
1152
- self,
1153
- self.id
1154
- )
1646
+ """Create a copy of the template.
1647
+
1648
+ Returns:
1649
+ `~pyjess.Template`: A new template object with identical
1650
+ attributes and a copy of the `TemplateAtom` it contains.
1155
1651
 
1652
+ """
1653
+ cdef Template tpl = Template.__new__(Template)
1654
+ with nogil:
1655
+ tpl._tpl = self._tpl.copy(self._tpl)
1656
+ tpl._tess = <_TessTemplate*> &tpl._tpl[1]
1657
+ return tpl
1156
1658
 
1157
1659
  cdef class Query:
1158
1660
  """A query over templates with a given molecule.
@@ -1168,10 +1670,6 @@ cdef class Query:
1168
1670
  the templates.
1169
1671
  rmsd_threshold (`float`): The RMSD threshold for reporting
1170
1672
  results.
1171
- max_candidates (`int`): The maximum number of candidate hits
1172
- to report.
1173
- ignore_chain (`bool`): Whether to check or ignore the chain of
1174
- the atoms to match.
1175
1673
  best_match (`bool`): Whether the query will return only the
1176
1674
  best match to each template.
1177
1675
 
@@ -1179,18 +1677,20 @@ cdef class Query:
1179
1677
  cdef _JessQuery* _jq
1180
1678
  cdef bint _partial
1181
1679
  cdef int _candidates
1680
+ cdef uintptr_t _prev_tpl
1681
+ cdef int _max_candidates
1682
+ cdef _IgnoreType _ignore_chain
1182
1683
 
1183
1684
  cdef readonly Jess jess
1184
1685
  cdef readonly Molecule molecule
1185
- cdef readonly bint ignore_chain
1186
1686
  cdef readonly bint best_match
1187
1687
  cdef readonly double rmsd_threshold
1188
- cdef readonly int max_candidates
1189
1688
 
1190
1689
  def __cinit__(self):
1191
1690
  self._jq = NULL
1192
1691
  self._candidates = 0
1193
1692
  self._partial = False
1693
+ self._prev_tpl = 0
1194
1694
 
1195
1695
  def __dealloc__(self):
1196
1696
  jess.jess.JessQuery_free(self._jq)
@@ -1198,11 +1698,48 @@ cdef class Query:
1198
1698
  def __iter__(self):
1199
1699
  return self
1200
1700
 
1701
+ @property
1702
+ def ignore_chain(self):
1703
+ """`str` or `None`: The way atom chains are considered or discarded.
1704
+ """
1705
+ if self._ignore_chain == _IgnoreType.ignoreNone:
1706
+ return None
1707
+ elif self._ignore_chain == _IgnoreType.ignoreResidues:
1708
+ return "residues"
1709
+ elif self._ignore_chain == _IgnoreType.ignoreAtoms:
1710
+ return "atoms"
1711
+
1712
+ @ignore_chain.setter
1713
+ def ignore_chain(self, ignore_chain):
1714
+ if ignore_chain is None:
1715
+ self._ignore_chain = _IgnoreType.ignoreNone
1716
+ elif ignore_chain == "residues":
1717
+ self._ignore_chain = _IgnoreType.ignoreResidues
1718
+ elif ignore_chain == "atoms":
1719
+ self._ignore_chain = _IgnoreType.ignoreAtoms
1720
+ else:
1721
+ raise ValueError(f"invalid value for `ignore_chain`: {ignore_chain!r}")
1722
+
1723
+ @property
1724
+ def max_candidates(self):
1725
+ """`int`: The maximum number of candidate hits to report *by template*.
1726
+ """
1727
+ return None if self._max_candidates == -1 else self._max_candidates
1728
+
1729
+ @max_candidates.setter
1730
+ def max_candidates(self, max_candidates):
1731
+ if max_candidates is None:
1732
+ self._max_candidates = -1
1733
+ elif max_candidates >= 0:
1734
+ self._max_candidates = max_candidates
1735
+ else:
1736
+ raise ValueError(f"invalid value for `max_candidates` argument: {max_candidates!r}")
1737
+
1201
1738
  cdef bint _advance(self) noexcept nogil:
1202
1739
  if self._partial:
1203
1740
  self._partial = False
1204
1741
  return True
1205
- return jess.jess.JessQuery_next(self._jq, self.ignore_chain)
1742
+ return jess.jess.JessQuery_next(self._jq, self._ignore_chain)
1206
1743
 
1207
1744
  cdef bint _rewind(self) noexcept nogil:
1208
1745
  self._partial = True
@@ -1246,10 +1783,11 @@ cdef class Query:
1246
1783
 
1247
1784
  # search the next hit without the GIL to allow parallel queries.
1248
1785
  with nogil:
1249
- while self._advance() and self._candidates < self.max_candidates:
1786
+ while self._advance():
1250
1787
  # load current iteration template, and check that the hit
1251
1788
  # was obtained with the current template and not with the
1252
1789
  # previous one
1790
+ self._prev_tpl = <uintptr_t> tpl
1253
1791
  tpl = jess.jess.JessQuery_template(self._jq)
1254
1792
  if hit_found and hit_tpl != tpl:
1255
1793
  self._rewind()
@@ -1276,10 +1814,10 @@ cdef class Query:
1276
1814
 
1277
1815
  if nan:
1278
1816
  with gil:
1279
- warnings.warn(
1280
- "Jess returned a superposition matrix with NaN values",
1817
+ PyErr_WarnEx(
1281
1818
  UserWarning,
1282
- stacklevel=2,
1819
+ "Jess returned a superposition matrix with NaN values",
1820
+ 2,
1283
1821
  )
1284
1822
  else:
1285
1823
  self._copy_atoms(tpl, hit)
@@ -1288,9 +1826,21 @@ cdef class Query:
1288
1826
  hit_tpl = tpl
1289
1827
  hit_found = True
1290
1828
 
1291
- # free superposition items that are not used in a hit, and
1292
- # return hits immediately if we are not in best match mode
1293
- self._candidates += 1
1829
+ # check if we already made it to the next template,
1830
+ # or if we need to short-circuit the iteration and
1831
+ # force the query to move to the next template as
1832
+ # we found too many candidates already.
1833
+ if <uintptr_t> tpl != self._prev_tpl:
1834
+ self._candidates = 0
1835
+ else:
1836
+ self._candidates += 1
1837
+ if self._max_candidates != -1 and self._candidates > self._max_candidates:
1838
+ self._candidates = 0
1839
+ jess.jess.JessQuery_nextTemplate(self._jq)
1840
+
1841
+ # free superposition items (as relevant data was copied in
1842
+ # the Hit if needed) and return hits immediately if we are
1843
+ # not in best match mode
1294
1844
  jess.super.Superposition_free(sup)
1295
1845
  if hit_found and not self.best_match:
1296
1846
  break
@@ -1357,6 +1907,18 @@ cdef class Hit:
1357
1907
  for i, atom in enumerate(state["atoms"]):
1358
1908
  memcpy(&self._atoms[i], atom._atom, sizeof(_Atom))
1359
1909
 
1910
+ cdef void _transform_atom(self, double* x, const double* src):
1911
+ cdef size_t i
1912
+ cdef size_t j
1913
+ cdef const double* M = self._rotation
1914
+ cdef const double* c = self._centre[0]
1915
+ cdef const double* v = self._centre[1]
1916
+
1917
+ for i in range(3):
1918
+ x[i] = v[i]
1919
+ for j in range(3):
1920
+ x[i] += M[3*i + j] * (src[j] - c[j])
1921
+
1360
1922
  @property
1361
1923
  def determinant(self):
1362
1924
  """`float`: The determinant of the rotation matrix.
@@ -1425,15 +1987,11 @@ cdef class Hit:
1425
1987
  if transform:
1426
1988
  atom._atom = <_Atom*> malloc(sizeof(_Atom))
1427
1989
  memcpy(atom._atom, &self._atoms[k], sizeof(_Atom))
1428
- for i in range(3):
1429
- atom._atom.x[i] = v[i]
1430
- for j in range(3):
1431
- atom._atom.x[i] += M[3*i + j] * (self._atoms[k].x[j] - c[j])
1990
+ self._transform_atom(atom._atom.x, self._atoms[k].x)
1432
1991
  else:
1433
1992
  atom.owned = True
1434
1993
  atom.owner = self
1435
1994
  atom._atom = &self._atoms[k]
1436
-
1437
1995
  atoms.append(atom)
1438
1996
 
1439
1997
  return atoms
@@ -1469,17 +2027,142 @@ cdef class Hit:
1469
2027
  mol = self._molecule.copy()
1470
2028
  for k in range(mol._mol.count):
1471
2029
  atom = mol._mol.atom[k]
1472
- for i in range(3):
1473
- atom.x[i] = v[i]
1474
- for j in range(3):
1475
- atom.x[i] += M[3*i + j] * (self._molecule._mol.atom[k].x[j] - c[j])
2030
+ self._transform_atom(atom.x, self._molecule._mol.atom[k].x)
1476
2031
 
1477
2032
  return mol
1478
2033
 
2034
+ cpdef str dumps(self, str format="pdb", bint transform=True):
2035
+ """Write the hit to a string.
2036
+
2037
+ Arguments:
2038
+ format (`str`): The format in which to write the hit.
2039
+ Currently only supports ``pdb``, which writes the hits
2040
+ in the same format as Jess.
2041
+ transform (`bool`): Whether or not to transform coordinates
2042
+ of the molecule atoms into template frame.
2043
+
2044
+ Raises:
2045
+ `RuntimeError`: When attempting to dump a `Hit` which was
2046
+ obtained from a `Template` which has no `~Template.id`.
2047
+
2048
+ .. versionadded:: 0.7.0
2049
+
2050
+ """
2051
+ file = io.StringIO()
2052
+ self.dump(file, format=format, transform=transform)
2053
+ return file.getvalue()
2054
+
2055
+ cpdef void dump(self, object file, str format="pdb", bint transform=True):
2056
+ """Write the hit to a file.
2057
+
2058
+ Arguments:
2059
+ file (file-like object): A file opened in *text* mode where the
2060
+ hit will be written.
2061
+ format (`str`): The format in which to write the hit.
2062
+ Currently only supports ``pdb``, which writes the hits
2063
+ in the same format as Jess.
2064
+ transform (`bool`): Whether or not to transform coordinates
2065
+ of the molecule atoms into template frame.
2066
+
2067
+ Raises:
2068
+ `RuntimeError`: When attempting to dump a `Hit` which was
2069
+ obtained from a `Template` which has no `~Template.id`.
2070
+
2071
+ .. versionadded:: 0.7.0
2072
+
2073
+ """
2074
+ assert self.template._tpl is not NULL
2075
+ assert self._molecule._mol is not NULL
2076
+
2077
+ cdef _Atom* atom
2078
+ cdef size_t k
2079
+ cdef char[80] buffer
2080
+ cdef char[5] name
2081
+ cdef char[5] resname
2082
+ cdef double[3] x
2083
+ cdef int count = self.template._tpl.count(self.template._tpl)
2084
+
2085
+ if self.template.id is None:
2086
+ raise RuntimeError("cannot dump `Hit` where `self.template.id` is `None`")
2087
+
2088
+ file.write("REMARK ")
2089
+ file.write(self._molecule.id)
2090
+ file.write(f" {self.rmsd:5.3f} ")
2091
+ file.write(self.template.id)
2092
+ file.write(f" Det={self.determinant:4,.1f} log(E)~ {self.log_evalue:4.2f}\n")
2093
+
2094
+ for k in range(count):
2095
+ atom = &self._atoms[k]
2096
+ decode_token(name, atom.name, 4)
2097
+ decode_token(resname, atom.resName, 3)
2098
+ if transform:
2099
+ self._transform_atom(x, atom.x)
2100
+ else:
2101
+ memcpy(x, atom.x, 3*sizeof(double))
2102
+ n = sprintf(
2103
+ buffer,
2104
+ "ATOM %5i%5s%c%-3s%c%c%4i%-4c%8.3f%8.3f%8.3f%6.2f%6.2f\n",
2105
+ atom.serial,
2106
+ name,
2107
+ atom.altLoc,
2108
+ resname,
2109
+ atom.chainID1,
2110
+ atom.chainID2,
2111
+ atom.resSeq,
2112
+ atom.iCode,
2113
+ x[0],
2114
+ x[1],
2115
+ x[2],
2116
+ atom.occupancy,
2117
+ atom.tempFactor,
2118
+ atom.segID,
2119
+ atom.element,
2120
+ atom.charge
2121
+ )
2122
+ file.write(PyUnicode_FromStringAndSize(buffer, n))
2123
+ file.write("ENDMDL\n")
1479
2124
 
1480
2125
  cdef class Jess:
1481
2126
  """A handle to run Jess over a list of templates.
1482
2127
 
2128
+ Example:
2129
+ Create a `Jess` object from a list of templates::
2130
+
2131
+ >>> t1 = Template.load("1.3.3.tpl")
2132
+ >>> t2 = Template.load("4.1.2.tpl")
2133
+ >>> jess = Jess([t1, t2])
2134
+
2135
+ Once initialized, the `Jess` object cannot be modified further.
2136
+ Use the `~Jess.query` method to query the templates with a
2137
+ molecule::
2138
+
2139
+ >>> molecule = Molecule.load("1AMY.pdb")
2140
+ >>> query = jess.query(molecule, 2, 2, 2)
2141
+
2142
+ The returned `Query` object is an iterator that can be
2143
+ advanced through a ``for`` loop, or with the `next` built-in
2144
+ function to get the first hit:
2145
+
2146
+ >>> hit = next(query)
2147
+ >>> hit.rmsd
2148
+ 1.4386...
2149
+
2150
+ The hit can also be formatted in PDB format like in the
2151
+ original JESS code::
2152
+
2153
+ >>> print(hit.dumps(format="pdb"), end="")
2154
+ REMARK 1AMY 1.439 2om2 Det= 1.0 log(E)~ 1.11
2155
+ ATOM 729 CA THR A 94 34.202 -24.426 8.851 1.00 2.00
2156
+ ATOM 732 CB THR A 94 35.157 -23.467 8.101 1.00 4.66
2157
+ ATOM 733 OG1 THR A 94 36.338 -23.247 8.871 1.00 9.85
2158
+ ATOM 746 CD GLU A 96 41.454 -29.509 8.013 1.00 24.05
2159
+ ATOM 748 OE2 GLU A 96 42.536 -29.680 7.441 1.00 34.44
2160
+ ATOM 747 OE1 GLU A 96 41.212 -28.521 8.708 1.00 18.56
2161
+ ATOM 437 CZ ARG A 55 44.471 -26.619 10.181 1.00 8.51
2162
+ ATOM 436 NE ARG A 55 44.334 -27.346 11.290 1.00 9.05
2163
+ ATOM 438 NH1 ARG A 55 43.590 -26.751 9.179 1.00 13.17
2164
+ ENDMDL
2165
+
1483
2166
  .. versionadded:: 0.4.0
1484
2167
  Equality, hashing and pickle protocol support.
1485
2168
 
@@ -1581,8 +2264,8 @@ cdef class Jess:
1581
2264
  double distance_cutoff,
1582
2265
  double max_dynamic_distance,
1583
2266
  *,
1584
- int max_candidates = 1000,
1585
- bint ignore_chain = False,
2267
+ object max_candidates = None,
2268
+ object ignore_chain = None,
1586
2269
  bint best_match = False,
1587
2270
  bint reorder = True,
1588
2271
  ):
@@ -1599,15 +2282,35 @@ cdef class Jess:
1599
2282
  dynamic distance after adding the global distance cutoff
1600
2283
  and the individual atom distance cutoff defined for each
1601
2284
  atom of the template.
1602
- ignore_chain (`bool`): Whether to check or ignore the chain of
1603
- the atoms to match.
2285
+ max_candidates (`int` or `None`): The maximum number of candidate
2286
+ hits to report by template. If a non-`None` value is given,
2287
+ it may speed up querying for unspecific templates, but also
2288
+ produce results potentially inconsistent with Jess.
2289
+ ignore_chain (`str` or `None`): Whether to check or ignore the
2290
+ chain of the atoms to match. The different supported modes
2291
+ are:
2292
+
2293
+ - `None`: Force the atoms in the molecule to belong
2294
+ to different (resp. same) chains if so is the case
2295
+ in the template.
2296
+ - ``residues``: Allow atoms to belong to different
2297
+ (resp. same) chains even if it is not the case in
2298
+ the template, but force all atoms of a residue to
2299
+ belong to the same chain.
2300
+ - ``atoms``: Allow atoms to belong to any chain,
2301
+ independently to the template or the residue they
2302
+ belong to.
2303
+
1604
2304
  best_match (`bool`): Pass `True` to return only the best match
1605
- to each template.
2305
+ to each template, based on RMSD. In case of ties, the
2306
+ first match is returned. Note that a match must still
2307
+ be passing the RMSD threshold given in ``rmsd_threshold``
2308
+ to be returned.
1606
2309
  reorder (`bool`): Whether to enable template atom reordering
1607
- to accelerate matching in the scanner algorithm. Pass
2310
+ to accelerate matching in the scanner algorithm. Pass
1608
2311
  `False` to reverse to the original, slower algorithm
1609
2312
  which matches atoms in the same order as they appear in
1610
- the template, at the cost
2313
+ the template, at the cost of longer run times.
1611
2314
 
1612
2315
  Returns:
1613
2316
  `~pyjess.Query`: An iterator over the query hits.
@@ -1615,19 +2318,45 @@ cdef class Jess:
1615
2318
  Caution:
1616
2319
  Since ``v0.6.0``, this function uses an optimized variant of
1617
2320
  the Jess scanning algorithm which minimized the number of steps
1618
- needed to generate matches, by re-ordering the order the
2321
+ needed to generate matches, by re-ordering the order the
1619
2322
  template atoms are iterated upon. Because of this change,
1620
- the query may return *exactly* the same matches but in an order
2323
+ the query may return *exactly* the same matches but in an order
1621
2324
  that *differs* from the original Jess version. If you really
1622
2325
  need results in the original order, set ``reorder`` to `False`.
1623
2326
 
1624
2327
  .. versionadded:: 0.6.0
1625
2328
  The ``reorder`` argument, defaulting to `True`.
1626
2329
 
2330
+ .. versionchanged:: 0.7.0
2331
+ Default value of ``max_candidates`` argument to `None`.
2332
+
2333
+ .. versionchanged:: 0.7.0
2334
+ ``ignore_chain`` now expects string variants rather than `bool`.
2335
+
1627
2336
  """
2337
+
2338
+ if ignore_chain is True:
2339
+ PyErr_WarnEx(
2340
+ DeprecationWarning,
2341
+ "`ignore_chain` parameter expects string parameters "
2342
+ "to specificy the mode since PyJess v0.7.0. "
2343
+ "Use `ignore_chain='atoms'` instead of `ignore_chain=True`",
2344
+ 2,
2345
+ )
2346
+ ignore_chain="atoms"
2347
+ elif ignore_chain is False:
2348
+ PyErr_WarnEx(
2349
+ DeprecationWarning,
2350
+ "`ignore_chain` parameter expects string parameters "
2351
+ "to specificy the mode since PyJess v0.7.0. "
2352
+ "Use `ignore_chain=None` instead of `ignore_chain=False`",
2353
+ 2,
2354
+ )
2355
+ ignore_chain=None
2356
+
1628
2357
  cdef Query query = Query.__new__(Query)
1629
- query.ignore_chain = ignore_chain
1630
2358
  query.max_candidates = max_candidates
2359
+ query.ignore_chain = ignore_chain
1631
2360
  query.rmsd_threshold = rmsd_threshold
1632
2361
  query.best_match = best_match
1633
2362
  query.molecule = molecule