pyjess 0.5.2__cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 0.6.0__cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjess might be problematic. Click here for more details.

pyjess/_jess.pyi CHANGED
@@ -221,4 +221,5 @@ class Jess(Generic[_T], Sequence[_T]):
221
221
  max_candidates: int = 1000,
222
222
  ignore_chain: bool = False,
223
223
  best_match: bool = False,
224
+ reorder: bool = True,
224
225
  ) -> Query[_T]: ...
pyjess/_jess.pyx CHANGED
@@ -329,8 +329,9 @@ cdef class Atom:
329
329
  atom metadata from.
330
330
 
331
331
  """
332
- cdef bytearray b
333
- cdef Atom atom
332
+ cdef const unsigned char* s
333
+ cdef bytearray b
334
+ cdef Atom atom
334
335
 
335
336
  if isinstance(text, str):
336
337
  b = bytearray(text, 'utf-8')
@@ -339,14 +340,15 @@ cdef class Atom:
339
340
  if not b.endswith(b'\n'):
340
341
  b.append(b'\n')
341
342
  b.append(b'\0')
343
+ s = b
342
344
 
343
345
  atom = cls.__new__(cls)
344
- atom._atom = <_Atom*> malloc(sizeof(_Atom))
345
- if atom._atom == NULL:
346
- raise MemoryError("Failed to allocate atom")
347
-
348
- if not jess.atom.Atom_parse(atom._atom, b):
349
- raise ValueError(f"Failed to parse atom: {text!r}")
346
+ with nogil:
347
+ atom._atom = <_Atom*> malloc(sizeof(_Atom))
348
+ if atom._atom == NULL:
349
+ raise MemoryError("Failed to allocate atom")
350
+ if not jess.atom.Atom_parse(atom._atom, <const char*> s):
351
+ raise ValueError(f"Failed to parse atom: {text!r}")
350
352
 
351
353
  return atom
352
354
 
@@ -1582,6 +1584,7 @@ cdef class Jess:
1582
1584
  int max_candidates = 1000,
1583
1585
  bint ignore_chain = False,
1584
1586
  bint best_match = False,
1587
+ bint reorder = True,
1585
1588
  ):
1586
1589
  """Scan for templates matching the given molecule.
1587
1590
 
@@ -1600,10 +1603,27 @@ cdef class Jess:
1600
1603
  the atoms to match.
1601
1604
  best_match (`bool`): Pass `True` to return only the best match
1602
1605
  to each template.
1606
+ reorder (`bool`): Whether to enable template atom reordering
1607
+ to accelerate matching in the scanner algorithm. Pass
1608
+ `False` to reverse to the original, slower algorithm
1609
+ which matches atoms in the same order as they appear in
1610
+ the template, at the cost
1603
1611
 
1604
1612
  Returns:
1605
1613
  `~pyjess.Query`: An iterator over the query hits.
1606
1614
 
1615
+ Caution:
1616
+ Since ``v0.6.0``, this function uses an optimized variant of
1617
+ the Jess scanning algorithm which minimized the number of steps
1618
+ needed to generate matches, by re-ordering the order the
1619
+ template atoms are iterated upon. Because of this change,
1620
+ the query may return *exactly* the same matches but in an order
1621
+ that *differs* from the original Jess version. If you really
1622
+ need results in the original order, set ``reorder`` to `False`.
1623
+
1624
+ .. versionadded:: 0.6.0
1625
+ The ``reorder`` argument, defaulting to `True`.
1626
+
1607
1627
  """
1608
1628
  cdef Query query = Query.__new__(Query)
1609
1629
  query.ignore_chain = ignore_chain
@@ -1617,5 +1637,6 @@ cdef class Jess:
1617
1637
  molecule._mol,
1618
1638
  distance_cutoff,
1619
1639
  max_dynamic_distance,
1640
+ reorder,
1620
1641
  )
1621
1642
  return query
pyjess/tests/test_jess.py CHANGED
@@ -204,7 +204,7 @@ class TestJess(unittest.TestCase):
204
204
  self.assertAlmostEqual(hit.log_evalue, -2.04, places=1)
205
205
 
206
206
  @unittest.skipUnless(files, "importlib.resources not available")
207
- def test_mcsa_query(self):
207
+ def test_mcsa_query_no_reorder(self):
208
208
  with files(data).joinpath("1.3.3.tpl").open() as f:
209
209
  template = Template.load(f)
210
210
  jess = Jess([template])
@@ -213,7 +213,7 @@ class TestJess(unittest.TestCase):
213
213
  with files(data).joinpath("1AMY+1.3.3.txt").open() as f:
214
214
  results = list(filter(None, f.read().split("REMARK")))
215
215
 
216
- hits = list(jess.query(molecule, 2, 4, 4))
216
+ hits = list(jess.query(molecule, 2, 4, 4, reorder=False))
217
217
  self.assertEqual(len(hits), len(results))
218
218
  for hit, block in zip(hits, results):
219
219
  self.assertIs(hit.template, template)
@@ -251,3 +251,63 @@ class TestJess(unittest.TestCase):
251
251
  self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
252
252
  self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
253
253
 
254
+ @unittest.skipUnless(files, "importlib.resources not available")
255
+ def test_mcsa_query_reorder(self):
256
+ with files(data).joinpath("1.3.3.tpl").open() as f:
257
+ template = Template.load(f)
258
+ jess = Jess([template])
259
+ with files(data).joinpath("1AMY.pdb").open() as f:
260
+ molecule = Molecule.load(f)
261
+ with files(data).joinpath("1AMY+1.3.3.txt").open() as f:
262
+ results = list(filter(None, f.read().split("REMARK")))
263
+
264
+ hits = list(jess.query(molecule, 2, 4, 4, reorder=True))
265
+ self.assertEqual(len(hits), len(results))
266
+
267
+ # `reorder=True` means that we may get results in a different order
268
+ # to Jess, so we need to match the hits in the file by residue number
269
+ # to make sure we compare them consistently.
270
+
271
+ results_by_serials = {}
272
+ for block in results:
273
+ lines = block.strip().splitlines()
274
+ serials = tuple([ int(line.split()[1]) for line in lines[1:-1] ])
275
+ results_by_serials[serials] = block
276
+
277
+ for hit in hits:
278
+ self.assertIs(hit.template, template)
279
+ block = results_by_serials[tuple(atom.serial for atom in hit.atoms(False))]
280
+
281
+ lines = block.strip().splitlines()
282
+ query_id, rmsd, template_id, _, determinant, _, logE = lines[0].split()
283
+ self.assertEqual(query_id, "1AMY")
284
+ self.assertAlmostEqual(float(rmsd), hit.rmsd, places=3)
285
+ self.assertAlmostEqual(float(determinant), hit.determinant, places=1)
286
+ self.assertAlmostEqual(float(logE), hit.log_evalue, places=1)
287
+
288
+ atom_lines = lines[1:-1]
289
+ atoms = hit.atoms()
290
+ self.assertEqual(len(atoms), len(atom_lines))
291
+ for atom, atom_line in zip(atoms, atom_lines):
292
+ self.assertEqual(atom.serial, int(atom_line[7:12]))
293
+ self.assertEqual(atom.name, atom_line[13:17].strip())
294
+ self.assertEqual(atom.residue_name, atom_line[17:21].strip())
295
+ self.assertEqual(atom.chain_id, atom_line[21:23].strip())
296
+ self.assertEqual(atom.residue_number, int(atom_line[23:27]))
297
+ self.assertAlmostEqual(atom.x, float(atom_line[31:39]), places=3)
298
+ self.assertAlmostEqual(atom.y, float(atom_line[39:47]), places=3)
299
+ self.assertAlmostEqual(atom.z, float(atom_line[47:55]), places=3)
300
+ self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
301
+ self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
302
+
303
+ atoms = hit.atoms(transform=False)
304
+ self.assertEqual(len(atoms), len(atom_lines))
305
+ for atom, atom_line in zip(atoms, atom_lines):
306
+ self.assertEqual(atom.serial, int(atom_line[7:12]))
307
+ self.assertEqual(atom.name, atom_line[13:17].strip())
308
+ self.assertEqual(atom.residue_name, atom_line[17:21].strip())
309
+ self.assertEqual(atom.chain_id, atom_line[21:23].strip())
310
+ self.assertEqual(atom.residue_number, int(atom_line[23:27]))
311
+ self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
312
+ self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
313
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pyjess
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Summary: Cython bindings and Python interface to JESS, a 3D template matching software.
5
5
  Keywords: bioinformatics,structure,template,matching
6
6
  Author-Email: Martin Larralde <martin.larralde@embl.de>
@@ -92,7 +92,9 @@ during his PhD in the [Thornton group](https://www.ebi.ac.uk/research/thornton/)
92
92
  PyJess is a Python module that provides bindings to Jess using
93
93
  [Cython](https://cython.org/). It allows creating templates, querying them
94
94
  with protein structures, and retrieving the hits using a Python API without
95
- performing any external I/O.
95
+ performing any external I/O. It's also more than 10x faster than Jess thanks to
96
+ [algorithmic optimizations](https://pyjess.readthedocs.io/en/latest/guide/optimizations.html)
97
+ added to improve the original Jess code while producing consistent results.
96
98
 
97
99
 
98
100
  ## 🔧 Installing
@@ -126,7 +128,8 @@ Jess if you are using it in an academic work, for instance as:
126
128
 
127
129
  ## 💡 Example
128
130
 
129
- Load templates to be used as references from different template files:
131
+ Load [`Template`](https://pyjess.readthedocs.io/en/latest/api/template.html#pyjess.Template)
132
+ objects to be used as references from different template files:
130
133
 
131
134
  ```python
132
135
  import pathlib
@@ -134,11 +137,10 @@ import pyjess
134
137
 
135
138
  templates = []
136
139
  for path in sorted(pathlib.Path("vendor/jess/examples").glob("template_*.qry")):
137
- with path.open() as file:
138
- templates.append(pyjess.Template.load(file, id=path.stem))
140
+ templates.append(pyjess.Template.load(path, id=path.stem))
139
141
  ```
140
142
 
141
- Create a `Jess` instance and use it to query a molecule (a PDB structure)
143
+ Create a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess) instance and use it to query a [`Molecule`](https://pyjess.readthedocs.io/en/latest/api/molecule.html#pyjess.Molecule) (a PDB structure)
142
144
  against the stored templates:
143
145
 
144
146
  ```python
@@ -160,9 +162,11 @@ for hit in query:
160
162
 
161
163
  ## 🧶 Thread-safety
162
164
 
163
- Once a `Jess` instance has been created, the templates cannot be edited anymore,
164
- making the `Jess.query` method re-entrant. This allows querying several
165
- molecules against the same templates in parallel using a thread pool:
165
+ Once a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess)
166
+ instance has been created, the templates cannot be edited anymore,
167
+ making the [`Jess.query`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess.query) method re-entrant and thread-safe. This allows querying
168
+ several molecules against the same templates in parallel using e.g a
169
+ [`ThreadPool`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.ThreadPool):
166
170
 
167
171
  ```python
168
172
  molecules = []
@@ -176,8 +180,22 @@ with multiprocessing.ThreadPool() as pool:
176
180
  *⚠️ Prior to PyJess `v0.2.1`, the Jess code was running some thread-unsafe operations which have now been patched.
177
181
  If running Jess in parallel, make sure to use `v0.2.1` or later to use the code patched with re-entrant functions*.
178
182
 
179
- <!-- ## ⏱️ Benchmarks -->
183
+ ## ⏱️ Benchmarks
180
184
 
185
+ The following table reports the runtime of PyJess to match $n=132$ protein
186
+ structures to the $m=7607$ templates of
187
+ [EnzyMM](https://github.com/RayHackett/enzymm), using $J=12$ threads to parallelize.
188
+
189
+ | Version | Runtime (s) | Match Speed (N * M / s * J) | Speedup |
190
+ | ----------- | ----------- | --------------------------- | ----------- |
191
+ | ``v0.4.2`` | 618.1 | 135.4 | N/A |
192
+ | ``v0.5.0`` | 586.3 | 142.7 | x1.05 |
193
+ | ``v0.5.1`` | 365.6 | 228.9 | x1.69 |
194
+ | ``v0.5.2`` | 327.2 | 255.7 | x1.88 |
195
+ | ``v0.6.0`` | 54.5 | 1535.4 | **x11.34** |
196
+
197
+ *Benchmarks were run on a quiet [i7-1255U](https://www.intel.com/content/www/us/en/products/sku/226259/intel-core-i71255u-processor-12m-cache-up-to-4-70-ghz/specifications.html) CPU running @4.70GHz with 10 physical cores / 12 logical
198
+ cores.*
181
199
 
182
200
  ## 💭 Feedback
183
201
 
@@ -210,7 +228,7 @@ This library is provided under the [MIT License](https://choosealicense.com/lice
210
228
  *This project is in no way not affiliated, sponsored, or otherwise endorsed
211
229
  by the JESS authors. It was developed
212
230
  by [Martin Larralde](https://github.com/althonos/) during his PhD project
213
- at the [European Molecular Biology Laboratory](https://www.embl.de/) in
231
+ at the [Leiden University Medical Center](https://www.lumc.nl/en/) in
214
232
  the [Zeller team](https://github.com/zellerlab).*
215
233
 
216
234
 
@@ -1,17 +1,17 @@
1
- pyjess-0.5.2.dist-info/RECORD,,
2
- pyjess-0.5.2.dist-info/WHEEL,sha256=3pwvh1oM8-y4943HioszAJjngUGY7ols47C1CujoGoM,156
3
- pyjess-0.5.2.dist-info/METADATA,sha256=mzv50eM3MPlmmNJzhs18ibYOnyTywH3DPJJlTG99SFI,11218
4
- pyjess-0.5.2.dist-info/licenses/COPYING,sha256=gLCfHtBLTrghVX7GkpmZqoozWMNN46502m_OUiYy01Y,1103
1
+ pyjess-0.6.0.dist-info/RECORD,,
2
+ pyjess-0.6.0.dist-info/WHEEL,sha256=3pwvh1oM8-y4943HioszAJjngUGY7ols47C1CujoGoM,156
3
+ pyjess-0.6.0.dist-info/METADATA,sha256=KOWfj5ekQjz2OW7isBjI-3rcV-FUghBm06cilWhL9g8,12852
4
+ pyjess-0.6.0.dist-info/licenses/COPYING,sha256=gLCfHtBLTrghVX7GkpmZqoozWMNN46502m_OUiYy01Y,1103
5
5
  pyjess/CMakeLists.txt,sha256=Oa0pniEQx9jXyFCJGyrswn9ahWSSVuW1madyeP6StoI,35
6
- pyjess/_jess.pyi,sha256=PhDo0l5LoYyLQPC2l-ZwyXKmyrC7y8T0i-yQYwcx--4,7015
7
- pyjess/_jess.pyx,sha256=7fZoPrLnUBXmE9eCq1wiBBrbGIcuOxyn9Szbct-nBKw,51879
6
+ pyjess/_jess.pyi,sha256=3FLL_E4q_fWlThfTwnas56zpdDFoN5z_xYD0aY-0-dk,7045
7
+ pyjess/_jess.pyx,sha256=5qCasXyFa0e2BfhiFfx5Qlt7xiZeb7CFfwzfFgfVfTs,53007
8
8
  pyjess/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  pyjess/.gitignore,sha256=uQBOufp4v50qn0aZKv6zbSo00cjfB-v9KySog7rlmIU,19
10
10
  pyjess/__init__.py,sha256=h4XXLdS4FnyVa-MBs_k3eZMG1jWxeiOJnwfBaJA9gyQ,745
11
- pyjess/_jess.cpython-37m-aarch64-linux-gnu.so,sha256=xr-CZBkKBjXx8bduOl8XyTbB1SH_jBu_sDEXGAAPSyc,417184
11
+ pyjess/_jess.cpython-37m-aarch64-linux-gnu.so,sha256=t7zkD0eoFEg6LY9p4EJ289podnT_bBmDGW5mg4_l9lY,418096
12
12
  pyjess/tests/test_template_atom.py,sha256=s9tJ_SAgvKeGwbVjaTWY-EtsUeQp3eu4NF5ja3oO_84,3405
13
13
  pyjess/tests/test_hit.py,sha256=qN0qcGWHdvM9PZzBLWwuORhAXaZLp9c-CuZgO3GAbr8,1212
14
- pyjess/tests/test_jess.py,sha256=tEu0K2B2-zLEMPFgQ1wk7lXhsZuRDyMzm58crT6G_ZY,11363
14
+ pyjess/tests/test_jess.py,sha256=qNXtgQ9IWJ0NH4MRhFPMQ54ocpLaxwJRP-0lZv_N0Ns,14710
15
15
  pyjess/tests/test_atom.py,sha256=omNznNbRXMDt2j1plAUlfWPGCfmtkYpj2jysEX1zQuY,4631
16
16
  pyjess/tests/test_template.py,sha256=XMLELYRB4j7xavziZ-ntq15PjhNHNfJJkctUq9BkvEI,4541
17
17
  pyjess/tests/test_molecule.py,sha256=RHBE0yL_j71nQOwXJ6t_PzkZi4BaFPY5Q0VwKWM6elk,5311
File without changes