pyjess 0.5.2__cp313-cp313-macosx_11_0_arm64.whl → 0.6.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjess might be problematic. Click here for more details.

Binary file
pyjess/_jess.pyi CHANGED
@@ -221,4 +221,5 @@ class Jess(Generic[_T], Sequence[_T]):
221
221
  max_candidates: int = 1000,
222
222
  ignore_chain: bool = False,
223
223
  best_match: bool = False,
224
+ reorder: bool = True,
224
225
  ) -> Query[_T]: ...
pyjess/_jess.pyx CHANGED
@@ -329,8 +329,9 @@ cdef class Atom:
329
329
  atom metadata from.
330
330
 
331
331
  """
332
- cdef bytearray b
333
- cdef Atom atom
332
+ cdef const unsigned char* s
333
+ cdef bytearray b
334
+ cdef Atom atom
334
335
 
335
336
  if isinstance(text, str):
336
337
  b = bytearray(text, 'utf-8')
@@ -339,14 +340,15 @@ cdef class Atom:
339
340
  if not b.endswith(b'\n'):
340
341
  b.append(b'\n')
341
342
  b.append(b'\0')
343
+ s = b
342
344
 
343
345
  atom = cls.__new__(cls)
344
- atom._atom = <_Atom*> malloc(sizeof(_Atom))
345
- if atom._atom == NULL:
346
- raise MemoryError("Failed to allocate atom")
347
-
348
- if not jess.atom.Atom_parse(atom._atom, b):
349
- raise ValueError(f"Failed to parse atom: {text!r}")
346
+ with nogil:
347
+ atom._atom = <_Atom*> malloc(sizeof(_Atom))
348
+ if atom._atom == NULL:
349
+ raise MemoryError("Failed to allocate atom")
350
+ if not jess.atom.Atom_parse(atom._atom, <const char*> s):
351
+ raise ValueError(f"Failed to parse atom: {text!r}")
350
352
 
351
353
  return atom
352
354
 
@@ -1582,6 +1584,7 @@ cdef class Jess:
1582
1584
  int max_candidates = 1000,
1583
1585
  bint ignore_chain = False,
1584
1586
  bint best_match = False,
1587
+ bint reorder = True,
1585
1588
  ):
1586
1589
  """Scan for templates matching the given molecule.
1587
1590
 
@@ -1600,10 +1603,27 @@ cdef class Jess:
1600
1603
  the atoms to match.
1601
1604
  best_match (`bool`): Pass `True` to return only the best match
1602
1605
  to each template.
1606
+ reorder (`bool`): Whether to enable template atom reordering
1607
+ to accelerate matching in the scanner algorithm. Pass
1608
+ `False` to reverse to the original, slower algorithm
1609
+ which matches atoms in the same order as they appear in
1610
+ the template, at the cost
1603
1611
 
1604
1612
  Returns:
1605
1613
  `~pyjess.Query`: An iterator over the query hits.
1606
1614
 
1615
+ Caution:
1616
+ Since ``v0.6.0``, this function uses an optimized variant of
1617
+ the Jess scanning algorithm which minimized the number of steps
1618
+ needed to generate matches, by re-ordering the order the
1619
+ template atoms are iterated upon. Because of this change,
1620
+ the query may return *exactly* the same matches but in an order
1621
+ that *differs* from the original Jess version. If you really
1622
+ need results in the original order, set ``reorder`` to `False`.
1623
+
1624
+ .. versionadded:: 0.6.0
1625
+ The ``reorder`` argument, defaulting to `True`.
1626
+
1607
1627
  """
1608
1628
  cdef Query query = Query.__new__(Query)
1609
1629
  query.ignore_chain = ignore_chain
@@ -1617,5 +1637,6 @@ cdef class Jess:
1617
1637
  molecule._mol,
1618
1638
  distance_cutoff,
1619
1639
  max_dynamic_distance,
1640
+ reorder,
1620
1641
  )
1621
1642
  return query
pyjess/tests/test_jess.py CHANGED
@@ -204,7 +204,7 @@ class TestJess(unittest.TestCase):
204
204
  self.assertAlmostEqual(hit.log_evalue, -2.04, places=1)
205
205
 
206
206
  @unittest.skipUnless(files, "importlib.resources not available")
207
- def test_mcsa_query(self):
207
+ def test_mcsa_query_no_reorder(self):
208
208
  with files(data).joinpath("1.3.3.tpl").open() as f:
209
209
  template = Template.load(f)
210
210
  jess = Jess([template])
@@ -213,7 +213,7 @@ class TestJess(unittest.TestCase):
213
213
  with files(data).joinpath("1AMY+1.3.3.txt").open() as f:
214
214
  results = list(filter(None, f.read().split("REMARK")))
215
215
 
216
- hits = list(jess.query(molecule, 2, 4, 4))
216
+ hits = list(jess.query(molecule, 2, 4, 4, reorder=False))
217
217
  self.assertEqual(len(hits), len(results))
218
218
  for hit, block in zip(hits, results):
219
219
  self.assertIs(hit.template, template)
@@ -251,3 +251,63 @@ class TestJess(unittest.TestCase):
251
251
  self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
252
252
  self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
253
253
 
254
+ @unittest.skipUnless(files, "importlib.resources not available")
255
+ def test_mcsa_query_reorder(self):
256
+ with files(data).joinpath("1.3.3.tpl").open() as f:
257
+ template = Template.load(f)
258
+ jess = Jess([template])
259
+ with files(data).joinpath("1AMY.pdb").open() as f:
260
+ molecule = Molecule.load(f)
261
+ with files(data).joinpath("1AMY+1.3.3.txt").open() as f:
262
+ results = list(filter(None, f.read().split("REMARK")))
263
+
264
+ hits = list(jess.query(molecule, 2, 4, 4, reorder=True))
265
+ self.assertEqual(len(hits), len(results))
266
+
267
+ # `reorder=True` means that we may get results in a different order
268
+ # to Jess, so we need to match the hits in the file by residue number
269
+ # to make sure we compare them consistently.
270
+
271
+ results_by_serials = {}
272
+ for block in results:
273
+ lines = block.strip().splitlines()
274
+ serials = tuple([ int(line.split()[1]) for line in lines[1:-1] ])
275
+ results_by_serials[serials] = block
276
+
277
+ for hit in hits:
278
+ self.assertIs(hit.template, template)
279
+ block = results_by_serials[tuple(atom.serial for atom in hit.atoms(False))]
280
+
281
+ lines = block.strip().splitlines()
282
+ query_id, rmsd, template_id, _, determinant, _, logE = lines[0].split()
283
+ self.assertEqual(query_id, "1AMY")
284
+ self.assertAlmostEqual(float(rmsd), hit.rmsd, places=3)
285
+ self.assertAlmostEqual(float(determinant), hit.determinant, places=1)
286
+ self.assertAlmostEqual(float(logE), hit.log_evalue, places=1)
287
+
288
+ atom_lines = lines[1:-1]
289
+ atoms = hit.atoms()
290
+ self.assertEqual(len(atoms), len(atom_lines))
291
+ for atom, atom_line in zip(atoms, atom_lines):
292
+ self.assertEqual(atom.serial, int(atom_line[7:12]))
293
+ self.assertEqual(atom.name, atom_line[13:17].strip())
294
+ self.assertEqual(atom.residue_name, atom_line[17:21].strip())
295
+ self.assertEqual(atom.chain_id, atom_line[21:23].strip())
296
+ self.assertEqual(atom.residue_number, int(atom_line[23:27]))
297
+ self.assertAlmostEqual(atom.x, float(atom_line[31:39]), places=3)
298
+ self.assertAlmostEqual(atom.y, float(atom_line[39:47]), places=3)
299
+ self.assertAlmostEqual(atom.z, float(atom_line[47:55]), places=3)
300
+ self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
301
+ self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
302
+
303
+ atoms = hit.atoms(transform=False)
304
+ self.assertEqual(len(atoms), len(atom_lines))
305
+ for atom, atom_line in zip(atoms, atom_lines):
306
+ self.assertEqual(atom.serial, int(atom_line[7:12]))
307
+ self.assertEqual(atom.name, atom_line[13:17].strip())
308
+ self.assertEqual(atom.residue_name, atom_line[17:21].strip())
309
+ self.assertEqual(atom.chain_id, atom_line[21:23].strip())
310
+ self.assertEqual(atom.residue_number, int(atom_line[23:27]))
311
+ self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
312
+ self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
313
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pyjess
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Summary: Cython bindings and Python interface to JESS, a 3D template matching software.
5
5
  Keywords: bioinformatics,structure,template,matching
6
6
  Author-Email: Martin Larralde <martin.larralde@embl.de>
@@ -93,7 +93,9 @@ during his PhD in the [Thornton group](https://www.ebi.ac.uk/research/thornton/)
93
93
  PyJess is a Python module that provides bindings to Jess using
94
94
  [Cython](https://cython.org/). It allows creating templates, querying them
95
95
  with protein structures, and retrieving the hits using a Python API without
96
- performing any external I/O.
96
+ performing any external I/O. It's also more than 10x faster than Jess thanks to
97
+ [algorithmic optimizations](https://pyjess.readthedocs.io/en/latest/guide/optimizations.html)
98
+ added to improve the original Jess code while producing consistent results.
97
99
 
98
100
 
99
101
  ## 🔧 Installing
@@ -127,7 +129,8 @@ Jess if you are using it in an academic work, for instance as:
127
129
 
128
130
  ## 💡 Example
129
131
 
130
- Load templates to be used as references from different template files:
132
+ Load [`Template`](https://pyjess.readthedocs.io/en/latest/api/template.html#pyjess.Template)
133
+ objects to be used as references from different template files:
131
134
 
132
135
  ```python
133
136
  import pathlib
@@ -135,11 +138,10 @@ import pyjess
135
138
 
136
139
  templates = []
137
140
  for path in sorted(pathlib.Path("vendor/jess/examples").glob("template_*.qry")):
138
- with path.open() as file:
139
- templates.append(pyjess.Template.load(file, id=path.stem))
141
+ templates.append(pyjess.Template.load(path, id=path.stem))
140
142
  ```
141
143
 
142
- Create a `Jess` instance and use it to query a molecule (a PDB structure)
144
+ Create a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess) instance and use it to query a [`Molecule`](https://pyjess.readthedocs.io/en/latest/api/molecule.html#pyjess.Molecule) (a PDB structure)
143
145
  against the stored templates:
144
146
 
145
147
  ```python
@@ -161,9 +163,11 @@ for hit in query:
161
163
 
162
164
  ## 🧶 Thread-safety
163
165
 
164
- Once a `Jess` instance has been created, the templates cannot be edited anymore,
165
- making the `Jess.query` method re-entrant. This allows querying several
166
- molecules against the same templates in parallel using a thread pool:
166
+ Once a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess)
167
+ instance has been created, the templates cannot be edited anymore,
168
+ making the [`Jess.query`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess.query) method re-entrant and thread-safe. This allows querying
169
+ several molecules against the same templates in parallel using e.g a
170
+ [`ThreadPool`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.ThreadPool):
167
171
 
168
172
  ```python
169
173
  molecules = []
@@ -177,8 +181,22 @@ with multiprocessing.ThreadPool() as pool:
177
181
  *⚠️ Prior to PyJess `v0.2.1`, the Jess code was running some thread-unsafe operations which have now been patched.
178
182
  If running Jess in parallel, make sure to use `v0.2.1` or later to use the code patched with re-entrant functions*.
179
183
 
180
- <!-- ## ⏱️ Benchmarks -->
184
+ ## ⏱️ Benchmarks
181
185
 
186
+ The following table reports the runtime of PyJess to match $n=132$ protein
187
+ structures to the $m=7607$ templates of
188
+ [EnzyMM](https://github.com/RayHackett/enzymm), using $J=12$ threads to parallelize.
189
+
190
+ | Version | Runtime (s) | Match Speed (N * M / s * J) | Speedup |
191
+ | ----------- | ----------- | --------------------------- | ----------- |
192
+ | ``v0.4.2`` | 618.1 | 135.4 | N/A |
193
+ | ``v0.5.0`` | 586.3 | 142.7 | x1.05 |
194
+ | ``v0.5.1`` | 365.6 | 228.9 | x1.69 |
195
+ | ``v0.5.2`` | 327.2 | 255.7 | x1.88 |
196
+ | ``v0.6.0`` | 54.5 | 1535.4 | **x11.34** |
197
+
198
+ *Benchmarks were run on a quiet [i7-1255U](https://www.intel.com/content/www/us/en/products/sku/226259/intel-core-i71255u-processor-12m-cache-up-to-4-70-ghz/specifications.html) CPU running @4.70GHz with 10 physical cores / 12 logical
199
+ cores.*
182
200
 
183
201
  ## 💭 Feedback
184
202
 
@@ -211,7 +229,7 @@ This library is provided under the [MIT License](https://choosealicense.com/lice
211
229
  *This project is in no way not affiliated, sponsored, or otherwise endorsed
212
230
  by the JESS authors. It was developed
213
231
  by [Martin Larralde](https://github.com/althonos/) during his PhD project
214
- at the [European Molecular Biology Laboratory](https://www.embl.de/) in
232
+ at the [Leiden University Medical Center](https://www.lumc.nl/en/) in
215
233
  the [Zeller team](https://github.com/zellerlab).*
216
234
 
217
235
 
@@ -1,20 +1,20 @@
1
- pyjess-0.5.2.dist-info/RECORD,,
2
- pyjess-0.5.2.dist-info/WHEEL,sha256=xL_I_TleH-YV9h3oMcQi1F0DSqA1tyOenDLO1uzk5E8,114
3
- pyjess-0.5.2.dist-info/METADATA,sha256=tsCMCW4Qh7vkECoA8aybf9sb0I8QTcdifZ7P7SDm1oI,11248
4
- pyjess-0.5.2.dist-info/licenses/COPYING,sha256=gLCfHtBLTrghVX7GkpmZqoozWMNN46502m_OUiYy01Y,1103
5
- pyjess/_jess.pyi,sha256=PhDo0l5LoYyLQPC2l-ZwyXKmyrC7y8T0i-yQYwcx--4,7015
1
+ pyjess-0.6.0.dist-info/RECORD,,
2
+ pyjess-0.6.0.dist-info/WHEEL,sha256=xL_I_TleH-YV9h3oMcQi1F0DSqA1tyOenDLO1uzk5E8,114
3
+ pyjess-0.6.0.dist-info/METADATA,sha256=OG5zbwpmxVLSmlQQ6xvyicRYKB6JrLBkDa-zhmWa6js,12882
4
+ pyjess-0.6.0.dist-info/licenses/COPYING,sha256=gLCfHtBLTrghVX7GkpmZqoozWMNN46502m_OUiYy01Y,1103
5
+ pyjess/_jess.pyi,sha256=3FLL_E4q_fWlThfTwnas56zpdDFoN5z_xYD0aY-0-dk,7045
6
6
  pyjess/CMakeLists.txt,sha256=Oa0pniEQx9jXyFCJGyrswn9ahWSSVuW1madyeP6StoI,35
7
- pyjess/_jess.cpython-313-darwin.so,sha256=-TNSeSLkDrCyuEzaFTNXUuDyywCJRTdSwaolGy01O0U,367736
7
+ pyjess/_jess.cpython-313-darwin.so,sha256=qFT8uG8RvbvbeELpaICQNnQhVaADGJB8UTIeNNMMqaM,368248
8
8
  pyjess/__init__.py,sha256=h4XXLdS4FnyVa-MBs_k3eZMG1jWxeiOJnwfBaJA9gyQ,745
9
9
  pyjess/.gitignore,sha256=uQBOufp4v50qn0aZKv6zbSo00cjfB-v9KySog7rlmIU,19
10
- pyjess/_jess.pyx,sha256=7fZoPrLnUBXmE9eCq1wiBBrbGIcuOxyn9Szbct-nBKw,51879
10
+ pyjess/_jess.pyx,sha256=5qCasXyFa0e2BfhiFfx5Qlt7xiZeb7CFfwzfFgfVfTs,53007
11
11
  pyjess/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  pyjess/tests/__init__.py,sha256=ka1wkfXesesZ3f6p5Dg55P4YYyS3R4gKRmhpQRtWVec,599
13
13
  pyjess/tests/test_molecule.py,sha256=RHBE0yL_j71nQOwXJ6t_PzkZi4BaFPY5Q0VwKWM6elk,5311
14
14
  pyjess/tests/utils.py,sha256=dsaphex7qomJCvSHWnVy79iYDPGiL59xqGAtRoVAeWc,196
15
15
  pyjess/tests/test_hit.py,sha256=qN0qcGWHdvM9PZzBLWwuORhAXaZLp9c-CuZgO3GAbr8,1212
16
16
  pyjess/tests/test_atom.py,sha256=omNznNbRXMDt2j1plAUlfWPGCfmtkYpj2jysEX1zQuY,4631
17
- pyjess/tests/test_jess.py,sha256=tEu0K2B2-zLEMPFgQ1wk7lXhsZuRDyMzm58crT6G_ZY,11363
17
+ pyjess/tests/test_jess.py,sha256=qNXtgQ9IWJ0NH4MRhFPMQ54ocpLaxwJRP-0lZv_N0Ns,14710
18
18
  pyjess/tests/test_template.py,sha256=XMLELYRB4j7xavziZ-ntq15PjhNHNfJJkctUq9BkvEI,4541
19
19
  pyjess/tests/test_template_atom.py,sha256=s9tJ_SAgvKeGwbVjaTWY-EtsUeQp3eu4NF5ja3oO_84,3405
20
20
  pyjess/tests/data/pdb1lnb.pdb,sha256=E9Jjy4qQ75O1UKIXcVyVJHE1XDNx1Rb7ENPVrehW6N8,270054
File without changes