pyjess 0.5.2__cp38-cp38-win_amd64.whl → 0.6.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjess might be problematic. Click here for more details.
- pyjess/_jess.cp38-win_amd64.pyd +0 -0
- pyjess/_jess.pyi +1 -0
- pyjess/_jess.pyx +29 -8
- pyjess/tests/test_jess.py +62 -2
- {pyjess-0.5.2.dist-info → pyjess-0.6.0.dist-info}/METADATA +29 -11
- {pyjess-0.5.2.dist-info → pyjess-0.6.0.dist-info}/RECORD +8 -8
- {pyjess-0.5.2.dist-info → pyjess-0.6.0.dist-info}/WHEEL +0 -0
- {pyjess-0.5.2.dist-info → pyjess-0.6.0.dist-info}/licenses/COPYING +0 -0
pyjess/_jess.cp38-win_amd64.pyd
CHANGED
|
Binary file
|
pyjess/_jess.pyi
CHANGED
pyjess/_jess.pyx
CHANGED
|
@@ -329,8 +329,9 @@ cdef class Atom:
|
|
|
329
329
|
atom metadata from.
|
|
330
330
|
|
|
331
331
|
"""
|
|
332
|
-
cdef
|
|
333
|
-
cdef
|
|
332
|
+
cdef const unsigned char* s
|
|
333
|
+
cdef bytearray b
|
|
334
|
+
cdef Atom atom
|
|
334
335
|
|
|
335
336
|
if isinstance(text, str):
|
|
336
337
|
b = bytearray(text, 'utf-8')
|
|
@@ -339,14 +340,15 @@ cdef class Atom:
|
|
|
339
340
|
if not b.endswith(b'\n'):
|
|
340
341
|
b.append(b'\n')
|
|
341
342
|
b.append(b'\0')
|
|
343
|
+
s = b
|
|
342
344
|
|
|
343
345
|
atom = cls.__new__(cls)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
346
|
+
with nogil:
|
|
347
|
+
atom._atom = <_Atom*> malloc(sizeof(_Atom))
|
|
348
|
+
if atom._atom == NULL:
|
|
349
|
+
raise MemoryError("Failed to allocate atom")
|
|
350
|
+
if not jess.atom.Atom_parse(atom._atom, <const char*> s):
|
|
351
|
+
raise ValueError(f"Failed to parse atom: {text!r}")
|
|
350
352
|
|
|
351
353
|
return atom
|
|
352
354
|
|
|
@@ -1582,6 +1584,7 @@ cdef class Jess:
|
|
|
1582
1584
|
int max_candidates = 1000,
|
|
1583
1585
|
bint ignore_chain = False,
|
|
1584
1586
|
bint best_match = False,
|
|
1587
|
+
bint reorder = True,
|
|
1585
1588
|
):
|
|
1586
1589
|
"""Scan for templates matching the given molecule.
|
|
1587
1590
|
|
|
@@ -1600,10 +1603,27 @@ cdef class Jess:
|
|
|
1600
1603
|
the atoms to match.
|
|
1601
1604
|
best_match (`bool`): Pass `True` to return only the best match
|
|
1602
1605
|
to each template.
|
|
1606
|
+
reorder (`bool`): Whether to enable template atom reordering
|
|
1607
|
+
to accelerate matching in the scanner algorithm. Pass
|
|
1608
|
+
`False` to reverse to the original, slower algorithm
|
|
1609
|
+
which matches atoms in the same order as they appear in
|
|
1610
|
+
the template, at the cost
|
|
1603
1611
|
|
|
1604
1612
|
Returns:
|
|
1605
1613
|
`~pyjess.Query`: An iterator over the query hits.
|
|
1606
1614
|
|
|
1615
|
+
Caution:
|
|
1616
|
+
Since ``v0.6.0``, this function uses an optimized variant of
|
|
1617
|
+
the Jess scanning algorithm which minimized the number of steps
|
|
1618
|
+
needed to generate matches, by re-ordering the order the
|
|
1619
|
+
template atoms are iterated upon. Because of this change,
|
|
1620
|
+
the query may return *exactly* the same matches but in an order
|
|
1621
|
+
that *differs* from the original Jess version. If you really
|
|
1622
|
+
need results in the original order, set ``reorder`` to `False`.
|
|
1623
|
+
|
|
1624
|
+
.. versionadded:: 0.6.0
|
|
1625
|
+
The ``reorder`` argument, defaulting to `True`.
|
|
1626
|
+
|
|
1607
1627
|
"""
|
|
1608
1628
|
cdef Query query = Query.__new__(Query)
|
|
1609
1629
|
query.ignore_chain = ignore_chain
|
|
@@ -1617,5 +1637,6 @@ cdef class Jess:
|
|
|
1617
1637
|
molecule._mol,
|
|
1618
1638
|
distance_cutoff,
|
|
1619
1639
|
max_dynamic_distance,
|
|
1640
|
+
reorder,
|
|
1620
1641
|
)
|
|
1621
1642
|
return query
|
pyjess/tests/test_jess.py
CHANGED
|
@@ -204,7 +204,7 @@ class TestJess(unittest.TestCase):
|
|
|
204
204
|
self.assertAlmostEqual(hit.log_evalue, -2.04, places=1)
|
|
205
205
|
|
|
206
206
|
@unittest.skipUnless(files, "importlib.resources not available")
|
|
207
|
-
def
|
|
207
|
+
def test_mcsa_query_no_reorder(self):
|
|
208
208
|
with files(data).joinpath("1.3.3.tpl").open() as f:
|
|
209
209
|
template = Template.load(f)
|
|
210
210
|
jess = Jess([template])
|
|
@@ -213,7 +213,7 @@ class TestJess(unittest.TestCase):
|
|
|
213
213
|
with files(data).joinpath("1AMY+1.3.3.txt").open() as f:
|
|
214
214
|
results = list(filter(None, f.read().split("REMARK")))
|
|
215
215
|
|
|
216
|
-
hits = list(jess.query(molecule, 2, 4, 4))
|
|
216
|
+
hits = list(jess.query(molecule, 2, 4, 4, reorder=False))
|
|
217
217
|
self.assertEqual(len(hits), len(results))
|
|
218
218
|
for hit, block in zip(hits, results):
|
|
219
219
|
self.assertIs(hit.template, template)
|
|
@@ -251,3 +251,63 @@ class TestJess(unittest.TestCase):
|
|
|
251
251
|
self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
|
|
252
252
|
self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
|
|
253
253
|
|
|
254
|
+
@unittest.skipUnless(files, "importlib.resources not available")
|
|
255
|
+
def test_mcsa_query_reorder(self):
|
|
256
|
+
with files(data).joinpath("1.3.3.tpl").open() as f:
|
|
257
|
+
template = Template.load(f)
|
|
258
|
+
jess = Jess([template])
|
|
259
|
+
with files(data).joinpath("1AMY.pdb").open() as f:
|
|
260
|
+
molecule = Molecule.load(f)
|
|
261
|
+
with files(data).joinpath("1AMY+1.3.3.txt").open() as f:
|
|
262
|
+
results = list(filter(None, f.read().split("REMARK")))
|
|
263
|
+
|
|
264
|
+
hits = list(jess.query(molecule, 2, 4, 4, reorder=True))
|
|
265
|
+
self.assertEqual(len(hits), len(results))
|
|
266
|
+
|
|
267
|
+
# `reorder=True` means that we may get results in a different order
|
|
268
|
+
# to Jess, so we need to match the hits in the file by residue number
|
|
269
|
+
# to make sure we compare them consistently.
|
|
270
|
+
|
|
271
|
+
results_by_serials = {}
|
|
272
|
+
for block in results:
|
|
273
|
+
lines = block.strip().splitlines()
|
|
274
|
+
serials = tuple([ int(line.split()[1]) for line in lines[1:-1] ])
|
|
275
|
+
results_by_serials[serials] = block
|
|
276
|
+
|
|
277
|
+
for hit in hits:
|
|
278
|
+
self.assertIs(hit.template, template)
|
|
279
|
+
block = results_by_serials[tuple(atom.serial for atom in hit.atoms(False))]
|
|
280
|
+
|
|
281
|
+
lines = block.strip().splitlines()
|
|
282
|
+
query_id, rmsd, template_id, _, determinant, _, logE = lines[0].split()
|
|
283
|
+
self.assertEqual(query_id, "1AMY")
|
|
284
|
+
self.assertAlmostEqual(float(rmsd), hit.rmsd, places=3)
|
|
285
|
+
self.assertAlmostEqual(float(determinant), hit.determinant, places=1)
|
|
286
|
+
self.assertAlmostEqual(float(logE), hit.log_evalue, places=1)
|
|
287
|
+
|
|
288
|
+
atom_lines = lines[1:-1]
|
|
289
|
+
atoms = hit.atoms()
|
|
290
|
+
self.assertEqual(len(atoms), len(atom_lines))
|
|
291
|
+
for atom, atom_line in zip(atoms, atom_lines):
|
|
292
|
+
self.assertEqual(atom.serial, int(atom_line[7:12]))
|
|
293
|
+
self.assertEqual(atom.name, atom_line[13:17].strip())
|
|
294
|
+
self.assertEqual(atom.residue_name, atom_line[17:21].strip())
|
|
295
|
+
self.assertEqual(atom.chain_id, atom_line[21:23].strip())
|
|
296
|
+
self.assertEqual(atom.residue_number, int(atom_line[23:27]))
|
|
297
|
+
self.assertAlmostEqual(atom.x, float(atom_line[31:39]), places=3)
|
|
298
|
+
self.assertAlmostEqual(atom.y, float(atom_line[39:47]), places=3)
|
|
299
|
+
self.assertAlmostEqual(atom.z, float(atom_line[47:55]), places=3)
|
|
300
|
+
self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
|
|
301
|
+
self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
|
|
302
|
+
|
|
303
|
+
atoms = hit.atoms(transform=False)
|
|
304
|
+
self.assertEqual(len(atoms), len(atom_lines))
|
|
305
|
+
for atom, atom_line in zip(atoms, atom_lines):
|
|
306
|
+
self.assertEqual(atom.serial, int(atom_line[7:12]))
|
|
307
|
+
self.assertEqual(atom.name, atom_line[13:17].strip())
|
|
308
|
+
self.assertEqual(atom.residue_name, atom_line[17:21].strip())
|
|
309
|
+
self.assertEqual(atom.chain_id, atom_line[21:23].strip())
|
|
310
|
+
self.assertEqual(atom.residue_number, int(atom_line[23:27]))
|
|
311
|
+
self.assertAlmostEqual(atom.occupancy, float(atom_line[55:61]), places=3)
|
|
312
|
+
self.assertAlmostEqual(atom.temperature_factor, float(atom_line[61:67]), places=3)
|
|
313
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: pyjess
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Cython bindings and Python interface to JESS, a 3D template matching software.
|
|
5
5
|
Keywords: bioinformatics,structure,template,matching
|
|
6
6
|
Author-Email: Martin Larralde <martin.larralde@embl.de>
|
|
@@ -93,7 +93,9 @@ during his PhD in the [Thornton group](https://www.ebi.ac.uk/research/thornton/)
|
|
|
93
93
|
PyJess is a Python module that provides bindings to Jess using
|
|
94
94
|
[Cython](https://cython.org/). It allows creating templates, querying them
|
|
95
95
|
with protein structures, and retrieving the hits using a Python API without
|
|
96
|
-
performing any external I/O.
|
|
96
|
+
performing any external I/O. It's also more than 10x faster than Jess thanks to
|
|
97
|
+
[algorithmic optimizations](https://pyjess.readthedocs.io/en/latest/guide/optimizations.html)
|
|
98
|
+
added to improve the original Jess code while producing consistent results.
|
|
97
99
|
|
|
98
100
|
|
|
99
101
|
## 🔧 Installing
|
|
@@ -127,7 +129,8 @@ Jess if you are using it in an academic work, for instance as:
|
|
|
127
129
|
|
|
128
130
|
## 💡 Example
|
|
129
131
|
|
|
130
|
-
Load
|
|
132
|
+
Load [`Template`](https://pyjess.readthedocs.io/en/latest/api/template.html#pyjess.Template)
|
|
133
|
+
objects to be used as references from different template files:
|
|
131
134
|
|
|
132
135
|
```python
|
|
133
136
|
import pathlib
|
|
@@ -135,11 +138,10 @@ import pyjess
|
|
|
135
138
|
|
|
136
139
|
templates = []
|
|
137
140
|
for path in sorted(pathlib.Path("vendor/jess/examples").glob("template_*.qry")):
|
|
138
|
-
|
|
139
|
-
templates.append(pyjess.Template.load(file, id=path.stem))
|
|
141
|
+
templates.append(pyjess.Template.load(path, id=path.stem))
|
|
140
142
|
```
|
|
141
143
|
|
|
142
|
-
Create a `Jess` instance and use it to query a molecule (a PDB structure)
|
|
144
|
+
Create a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess) instance and use it to query a [`Molecule`](https://pyjess.readthedocs.io/en/latest/api/molecule.html#pyjess.Molecule) (a PDB structure)
|
|
143
145
|
against the stored templates:
|
|
144
146
|
|
|
145
147
|
```python
|
|
@@ -161,9 +163,11 @@ for hit in query:
|
|
|
161
163
|
|
|
162
164
|
## 🧶 Thread-safety
|
|
163
165
|
|
|
164
|
-
Once a `Jess`
|
|
165
|
-
|
|
166
|
-
|
|
166
|
+
Once a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess)
|
|
167
|
+
instance has been created, the templates cannot be edited anymore,
|
|
168
|
+
making the [`Jess.query`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess.query) method re-entrant and thread-safe. This allows querying
|
|
169
|
+
several molecules against the same templates in parallel using e.g a
|
|
170
|
+
[`ThreadPool`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.ThreadPool):
|
|
167
171
|
|
|
168
172
|
```python
|
|
169
173
|
molecules = []
|
|
@@ -177,8 +181,22 @@ with multiprocessing.ThreadPool() as pool:
|
|
|
177
181
|
*⚠️ Prior to PyJess `v0.2.1`, the Jess code was running some thread-unsafe operations which have now been patched.
|
|
178
182
|
If running Jess in parallel, make sure to use `v0.2.1` or later to use the code patched with re-entrant functions*.
|
|
179
183
|
|
|
180
|
-
|
|
184
|
+
## ⏱️ Benchmarks
|
|
181
185
|
|
|
186
|
+
The following table reports the runtime of PyJess to match $n=132$ protein
|
|
187
|
+
structures to the $m=7607$ templates of
|
|
188
|
+
[EnzyMM](https://github.com/RayHackett/enzymm), using $J=12$ threads to parallelize.
|
|
189
|
+
|
|
190
|
+
| Version | Runtime (s) | Match Speed (N * M / s * J) | Speedup |
|
|
191
|
+
| ----------- | ----------- | --------------------------- | ----------- |
|
|
192
|
+
| ``v0.4.2`` | 618.1 | 135.4 | N/A |
|
|
193
|
+
| ``v0.5.0`` | 586.3 | 142.7 | x1.05 |
|
|
194
|
+
| ``v0.5.1`` | 365.6 | 228.9 | x1.69 |
|
|
195
|
+
| ``v0.5.2`` | 327.2 | 255.7 | x1.88 |
|
|
196
|
+
| ``v0.6.0`` | 54.5 | 1535.4 | **x11.34** |
|
|
197
|
+
|
|
198
|
+
*Benchmarks were run on a quiet [i7-1255U](https://www.intel.com/content/www/us/en/products/sku/226259/intel-core-i71255u-processor-12m-cache-up-to-4-70-ghz/specifications.html) CPU running @4.70GHz with 10 physical cores / 12 logical
|
|
199
|
+
cores.*
|
|
182
200
|
|
|
183
201
|
## 💭 Feedback
|
|
184
202
|
|
|
@@ -211,7 +229,7 @@ This library is provided under the [MIT License](https://choosealicense.com/lice
|
|
|
211
229
|
*This project is in no way not affiliated, sponsored, or otherwise endorsed
|
|
212
230
|
by the JESS authors. It was developed
|
|
213
231
|
by [Martin Larralde](https://github.com/althonos/) during his PhD project
|
|
214
|
-
at the [
|
|
232
|
+
at the [Leiden University Medical Center](https://www.lumc.nl/en/) in
|
|
215
233
|
the [Zeller team](https://github.com/zellerlab).*
|
|
216
234
|
|
|
217
235
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
pyjess/.gitignore,sha256=u14v4OOy8U50Kp9SUKU8DupCG-mQIuel47gdbNDmAwg,21
|
|
2
2
|
pyjess/__init__.py,sha256=Xe9GBQUBm9ik-ty5tcE3UQ9Ip1p-C_IGvTPuGULolng,766
|
|
3
|
-
pyjess/_jess.cp38-win_amd64.pyd,sha256=
|
|
4
|
-
pyjess/_jess.pyi,sha256=
|
|
5
|
-
pyjess/_jess.pyx,sha256=
|
|
3
|
+
pyjess/_jess.cp38-win_amd64.pyd,sha256=BVPvaCEC3lKRCs9RnpKUFqlfis49AZo64PvK4s3Y8X8,320512
|
|
4
|
+
pyjess/_jess.pyi,sha256=DhMqsGNzGUICu08CSTXsDXK8hOWw6Mu3y7KZ_X2wv20,7270
|
|
5
|
+
pyjess/_jess.pyx,sha256=YCma_4lqdwoJ-1nfExAkQWNuisztDeoRiU54AdGiJ8E,54649
|
|
6
6
|
pyjess/CMakeLists.txt,sha256=H9eXbrFcGF2OLP8muQctb4cOb27Qp2uZj5KRjoDAROg,36
|
|
7
7
|
pyjess/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
pyjess/tests/__init__.py,sha256=J7rYASCuQsCu5gktf5XqjNxJoBkBqxbREAiaSOta5xE,617
|
|
@@ -15,12 +15,12 @@ pyjess/tests/data/template_01.qry,sha256=izCIhUUTEk-IvQowhSVLiJaCAhpPbyvrfyoR4Q6
|
|
|
15
15
|
pyjess/tests/data/template_02.qry,sha256=5IYRTqsvO_roB2INLwfFDEaWJW9VRcXdbK4oe8VKMxE,618
|
|
16
16
|
pyjess/tests/test_atom.py,sha256=clLN9IVuivadztGtagDhdPBDGoMkUgs41lEWuTCCmFA,4741
|
|
17
17
|
pyjess/tests/test_hit.py,sha256=3p7MkvZL84e-tSbVlMSXHbO1yCZkaLIMD2e09occw1A,1244
|
|
18
|
-
pyjess/tests/test_jess.py,sha256=
|
|
18
|
+
pyjess/tests/test_jess.py,sha256=s5oBf5s4YvoF15KK5YcUzlx0NiO1vT40u53wBahZH1w,15023
|
|
19
19
|
pyjess/tests/test_molecule.py,sha256=9k6uiTeOWc5NiO7epyxY9lm_GgksPb7-o-ZcNFNxutw,5452
|
|
20
20
|
pyjess/tests/test_template.py,sha256=AIN-ba5-YTnGdT9SGPU4q45AZ03QnPE769WyItSpoPs,4657
|
|
21
21
|
pyjess/tests/test_template_atom.py,sha256=oK8cfKe4_k3Pm1PqoTTxTzAoeUVLiCFsg6QmiTQ-RCQ,3496
|
|
22
22
|
pyjess/tests/utils.py,sha256=Z7rUPC-D8dZlRfHAnLaXHUg6M10D3zFvNiwDvvHA3xc,202
|
|
23
|
-
pyjess-0.
|
|
24
|
-
pyjess-0.
|
|
25
|
-
pyjess-0.
|
|
26
|
-
pyjess-0.
|
|
23
|
+
pyjess-0.6.0.dist-info/METADATA,sha256=OG5zbwpmxVLSmlQQ6xvyicRYKB6JrLBkDa-zhmWa6js,12882
|
|
24
|
+
pyjess-0.6.0.dist-info/WHEEL,sha256=yXMtVL9U8RkqJEJfb-z5X2s1_G1r2eGG-REYk3wgjZ0,104
|
|
25
|
+
pyjess-0.6.0.dist-info/licenses/COPYING,sha256=Iyx2bRDPnLgoEzW2KVanb61cjhW8lnhJNU-mjS-KhIY,1124
|
|
26
|
+
pyjess-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|