pyjess 0.5.2__tar.gz → 0.7.0a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyjess might be problematic. Click here for more details.

Files changed (139) hide show
  1. {pyjess-0.5.2 → pyjess-0.7.0a1}/.github/workflows/package.yml +7 -2
  2. {pyjess-0.5.2 → pyjess-0.7.0a1}/CHANGELOG.md +25 -1
  3. {pyjess-0.5.2 → pyjess-0.7.0a1}/PKG-INFO +29 -11
  4. {pyjess-0.5.2 → pyjess-0.7.0a1}/README.md +28 -10
  5. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/_static/json/switcher.json +6 -1
  6. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/guide/index.rst +1 -0
  7. pyjess-0.7.0a1/docs/guide/optimizations.rst +258 -0
  8. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/index.rst +6 -5
  9. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/jess.pxd +2 -1
  10. pyjess-0.7.0a1/patches/Annulus.c.patch +149 -0
  11. pyjess-0.7.0a1/patches/Annulus.h.patch +108 -0
  12. pyjess-0.7.0a1/patches/Box.c.patch +57 -0
  13. pyjess-0.7.0a1/patches/Box.h.patch +93 -0
  14. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/CMakeLists.txt.patch +18 -2
  15. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/CandidateSet.c.patch +21 -21
  16. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/CandidateSet.h.patch +26 -9
  17. pyjess-0.7.0a1/patches/Jess.c.patch +215 -0
  18. pyjess-0.7.0a1/patches/Jess.h.patch +45 -0
  19. pyjess-0.7.0a1/patches/Join.c.patch +216 -0
  20. pyjess-0.7.0a1/patches/Join.h.patch +145 -0
  21. pyjess-0.7.0a1/patches/KdTree.c.patch +471 -0
  22. pyjess-0.7.0a1/patches/KdTree.h.patch +40 -0
  23. pyjess-0.7.0a1/patches/Main.c.patch +89 -0
  24. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/Molecule.c.patch +23 -3
  25. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/ResIndex.c.patch +11 -19
  26. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/ResIndex.h.patch +4 -4
  27. pyjess-0.7.0a1/patches/Scanner.c.patch +471 -0
  28. pyjess-0.7.0a1/patches/Scanner.h.patch +50 -0
  29. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/Template.h.patch +6 -4
  30. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/TessAtom.c.patch +40 -28
  31. pyjess-0.7.0a1/patches/TessAtom.h.patch +117 -0
  32. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/TessTemplate.c.patch +44 -14
  33. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/TessTemplate.h.patch +3 -3
  34. pyjess-0.7.0a1/patches/qselect.h.patch +96 -0
  35. {pyjess-0.5.2 → pyjess-0.7.0a1}/pyproject.toml +1 -1
  36. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/jess/CMakeLists.txt +3 -0
  37. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/_jess.pyi +3 -2
  38. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/_jess.pyx +80 -18
  39. pyjess-0.7.0a1/src/pyjess/tests/data/1sur.qry +26 -0
  40. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/test_jess.py +62 -2
  41. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/test_template.py +9 -1
  42. pyjess-0.5.2/patches/Jess.c.patch +0 -69
  43. pyjess-0.5.2/patches/KdTree.c.patch +0 -73
  44. pyjess-0.5.2/patches/Scanner.c.patch +0 -108
  45. pyjess-0.5.2/patches/TessAtom.h.patch +0 -52
  46. {pyjess-0.5.2 → pyjess-0.7.0a1}/.github/workflows/requirements.txt +0 -0
  47. {pyjess-0.5.2 → pyjess-0.7.0a1}/.github/workflows/test.yml +0 -0
  48. {pyjess-0.5.2 → pyjess-0.7.0a1}/.gitignore +0 -0
  49. {pyjess-0.5.2 → pyjess-0.7.0a1}/.gitmodules +0 -0
  50. {pyjess-0.5.2 → pyjess-0.7.0a1}/.readthedocs.yaml +0 -0
  51. {pyjess-0.5.2 → pyjess-0.7.0a1}/CMakeLists.txt +0 -0
  52. {pyjess-0.5.2 → pyjess-0.7.0a1}/CONTRIBUTING.md +0 -0
  53. {pyjess-0.5.2 → pyjess-0.7.0a1}/COPYING +0 -0
  54. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/.gitignore +0 -0
  55. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/Makefile +0 -0
  56. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/_static/css/main.css +0 -0
  57. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/_static/js/custom-icon.js +0 -0
  58. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/api/index.rst +0 -0
  59. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/api/jess.rst +0 -0
  60. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/api/molecule.rst +0 -0
  61. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/api/template.rst +0 -0
  62. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/conf.py +0 -0
  63. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/guide/changes.md +0 -0
  64. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/guide/contributing.md +0 -0
  65. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/guide/copyright.rst +0 -0
  66. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/guide/install.rst +0 -0
  67. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/make.bat +0 -0
  68. {pyjess-0.5.2 → pyjess-0.7.0a1}/docs/requirements.txt +0 -0
  69. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/__init__.pxd +0 -0
  70. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/annulus.pxd +0 -0
  71. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/atom.pxd +0 -0
  72. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/candidate_set.pxd +0 -0
  73. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/join.pxd +0 -0
  74. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/kdtree.pxd +0 -0
  75. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/molecule.pxd +0 -0
  76. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/region.pxd +0 -0
  77. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/res_index.pxd +0 -0
  78. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/scanner.pxd +0 -0
  79. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/super.pxd +0 -0
  80. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/template.pxd +0 -0
  81. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/tess_atom.pxd +0 -0
  82. {pyjess-0.5.2 → pyjess-0.7.0a1}/include/jess/tess_template.pxd +0 -0
  83. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/Atom.h.patch +0 -0
  84. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/Molecule.h.patch +0 -0
  85. {pyjess-0.5.2 → pyjess-0.7.0a1}/patches/Super.c.patch +0 -0
  86. {pyjess-0.5.2 → pyjess-0.7.0a1}/pkg/aur/PKGBUILD.in +0 -0
  87. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/CMakeLists.txt +0 -0
  88. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/.gitignore +0 -0
  89. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/CMakeLists.txt +0 -0
  90. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/__init__.py +0 -0
  91. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/py.typed +0 -0
  92. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/__init__.py +0 -0
  93. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/data/1.3.3.tpl +0 -0
  94. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/data/1AMY+1.3.3.txt +0 -0
  95. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/data/1AMY.pdb +0 -0
  96. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/data/__init__.py +0 -0
  97. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/data/pdb1lnb.pdb +0 -0
  98. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/data/template_01.qry +0 -0
  99. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/data/template_02.qry +0 -0
  100. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/test_atom.py +0 -0
  101. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/test_hit.py +0 -0
  102. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/test_molecule.py +0 -0
  103. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/test_template_atom.py +0 -0
  104. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/pyjess/tests/utils.py +0 -0
  105. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/scripts/apply_patch.py +0 -0
  106. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/scripts/cmake/CythonExtension.cmake +0 -0
  107. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/scripts/cmake/pystate_patch.h +0 -0
  108. {pyjess-0.5.2 → pyjess-0.7.0a1}/src/scripts/generate_patches.py +0 -0
  109. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/.gitignore +0 -0
  110. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/README.md +0 -0
  111. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/examples/template_01.qry +0 -0
  112. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/examples/template_02.qry +0 -0
  113. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/examples/test_pdbs/pdb1lnb.ent +0 -0
  114. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/filter_output.py +0 -0
  115. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Annulus.c +0 -0
  116. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Annulus.h +0 -0
  117. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Atom.c +0 -0
  118. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Atom.h +0 -0
  119. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Jess.c +0 -0
  120. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Jess.h +0 -0
  121. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Join.c +0 -0
  122. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Join.h +0 -0
  123. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/KdTree.c +0 -0
  124. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/KdTree.h +0 -0
  125. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Main.c +0 -0
  126. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Molecule.c +0 -0
  127. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Molecule.h +0 -0
  128. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Region.c +0 -0
  129. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Region.h +0 -0
  130. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Scanner.c +0 -0
  131. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Scanner.h +0 -0
  132. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Super.c +0 -0
  133. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Super.h +0 -0
  134. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/Template.h +0 -0
  135. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/TessAtom.c +0 -0
  136. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/TessAtom.h +0 -0
  137. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/TessTemplate.c +0 -0
  138. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/TessTemplate.h +0 -0
  139. {pyjess-0.5.2 → pyjess-0.7.0a1}/vendor/jess/src/jess +0 -0
@@ -15,6 +15,7 @@ jobs:
15
15
  with:
16
16
  submodules: true
17
17
  - name: Build manylinux wheels
18
+ if: "!contains(github.ref, 'rc') && !contains(github.ref, 'alpha')"
18
19
  uses: pypa/cibuildwheel@v2.21.3
19
20
  env:
20
21
  CIBW_ARCHS: aarch64
@@ -35,6 +36,7 @@ jobs:
35
36
  with:
36
37
  submodules: true
37
38
  - name: Build manylinux wheels
39
+ if: "!contains(github.ref, 'rc') && !contains(github.ref, 'alpha')"
38
40
  uses: pypa/cibuildwheel@v2.21.3
39
41
  env:
40
42
  CIBW_ARCHS: x86_64
@@ -55,6 +57,7 @@ jobs:
55
57
  with:
56
58
  submodules: true
57
59
  - name: Build manylinux wheels
60
+ if: "!contains(github.ref, 'rc') && !contains(github.ref, 'alpha')"
58
61
  uses: pypa/cibuildwheel@v2.21.3
59
62
  env:
60
63
  CIBW_ARCHS: x86_64
@@ -75,6 +78,7 @@ jobs:
75
78
  with:
76
79
  submodules: true
77
80
  - name: Build manylinux wheels
81
+ if: "!contains(github.ref, 'rc') && !contains(github.ref, 'alpha')"
78
82
  uses: pypa/cibuildwheel@v2.21.3
79
83
  env:
80
84
  CIBW_ARCHS: arm64
@@ -99,6 +103,7 @@ jobs:
99
103
  with:
100
104
  arch: amd64
101
105
  - name: Build manylinux wheels
106
+ if: "!contains(github.ref, 'rc') && !contains(github.ref, 'alpha')"
102
107
  uses: pypa/cibuildwheel@v2.21.3
103
108
  env:
104
109
  CIBW_ARCHS: AMD64
@@ -187,7 +192,7 @@ jobs:
187
192
  environment: GitHub Releases
188
193
  runs-on: ubuntu-latest
189
194
  permissions: write-all
190
- if: "!contains(github.ref, 'rc')"
195
+ if: "!contains(github.ref, 'rc') && !contains(github.ref, 'alpha')"
191
196
  name: Release
192
197
  needs: upload
193
198
  steps:
@@ -201,7 +206,7 @@ jobs:
201
206
  aur:
202
207
  environment: Arch User Repository
203
208
  runs-on: ubuntu-latest
204
- if: "!contains(github.ref, 'rc')"
209
+ if: "!contains(github.ref, 'rc') && !contains(github.ref, 'alpha')"
205
210
  name: Update AUR package
206
211
  needs:
207
212
  - sdist
@@ -6,9 +6,33 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
6
6
 
7
7
 
8
8
  ## [Unreleased]
9
- [Unreleased]: https://github.com/althonos/pyjess/compare/v0.5.2...HEAD
9
+ [Unreleased]: https://github.com/althonos/pyjess/compare/v0.7.0-alpha.1...HEAD
10
10
 
11
11
 
12
+ ## [v0.7.0-alpha.1] - 2025-09-02
13
+ [v0.7.0-alpha.1]: https://github.com/althonos/pyjess/compare/v0.6.0...v0.7.0-alpha.1
14
+
15
+ ### Fixed
16
+ - **breaking**: Incorrect handling of `max_candidates` in `Jess.query`, causing PyJess to erroneously ignore some templates.
17
+
18
+ ### Changed
19
+ - **breaking**: Set the `max_candidates` default value to `None` in `Jess.query`, disabling max candidates filtering by default.
20
+
21
+
22
+ ## [v0.6.0] - 2025-09-01
23
+ [v0.6.0]: https://github.com/althonos/pyjess/compare/v0.5.2...v0.6.0
24
+
25
+ ### Added
26
+ - Several [algorithmic optimizations](https://pyjess.readthedocs.io/en/v0.6.0/guide/optimizations.html) to Jess, greatly improving runtime:
27
+ - Use QuickSelect ($O(n)$) instead of QuickSort ($O(nlog(n))$) to select the medians used for space partitioning on k-d tree initialization.
28
+ - Use approximate intersection on bounding boxes rather than exact code involving multiple annuli when querying the k-d tree for atoms.
29
+ - **breaking**: Reorder the matching order of template atoms to reduce the amount of backtracking and k-d tree querying performed in the average and worst case.
30
+ - `reorder` argument to `Jess.query` to support disabling template atom reordering if needed for Jess 1-to-1 consistency.
31
+
32
+ ### Changed
33
+ - Hardcode space dimensions to 3 to encourage compilers to unroll loops over dimensions.
34
+ - Recycle memory between templates within a query to reduce total amount of allocation/deallocation in hot paths.
35
+
12
36
  ## [v0.5.2] - 2025-08-26
13
37
  [v0.5.2]: https://github.com/althonos/pyjess/compare/v0.5.1...v0.5.2
14
38
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pyjess
3
- Version: 0.5.2
3
+ Version: 0.7.0a1
4
4
  Summary: Cython bindings and Python interface to JESS, a 3D template matching software.
5
5
  Keywords: bioinformatics,structure,template,matching
6
6
  Author-Email: Martin Larralde <martin.larralde@embl.de>
@@ -93,7 +93,9 @@ during his PhD in the [Thornton group](https://www.ebi.ac.uk/research/thornton/)
93
93
  PyJess is a Python module that provides bindings to Jess using
94
94
  [Cython](https://cython.org/). It allows creating templates, querying them
95
95
  with protein structures, and retrieving the hits using a Python API without
96
- performing any external I/O.
96
+ performing any external I/O. It's also more than 10x faster than Jess thanks to
97
+ [algorithmic optimizations](https://pyjess.readthedocs.io/en/latest/guide/optimizations.html)
98
+ added to improve the original Jess code while producing consistent results.
97
99
 
98
100
 
99
101
  ## 🔧 Installing
@@ -127,7 +129,8 @@ Jess if you are using it in an academic work, for instance as:
127
129
 
128
130
  ## 💡 Example
129
131
 
130
- Load templates to be used as references from different template files:
132
+ Load [`Template`](https://pyjess.readthedocs.io/en/latest/api/template.html#pyjess.Template)
133
+ objects to be used as references from different template files:
131
134
 
132
135
  ```python
133
136
  import pathlib
@@ -135,11 +138,10 @@ import pyjess
135
138
 
136
139
  templates = []
137
140
  for path in sorted(pathlib.Path("vendor/jess/examples").glob("template_*.qry")):
138
- with path.open() as file:
139
- templates.append(pyjess.Template.load(file, id=path.stem))
141
+ templates.append(pyjess.Template.load(path, id=path.stem))
140
142
  ```
141
143
 
142
- Create a `Jess` instance and use it to query a molecule (a PDB structure)
144
+ Create a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess) instance and use it to query a [`Molecule`](https://pyjess.readthedocs.io/en/latest/api/molecule.html#pyjess.Molecule) (a PDB structure)
143
145
  against the stored templates:
144
146
 
145
147
  ```python
@@ -161,9 +163,11 @@ for hit in query:
161
163
 
162
164
  ## 🧶 Thread-safety
163
165
 
164
- Once a `Jess` instance has been created, the templates cannot be edited anymore,
165
- making the `Jess.query` method re-entrant. This allows querying several
166
- molecules against the same templates in parallel using a thread pool:
166
+ Once a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess)
167
+ instance has been created, the templates cannot be edited anymore,
168
+ making the [`Jess.query`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess.query) method re-entrant and thread-safe. This allows querying
169
+ several molecules against the same templates in parallel using e.g a
170
+ [`ThreadPool`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.ThreadPool):
167
171
 
168
172
  ```python
169
173
  molecules = []
@@ -177,8 +181,22 @@ with multiprocessing.ThreadPool() as pool:
177
181
  *⚠️ Prior to PyJess `v0.2.1`, the Jess code was running some thread-unsafe operations which have now been patched.
178
182
  If running Jess in parallel, make sure to use `v0.2.1` or later to use the code patched with re-entrant functions*.
179
183
 
180
- <!-- ## ⏱️ Benchmarks -->
184
+ ## ⏱️ Benchmarks
181
185
 
186
+ The following table reports the runtime of PyJess to match $n=132$ protein
187
+ structures to the $m=7607$ templates of
188
+ [EnzyMM](https://github.com/RayHackett/enzymm), using $J=12$ threads to parallelize.
189
+
190
+ | Version | Runtime (s) | Match Speed (N * M / s * J) | Speedup |
191
+ | ----------- | ----------- | --------------------------- | ----------- |
192
+ | ``v0.4.2`` | 618.1 | 135.4 | N/A |
193
+ | ``v0.5.0`` | 586.3 | 142.7 | x1.05 |
194
+ | ``v0.5.1`` | 365.6 | 228.9 | x1.69 |
195
+ | ``v0.5.2`` | 327.2 | 255.7 | x1.88 |
196
+ | ``v0.6.0`` | 54.5 | 1535.4 | **x11.34** |
197
+
198
+ *Benchmarks were run on a quiet [i7-1255U](https://www.intel.com/content/www/us/en/products/sku/226259/intel-core-i71255u-processor-12m-cache-up-to-4-70-ghz/specifications.html) CPU running @4.70GHz with 10 physical cores / 12 logical
199
+ cores.*
182
200
 
183
201
  ## 💭 Feedback
184
202
 
@@ -211,7 +229,7 @@ This library is provided under the [MIT License](https://choosealicense.com/lice
211
229
  *This project is in no way not affiliated, sponsored, or otherwise endorsed
212
230
  by the JESS authors. It was developed
213
231
  by [Martin Larralde](https://github.com/althonos/) during his PhD project
214
- at the [European Molecular Biology Laboratory](https://www.embl.de/) in
232
+ at the [Leiden University Medical Center](https://www.lumc.nl/en/) in
215
233
  the [Zeller team](https://github.com/zellerlab).*
216
234
 
217
235
 
@@ -33,7 +33,9 @@ during his PhD in the [Thornton group](https://www.ebi.ac.uk/research/thornton/)
33
33
  PyJess is a Python module that provides bindings to Jess using
34
34
  [Cython](https://cython.org/). It allows creating templates, querying them
35
35
  with protein structures, and retrieving the hits using a Python API without
36
- performing any external I/O.
36
+ performing any external I/O. It's also more than 10x faster than Jess thanks to
37
+ [algorithmic optimizations](https://pyjess.readthedocs.io/en/latest/guide/optimizations.html)
38
+ added to improve the original Jess code while producing consistent results.
37
39
 
38
40
 
39
41
  ## 🔧 Installing
@@ -67,7 +69,8 @@ Jess if you are using it in an academic work, for instance as:
67
69
 
68
70
  ## 💡 Example
69
71
 
70
- Load templates to be used as references from different template files:
72
+ Load [`Template`](https://pyjess.readthedocs.io/en/latest/api/template.html#pyjess.Template)
73
+ objects to be used as references from different template files:
71
74
 
72
75
  ```python
73
76
  import pathlib
@@ -75,11 +78,10 @@ import pyjess
75
78
 
76
79
  templates = []
77
80
  for path in sorted(pathlib.Path("vendor/jess/examples").glob("template_*.qry")):
78
- with path.open() as file:
79
- templates.append(pyjess.Template.load(file, id=path.stem))
81
+ templates.append(pyjess.Template.load(path, id=path.stem))
80
82
  ```
81
83
 
82
- Create a `Jess` instance and use it to query a molecule (a PDB structure)
84
+ Create a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess) instance and use it to query a [`Molecule`](https://pyjess.readthedocs.io/en/latest/api/molecule.html#pyjess.Molecule) (a PDB structure)
83
85
  against the stored templates:
84
86
 
85
87
  ```python
@@ -101,9 +103,11 @@ for hit in query:
101
103
 
102
104
  ## 🧶 Thread-safety
103
105
 
104
- Once a `Jess` instance has been created, the templates cannot be edited anymore,
105
- making the `Jess.query` method re-entrant. This allows querying several
106
- molecules against the same templates in parallel using a thread pool:
106
+ Once a [`Jess`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess)
107
+ instance has been created, the templates cannot be edited anymore,
108
+ making the [`Jess.query`](https://pyjess.readthedocs.io/en/latest/api/jess.html#pyjess.Jess.query) method re-entrant and thread-safe. This allows querying
109
+ several molecules against the same templates in parallel using e.g a
110
+ [`ThreadPool`](https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.ThreadPool):
107
111
 
108
112
  ```python
109
113
  molecules = []
@@ -117,8 +121,22 @@ with multiprocessing.ThreadPool() as pool:
117
121
  *⚠️ Prior to PyJess `v0.2.1`, the Jess code was running some thread-unsafe operations which have now been patched.
118
122
  If running Jess in parallel, make sure to use `v0.2.1` or later to use the code patched with re-entrant functions*.
119
123
 
120
- <!-- ## ⏱️ Benchmarks -->
124
+ ## ⏱️ Benchmarks
121
125
 
126
+ The following table reports the runtime of PyJess to match $n=132$ protein
127
+ structures to the $m=7607$ templates of
128
+ [EnzyMM](https://github.com/RayHackett/enzymm), using $J=12$ threads to parallelize.
129
+
130
+ | Version | Runtime (s) | Match Speed (N * M / s * J) | Speedup |
131
+ | ----------- | ----------- | --------------------------- | ----------- |
132
+ | ``v0.4.2`` | 618.1 | 135.4 | N/A |
133
+ | ``v0.5.0`` | 586.3 | 142.7 | x1.05 |
134
+ | ``v0.5.1`` | 365.6 | 228.9 | x1.69 |
135
+ | ``v0.5.2`` | 327.2 | 255.7 | x1.88 |
136
+ | ``v0.6.0`` | 54.5 | 1535.4 | **x11.34** |
137
+
138
+ *Benchmarks were run on a quiet [i7-1255U](https://www.intel.com/content/www/us/en/products/sku/226259/intel-core-i71255u-processor-12m-cache-up-to-4-70-ghz/specifications.html) CPU running @4.70GHz with 10 physical cores / 12 logical
139
+ cores.*
122
140
 
123
141
  ## 💭 Feedback
124
142
 
@@ -151,7 +169,7 @@ This library is provided under the [MIT License](https://choosealicense.com/lice
151
169
  *This project is in no way not affiliated, sponsored, or otherwise endorsed
152
170
  by the JESS authors. It was developed
153
171
  by [Martin Larralde](https://github.com/althonos/) during his PhD project
154
- at the [European Molecular Biology Laboratory](https://www.embl.de/) in
172
+ at the [Leiden University Medical Center](https://www.lumc.nl/en/) in
155
173
  the [Zeller team](https://github.com/zellerlab).*
156
174
 
157
175
 
@@ -1,6 +1,11 @@
1
1
  [
2
2
  {
3
- "name": "v0.5 (latest)",
3
+ "name": "v0.6 (latest)",
4
+ "version": "0.6.0",
5
+ "url": "https://pyjess.readthedocs.io/en/v0.6.0/"
6
+ },
7
+ {
8
+ "name": "v0.5",
4
9
  "version": "0.5.2",
5
10
  "url": "https://pyjess.readthedocs.io/en/v0.5.2/"
6
11
  },
@@ -13,6 +13,7 @@ This section contains guides and documents about PyJess usage.
13
13
  :maxdepth: 1
14
14
  :caption: Resources
15
15
 
16
+ Optimizations <optimizations>
16
17
  Contribution Guide <contributing>
17
18
  Changelog <changes>
18
19
  Copyright Notice <copyright>
@@ -0,0 +1,258 @@
1
+ Optimizations
2
+ =============
3
+
4
+ .. currentmodule:: pyjess
5
+
6
+ While PyJess started as a Cython wrapper of Jess, it also
7
+ contains several optimizations to make the code write better
8
+ while maintaining consistency with the original Jess code.
9
+ Some of these optimizations are described below, as well as
10
+ the version where they were introduced.
11
+
12
+
13
+ Residue name index
14
+ ------------------
15
+
16
+ .. versionadded:: 0.5.1
17
+
18
+ Given a `Molecule`, matching a new `Template` to that `Molecule`
19
+ requires generating a set of candidate atoms for each `TemplateAtom`
20
+ of the template according to its `~TemplateAtom.match_mode`.
21
+
22
+ In the original Jess code, this is done by a full-scan on each
23
+ `Atom` of the `Molecule` for each `TemplateAtom` at index ``k``:
24
+
25
+ .. code:: c
26
+
27
+ Atom* A;
28
+ Molecule* M;
29
+ Template* T;
30
+ CandidateSet* S;
31
+ int n, i;
32
+
33
+ n=Molecule_count(M);
34
+ for(i=0; m<n; m++)
35
+ {
36
+ A=(Atom*) Molecule_atom(M,i);
37
+ if(T->match(T,k,A))
38
+ {
39
+ S->atom[S->count]=A;
40
+ S->count++;
41
+ }
42
+ }
43
+
44
+ However, the most common match modes require the candidate `Atom` to
45
+ have a `~Atom.residue_name` equal to one of the `TemplateAtom`
46
+ `~TemplateAtom.residue_names`. To exploit this, and avoid a full-scan,
47
+ we can create an index grouping the atoms of a `Molecule` by their
48
+ `~Atom.residue_name`, and only iterate on atoms with the right
49
+ `~Atom.residue_name` when building the candidates for a `TemplateAtom`:
50
+
51
+ .. code:: c
52
+
53
+ Atom* A;
54
+ Molecule* M;
55
+ Template* T;
56
+ CandidateSet* S;
57
+ Atom** atoms;
58
+
59
+ for(name=T->residueNames(T,k);name!=NULL;name++)
60
+ {
61
+ for(atoms=Molecule_atoms(M,resname);*atoms!=NULL;atoms++)
62
+ {
63
+ A=*atoms;
64
+ if(T->match(T,k,A))
65
+ {
66
+ S->atom[S->count]=A;
67
+ S->count++;
68
+ }
69
+ }
70
+ }
71
+
72
+ By doing so we greatly reduce the number of calls to ``T->match``,
73
+ at the cost of computing an index when a new `Molecule` is created.
74
+ This is grealy beneficial for a large number of templates, with an
75
+ additional :math:`O(n)` memory requirement.
76
+
77
+
78
+ k-d tree generation
79
+ -------------------
80
+
81
+ .. versionadded:: 0.6.0
82
+
83
+ Jess uses a `k-d tree <https://en.wikipedia.org/wiki/K-d_tree>`_ data
84
+ structure to partition the molecule into geometric regions and speed-up the
85
+ retrieval of atoms in each region. To generate a :math:`k`-d tree from
86
+ a list of `Atom`, the atoms are recursively partitioned on a single dimension
87
+ around a pivot value, usually the median coordinate for that dimension.
88
+
89
+ The original Jess code uses ``qsort`` (the `QuickSort <https://en.wikipedia.org/wiki/Quicksort>`_
90
+ implementation of the C standard library) to first sort the atoms on a
91
+ single dimension, and then takes the middle point:
92
+
93
+ .. code:: c
94
+
95
+ KdTreeNode* N;
96
+ int* idx;
97
+ int n;
98
+
99
+ qsort(idx,n,sizeof(int),KdTree_compare);
100
+
101
+ split = n/2;
102
+ N->index=idx[split-1];
103
+ N->type=type;
104
+
105
+ type = (type+1)%dim;
106
+ N->left = KdTreeNode_create(idx,split,type,u,dim);
107
+ N->right = KdTreeNode_create(&idx[split],n-split,type,u,dim);
108
+
109
+
110
+ While implemented very efficiently, QuickSort has an average runtime
111
+ complexity of :math:`O(nlog(n))`. Given that the algorithm is only used
112
+ here to search for the median, and that the sort order is actually irrelevant,
113
+ we replaced it with the `QuickSelect <https://en.wikipedia.org/wiki/Quickselect>`_ algorithm,
114
+ which can retrieve the median with an average runtime complexity of :math:`O(n)`.
115
+
116
+
117
+ Approximate annulus intersection
118
+ --------------------------------
119
+
120
+ .. versionadded:: 0.6.0
121
+
122
+ During the search for matches in a ``Scanner``, Jess takes into account the
123
+ flexibility of the `Template` by modelling the distance constraints to each
124
+ `TemplateAtom` using an `annulus <https://en.wikipedia.org/wiki/Annulus_(mathematics)>`_.
125
+ Then, it queries the :math:`k`-d tree created on the candidate atoms to find
126
+ candidate atoms that are included in this annulus.
127
+
128
+ In the original Jess, the traversal of the :math:`k`-d tree is done by computing
129
+ for each internal node of the tree whether the box formed by that node intersect
130
+ the query annulus, and for each leaf whether they are contained in that annulus.
131
+
132
+ Computing the intersection between a box and an annulus requires computing
133
+ `Euclidean distances <https://en.wikipedia.org/wiki/Euclidean_distance>`_,
134
+ and therefore products between real numbers, an operation that is among the
135
+ slowest even on modern CPUs:
136
+
137
+ .. code:: c
138
+
139
+ Annulus* A;
140
+ double minBox[d];
141
+ double maxBox[d];
142
+
143
+ double minSum=0.0;
144
+ double maxSum=0.0;
145
+ for(i=0; i<d; i++)
146
+ {
147
+ double t1 = A->centre[i]-minBox[i];
148
+ double t2 = A->centre[i]-maxBox[i];
149
+ t1 *= t1;
150
+ t2 *= t2;
151
+
152
+ if(minBox[i]>A->centre[i] || maxBox[i]<A->centre[i])
153
+ minSum += min(t1,t2);
154
+
155
+ maxSum += max(t1,t2);
156
+ }
157
+
158
+ bool intersects = minSum>(A->outer*A->outer) || maxSum<(A->inner*A->inner);
159
+
160
+ To speed-up the querying, we approximate the query annulus as a bounding
161
+ box, and instead compute the intersection to the :math:`k`-d tree box
162
+ to the bounding box, which only requires comparisons:
163
+
164
+ .. code:: c
165
+
166
+ Annulus* A;
167
+ double minBox[d];
168
+ double maxBox[d];
169
+
170
+ bool intersects = true;
171
+ for(i=0; i<d; i++)
172
+ {
173
+ double dmin = A->centre[i]-A->outer;
174
+ double dmax = A->centre[i]+A->outer;
175
+ if( !( dmin <= maxBox[i] && minBox[i] <= dmax ) )
176
+ intersects = false;
177
+ }
178
+
179
+ As this is an approximation, it may wrongly return ``true`` on (literal) corner
180
+ cases, i.e. when the intersection happens in a corner of the bounding box around
181
+ the annulus. However, given that the :math:`k`-d tree later checks that the
182
+ points are actually included in the annulus for the leaf nodes,
183
+ using this implementation will not generate false positives.
184
+
185
+
186
+ Reduced backtracking
187
+ --------------------
188
+
189
+ .. versionadded:: 0.6.0
190
+
191
+ The ``Scanner`` in Jess iterates on the `Template` atoms and then aligns the
192
+ `Molecule` atoms iteratively. When no more candidate for a `TemplateAtom` can
193
+ be found in a `Molecule`, it backtracks to the previous `TemplateAtom`, and
194
+ continues this iteration.
195
+
196
+ Since every backtracking event triggers the querying of the :math:`k`-d tree,
197
+ we want to minimize backtracking as much as possible. To do so, we compute
198
+ an iteration order over the `Template` atoms that minimizes the amount of
199
+ backtracking required to explore all paths. This can be done quickly by
200
+ sorting the `TemplateAtom` using the number of `Atom` in the corresponding
201
+ ``CandidateSet``.
202
+
203
+ Empirically, this approach reduced the amount of :math:`k`-d tree queries by a
204
+ factor of 10. It is nevertheless unclear whether an optimal path can be
205
+ further identified and pre-computed from the candidate `Atom`, possibly by
206
+ filtering distant atoms first.
207
+
208
+ .. warning::
209
+
210
+ Out all the optimizations in this section, this one is the only one
211
+ to introduce *slight* behavioral changes in PyJess compared to Jess.
212
+ Since the order in which candidate atoms are matched have changed,
213
+ the order in which a PyJess `Query` yields `Hit` objects differs
214
+ to that in which Jess reports a match. This only affects the *order*
215
+ though: all matches are still returned, and are 1-to-1 identical!
216
+ This can be disabled by running with ``Jess.query(..., reorder=False)``
217
+ to use the original matching order, at the cost of a longer runtime.
218
+
219
+
220
+
221
+ Type concretization
222
+ -------------------
223
+
224
+ .. versionadded:: 0.6.0
225
+
226
+ Jess uses generic code to support multiple types of spatial regions
227
+ using function pointers to implement `virtual method tables <https://en.wikipedia.org/wiki/Virtual_method_table>`_.
228
+ While elegant and functional, the code never really makes uses of
229
+ the genericity, and therefore the algorithm can be specialized to
230
+ the appropriate `Region` concrete type (either `Join` or `Annulus`)
231
+ to remove the overhead of calling the function pointers.
232
+
233
+ In addition, most of the geometric code is inlined, as it is called in hot paths
234
+ where the compiler can apply auto-vectorization.
235
+
236
+
237
+ Dimension concretization
238
+ ------------------------
239
+
240
+ .. versionadded:: 0.6.0
241
+
242
+ The `Annulus` type is made to be generic over the number of dimensions,
243
+ but in practice it is only used for 3-dimensions. We hardcoded the dimensions
244
+ to encourage the compiler to unroll loops over the `Annulus` dimensions where
245
+ applicable, and to use constant-size arrays rather than dynamic allocation when
246
+ applicable.
247
+
248
+
249
+ Scanner memory recycling
250
+ ------------------------
251
+
252
+ .. versionadded:: 0.6.0
253
+
254
+ The original Jess code performs a lot of allocation/deallocations in hot paths,
255
+ as it creates a new `Scanner` which allocates memory for each `Template` / `Molecule`
256
+ pair to match. Effectively, most of these buffers can actually be reused
257
+ across `Templates` for a given `Molecule`, provided sufficient bookkeeping. Our
258
+ implementation keeps allocation to a minimum across an entire `Query`.
@@ -103,16 +103,17 @@ PyJess is a Python module that provides bindings to Jess using
103
103
  Retrieve results as they become availble as dedicated
104
104
  `~pyjess.Hit` objects, and compute statistics on-the-fly.
105
105
 
106
+ .. grid-item-card:: :fas:`gauge-high` Fast
107
+
108
+ Compute matches about 10x faster, thanks to
109
+ several :doc:`algorithmic optimizations <guide/optimizations>`
110
+ made to the original Jess code.
111
+
106
112
  .. grid-item-card:: :fas:`server` Parallel
107
113
 
108
114
  Easily run computations in parallel querying thread-safe
109
115
  `~pyjess.Jess` with several `~pyjess.Molecule` in parallel.
110
116
 
111
- .. grid-item-card:: :fas:`check` Consistent
112
-
113
- Get the same results as Jess, with some additional bug fixes for
114
- edge cases of the original implementation.
115
-
116
117
  .. grid-item-card:: :fas:`toolbox` Feature-complete
117
118
 
118
119
  Access all the features of the original CLI through the
@@ -18,10 +18,11 @@ cdef extern from "Jess.h" nogil:
18
18
  Jess* Jess_create()
19
19
  void Jess_free(Jess*)
20
20
  void Jess_addTemplate(Jess*, Template*)
21
- JessQuery* Jess_query(Jess*, Molecule*, double, double)
21
+ JessQuery* Jess_query(Jess*, Molecule*, double, double, bint)
22
22
 
23
23
  void JessQuery_free(JessQuery*)
24
24
  int JessQuery_next(JessQuery*, int)
25
+ int JessQuery_nextTemplate(JessQuery*)
25
26
  Template* JessQuery_template(JessQuery*)
26
27
  const Molecule* JessQuery_molecule(JessQuery*)
27
28
  Atom** JessQuery_atoms(JessQuery*)