pyjess 0.6.0__pp38-pypy38_pp73-win_amd64.whl → 0.7.0__pp38-pypy38_pp73-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyjess might be problematic. Click here for more details.
- pyjess/__main__.py +4 -0
- pyjess/_jess.pyi +52 -9
- pyjess/_jess.pypy38-pp73-win_amd64.pyd +0 -0
- pyjess/_jess.pyx +830 -101
- pyjess/cli.py +281 -0
- pyjess/tests/__init__.py +2 -0
- pyjess/tests/data/1AMY.cif +6259 -0
- pyjess/tests/data/1sur.qry +26 -0
- pyjess/tests/data/4.1.2.tpl +23 -0
- pyjess/tests/data/5ayx.EF.pdb +63 -0
- pyjess/tests/test_doctest.py +78 -0
- pyjess/tests/test_hit.py +26 -2
- pyjess/tests/test_jess.py +62 -1
- pyjess/tests/test_molecule.py +146 -0
- pyjess/tests/test_template.py +10 -1
- {pyjess-0.6.0.dist-info → pyjess-0.7.0.dist-info}/METADATA +59 -16
- pyjess-0.7.0.dist-info/RECORD +34 -0
- pyjess-0.7.0.dist-info/entry_points.txt +3 -0
- pyjess-0.6.0.dist-info/RECORD +0 -26
- {pyjess-0.6.0.dist-info → pyjess-0.7.0.dist-info}/WHEEL +0 -0
- {pyjess-0.6.0.dist-info → pyjess-0.7.0.dist-info}/licenses/COPYING +0 -0
pyjess/cli.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""The PyJess CLI.
|
|
4
|
+
"""
|
|
5
|
+
import argparse
|
|
6
|
+
import contextlib
|
|
7
|
+
import functools
|
|
8
|
+
import io
|
|
9
|
+
import itertools
|
|
10
|
+
import os
|
|
11
|
+
import operator
|
|
12
|
+
import sys
|
|
13
|
+
import warnings
|
|
14
|
+
import typing
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import multiprocessing.pool
|
|
19
|
+
except ImportError: # multiprocessing.pool may be missing, e.g. on AWS
|
|
20
|
+
multiprocessing = None
|
|
21
|
+
|
|
22
|
+
from . import __name__ as prog, __author__, __version__
|
|
23
|
+
from ._jess import Template, Molecule, Jess
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_BZ2_MAGIC = b"BZh"
|
|
27
|
+
_GZIP_MAGIC = b"\x1f\x8b"
|
|
28
|
+
_XZ_MAGIC = b"\xfd7zXZ"
|
|
29
|
+
_LZ4_MAGIC = b"\x04\x22\x4d\x18"
|
|
30
|
+
_ZSTD_MAGIC = b"\x28\xb5\x2f\xfd"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SmartFormatter(argparse.ArgumentDefaultsHelpFormatter):
|
|
34
|
+
|
|
35
|
+
def _format_text(self, text):
|
|
36
|
+
# if '%(prog)' in text:
|
|
37
|
+
# text = text % dict(prog=self._prog)
|
|
38
|
+
# text_width = max(self._width - self._current_indent, 11)
|
|
39
|
+
# indent = ' ' * self._current_indent
|
|
40
|
+
# return self._fill_text(text, text_width, indent) + '\n\n'
|
|
41
|
+
return text + '\n\n'
|
|
42
|
+
|
|
43
|
+
# def _split_lines(self, text, width):
|
|
44
|
+
# print(text)
|
|
45
|
+
# if text.startswith('Copyright'):
|
|
46
|
+
# return text.splitlines()
|
|
47
|
+
# return super()._split_lines(text, width)
|
|
48
|
+
# # this is the RawTextHelpFormatter._split_lines
|
|
49
|
+
# return argparse.HelpFormatter._split_lines(self, text, width)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@contextlib.contextmanager
|
|
53
|
+
def zopen(path, mode='r', encoding=None, errors=None, newline=None) -> typing.Iterator[typing.BinaryIO]:
|
|
54
|
+
with contextlib.ExitStack() as ctx:
|
|
55
|
+
file = ctx.enter_context(open(path, "rb"))
|
|
56
|
+
peek = file.peek()
|
|
57
|
+
if peek.startswith(_GZIP_MAGIC):
|
|
58
|
+
import gzip
|
|
59
|
+
file = ctx.enter_context(gzip.open(file, mode="rb"))
|
|
60
|
+
elif peek.startswith(_BZ2_MAGIC):
|
|
61
|
+
import bz2
|
|
62
|
+
file = ctx.enter_context(bz2.open(file, mode="rb"))
|
|
63
|
+
elif peek.startswith(_XZ_MAGIC):
|
|
64
|
+
import lzma
|
|
65
|
+
file = ctx.enter_context(lzma.open(file, mode="rb"))
|
|
66
|
+
elif peek.startswith(_LZ4_MAGIC):
|
|
67
|
+
try:
|
|
68
|
+
import lz4.frame
|
|
69
|
+
except ImportError as err:
|
|
70
|
+
raise RuntimeError("File compression is LZ4 but lz4 is not installed") from err
|
|
71
|
+
file = ctx.enter_context(lz4.frame.open(file))
|
|
72
|
+
elif peek.startswith(_ZSTD_MAGIC):
|
|
73
|
+
try:
|
|
74
|
+
import zstandard
|
|
75
|
+
except ImportError as err:
|
|
76
|
+
raise RuntimeError("File compression is ZSTD but zstandard is not installed") from err
|
|
77
|
+
decompressor = zstandard.ZstdDecompressor()
|
|
78
|
+
file = decompressor.stream_reader(file)
|
|
79
|
+
if mode == "r":
|
|
80
|
+
file = io.TextIOWrapper(file, encoding=encoding, errors=errors, newline=newline)
|
|
81
|
+
yield file
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def argument_parser(
|
|
85
|
+
prog: str = prog,
|
|
86
|
+
version: str = __version__,
|
|
87
|
+
# formatter_class: argparse.HelpFormatter = argparse.ArgumentDefaultsHelpFormatter,
|
|
88
|
+
) -> argparse.ArgumentParser:
|
|
89
|
+
parser = argparse.ArgumentParser(
|
|
90
|
+
prog=prog,
|
|
91
|
+
add_help=False,
|
|
92
|
+
formatter_class=SmartFormatter,
|
|
93
|
+
description=(
|
|
94
|
+
"PyJess - Optimized Python bindings to Jess, a 3D template matching software.\n\n"
|
|
95
|
+
"MIT License\n\n"
|
|
96
|
+
"Copyright (c) 2025 Martin Larralde <martin.larralde@embl.de>\n"
|
|
97
|
+
"Copyright (c) 2002 Jonathan Barker <jbarker@ebi.ac.uk>\n\n"
|
|
98
|
+
),
|
|
99
|
+
)
|
|
100
|
+
parser.add_argument(
|
|
101
|
+
"-h",
|
|
102
|
+
"--help",
|
|
103
|
+
action="help",
|
|
104
|
+
help="Show this help message and exit."
|
|
105
|
+
)
|
|
106
|
+
parser.add_argument(
|
|
107
|
+
"-V",
|
|
108
|
+
"--version",
|
|
109
|
+
help="Show the version number and exit.",
|
|
110
|
+
action="version",
|
|
111
|
+
version=f"PyJess {__version__}",
|
|
112
|
+
)
|
|
113
|
+
parser.add_argument(
|
|
114
|
+
"-j",
|
|
115
|
+
"--jobs",
|
|
116
|
+
help="The number of jobs to use for multithreading.",
|
|
117
|
+
type=int,
|
|
118
|
+
default=os.cpu_count() or 1,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
group = parser.add_argument_group("Mandatory Parameters")
|
|
122
|
+
group.add_argument(
|
|
123
|
+
"-T",
|
|
124
|
+
"--templates",
|
|
125
|
+
help="The path to the template list file.",
|
|
126
|
+
type=Path,
|
|
127
|
+
required=True,
|
|
128
|
+
)
|
|
129
|
+
group.add_argument(
|
|
130
|
+
"-Q",
|
|
131
|
+
"--queries",
|
|
132
|
+
help="The path to the query list file.",
|
|
133
|
+
type=Path,
|
|
134
|
+
required=True,
|
|
135
|
+
)
|
|
136
|
+
group.add_argument(
|
|
137
|
+
"-R",
|
|
138
|
+
"--rmsd",
|
|
139
|
+
help="The RMSD threshold.",
|
|
140
|
+
type=float,
|
|
141
|
+
required=True,
|
|
142
|
+
)
|
|
143
|
+
group.add_argument(
|
|
144
|
+
"-D",
|
|
145
|
+
"--distance-cutoff",
|
|
146
|
+
help="The distance-cutoff.",
|
|
147
|
+
type=float,
|
|
148
|
+
required=True,
|
|
149
|
+
)
|
|
150
|
+
group.add_argument(
|
|
151
|
+
"-M",
|
|
152
|
+
"--maximum-distance",
|
|
153
|
+
help=(
|
|
154
|
+
"The maximum allowed template/query atom distance after adding the "
|
|
155
|
+
"global distance cutoff and the individual atom distance cutoff "
|
|
156
|
+
"defined in the temperature field of the ATOM record in the "
|
|
157
|
+
"template file."
|
|
158
|
+
),
|
|
159
|
+
type=float,
|
|
160
|
+
required=True,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
group = parser.add_argument_group("Flags")
|
|
164
|
+
group.add_argument(
|
|
165
|
+
"-n",
|
|
166
|
+
"--no-transform",
|
|
167
|
+
help="Do not transform coordinates of hit into the template coordinate frame",
|
|
168
|
+
action="store_false",
|
|
169
|
+
dest="transform"
|
|
170
|
+
)
|
|
171
|
+
group.add_argument(
|
|
172
|
+
"-f",
|
|
173
|
+
"--filenames",
|
|
174
|
+
help="Show PDB filenames in progress on stderr",
|
|
175
|
+
action="store_true"
|
|
176
|
+
)
|
|
177
|
+
group.add_argument(
|
|
178
|
+
"-i",
|
|
179
|
+
"--ignore-chain",
|
|
180
|
+
help=(
|
|
181
|
+
"Include matches composed of residues belonging to multiple chains "
|
|
182
|
+
"(if template is single-chain) or matches with residues from a single "
|
|
183
|
+
"chain (if template has residues from multiple chains)."
|
|
184
|
+
),
|
|
185
|
+
action="store_true",
|
|
186
|
+
)
|
|
187
|
+
group.add_argument(
|
|
188
|
+
"--ignore-res-chain",
|
|
189
|
+
help=(
|
|
190
|
+
"Include matches composed of residues belonging to multiple chains "
|
|
191
|
+
"but still enforce all atoms of a residue to be part of the same chain."
|
|
192
|
+
),
|
|
193
|
+
action="store_true",
|
|
194
|
+
)
|
|
195
|
+
group.add_argument(
|
|
196
|
+
"-q",
|
|
197
|
+
"--query-filename",
|
|
198
|
+
help="Write filename of query instead of PDB ID from HEADER",
|
|
199
|
+
action="store_true",
|
|
200
|
+
)
|
|
201
|
+
group.add_argument(
|
|
202
|
+
"-e",
|
|
203
|
+
"--ignore-endmdl",
|
|
204
|
+
help="Parse atoms from all models separated by ENDMDL (use with care).",
|
|
205
|
+
action="store_true",
|
|
206
|
+
)
|
|
207
|
+
group.add_argument(
|
|
208
|
+
"-c",
|
|
209
|
+
"--max-candidates",
|
|
210
|
+
help="Set a maximum number of candidates to return by template.",
|
|
211
|
+
type=int,
|
|
212
|
+
default=None,
|
|
213
|
+
)
|
|
214
|
+
group.add_argument(
|
|
215
|
+
"--no-reorder",
|
|
216
|
+
help=(
|
|
217
|
+
"Disable template atom reordering in the matching process, useful "
|
|
218
|
+
"to enforce results to be returned exactly in the same order as "
|
|
219
|
+
"the original Jess, at the cost of longer runtimes."
|
|
220
|
+
),
|
|
221
|
+
action="store_false",
|
|
222
|
+
dest="reorder",
|
|
223
|
+
)
|
|
224
|
+
group.add_argument(
|
|
225
|
+
"-b",
|
|
226
|
+
"--best-match",
|
|
227
|
+
help="Return only the best match for each template/query pair.",
|
|
228
|
+
action="store_true",
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return parser
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _process(gene_finder, sequence):
|
|
235
|
+
if not sequence.id:
|
|
236
|
+
warnings.warn("Input file contains a sequence without identifier", stacklevel=2)
|
|
237
|
+
return sequence.id, gene_finder.find_genes(sequence.seq)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def main(
|
|
241
|
+
argv: typing.Optional[typing.List[str]] = None,
|
|
242
|
+
stdout: typing.TextIO = sys.stdout,
|
|
243
|
+
stderr: typing.TextIO = sys.stderr,
|
|
244
|
+
) -> int:
|
|
245
|
+
parser = argument_parser()
|
|
246
|
+
args = parser.parse_args(argv)
|
|
247
|
+
|
|
248
|
+
ignore_chain = "all" if args.ignore_chain else "residues" if args.ignore_res_chain else None
|
|
249
|
+
|
|
250
|
+
with contextlib.ExitStack() as ctx:
|
|
251
|
+
try:
|
|
252
|
+
|
|
253
|
+
with args.templates.open() as f:
|
|
254
|
+
templates = [Template.load(n, id=n) for n in map(str.strip, f)]
|
|
255
|
+
jess = Jess(templates)
|
|
256
|
+
|
|
257
|
+
with args.queries.open() as f:
|
|
258
|
+
for filename in map(str.strip, f):
|
|
259
|
+
id_ = filename if args.query_filename else None
|
|
260
|
+
mol = Molecule.load(filename, id=id_, ignore_endmdl=args.ignore_endmdl)
|
|
261
|
+
if args.filenames:
|
|
262
|
+
print(filename, file=stderr)
|
|
263
|
+
query = jess.query(
|
|
264
|
+
mol,
|
|
265
|
+
args.rmsd,
|
|
266
|
+
args.distance_cutoff,
|
|
267
|
+
args.maximum_distance,
|
|
268
|
+
max_candidates=args.max_candidates,
|
|
269
|
+
ignore_chain=ignore_chain,
|
|
270
|
+
reorder=args.reorder,
|
|
271
|
+
best_match=args.best_match,
|
|
272
|
+
)
|
|
273
|
+
for hit in query:
|
|
274
|
+
hit.dump(stdout, format="pdb", transform=args.transform)
|
|
275
|
+
stdout.write("\n")
|
|
276
|
+
|
|
277
|
+
except Exception as err:
|
|
278
|
+
print("Error: {}".format(err), file=stderr)
|
|
279
|
+
return getattr(err, "errno", 1)
|
|
280
|
+
else:
|
|
281
|
+
return 0
|
pyjess/tests/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from . import (
|
|
2
2
|
test_atom,
|
|
3
|
+
test_doctest,
|
|
3
4
|
test_hit,
|
|
4
5
|
test_jess,
|
|
5
6
|
test_molecule,
|
|
@@ -8,6 +9,7 @@ from . import (
|
|
|
8
9
|
)
|
|
9
10
|
|
|
10
11
|
def load_tests(loader, suite, pattern):
|
|
12
|
+
suite.addTests(loader.loadTestsFromModule(test_doctest))
|
|
11
13
|
suite.addTests(loader.loadTestsFromModule(test_atom))
|
|
12
14
|
suite.addTests(loader.loadTestsFromModule(test_hit))
|
|
13
15
|
suite.addTests(loader.loadTestsFromModule(test_jess))
|