pyjess 0.7.0__pp39-pypy39_pp73-win_amd64.whl → 0.8.0__pp39-pypy39_pp73-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyjess/_jess.pyi +4 -0
- pyjess/_jess.pypy39-pp73-win_amd64.pyd +0 -0
- pyjess/_jess.pyx +171 -31
- pyjess/_peekable.py +53 -0
- pyjess/tests/test_molecule.py +43 -0
- pyjess/tests/test_template.py +6 -0
- pyjess/tests/test_template_atom.py +29 -1
- {pyjess-0.7.0.dist-info → pyjess-0.8.0.dist-info}/METADATA +3 -2
- {pyjess-0.7.0.dist-info → pyjess-0.8.0.dist-info}/RECORD +12 -11
- {pyjess-0.7.0.dist-info → pyjess-0.8.0.dist-info}/WHEEL +0 -0
- {pyjess-0.7.0.dist-info → pyjess-0.8.0.dist-info}/entry_points.txt +0 -0
- {pyjess-0.7.0.dist-info → pyjess-0.8.0.dist-info}/licenses/COPYING +0 -0
pyjess/_jess.pyi
CHANGED
|
@@ -181,6 +181,8 @@ class TemplateAtom:
|
|
|
181
181
|
@property
|
|
182
182
|
def distance_weight(self) -> float: ...
|
|
183
183
|
def copy(self: _SELF) -> _SELF: ...
|
|
184
|
+
def dump(self, file: TextIO) -> None: ...
|
|
185
|
+
def dumps(self) -> str: ...
|
|
184
186
|
|
|
185
187
|
class Template(Sequence[TemplateAtom]):
|
|
186
188
|
@classmethod
|
|
@@ -204,6 +206,8 @@ class Template(Sequence[TemplateAtom]):
|
|
|
204
206
|
@property
|
|
205
207
|
def dimension(self) -> int: ...
|
|
206
208
|
def copy(self) -> Template: ...
|
|
209
|
+
def dump(self, file: TextIO) -> None: ...
|
|
210
|
+
def dumps(self) -> str: ...
|
|
207
211
|
|
|
208
212
|
_T = TypeVar("_T", bound=Template)
|
|
209
213
|
|
|
Binary file
|
pyjess/_jess.pyx
CHANGED
|
@@ -4,20 +4,20 @@
|
|
|
4
4
|
|
|
5
5
|
Jess is an algorithm for constraint-based structural template matching
|
|
6
6
|
proposed by Jonathan Barker *et al.*. It can be used to identify
|
|
7
|
-
catalytic residues from a known template inside a protein structure.
|
|
7
|
+
catalytic residues from a known template inside a protein structure.
|
|
8
8
|
Jess is an evolution of TESS, a geometric hashing algorithm developed by
|
|
9
9
|
Andrew Wallace *et al.*, removing some pre-computation and
|
|
10
|
-
structural requirements from the original algorithm.
|
|
10
|
+
structural requirements from the original algorithm.
|
|
11
11
|
|
|
12
12
|
PyJess is a Python module that provides bindings to Jess using
|
|
13
|
-
`Cython <https://cython.org/>`_. It allows creating templates, querying
|
|
14
|
-
them with protein structures, and retrieving the hits using a Python API
|
|
15
|
-
without performing any external I/O. It's also more than 10x faster than
|
|
16
|
-
Jess thanks to algorithmic optimizations added to improve the original Jess
|
|
13
|
+
`Cython <https://cython.org/>`_. It allows creating templates, querying
|
|
14
|
+
them with protein structures, and retrieving the hits using a Python API
|
|
15
|
+
without performing any external I/O. It's also more than 10x faster than
|
|
16
|
+
Jess thanks to algorithmic optimizations added to improve the original Jess
|
|
17
17
|
code while producing consistent results.
|
|
18
18
|
|
|
19
19
|
Example:
|
|
20
|
-
Load templates from a file, either as a file-like object or
|
|
20
|
+
Load templates from a file, either as a file-like object or
|
|
21
21
|
given a filename::
|
|
22
22
|
|
|
23
23
|
>>> t1 = pyjess.Template.load("1.3.3.tpl") # load from filename
|
|
@@ -31,15 +31,15 @@ Example:
|
|
|
31
31
|
>>> mol[0]
|
|
32
32
|
Atom(serial=1, name='N', altloc=' ', residue_name='GLN', ...)
|
|
33
33
|
|
|
34
|
-
Create a `Jess` object storing the templates to support running
|
|
35
|
-
queries on them. The individual templates can still be accessed by
|
|
34
|
+
Create a `Jess` object storing the templates to support running
|
|
35
|
+
queries on them. The individual templates can still be accessed by
|
|
36
36
|
index::
|
|
37
37
|
|
|
38
38
|
>>> jess = pyjess.Jess([t1, t2])
|
|
39
39
|
>>> jess[0].id
|
|
40
40
|
'3r6v'
|
|
41
41
|
|
|
42
|
-
Run a query on the Jess object to retrieve all templates matching
|
|
42
|
+
Run a query on the Jess object to retrieve all templates matching
|
|
43
43
|
a `Molecule`, *in no particular order*::
|
|
44
44
|
|
|
45
45
|
>>> hits = jess.query(mol, 2, 2, 2)
|
|
@@ -59,7 +59,7 @@ Example:
|
|
|
59
59
|
2om2 1.0711...
|
|
60
60
|
2om2 1.1494...
|
|
61
61
|
|
|
62
|
-
By default, a template can match a molecule in more than one way,
|
|
62
|
+
By default, a template can match a molecule in more than one way,
|
|
63
63
|
if several sets of atoms match the geometric constraints. Use the
|
|
64
64
|
``best_match`` argument of `~Jess.query` to only retrieve the
|
|
65
65
|
best match per template::
|
|
@@ -86,6 +86,7 @@ References:
|
|
|
86
86
|
|
|
87
87
|
cimport cython
|
|
88
88
|
from cpython.exc cimport PyErr_WarnEx
|
|
89
|
+
from cpython.bytes cimport PyBytes_FromStringAndSize
|
|
89
90
|
from cpython.unicode cimport (
|
|
90
91
|
PyUnicode_FromStringAndSize,
|
|
91
92
|
PyUnicode_FromFormat,
|
|
@@ -119,6 +120,8 @@ from jess.tess_atom cimport TessAtom as _TessAtom
|
|
|
119
120
|
import functools
|
|
120
121
|
import io
|
|
121
122
|
|
|
123
|
+
from ._peekable import PeekableFile
|
|
124
|
+
|
|
122
125
|
__version__ = PROJECT_VERSION
|
|
123
126
|
|
|
124
127
|
# --- Utils ------------------------------------------------------------------
|
|
@@ -149,6 +152,30 @@ class nullcontext:
|
|
|
149
152
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
150
153
|
return False
|
|
151
154
|
|
|
155
|
+
cdef extern from * nogil:
|
|
156
|
+
"""
|
|
157
|
+
const char AA3[21][3] = {
|
|
158
|
+
"ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS",
|
|
159
|
+
"ILE", "LYS", "LEU", "MET", "ASN", "PRO", "GLN",
|
|
160
|
+
"ARG", "SER", "THR", "VAL", "TRP", "TYR", "XXX"
|
|
161
|
+
};
|
|
162
|
+
|
|
163
|
+
const char AA1[21] = {
|
|
164
|
+
'A', 'C', 'D', 'E', 'F', 'G', 'H',
|
|
165
|
+
'I', 'K', 'L', 'M', 'N', 'P', 'Q',
|
|
166
|
+
'R', 'S', 'T', 'V', 'W', 'Y', 'X'
|
|
167
|
+
};
|
|
168
|
+
"""
|
|
169
|
+
const char[21][3] AA3
|
|
170
|
+
const char[21] AA1
|
|
171
|
+
|
|
172
|
+
cdef inline char encode_resname(const char* src) noexcept nogil:
|
|
173
|
+
cdef size_t i
|
|
174
|
+
for i in range(21):
|
|
175
|
+
if AA3[i][0] == src[0] and AA3[i][1] == src[1] and AA3[i][2] == src[2]:
|
|
176
|
+
return AA1[i]
|
|
177
|
+
return ord('X')
|
|
178
|
+
|
|
152
179
|
# --- Classes ----------------------------------------------------------------
|
|
153
180
|
|
|
154
181
|
cdef class _MoleculeParser:
|
|
@@ -198,26 +225,34 @@ cdef class _CIFMoleculeParser(_MoleculeParser):
|
|
|
198
225
|
cdef object gemmi
|
|
199
226
|
cdef bint use_author
|
|
200
227
|
cdef bint skip_hetatm
|
|
228
|
+
cdef bint ignore_endmdl
|
|
201
229
|
|
|
202
230
|
_PRIMARY_COLUMNS = [
|
|
203
231
|
'id', 'type_symbol', 'label_atom_id', 'label_alt_id', 'label_comp_id',
|
|
204
232
|
'label_asym_id', 'label_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
|
|
205
233
|
'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
|
|
206
|
-
'?pdbx_formal_charge', '?group_PDB',
|
|
234
|
+
'?pdbx_formal_charge', '?group_PDB', 'pdbx_PDB_model_num',
|
|
207
235
|
]
|
|
208
236
|
|
|
209
237
|
_AUTH_COLUMNS = [
|
|
210
238
|
'id', 'type_symbol', 'auth_atom_id', 'label_alt_id', 'auth_comp_id',
|
|
211
239
|
'auth_asym_id', 'auth_seq_id', '?pdbx_PDB_ins_code', 'Cartn_x',
|
|
212
240
|
'Cartn_y', 'Cartn_z', 'occupancy', 'B_iso_or_equiv',
|
|
213
|
-
'?pdbx_formal_charge', '?group_PDB',
|
|
241
|
+
'?pdbx_formal_charge', '?group_PDB', 'pdbx_PDB_model_num',
|
|
214
242
|
]
|
|
215
243
|
|
|
216
|
-
def __init__(
|
|
244
|
+
def __init__(
|
|
245
|
+
self,
|
|
246
|
+
str id = None,
|
|
247
|
+
bint use_author = False,
|
|
248
|
+
bint skip_hetatm = False,
|
|
249
|
+
bint ignore_endmdl = False,
|
|
250
|
+
):
|
|
217
251
|
super().__init__(id=id)
|
|
218
252
|
self.gemmi = __import__('gemmi')
|
|
219
253
|
self.use_author = use_author
|
|
220
254
|
self.skip_hetatm = skip_hetatm
|
|
255
|
+
self.ignore_endmdl = ignore_endmdl
|
|
221
256
|
|
|
222
257
|
def _load_block(self, document, molecule_type):
|
|
223
258
|
block = document.sole_block()
|
|
@@ -230,6 +265,12 @@ cdef class _CIFMoleculeParser(_MoleculeParser):
|
|
|
230
265
|
|
|
231
266
|
atoms = []
|
|
232
267
|
for row in table:
|
|
268
|
+
|
|
269
|
+
# row[15] contains _atom_site.pdbx_PDB_model_num
|
|
270
|
+
# by default (if ignore_endmdl is False) we break on model number
|
|
271
|
+
if not self.ignore_endmdl and row[15] != '1':
|
|
272
|
+
break
|
|
273
|
+
|
|
233
274
|
if row[14] != "ATOM" and (row[14] != "HETATM" or self.skip_hetatm):
|
|
234
275
|
continue
|
|
235
276
|
|
|
@@ -251,7 +292,7 @@ cdef class _CIFMoleculeParser(_MoleculeParser):
|
|
|
251
292
|
atom = Atom(
|
|
252
293
|
serial=int(row[0]),
|
|
253
294
|
element=row[1],
|
|
254
|
-
name=row[2],
|
|
295
|
+
name=row[2].strip('"'),
|
|
255
296
|
altloc=' ' if row[3] == "." else row[3], # FIXME: replace with None?
|
|
256
297
|
residue_name=row[4],
|
|
257
298
|
chain_id=row[5],
|
|
@@ -261,7 +302,7 @@ cdef class _CIFMoleculeParser(_MoleculeParser):
|
|
|
261
302
|
y=float(row[9]),
|
|
262
303
|
z=float(row[10]),
|
|
263
304
|
occupancy=0.0 if row[11] == '.' else float(row[11]),
|
|
264
|
-
temperature_factor=float(row[12]),
|
|
305
|
+
temperature_factor=0.0 if row[12] == '.' else float(row[12]),
|
|
265
306
|
charge=0 if not row.has(13) or row[13] == "?" else int(row[13]),
|
|
266
307
|
)
|
|
267
308
|
atoms.append(atom)
|
|
@@ -324,7 +365,7 @@ cdef class Molecule:
|
|
|
324
365
|
ignore_endmdl (`bool`): Pass `True` to make the parser read all
|
|
325
366
|
the atoms from the PDB file. By default, the parser only
|
|
326
367
|
reads the atoms of the first model, and stops at the first
|
|
327
|
-
``ENDMDL`` line
|
|
368
|
+
``ENDMDL`` line - or for CIF files stops if the PDB model > 1.
|
|
328
369
|
use_author (`bool`): Pass `True` to use the author-defined
|
|
329
370
|
labels while parsing CIF files, e.g. read the chain name
|
|
330
371
|
from ``_atom_site.auth_asym_id`` rather than
|
|
@@ -348,8 +389,6 @@ cdef class Molecule:
|
|
|
348
389
|
The ``format`` argument, and support for CIF parsing.
|
|
349
390
|
|
|
350
391
|
"""
|
|
351
|
-
if format == "detect":
|
|
352
|
-
format = "cif" if text.lstrip().startswith(("data_", "loop_")) else "pdb"
|
|
353
392
|
return cls.load(
|
|
354
393
|
io.StringIO(text),
|
|
355
394
|
format=format,
|
|
@@ -389,7 +428,7 @@ cdef class Molecule:
|
|
|
389
428
|
ignore_endmdl (`bool`): Pass `True` to make the parser read all
|
|
390
429
|
the atoms from the PDB file. By default, the parser only
|
|
391
430
|
reads the atoms of the first model, and stops at the first
|
|
392
|
-
``ENDMDL`` line
|
|
431
|
+
``ENDMDL`` line - or for CIF files stops if the PDB model > 1.
|
|
393
432
|
use_author (`bool`): Pass `True` to use the author-defined
|
|
394
433
|
labels while parsing CIF files, e.g. read the chain name
|
|
395
434
|
from ``_atom_site.auth_asym_id`` rather than
|
|
@@ -423,17 +462,17 @@ cdef class Molecule:
|
|
|
423
462
|
except TypeError:
|
|
424
463
|
handle = nullcontext(file)
|
|
425
464
|
with handle as f:
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
peek = f[5:]
|
|
465
|
+
f = PeekableFile(f)
|
|
466
|
+
peek = f.peek(10)
|
|
467
|
+
while peek.startswith("#"):
|
|
468
|
+
f.readline()
|
|
469
|
+
peek = f.peek(5)
|
|
432
470
|
if peek.startswith(("data_", "loop_")):
|
|
433
471
|
parser = _CIFMoleculeParser(
|
|
434
472
|
id=id,
|
|
435
473
|
use_author=use_author,
|
|
436
474
|
skip_hetatm=skip_hetatm,
|
|
475
|
+
ignore_endmdl=ignore_endmdl,
|
|
437
476
|
)
|
|
438
477
|
else:
|
|
439
478
|
parser = _PDBMoleculeParser(
|
|
@@ -441,10 +480,8 @@ cdef class Molecule:
|
|
|
441
480
|
ignore_endmdl=ignore_endmdl,
|
|
442
481
|
skip_hetatm=skip_hetatm,
|
|
443
482
|
)
|
|
444
|
-
if isinstance(f, str):
|
|
445
|
-
return parser.loads(f, molecule_type=cls)
|
|
446
483
|
return parser.load(f, molecule_type=cls)
|
|
447
|
-
|
|
484
|
+
elif format == "pdb":
|
|
448
485
|
parser = _PDBMoleculeParser(
|
|
449
486
|
id=id,
|
|
450
487
|
ignore_endmdl=ignore_endmdl,
|
|
@@ -455,6 +492,7 @@ cdef class Molecule:
|
|
|
455
492
|
id=id,
|
|
456
493
|
use_author=use_author,
|
|
457
494
|
skip_hetatm=skip_hetatm,
|
|
495
|
+
ignore_endmdl=ignore_endmdl,
|
|
458
496
|
)
|
|
459
497
|
else:
|
|
460
498
|
raise ValueError(f"invalid value for `format` argument: {format!r}")
|
|
@@ -1193,11 +1231,21 @@ cdef class TemplateAtom:
|
|
|
1193
1231
|
self._atom.pos[0] = x
|
|
1194
1232
|
self._atom.pos[1] = y
|
|
1195
1233
|
self._atom.pos[2] = z
|
|
1196
|
-
self._atom.chainID1, self._atom.chainID2 = map(ord, chain_id.ljust(2))
|
|
1197
1234
|
self._atom.nameCount = ac
|
|
1198
1235
|
self._atom.resNameCount = rc
|
|
1199
1236
|
self._atom.distWeight = distance_weight
|
|
1200
1237
|
|
|
1238
|
+
# copy chain ID
|
|
1239
|
+
if len(chain_id) == 2:
|
|
1240
|
+
self._atom.chainID1 = ord(chain_id[0])
|
|
1241
|
+
self._atom.chainID2 = ord(chain_id[1])
|
|
1242
|
+
elif len(chain_id) == 1:
|
|
1243
|
+
self._atom.chainID1 = ord(' ')
|
|
1244
|
+
self._atom.chainID2 = ord(chain_id[0])
|
|
1245
|
+
else:
|
|
1246
|
+
self._atom.chainID1 = ord(' ')
|
|
1247
|
+
self._atom.chainID2 = ord('0')
|
|
1248
|
+
|
|
1201
1249
|
# setup string pointers
|
|
1202
1250
|
p = <char*> &self._atom[1]
|
|
1203
1251
|
self._atom.name = <char**> p
|
|
@@ -1303,6 +1351,8 @@ cdef class TemplateAtom:
|
|
|
1303
1351
|
assert self._atom is not NULL
|
|
1304
1352
|
cdef char c1 = jess.tess_atom.TessAtom_chainID1(self._atom)
|
|
1305
1353
|
cdef char c2 = jess.tess_atom.TessAtom_chainID2(self._atom)
|
|
1354
|
+
if c1 == ord(' '):
|
|
1355
|
+
return chr(c2)
|
|
1306
1356
|
return PyUnicode_FromFormat("%c%c", c1, c2).strip()
|
|
1307
1357
|
|
|
1308
1358
|
@property
|
|
@@ -1382,6 +1432,63 @@ cdef class TemplateAtom:
|
|
|
1382
1432
|
atom._atom = jess.tess_atom.TessAtom_copy(self._atom)
|
|
1383
1433
|
return atom
|
|
1384
1434
|
|
|
1435
|
+
cpdef str dumps(self):
|
|
1436
|
+
"""Write the template atom to a string.
|
|
1437
|
+
|
|
1438
|
+
Returns:
|
|
1439
|
+
`str`: The serialized template atom.
|
|
1440
|
+
|
|
1441
|
+
.. versionadded:: 0.8.0
|
|
1442
|
+
|
|
1443
|
+
"""
|
|
1444
|
+
file = io.StringIO()
|
|
1445
|
+
self.dump(file)
|
|
1446
|
+
return file.getvalue()
|
|
1447
|
+
|
|
1448
|
+
cpdef void dump(self, object file):
|
|
1449
|
+
"""Write the template atom to a file.
|
|
1450
|
+
|
|
1451
|
+
Arguments:
|
|
1452
|
+
file (file-like object): A file opened in *text* mode where the
|
|
1453
|
+
template atom will be written.
|
|
1454
|
+
|
|
1455
|
+
.. versionadded:: 0.8.0
|
|
1456
|
+
|
|
1457
|
+
"""
|
|
1458
|
+
assert self._atom is not NULL
|
|
1459
|
+
|
|
1460
|
+
cdef size_t k
|
|
1461
|
+
cdef char[80] buffer
|
|
1462
|
+
cdef char[5] name
|
|
1463
|
+
cdef char[5] resname
|
|
1464
|
+
cdef _TessAtom* atom = self._atom
|
|
1465
|
+
|
|
1466
|
+
decode_token(name, atom.name[0], 4)
|
|
1467
|
+
decode_token(resname, atom.resName[0], 3)
|
|
1468
|
+
|
|
1469
|
+
n = sprintf(
|
|
1470
|
+
buffer,
|
|
1471
|
+
"ATOM %5i %4s %3s%c%c%4i %8.3f%8.3f%8.3f",
|
|
1472
|
+
atom.code,
|
|
1473
|
+
name,
|
|
1474
|
+
resname,
|
|
1475
|
+
jess.tess_atom.TessAtom_chainID1(self._atom),
|
|
1476
|
+
jess.tess_atom.TessAtom_chainID2(self._atom),
|
|
1477
|
+
atom.resSeq,
|
|
1478
|
+
atom.pos[0],
|
|
1479
|
+
atom.pos[1],
|
|
1480
|
+
atom.pos[2],
|
|
1481
|
+
)
|
|
1482
|
+
|
|
1483
|
+
memset(&buffer[n], ord(' '), 7*sizeof(char))
|
|
1484
|
+
for k in range(1, atom.resNameCount):
|
|
1485
|
+
buffer[n + k] = encode_resname(atom.resName[k])
|
|
1486
|
+
|
|
1487
|
+
n += 7
|
|
1488
|
+
n += sprintf(&buffer[n], "%4.2f", atom.distWeight)
|
|
1489
|
+
|
|
1490
|
+
file.write(PyUnicode_FromStringAndSize(buffer, n))
|
|
1491
|
+
|
|
1385
1492
|
|
|
1386
1493
|
cdef class Template:
|
|
1387
1494
|
"""A template, as a sequence of `TemplateAtom` objects.
|
|
@@ -1656,6 +1763,40 @@ cdef class Template:
|
|
|
1656
1763
|
tpl._tess = <_TessTemplate*> &tpl._tpl[1]
|
|
1657
1764
|
return tpl
|
|
1658
1765
|
|
|
1766
|
+
cpdef str dumps(self):
|
|
1767
|
+
"""Write the template to a string.
|
|
1768
|
+
|
|
1769
|
+
Returns:
|
|
1770
|
+
`str`: The serialized template atom.
|
|
1771
|
+
|
|
1772
|
+
.. versionadded:: 0.8.0
|
|
1773
|
+
|
|
1774
|
+
"""
|
|
1775
|
+
file = io.StringIO()
|
|
1776
|
+
self.dump(file)
|
|
1777
|
+
return file.getvalue()
|
|
1778
|
+
|
|
1779
|
+
cpdef void dump(self, object file, bint write_id=True):
|
|
1780
|
+
"""Write the template to a file.
|
|
1781
|
+
|
|
1782
|
+
Arguments:
|
|
1783
|
+
file (file-like object): A file opened in *text* mode where the
|
|
1784
|
+
template will be written.
|
|
1785
|
+
write_id (`bool`): Whether to write the identifier of the
|
|
1786
|
+
template as a ``REMARK`` line.
|
|
1787
|
+
|
|
1788
|
+
.. versionadded:: 0.8.0
|
|
1789
|
+
|
|
1790
|
+
"""
|
|
1791
|
+
cdef str id_ = self.id
|
|
1792
|
+
if write_id and id_ is not None:
|
|
1793
|
+
file.write(f"REMARK PDB_ID {id_}\n")
|
|
1794
|
+
for template_atom in self:
|
|
1795
|
+
template_atom.dump(file)
|
|
1796
|
+
file.write("\n")
|
|
1797
|
+
file.write("END\n")
|
|
1798
|
+
|
|
1799
|
+
|
|
1659
1800
|
cdef class Query:
|
|
1660
1801
|
"""A query over templates with a given molecule.
|
|
1661
1802
|
|
|
@@ -1860,7 +2001,6 @@ cdef class Hit:
|
|
|
1860
2001
|
rmsd (`float`): The RMSD between the aligned structures.
|
|
1861
2002
|
template (`~pyjess.Template`): The template that matched the
|
|
1862
2003
|
query molecule.
|
|
1863
|
-
molecule (`~pyjess.Molecule`): The query molecule.
|
|
1864
2004
|
|
|
1865
2005
|
"""
|
|
1866
2006
|
cdef double[9] _rotation
|
pyjess/_peekable.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PeekableFile(typing.TextIO):
|
|
5
|
+
"""A buffered file that supports peeking.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
def __init__(self, file):
|
|
9
|
+
self.file = file
|
|
10
|
+
self._buffer = ""
|
|
11
|
+
|
|
12
|
+
def __iter__(self):
|
|
13
|
+
return self
|
|
14
|
+
|
|
15
|
+
def __next__(self):
|
|
16
|
+
line = self.readline()
|
|
17
|
+
if not line:
|
|
18
|
+
raise StopIteration
|
|
19
|
+
return line
|
|
20
|
+
|
|
21
|
+
def read(self, n=-1):
|
|
22
|
+
l = len(self._buffer)
|
|
23
|
+
if l == 0:
|
|
24
|
+
output = self.file.read(n)
|
|
25
|
+
elif n is None or n == -1:
|
|
26
|
+
output = self._buffer + self.file.read()
|
|
27
|
+
self._buffer = ""
|
|
28
|
+
elif n < l:
|
|
29
|
+
output = self._buffer[:n]
|
|
30
|
+
self._buffer = self._buffer[n:]
|
|
31
|
+
else:
|
|
32
|
+
output = self._buffer + self.file.read(n - l)
|
|
33
|
+
self._buffer = ""
|
|
34
|
+
return output
|
|
35
|
+
|
|
36
|
+
def readline(self):
|
|
37
|
+
i = self._buffer.find("\n")
|
|
38
|
+
if i == -1:
|
|
39
|
+
line = self._buffer + self.file.readline()
|
|
40
|
+
self._buffer = ""
|
|
41
|
+
else:
|
|
42
|
+
line = self._buffer[:i+1]
|
|
43
|
+
self._buffer = self._buffer[i+1:]
|
|
44
|
+
return line
|
|
45
|
+
|
|
46
|
+
def peek(self, n):
|
|
47
|
+
l = len(self._buffer)
|
|
48
|
+
if l < n:
|
|
49
|
+
self._buffer += self.file.read(n - l)
|
|
50
|
+
return self._buffer[:n]
|
|
51
|
+
|
|
52
|
+
def close(self):
|
|
53
|
+
self.file.close()
|
pyjess/tests/test_molecule.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import itertools
|
|
2
|
+
import io
|
|
2
3
|
import os
|
|
3
4
|
import pickle
|
|
4
5
|
import unittest
|
|
5
6
|
import tempfile
|
|
6
7
|
import textwrap
|
|
7
8
|
import sys
|
|
9
|
+
import shutil
|
|
8
10
|
import warnings
|
|
9
11
|
|
|
10
12
|
from .utils import files
|
|
@@ -191,6 +193,47 @@ class TestMolecule(unittest.TestCase):
|
|
|
191
193
|
self.assertEqual(mol1.id, mol2.id)
|
|
192
194
|
self.assertEqual(mol1, mol2)
|
|
193
195
|
|
|
196
|
+
@unittest.skipUnless(files, "importlib.resources not available")
|
|
197
|
+
@unittest.skipUnless(gemmi, "gemmi not available")
|
|
198
|
+
def test_load_mmcif_comment(self):
|
|
199
|
+
# load mmCIF file into a buffer and add a comment at the top
|
|
200
|
+
buffer = io.StringIO()
|
|
201
|
+
buffer.write("# this is a comment line \n")
|
|
202
|
+
buffer.write("# and a second comment line \n")
|
|
203
|
+
with files(data).joinpath("1AMY.cif").open() as f:
|
|
204
|
+
shutil.copyfileobj(f, buffer)
|
|
205
|
+
|
|
206
|
+
with warnings.catch_warnings():
|
|
207
|
+
warnings.simplefilter('ignore')
|
|
208
|
+
# should work when explicitly given "CIF" formula
|
|
209
|
+
buffer.seek(0)
|
|
210
|
+
cif_molecule = Molecule.load(buffer, format="cif")
|
|
211
|
+
self.assertEqual(len(cif_molecule), 3339)
|
|
212
|
+
# should work when not given format
|
|
213
|
+
buffer.seek(0)
|
|
214
|
+
cif_molecule = Molecule.load(buffer, format="detect")
|
|
215
|
+
self.assertEqual(len(cif_molecule), 3339)
|
|
216
|
+
|
|
217
|
+
@unittest.skipUnless(files, "importlib.resources not available")
|
|
218
|
+
@unittest.skipUnless(gemmi, "gemmi not available")
|
|
219
|
+
def test_loads_mmcif_comment(self):
|
|
220
|
+
# load mmCIF file into a buffer and add a comment at the top
|
|
221
|
+
buffer = io.StringIO()
|
|
222
|
+
buffer.write("# this is a comment line \n")
|
|
223
|
+
buffer.write("# and a second comment line \n")
|
|
224
|
+
with files(data).joinpath("1AMY.cif").open() as f:
|
|
225
|
+
shutil.copyfileobj(f, buffer)
|
|
226
|
+
text = buffer.getvalue()
|
|
227
|
+
|
|
228
|
+
with warnings.catch_warnings():
|
|
229
|
+
warnings.simplefilter('ignore')
|
|
230
|
+
# should work when explicitly given "CIF" formula
|
|
231
|
+
cif_molecule = Molecule.loads(text, format="cif")
|
|
232
|
+
self.assertEqual(len(cif_molecule), 3339)
|
|
233
|
+
# should work when not given format
|
|
234
|
+
cif_molecule = Molecule.loads(text, format="detect")
|
|
235
|
+
self.assertEqual(len(cif_molecule), 3339)
|
|
236
|
+
|
|
194
237
|
@unittest.skipUnless(files, "importlib.resources not available")
|
|
195
238
|
@unittest.skipUnless(gemmi, "gemmi not available")
|
|
196
239
|
def test_load_consistency_no_skip_hetatm(self):
|
pyjess/tests/test_template.py
CHANGED
|
@@ -119,6 +119,12 @@ class TestTemplate(unittest.TestCase):
|
|
|
119
119
|
self.assertEqual(len(tpl1), len(tpl2))
|
|
120
120
|
self.assertEqual(tpl1, tpl2)
|
|
121
121
|
|
|
122
|
+
def test_dumps_roundtrip(self):
|
|
123
|
+
tpl1 = Template.loads(TEMPLATE, id="tpl1")
|
|
124
|
+
tpl2 = Template.loads(tpl1.dumps())
|
|
125
|
+
self.assertEqual(len(tpl1), len(tpl2))
|
|
126
|
+
self.assertEqual(tpl1, tpl2)
|
|
127
|
+
|
|
122
128
|
@unittest.skipUnless(files, "importlib.resources not available")
|
|
123
129
|
def test_dimension_multiple_chains(self):
|
|
124
130
|
with files(data).joinpath("1sur.qry").open() as f:
|
|
@@ -2,7 +2,7 @@ import unittest
|
|
|
2
2
|
import sys
|
|
3
3
|
import pickle
|
|
4
4
|
|
|
5
|
-
from .._jess import TemplateAtom
|
|
5
|
+
from .._jess import TemplateAtom, Atom
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class TestTemplateAtom(unittest.TestCase):
|
|
@@ -89,4 +89,32 @@ class TestTemplateAtom(unittest.TestCase):
|
|
|
89
89
|
copy = pickle.loads(pickle.dumps(atom))
|
|
90
90
|
for attribute in ("atom_names", "residue_names", "chain_id", "x", "y", "z", "match_mode"):
|
|
91
91
|
self.assertEqual(getattr(copy, attribute), getattr(atom, attribute))
|
|
92
|
+
self.assertEqual(atom, copy)
|
|
93
|
+
|
|
94
|
+
def test_dumps(self):
|
|
95
|
+
atom = TemplateAtom(
|
|
96
|
+
chain_id="A",
|
|
97
|
+
residue_number=1136,
|
|
98
|
+
x=3.953,
|
|
99
|
+
y=0.597,
|
|
100
|
+
z=-1.721,
|
|
101
|
+
residue_names=["ARG", "LYS"],
|
|
102
|
+
atom_names=["NE"],
|
|
103
|
+
distance_weight=0.00,
|
|
104
|
+
match_mode=1,
|
|
105
|
+
)
|
|
106
|
+
t = atom.dumps()
|
|
107
|
+
self.assertEqual(
|
|
108
|
+
t,
|
|
109
|
+
"ATOM 1 NE ARG A1136 3.953 0.597 -1.721 K 0.00"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def test_dumps_roundtrip(self):
|
|
113
|
+
atom = self._create_atom()
|
|
114
|
+
self.assertEqual(atom.chain_id, 'A')
|
|
115
|
+
t = atom.dumps()
|
|
116
|
+
self.assertEqual(t[20:22], ' A')
|
|
117
|
+
copy = TemplateAtom.loads(atom.dumps())
|
|
118
|
+
for attribute in ("atom_names", "residue_names", "chain_id", "x", "y", "z", "match_mode"):
|
|
119
|
+
self.assertEqual(getattr(copy, attribute), getattr(atom, attribute), attribute)
|
|
92
120
|
self.assertEqual(atom, copy)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: pyjess
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Cython bindings and Python interface to JESS, a 3D template matching software.
|
|
5
5
|
Keywords: bioinformatics,structure,template,matching
|
|
6
6
|
Author-Email: Martin Larralde <martin.larralde@embl.de>
|
|
@@ -40,6 +40,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
40
40
|
Classifier: Programming Language :: Python :: 3.11
|
|
41
41
|
Classifier: Programming Language :: Python :: 3.12
|
|
42
42
|
Classifier: Programming Language :: Python :: 3.13
|
|
43
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
43
44
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
44
45
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
45
46
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
@@ -190,7 +191,7 @@ computed on-the-fly when requested:
|
|
|
190
191
|
|
|
191
192
|
```python
|
|
192
193
|
for hit in query:
|
|
193
|
-
print(hit.molecule.id, hit.template.id, hit.rmsd, hit.log_evalue)
|
|
194
|
+
print(hit.molecule().id, hit.template.id, hit.rmsd, hit.log_evalue)
|
|
194
195
|
for atom in hit.atoms():
|
|
195
196
|
print(atom.name, atom.x, atom.y, atom.z)
|
|
196
197
|
```
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
pyjess/.gitignore,sha256=u14v4OOy8U50Kp9SUKU8DupCG-mQIuel47gdbNDmAwg,21
|
|
2
2
|
pyjess/__init__.py,sha256=Xe9GBQUBm9ik-ty5tcE3UQ9Ip1p-C_IGvTPuGULolng,766
|
|
3
3
|
pyjess/__main__.py,sha256=Kc823UjDqgAMU6YJdDwfNlEPWjpX_94QXgCBlLMnUMo,53
|
|
4
|
-
pyjess/_jess.pyi,sha256=
|
|
5
|
-
pyjess/_jess.pypy39-pp73-win_amd64.pyd,sha256
|
|
6
|
-
pyjess/_jess.pyx,sha256=
|
|
4
|
+
pyjess/_jess.pyi,sha256=soIsXPg2INGEqZ7AXQ3aZWKsr-piSzVXY-FSfOFXURQ,8748
|
|
5
|
+
pyjess/_jess.pypy39-pp73-win_amd64.pyd,sha256=-ES2-Zm4HVnonjfzKN23w-tY353iY9IEUsx0wJlWBns,349696
|
|
6
|
+
pyjess/_jess.pyx,sha256=PSr3lGPKQ6est9bZJZWq0Z_dPw-hIAYkbxOpC6XIpqo,86976
|
|
7
|
+
pyjess/_peekable.py,sha256=Blu35yPXFp85gMxLRGl5qmZfuUybM1ttM448o1LUlsc,1357
|
|
7
8
|
pyjess/cli.py,sha256=82qa2vDMWVqmAScPYxGS3-5bEFuqBDAmuQ6M-9WQsLA,9211
|
|
8
9
|
pyjess/CMakeLists.txt,sha256=H9eXbrFcGF2OLP8muQctb4cOb27Qp2uZj5KRjoDAROg,36
|
|
9
10
|
pyjess/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -23,12 +24,12 @@ pyjess/tests/test_atom.py,sha256=clLN9IVuivadztGtagDhdPBDGoMkUgs41lEWuTCCmFA,474
|
|
|
23
24
|
pyjess/tests/test_doctest.py,sha256=Z46WI6d2rvRoShOGQFZM_9zAzzgBqPlOaozpFu8bvDM,2632
|
|
24
25
|
pyjess/tests/test_hit.py,sha256=3z1JgGI87w_77Rdk_zrG2zA9M1n8u9L-XtTU3HtpSaY,2468
|
|
25
26
|
pyjess/tests/test_jess.py,sha256=E6j27Y3N9Da19YL30QFVCwREUO9fID2HH75zWMn2V5Y,17669
|
|
26
|
-
pyjess/tests/test_molecule.py,sha256=
|
|
27
|
-
pyjess/tests/test_template.py,sha256=
|
|
28
|
-
pyjess/tests/test_template_atom.py,sha256=
|
|
27
|
+
pyjess/tests/test_molecule.py,sha256=MPDHFGbcw83E1vm4Z1_3PuPADW0qT7qzBoehaJyMURE,14818
|
|
28
|
+
pyjess/tests/test_template.py,sha256=3lbN_jzWjsEcw00r0Y4PxYBuiWlq47Wh1bba189jjiM,5248
|
|
29
|
+
pyjess/tests/test_template_atom.py,sha256=vHT1I2nmaP34LUg-7SuqbpUYAJnL2iyRGd3k3yDNQdU,4467
|
|
29
30
|
pyjess/tests/utils.py,sha256=Z7rUPC-D8dZlRfHAnLaXHUg6M10D3zFvNiwDvvHA3xc,202
|
|
30
|
-
pyjess-0.
|
|
31
|
-
pyjess-0.
|
|
32
|
-
pyjess-0.
|
|
33
|
-
pyjess-0.
|
|
34
|
-
pyjess-0.
|
|
31
|
+
pyjess-0.8.0.dist-info/METADATA,sha256=Zxnk5McI0xVW_Y_WIYKRLIVTioGlNE7LGtre5KnzWvo,14329
|
|
32
|
+
pyjess-0.8.0.dist-info/WHEEL,sha256=k0WUL95hfPxp-oNxWWxpwaSfyWqqHRsPPz7teFk1VMg,111
|
|
33
|
+
pyjess-0.8.0.dist-info/entry_points.txt,sha256=5dgYfglg8P5hPTIyrKAnOBmYqs2GRR0kb6x0BncaHbA,44
|
|
34
|
+
pyjess-0.8.0.dist-info/licenses/COPYING,sha256=Iyx2bRDPnLgoEzW2KVanb61cjhW8lnhJNU-mjS-KhIY,1124
|
|
35
|
+
pyjess-0.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|