RNApolis 0.4.4__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.4
3
+ Version: 0.4.6
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -4,14 +4,14 @@ rnapolis/common.py,sha256=PUYF01P2vevhyImhZjGYE0jJlsxWHX6GQmsxI4W7S-E,30255
4
4
  rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
5
5
  rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
6
6
  rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
7
- rnapolis/parser.py,sha256=wCA9rXqt51iLECgeBqOShFpuT8JwanNkHYD5uXYvLzU,13988
7
+ rnapolis/parser.py,sha256=2pQYy0sh8TCpeluMmmSJ7C5dudK_bsfstTWCdpwwpNU,15193
8
8
  rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
9
9
  rnapolis/tertiary.py,sha256=SQyiYWA0RJhAK70f88CKZvS4EzGKHQ2RoL1s4MueEDQ,21657
10
10
  rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
11
11
  rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
12
- RNApolis-0.4.4.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
- RNApolis-0.4.4.dist-info/METADATA,sha256=irtWJbeg1LWun2r3WtnsnDDSHlLvru0hO9wz1e67cIE,54322
14
- RNApolis-0.4.4.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
15
- RNApolis-0.4.4.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
- RNApolis-0.4.4.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
- RNApolis-0.4.4.dist-info/RECORD,,
12
+ RNApolis-0.4.6.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
13
+ RNApolis-0.4.6.dist-info/METADATA,sha256=1Zd34oJFXeLnKDzG7HG-6jSjsAnIimsiWPbTlvDerO4,54322
14
+ RNApolis-0.4.6.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
15
+ RNApolis-0.4.6.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
16
+ RNApolis-0.4.6.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
17
+ RNApolis-0.4.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.1.2)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
rnapolis/parser.py CHANGED
@@ -1,7 +1,10 @@
1
1
  import logging
2
2
  from typing import IO, Dict, List, Optional, Tuple, Union
3
3
 
4
+ import numpy as np
4
5
  from mmcif.io.IoAdapterPy import IoAdapterPy
6
+ from scipy.spatial import KDTree
7
+
5
8
  from rnapolis.common import ResidueAuth, ResidueLabel
6
9
  from rnapolis.tertiary import BASE_ATOMS, Atom, Residue3D, Structure3D
7
10
 
@@ -53,10 +56,10 @@ def parse_cif(
53
56
 
54
57
  io_adapter = IoAdapterPy()
55
58
  data = io_adapter.readFile(cif.name)
56
- atoms: List[Atom] = []
59
+ atoms_to_process: List[Atom] = []
57
60
  modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
58
- sequence_by_entity = {}
59
- is_nucleic_acid_by_entity = {}
61
+ sequence_by_entity: Dict[str, str] = {}
62
+ is_nucleic_acid_by_entity: Dict[str, bool] = {}
60
63
 
61
64
  if data:
62
65
  atom_site = data[0].getObj("atom_site")
@@ -136,7 +139,7 @@ def parse_cif(
136
139
  else None
137
140
  )
138
141
 
139
- atoms.append(
142
+ atoms_to_process.append(
140
143
  Atom(
141
144
  label_entity_id,
142
145
  label,
@@ -216,6 +219,7 @@ def parse_cif(
216
219
  if entity_id and pdbx_seq_one_letter_code_can:
217
220
  sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
218
221
 
222
+ atoms = filter_clashing_atoms(atoms_to_process)
219
223
  return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
220
224
 
221
225
 
@@ -228,7 +232,7 @@ def parse_pdb(
228
232
  Dict[str, bool],
229
233
  ]:
230
234
  pdb.seek(0)
231
- atoms: List[Atom] = []
235
+ atoms_to_process: List[Atom] = []
232
236
  modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
233
237
  model = 1
234
238
 
@@ -236,9 +240,6 @@ def parse_pdb(
236
240
  if line.startswith("MODEL"):
237
241
  model = int(line[10:14].strip())
238
242
  elif line.startswith("ATOM") or line.startswith("HETATM"):
239
- alternate_location = line[16]
240
- if alternate_location != " ":
241
- continue
242
243
  atom_name = line[12:16].strip()
243
244
  residue_name = line[17:20].strip()
244
245
  chain_identifier = line[21]
@@ -251,7 +252,10 @@ def parse_pdb(
251
252
  auth = ResidueAuth(
252
253
  chain_identifier, residue_number, insertion_code, residue_name
253
254
  )
254
- atoms.append(Atom(None, None, auth, model, atom_name, x, y, z, occupancy))
255
+
256
+ atoms_to_process.append(
257
+ Atom(None, None, auth, model, atom_name, x, y, z, occupancy)
258
+ )
255
259
  elif line.startswith("MODRES"):
256
260
  original_name = line[12:15]
257
261
  chain_identifier = line[16]
@@ -263,6 +267,7 @@ def parse_pdb(
263
267
  )
264
268
  modified[auth] = standard_residue_name
265
269
 
270
+ atoms = filter_clashing_atoms(atoms_to_process)
266
271
  return atoms, modified, {}, {}
267
272
 
268
273
 
@@ -392,3 +397,36 @@ def try_parse_int(s: str) -> Optional[int]:
392
397
  return int(s)
393
398
  except ValueError:
394
399
  return None
400
+
401
+
402
+ def filter_clashing_atoms(atoms: List[Atom], clash_distance: float = 0.5) -> List[Atom]:
403
+ # First, remove duplicate atoms
404
+ unique_atoms = {}
405
+
406
+ for i, atom in enumerate(atoms):
407
+ key = (atom.label, atom.auth, atom.name)
408
+ if key not in unique_atoms or atom.occupancy > unique_atoms[key].occupancy:
409
+ unique_atoms[key] = atom
410
+
411
+ unique_atoms_list = list(unique_atoms.values())
412
+
413
+ # Now handle clashing atoms
414
+ coords = np.array([(atom.x, atom.y, atom.z) for atom in unique_atoms_list])
415
+ tree = KDTree(coords)
416
+
417
+ pairs = tree.query_pairs(r=clash_distance)
418
+
419
+ atoms_to_keep = set(range(len(unique_atoms_list)))
420
+
421
+ for i, j in pairs:
422
+ if (
423
+ unique_atoms_list[i].occupancy is None
424
+ or unique_atoms_list[j].occupancy is None
425
+ ):
426
+ continue
427
+ if unique_atoms_list[i].occupancy > unique_atoms_list[j].occupancy:
428
+ atoms_to_keep.discard(j)
429
+ else:
430
+ atoms_to_keep.discard(i)
431
+
432
+ return [unique_atoms_list[i] for i in atoms_to_keep]