RNApolis 0.4.3__tar.gz → 0.4.6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. {rnapolis-0.4.3/src/RNApolis.egg-info → rnapolis-0.4.6}/PKG-INFO +1 -1
  2. {rnapolis-0.4.3 → rnapolis-0.4.6}/setup.py +1 -1
  3. {rnapolis-0.4.3 → rnapolis-0.4.6/src/RNApolis.egg-info}/PKG-INFO +1 -1
  4. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/parser.py +47 -9
  5. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/tertiary.py +43 -7
  6. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_annotator.py +11 -0
  7. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_common.py +44 -0
  8. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_parser.py +15 -0
  9. {rnapolis-0.4.3 → rnapolis-0.4.6}/LICENSE +0 -0
  10. {rnapolis-0.4.3 → rnapolis-0.4.6}/README.md +0 -0
  11. {rnapolis-0.4.3 → rnapolis-0.4.6}/pyproject.toml +0 -0
  12. {rnapolis-0.4.3 → rnapolis-0.4.6}/setup.cfg +0 -0
  13. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/SOURCES.txt +0 -0
  14. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/dependency_links.txt +0 -0
  15. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/entry_points.txt +0 -0
  16. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/requires.txt +0 -0
  17. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/RNApolis.egg-info/top_level.txt +0 -0
  18. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/annotator.py +0 -0
  19. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/clashfinder.py +0 -0
  20. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/common.py +0 -0
  21. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/metareader.py +0 -0
  22. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/molecule_filter.py +0 -0
  23. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/motif_extractor.py +0 -0
  24. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/rfam_folder.py +0 -0
  25. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/transformer.py +0 -0
  26. {rnapolis-0.4.3 → rnapolis-0.4.6}/src/rnapolis/util.py +0 -0
  27. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_bugfixes.py +0 -0
  28. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_metareader.py +0 -0
  29. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_quadruplexes.py +0 -0
  30. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_rfam_folder.py +0 -0
  31. {rnapolis-0.4.3 → rnapolis-0.4.6}/tests/test_tertiary.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.3
3
+ Version: 0.4.6
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -5,7 +5,7 @@ with open("README.md") as f:
5
5
 
6
6
  setup(
7
7
  name="RNApolis",
8
- version="0.4.3",
8
+ version="0.4.6",
9
9
  packages=["rnapolis"],
10
10
  package_dir={"": "src"},
11
11
  author="Tomasz Zok",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: RNApolis
3
- Version: 0.4.3
3
+ Version: 0.4.6
4
4
  Summary: A Python library containing RNA-related bioinformatics functions and classes
5
5
  Home-page: https://github.com/tzok/rnapolis-py
6
6
  Author: Tomasz Zok
@@ -1,7 +1,10 @@
1
1
  import logging
2
2
  from typing import IO, Dict, List, Optional, Tuple, Union
3
3
 
4
+ import numpy as np
4
5
  from mmcif.io.IoAdapterPy import IoAdapterPy
6
+ from scipy.spatial import KDTree
7
+
5
8
  from rnapolis.common import ResidueAuth, ResidueLabel
6
9
  from rnapolis.tertiary import BASE_ATOMS, Atom, Residue3D, Structure3D
7
10
 
@@ -53,10 +56,10 @@ def parse_cif(
53
56
 
54
57
  io_adapter = IoAdapterPy()
55
58
  data = io_adapter.readFile(cif.name)
56
- atoms: List[Atom] = []
59
+ atoms_to_process: List[Atom] = []
57
60
  modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
58
- sequence_by_entity = {}
59
- is_nucleic_acid_by_entity = {}
61
+ sequence_by_entity: Dict[str, str] = {}
62
+ is_nucleic_acid_by_entity: Dict[str, bool] = {}
60
63
 
61
64
  if data:
62
65
  atom_site = data[0].getObj("atom_site")
@@ -136,7 +139,7 @@ def parse_cif(
136
139
  else None
137
140
  )
138
141
 
139
- atoms.append(
142
+ atoms_to_process.append(
140
143
  Atom(
141
144
  label_entity_id,
142
145
  label,
@@ -216,6 +219,7 @@ def parse_cif(
216
219
  if entity_id and pdbx_seq_one_letter_code_can:
217
220
  sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
218
221
 
222
+ atoms = filter_clashing_atoms(atoms_to_process)
219
223
  return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
220
224
 
221
225
 
@@ -228,7 +232,7 @@ def parse_pdb(
228
232
  Dict[str, bool],
229
233
  ]:
230
234
  pdb.seek(0)
231
- atoms: List[Atom] = []
235
+ atoms_to_process: List[Atom] = []
232
236
  modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
233
237
  model = 1
234
238
 
@@ -236,9 +240,6 @@ def parse_pdb(
236
240
  if line.startswith("MODEL"):
237
241
  model = int(line[10:14].strip())
238
242
  elif line.startswith("ATOM") or line.startswith("HETATM"):
239
- alternate_location = line[16]
240
- if alternate_location != " ":
241
- continue
242
243
  atom_name = line[12:16].strip()
243
244
  residue_name = line[17:20].strip()
244
245
  chain_identifier = line[21]
@@ -251,7 +252,10 @@ def parse_pdb(
251
252
  auth = ResidueAuth(
252
253
  chain_identifier, residue_number, insertion_code, residue_name
253
254
  )
254
- atoms.append(Atom(None, None, auth, model, atom_name, x, y, z, occupancy))
255
+
256
+ atoms_to_process.append(
257
+ Atom(None, None, auth, model, atom_name, x, y, z, occupancy)
258
+ )
255
259
  elif line.startswith("MODRES"):
256
260
  original_name = line[12:15]
257
261
  chain_identifier = line[16]
@@ -263,6 +267,7 @@ def parse_pdb(
263
267
  )
264
268
  modified[auth] = standard_residue_name
265
269
 
270
+ atoms = filter_clashing_atoms(atoms_to_process)
266
271
  return atoms, modified, {}, {}
267
272
 
268
273
 
@@ -392,3 +397,36 @@ def try_parse_int(s: str) -> Optional[int]:
392
397
  return int(s)
393
398
  except ValueError:
394
399
  return None
400
+
401
+
402
+ def filter_clashing_atoms(atoms: List[Atom], clash_distance: float = 0.5) -> List[Atom]:
403
+ # First, remove duplicate atoms
404
+ unique_atoms = {}
405
+
406
+ for i, atom in enumerate(atoms):
407
+ key = (atom.label, atom.auth, atom.name)
408
+ if key not in unique_atoms or atom.occupancy > unique_atoms[key].occupancy:
409
+ unique_atoms[key] = atom
410
+
411
+ unique_atoms_list = list(unique_atoms.values())
412
+
413
+ # Now handle clashing atoms
414
+ coords = np.array([(atom.x, atom.y, atom.z) for atom in unique_atoms_list])
415
+ tree = KDTree(coords)
416
+
417
+ pairs = tree.query_pairs(r=clash_distance)
418
+
419
+ atoms_to_keep = set(range(len(unique_atoms_list)))
420
+
421
+ for i, j in pairs:
422
+ if (
423
+ unique_atoms_list[i].occupancy is None
424
+ or unique_atoms_list[j].occupancy is None
425
+ ):
426
+ continue
427
+ if unique_atoms_list[i].occupancy > unique_atoms_list[j].occupancy:
428
+ atoms_to_keep.discard(j)
429
+ else:
430
+ atoms_to_keep.discard(i)
431
+
432
+ return [unique_atoms_list[i] for i in atoms_to_keep]
@@ -7,6 +7,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union
7
7
 
8
8
  import numpy
9
9
  import numpy.typing
10
+
10
11
  from rnapolis.common import (
11
12
  BasePair,
12
13
  BpSeq,
@@ -16,6 +17,7 @@ from rnapolis.common import (
16
17
  Residue,
17
18
  ResidueAuth,
18
19
  ResidueLabel,
20
+ Saenger,
19
21
  Stacking,
20
22
  )
21
23
 
@@ -521,13 +523,47 @@ class Mapping2D3D:
521
523
 
522
524
  @cached_property
523
525
  def bpseq(self) -> BpSeq:
524
- return self.__generate_bpseq(
525
- [
526
- base_pair
527
- for base_pair in self.base_pairs
528
- if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
529
- ]
530
- )
526
+ def pair_scoring_function(pair: BasePair3D) -> int:
527
+ if pair.saenger is not None:
528
+ if pair.saenger in (Saenger.XIX, Saenger.XX):
529
+ return 0, pair.nt1, pair.nt2
530
+ else:
531
+ return 1, pair.nt1, pair.nt2
532
+
533
+ sequence = "".join(
534
+ sorted(
535
+ [
536
+ pair.nt1_3d.one_letter_name.upper(),
537
+ pair.nt2_3d.one_letter_name.upper(),
538
+ ]
539
+ )
540
+ )
541
+ if sequence in ("AU", "AT", "CG"):
542
+ return 0, pair.nt1, pair.nt2
543
+ return 1, pair.nt1, pair.nt2
544
+
545
+ canonical = [
546
+ base_pair
547
+ for base_pair in self.base_pairs
548
+ if base_pair.is_canonical and base_pair.nt1 < base_pair.nt2
549
+ ]
550
+
551
+ while True:
552
+ matches = defaultdict(set)
553
+
554
+ for base_pair in canonical:
555
+ matches[base_pair.nt1_3d].add(base_pair)
556
+ matches[base_pair.nt2_3d].add(base_pair)
557
+
558
+ for pairs in matches.values():
559
+ if len(pairs) > 1:
560
+ pairs = sorted(pairs, key=pair_scoring_function)
561
+ canonical.remove(pairs[-1])
562
+ break
563
+ else:
564
+ break
565
+
566
+ return self.__generate_bpseq(canonical)
531
567
 
532
568
  def __generate_bpseq(self, base_pairs):
533
569
  result: Dict[int, List] = {}
@@ -43,3 +43,14 @@ def test_8btk():
43
43
  with open("tests/8btk_B7.cif") as f:
44
44
  structure3d = read_3d_structure(f, 1)
45
45
  assert extract_secondary_structure(structure3d, 1) is not None
46
+
47
+
48
+ def test_488d():
49
+ """
50
+ There are clashing residues 151 in chains B and D. The clash is caused by occupancy factors less than 1.
51
+ """
52
+ with open("tests/488d.pdb") as f:
53
+ structure3d = read_3d_structure(f)
54
+
55
+ base_interactions = extract_base_interactions(structure3d)
56
+ assert base_interactions is not None
@@ -1,7 +1,9 @@
1
1
  from collections import Counter
2
2
 
3
+ import orjson
3
4
  from hypothesis import given, settings
4
5
  from hypothesis import strategies as st
6
+
5
7
  from rnapolis.common import (
6
8
  BaseInteractions,
7
9
  BasePair,
@@ -10,13 +12,17 @@ from rnapolis.common import (
10
12
  BpSeq,
11
13
  DotBracket,
12
14
  Interaction,
15
+ LeontisWesthof,
13
16
  MultiStrandDotBracket,
14
17
  OtherInteraction,
15
18
  Residue,
16
19
  ResidueAuth,
17
20
  ResidueLabel,
21
+ Saenger,
18
22
  Stacking,
19
23
  )
24
+ from rnapolis.parser import read_3d_structure
25
+ from rnapolis.tertiary import Mapping2D3D
20
26
 
21
27
 
22
28
  @given(st.from_type(ResidueLabel))
@@ -136,3 +142,41 @@ def test_multi_strand_dot_bracket():
136
142
  assert dot_bracket.strands[1].structure == (
137
143
  ")))))))))))..(((...[[[[[[...)))......)))))...]]]]]][[[[[.((((((]]]]].....((((((......((((((....)))))).......))))))..))))))."
138
144
  )
145
+
146
+
147
+ def test_conflicted_base_pairs():
148
+ with open("tests/1A1T_1_B-rnaview.json", "rb") as f:
149
+ data = orjson.loads(f.read())
150
+
151
+ base_pairs = []
152
+
153
+ for obj in data.get("basePairs", []):
154
+ nt1 = Residue(
155
+ None,
156
+ ResidueAuth(
157
+ obj["nt1"]["auth"]["chain"],
158
+ obj["nt1"]["auth"]["number"],
159
+ obj["nt1"]["auth"]["icode"],
160
+ obj["nt1"]["auth"]["name"],
161
+ ),
162
+ )
163
+ nt2 = Residue(
164
+ None,
165
+ ResidueAuth(
166
+ obj["nt2"]["auth"]["chain"],
167
+ obj["nt2"]["auth"]["number"],
168
+ obj["nt2"]["auth"]["icode"],
169
+ obj["nt2"]["auth"]["name"],
170
+ ),
171
+ )
172
+ lw = LeontisWesthof(obj["lw"])
173
+ saenger = Saenger(obj["saenger"]) if obj["saenger"] else None
174
+ base_pairs.append(BasePair(nt1, nt2, lw, saenger))
175
+
176
+ with open("tests/1A1T_1_B.cif") as f:
177
+ structure3d = read_3d_structure(f)
178
+
179
+ mapping = Mapping2D3D(structure3d, base_pairs, [], True)
180
+ assert (
181
+ mapping.dot_bracket == ">strand_B\nGGACUAGCGGAGGCUAGUCC\n((((((((....))))))))"
182
+ )
@@ -16,3 +16,18 @@ def test_1ato():
16
16
  structure3d = read_3d_structure(f)
17
17
  sequence = "".join([residue.one_letter_name for residue in structure3d.residues])
18
18
  assert sequence == "GGCACCUCCUCGCGGUGCC"
19
+
20
+
21
+ def test_4qln_no_duplicate_atoms():
22
+ for ext in (".pdb", ".cif"):
23
+ with open(f"tests/4qln{ext}") as f:
24
+ structure3d = read_3d_structure(f)
25
+
26
+ chain_a = [r for r in structure3d.residues if r.auth.chain == "A"]
27
+ residues_to_check = [r for r in chain_a if r.auth.number in (18, 19, 20)]
28
+
29
+ for residue in residues_to_check:
30
+ atom_names = [atom.name for atom in residue.atoms]
31
+ assert len(atom_names) == len(
32
+ set(atom_names)
33
+ ), f"Duplicate atoms found in residue {residue.auth}"
File without changes
File without changes
File without changes
File without changes
File without changes