RNApolis 0.3.18__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/METADATA +1 -1
- {RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/RECORD +8 -8
- rnapolis/parser.py +82 -32
- rnapolis/tertiary.py +42 -5
- {RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/LICENSE +0 -0
- {RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/WHEEL +0 -0
- {RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.3.18.dist-info → RNApolis-0.4.1.dist-info}/top_level.txt +0 -0
@@ -4,14 +4,14 @@ rnapolis/common.py,sha256=PUYF01P2vevhyImhZjGYE0jJlsxWHX6GQmsxI4W7S-E,30255
|
|
4
4
|
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
5
5
|
rnapolis/molecule_filter.py,sha256=hB6-nXgjmw7FAsQ3bj0cZ2FvuW2I1PXunEfcdwEUB1o,7389
|
6
6
|
rnapolis/motif_extractor.py,sha256=duHvpi9Ulcny9K60E6VBpz5RpJZw-KdTB4_Ph0iP478,774
|
7
|
-
rnapolis/parser.py,sha256=
|
7
|
+
rnapolis/parser.py,sha256=wCA9rXqt51iLECgeBqOShFpuT8JwanNkHYD5uXYvLzU,13988
|
8
8
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
9
|
-
rnapolis/tertiary.py,sha256=
|
9
|
+
rnapolis/tertiary.py,sha256=qk1te8GPDuvQsnm4rTiw96VDYyNoO5x4IPf98zDzxPw,20824
|
10
10
|
rnapolis/transformer.py,sha256=V9nOQvdq4-p7yUWo0vQg0CDQMpmyxz9t4TMSRVEKHnw,1817
|
11
11
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.
|
13
|
-
RNApolis-0.
|
14
|
-
RNApolis-0.
|
15
|
-
RNApolis-0.
|
16
|
-
RNApolis-0.
|
17
|
-
RNApolis-0.
|
12
|
+
RNApolis-0.4.1.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.4.1.dist-info/METADATA,sha256=EayMQbE4Y5raff-7pFMmeh4EB81JaLEEMrbvEA4mbAk,54322
|
14
|
+
RNApolis-0.4.1.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
15
|
+
RNApolis-0.4.1.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.4.1.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.4.1.dist-info/RECORD,,
|
rnapolis/parser.py
CHANGED
@@ -12,7 +12,7 @@ logger = logging.getLogger(__name__)
|
|
12
12
|
def read_3d_structure(
|
13
13
|
cif_or_pdb: IO[str], model: Optional[int] = None, nucleic_acid_only: bool = False
|
14
14
|
) -> Structure3D:
|
15
|
-
atoms, modified,
|
15
|
+
atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity = (
|
16
16
|
parse_cif(cif_or_pdb) if is_cif(cif_or_pdb) else parse_pdb(cif_or_pdb)
|
17
17
|
)
|
18
18
|
available_models = {atom.model: None for atom in atoms}
|
@@ -24,7 +24,13 @@ def read_3d_structure(
|
|
24
24
|
atoms = atoms_by_model[model]
|
25
25
|
else:
|
26
26
|
atoms = atoms_by_model[list(available_models.keys())[0]]
|
27
|
-
return group_atoms(
|
27
|
+
return group_atoms(
|
28
|
+
atoms,
|
29
|
+
modified,
|
30
|
+
sequence_by_entity,
|
31
|
+
is_nucleic_acid_by_entity,
|
32
|
+
nucleic_acid_only,
|
33
|
+
)
|
28
34
|
|
29
35
|
|
30
36
|
def is_cif(cif_or_pdb: IO[str]) -> bool:
|
@@ -40,7 +46,8 @@ def parse_cif(
|
|
40
46
|
) -> Tuple[
|
41
47
|
List[Atom],
|
42
48
|
Dict[Union[ResidueLabel, ResidueAuth], str],
|
43
|
-
Dict[
|
49
|
+
Dict[str, str],
|
50
|
+
Dict[str, bool],
|
44
51
|
]:
|
45
52
|
cif.seek(0)
|
46
53
|
|
@@ -48,7 +55,8 @@ def parse_cif(
|
|
48
55
|
data = io_adapter.readFile(cif.name)
|
49
56
|
atoms: List[Atom] = []
|
50
57
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str] = {}
|
51
|
-
|
58
|
+
sequence_by_entity = {}
|
59
|
+
is_nucleic_acid_by_entity = {}
|
52
60
|
|
53
61
|
if data:
|
54
62
|
atom_site = data[0].getObj("atom_site")
|
@@ -59,6 +67,7 @@ def parse_cif(
|
|
59
67
|
for row in atom_site.getRowList():
|
60
68
|
row_dict = dict(zip(atom_site.getAttributeList(), row))
|
61
69
|
|
70
|
+
label_entity_id = row_dict.get("label_entity_id", None)
|
62
71
|
label_chain_name = row_dict.get("label_asym_id", None)
|
63
72
|
label_residue_number = try_parse_int(row_dict.get("label_seq_id", None))
|
64
73
|
label_residue_name = row_dict.get("label_comp_id", None)
|
@@ -127,7 +136,19 @@ def parse_cif(
|
|
127
136
|
else None
|
128
137
|
)
|
129
138
|
|
130
|
-
atoms.append(
|
139
|
+
atoms.append(
|
140
|
+
Atom(
|
141
|
+
label_entity_id,
|
142
|
+
label,
|
143
|
+
auth,
|
144
|
+
model,
|
145
|
+
atom_name,
|
146
|
+
x,
|
147
|
+
y,
|
148
|
+
z,
|
149
|
+
occupancy,
|
150
|
+
)
|
151
|
+
)
|
131
152
|
|
132
153
|
if mod_residue:
|
133
154
|
for row in mod_residue.getRowList():
|
@@ -178,17 +199,24 @@ def parse_cif(
|
|
178
199
|
for row in entity_poly.getRowList():
|
179
200
|
row_dict = dict(zip(entity_poly.getAttributeList(), row))
|
180
201
|
|
181
|
-
|
202
|
+
entity_id = row_dict.get("entity_id", None)
|
203
|
+
type_ = row_dict.get("type", None)
|
182
204
|
pdbx_seq_one_letter_code_can = row_dict.get(
|
183
205
|
"pdbx_seq_one_letter_code_can", None
|
184
206
|
)
|
185
207
|
|
186
|
-
if
|
187
|
-
|
188
|
-
|
189
|
-
|
208
|
+
if entity_id and type_:
|
209
|
+
is_nucleic_acid_by_entity[entity_id] = type_ in (
|
210
|
+
"peptide nucleic acid",
|
211
|
+
"polydeoxyribonucleotide",
|
212
|
+
"polydeoxyribonucleotide/polyribonucleotide hybrid",
|
213
|
+
"polyribonucleotide",
|
214
|
+
)
|
215
|
+
|
216
|
+
if entity_id and pdbx_seq_one_letter_code_can:
|
217
|
+
sequence_by_entity[entity_id] = pdbx_seq_one_letter_code_can
|
190
218
|
|
191
|
-
return atoms, modified,
|
219
|
+
return atoms, modified, sequence_by_entity, is_nucleic_acid_by_entity
|
192
220
|
|
193
221
|
|
194
222
|
def parse_pdb(
|
@@ -196,7 +224,8 @@ def parse_pdb(
|
|
196
224
|
) -> Tuple[
|
197
225
|
List[Atom],
|
198
226
|
Dict[Union[ResidueLabel, ResidueAuth], str],
|
199
|
-
Dict[
|
227
|
+
Dict[str, str],
|
228
|
+
Dict[str, bool],
|
200
229
|
]:
|
201
230
|
pdb.seek(0)
|
202
231
|
atoms: List[Atom] = []
|
@@ -222,7 +251,7 @@ def parse_pdb(
|
|
222
251
|
auth = ResidueAuth(
|
223
252
|
chain_identifier, residue_number, insertion_code, residue_name
|
224
253
|
)
|
225
|
-
atoms.append(Atom(None, auth, model, atom_name, x, y, z, occupancy))
|
254
|
+
atoms.append(Atom(None, None, auth, model, atom_name, x, y, z, occupancy))
|
226
255
|
elif line.startswith("MODRES"):
|
227
256
|
original_name = line[12:15]
|
228
257
|
chain_identifier = line[16]
|
@@ -234,13 +263,14 @@ def parse_pdb(
|
|
234
263
|
)
|
235
264
|
modified[auth] = standard_residue_name
|
236
265
|
|
237
|
-
return atoms, modified, {}
|
266
|
+
return atoms, modified, {}, {}
|
238
267
|
|
239
268
|
|
240
269
|
def group_atoms(
|
241
270
|
atoms: List[Atom],
|
242
271
|
modified: Dict[Union[ResidueLabel, ResidueAuth], str],
|
243
|
-
|
272
|
+
sequence_by_entity: Dict[str, str],
|
273
|
+
is_nucleic_acid_by_entity: Dict[str, bool],
|
244
274
|
nucleic_acid_only: bool,
|
245
275
|
) -> Structure3D:
|
246
276
|
if not atoms:
|
@@ -258,28 +288,45 @@ def group_atoms(
|
|
258
288
|
label = key_previous[0]
|
259
289
|
auth = key_previous[1]
|
260
290
|
model = key_previous[2]
|
291
|
+
entity_id = residue_atoms[-1].entity_id
|
261
292
|
name = get_residue_name(auth, label, modified)
|
262
|
-
one_letter_name = get_one_letter_name(
|
263
|
-
|
293
|
+
one_letter_name = get_one_letter_name(
|
294
|
+
entity_id, label, sequence_by_entity, name
|
295
|
+
)
|
296
|
+
|
297
|
+
if one_letter_name not in "ACGUTN":
|
264
298
|
one_letter_name = detect_one_letter_name(residue_atoms)
|
265
|
-
|
266
|
-
|
299
|
+
|
300
|
+
residues.append(
|
301
|
+
Residue3D(label, auth, model, one_letter_name, tuple(residue_atoms))
|
267
302
|
)
|
268
|
-
|
269
|
-
residues.append(residue)
|
303
|
+
|
270
304
|
key_previous = key
|
271
305
|
residue_atoms = [atom]
|
272
306
|
|
273
307
|
label = key_previous[0]
|
274
308
|
auth = key_previous[1]
|
275
309
|
model = key_previous[2]
|
310
|
+
entity_id = residue_atoms[-1].entity_id
|
276
311
|
name = get_residue_name(auth, label, modified)
|
277
|
-
one_letter_name = get_one_letter_name(label,
|
278
|
-
|
312
|
+
one_letter_name = get_one_letter_name(entity_id, label, sequence_by_entity, name)
|
313
|
+
|
314
|
+
if one_letter_name not in "ACGUTN":
|
279
315
|
one_letter_name = detect_one_letter_name(residue_atoms)
|
280
|
-
|
281
|
-
|
282
|
-
|
316
|
+
|
317
|
+
residues.append(
|
318
|
+
Residue3D(label, auth, model, one_letter_name, tuple(residue_atoms))
|
319
|
+
)
|
320
|
+
|
321
|
+
if nucleic_acid_only:
|
322
|
+
if is_nucleic_acid_by_entity:
|
323
|
+
residues = [
|
324
|
+
residue
|
325
|
+
for residue in residues
|
326
|
+
if is_nucleic_acid_by_entity[residue.atoms[0].entity_id]
|
327
|
+
]
|
328
|
+
else:
|
329
|
+
residues = [residue for residue in residues if residue.is_nucleotide]
|
283
330
|
|
284
331
|
return Structure3D(residues)
|
285
332
|
|
@@ -304,13 +351,14 @@ def get_residue_name(
|
|
304
351
|
|
305
352
|
|
306
353
|
def get_one_letter_name(
|
307
|
-
|
354
|
+
entity_id: Optional[str],
|
355
|
+
label: Optional[ResidueLabel],
|
356
|
+
sequence_by_entity: Dict[str, str],
|
357
|
+
name: str,
|
308
358
|
) -> str:
|
309
359
|
# try getting the value from _entity_poly first
|
310
|
-
if label is not None:
|
311
|
-
|
312
|
-
if key in sequence:
|
313
|
-
return sequence[key]
|
360
|
+
if entity_id is not None and label is not None and entity_id in sequence_by_entity:
|
361
|
+
return sequence_by_entity[entity_id][label.number - 1]
|
314
362
|
# RNA
|
315
363
|
if len(name) == 1:
|
316
364
|
return name
|
@@ -334,11 +382,13 @@ def detect_one_letter_name(atoms: List[Atom]) -> str:
|
|
334
382
|
) / len(atom_names_expected)
|
335
383
|
score[candidate] = count
|
336
384
|
items = sorted(score.items(), key=lambda kv: kv[1], reverse=True)
|
385
|
+
if items[0][1] == 0:
|
386
|
+
return "?"
|
337
387
|
return items[0][0]
|
338
388
|
|
339
389
|
|
340
390
|
def try_parse_int(s: str) -> Optional[int]:
|
341
391
|
try:
|
342
392
|
return int(s)
|
343
|
-
except:
|
393
|
+
except ValueError:
|
344
394
|
return None
|
rnapolis/tertiary.py
CHANGED
@@ -96,6 +96,7 @@ AVERAGE_OXYGEN_PHOSPHORUS_DISTANCE_COVALENT = 1.6
|
|
96
96
|
|
97
97
|
@dataclass(frozen=True, order=True)
|
98
98
|
class Atom:
|
99
|
+
entity_id: Optional[str]
|
99
100
|
label: Optional[ResidueLabel]
|
100
101
|
auth: Optional[ResidueAuth]
|
101
102
|
model: int
|
@@ -128,6 +129,29 @@ class Residue3D(Residue):
|
|
128
129
|
"C": set(["N1", "C2", "O2", "N3", "C4", "N4", "C5", "C6"]),
|
129
130
|
"U": set(["N1", "C2", "O2", "N3", "C4", "O4", "C5", "C6"]),
|
130
131
|
}
|
132
|
+
# Heavy atoms in nucleotide
|
133
|
+
nucleotide_heavy_atoms = (
|
134
|
+
set(
|
135
|
+
[
|
136
|
+
"P",
|
137
|
+
"OP1",
|
138
|
+
"OP2",
|
139
|
+
"O5'",
|
140
|
+
"C5'",
|
141
|
+
"C4'",
|
142
|
+
"O4'",
|
143
|
+
"C3'",
|
144
|
+
"O3'",
|
145
|
+
"C2'",
|
146
|
+
"O2'",
|
147
|
+
"C1'",
|
148
|
+
]
|
149
|
+
)
|
150
|
+
.union(nucleobase_heavy_atoms["A"])
|
151
|
+
.union(nucleobase_heavy_atoms["G"])
|
152
|
+
.union(nucleobase_heavy_atoms["C"])
|
153
|
+
.union(nucleobase_heavy_atoms["U"])
|
154
|
+
)
|
131
155
|
|
132
156
|
def __lt__(self, other):
|
133
157
|
return (self.model, self.chain, self.number, self.icode or " ") < (
|
@@ -176,8 +200,8 @@ class Residue3D(Residue):
|
|
176
200
|
|
177
201
|
@cached_property
|
178
202
|
def is_nucleotide(self) -> bool:
|
179
|
-
return
|
180
|
-
[atom for atom in self.atoms
|
203
|
+
return self.nucleotide_heavy_atoms.intersection(
|
204
|
+
set([atom.name for atom in self.atoms])
|
181
205
|
)
|
182
206
|
|
183
207
|
@cached_property
|
@@ -268,7 +292,7 @@ class Residue3D(Residue):
|
|
268
292
|
logging.error(
|
269
293
|
f"Failed to determine the outermost atom for nucleotide {self}, so an arbitrary atom will be used"
|
270
294
|
)
|
271
|
-
yield Atom(self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
|
295
|
+
yield Atom(None, self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
|
272
296
|
|
273
297
|
def __inner_generator(self):
|
274
298
|
# try to find expected atom name
|
@@ -296,7 +320,7 @@ class Residue3D(Residue):
|
|
296
320
|
logging.error(
|
297
321
|
f"Failed to determine the innermost atom for nucleotide {self}, so an arbitrary atom will be used"
|
298
322
|
)
|
299
|
-
yield Atom(self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
|
323
|
+
yield Atom(None, self.label, self.auth, self.model, "UNK", 0.0, 0.0, 0.0, None)
|
300
324
|
|
301
325
|
|
302
326
|
@dataclass(frozen=True, order=True)
|
@@ -524,7 +548,20 @@ class Mapping2D3D:
|
|
524
548
|
result: Dict[int, List] = {}
|
525
549
|
residue_map: Dict[Residue3D, int] = {}
|
526
550
|
i = 1
|
527
|
-
|
551
|
+
|
552
|
+
for j, residue in enumerate(self.structure3d.residues):
|
553
|
+
if self.find_gaps and j > 0:
|
554
|
+
previous = self.structure3d.residues[j - 1]
|
555
|
+
if (
|
556
|
+
previous.is_nucleotide
|
557
|
+
and residue.is_nucleotide
|
558
|
+
and previous.label
|
559
|
+
and residue.label
|
560
|
+
and previous.label.chain == residue.label.chain
|
561
|
+
):
|
562
|
+
for k in range(residue.label.number - previous.label.number - 1):
|
563
|
+
result[i] = [i, "?", 0]
|
564
|
+
i += 1
|
528
565
|
if residue.is_nucleotide:
|
529
566
|
result[i] = [i, residue.one_letter_name, 0]
|
530
567
|
residue_map[residue] = i
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|