RNApolis 0.4.14__py3-none-any.whl → 0.4.16__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- {RNApolis-0.4.14.dist-info → RNApolis-0.4.16.dist-info}/METADATA +11 -2
- {RNApolis-0.4.14.dist-info → RNApolis-0.4.16.dist-info}/RECORD +9 -9
- {RNApolis-0.4.14.dist-info → RNApolis-0.4.16.dist-info}/WHEEL +1 -1
- rnapolis/annotator.py +2 -1
- rnapolis/molecule_filter.py +130 -181
- rnapolis/parser.py +13 -2
- {RNApolis-0.4.14.dist-info → RNApolis-0.4.16.dist-info}/LICENSE +0 -0
- {RNApolis-0.4.14.dist-info → RNApolis-0.4.16.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.4.14.dist-info → RNApolis-0.4.16.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.2
|
2
2
|
Name: RNApolis
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.16
|
4
4
|
Summary: A Python library containing RNA-related bioinformatics functions and classes
|
5
5
|
Home-page: https://github.com/tzok/rnapolis-py
|
6
6
|
Author: Tomasz Zok
|
@@ -26,6 +26,15 @@ Requires-Dist: pulp
|
|
26
26
|
Requires-Dist: requests
|
27
27
|
Requires-Dist: scipy
|
28
28
|
Requires-Dist: viennarna
|
29
|
+
Dynamic: author
|
30
|
+
Dynamic: author-email
|
31
|
+
Dynamic: classifier
|
32
|
+
Dynamic: description
|
33
|
+
Dynamic: description-content-type
|
34
|
+
Dynamic: home-page
|
35
|
+
Dynamic: project-url
|
36
|
+
Dynamic: requires-dist
|
37
|
+
Dynamic: summary
|
29
38
|
|
30
39
|
# RNApolis
|
31
40
|
|
@@ -1,17 +1,17 @@
|
|
1
|
-
rnapolis/annotator.py,sha256=
|
1
|
+
rnapolis/annotator.py,sha256=hRRzRmneYxbg2tvwVHMWLfzmJb4szV0JL_6EOC09Gwg,22101
|
2
2
|
rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
|
3
3
|
rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
|
4
4
|
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
5
|
-
rnapolis/molecule_filter.py,sha256=
|
5
|
+
rnapolis/molecule_filter.py,sha256=jgcpJxx_oXEBX0d30v4k_FdwRouRUPUsEtCYWgLGpD4,7310
|
6
6
|
rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
|
7
|
-
rnapolis/parser.py,sha256=
|
7
|
+
rnapolis/parser.py,sha256=3g4mtFvpiEENFcSBBtx_E_x1vJPF9BujWnts0kb9XjE,16340
|
8
8
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
9
9
|
rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
|
10
10
|
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
11
11
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.4.
|
13
|
-
RNApolis-0.4.
|
14
|
-
RNApolis-0.4.
|
15
|
-
RNApolis-0.4.
|
16
|
-
RNApolis-0.4.
|
17
|
-
RNApolis-0.4.
|
12
|
+
RNApolis-0.4.16.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.4.16.dist-info/METADATA,sha256=Ouh1NQ3gFk7NrpInnQmBWJWRn_1JML9qi5c8MVk8_Q8,54516
|
14
|
+
RNApolis-0.4.16.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
15
|
+
RNApolis-0.4.16.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.4.16.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.4.16.dist-info/RECORD,,
|
rnapolis/annotator.py
CHANGED
@@ -11,6 +11,8 @@ import numpy
|
|
11
11
|
import numpy.typing
|
12
12
|
import orjson
|
13
13
|
from ordered_set import OrderedSet
|
14
|
+
from scipy.spatial import KDTree
|
15
|
+
|
14
16
|
from rnapolis.common import (
|
15
17
|
BR,
|
16
18
|
BaseInteractions,
|
@@ -42,7 +44,6 @@ from rnapolis.tertiary import (
|
|
42
44
|
torsion_angle,
|
43
45
|
)
|
44
46
|
from rnapolis.util import handle_input_file
|
45
|
-
from scipy.spatial import KDTree
|
46
47
|
|
47
48
|
HYDROGEN_BOND_MAX_DISTANCE = 4.0
|
48
49
|
HYDROGEN_BOND_ANGLE_RANGE = (50.0, 130.0) # 90 degrees is ideal, so allow +- 40 degrees
|
rnapolis/molecule_filter.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#! /usr/bin/env python
|
2
2
|
import argparse
|
3
|
+
import os
|
3
4
|
import tempfile
|
5
|
+
from collections import defaultdict, namedtuple
|
4
6
|
from typing import Iterable, List, Set, Tuple
|
5
7
|
|
6
8
|
from mmcif.io.IoAdapterPy import IoAdapterPy
|
@@ -20,167 +22,104 @@ ENTITY_POLY_TYPES = [
|
|
20
22
|
"polyribonucleotide",
|
21
23
|
]
|
22
24
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
("entity_keywords", "entity_id"),
|
27
|
-
("entity_name_com", "entity_id"),
|
28
|
-
("entity_name_sys", "entity_id"),
|
29
|
-
("entity_poly", "entity_id"),
|
30
|
-
("entity_src_gen", "entity_id"),
|
31
|
-
("entity_src_nat", "entity_id"),
|
32
|
-
("pdbx_branch_scheme", "entity_id"),
|
33
|
-
("pdbx_chain_remapping", "entity_id"),
|
34
|
-
("pdbx_construct", "entity_id"),
|
35
|
-
("pdbx_entity_assembly", "entity_id"),
|
36
|
-
("pdbx_entity_branch", "entity_id"),
|
37
|
-
("pdbx_entity_branch_descriptor", "entity_id"),
|
38
|
-
("pdbx_entity_branch_list", "entity_id"),
|
39
|
-
("pdbx_entity_func_bind_mode", "entity_id"),
|
40
|
-
("pdbx_entity_name", "entity_id"),
|
41
|
-
("pdbx_entity_nonpoly", "entity_id"),
|
42
|
-
("pdbx_entity_poly_domain", "entity_id"),
|
43
|
-
("pdbx_entity_poly_na_nonstandard", "entity_id"),
|
44
|
-
("pdbx_entity_poly_na_type", "entity_id"),
|
45
|
-
("pdbx_entity_poly_protein_class", "entity_id"),
|
46
|
-
("pdbx_entity_prod_protocol", "entity_id"),
|
47
|
-
("pdbx_entity_remapping", "entity_id"),
|
48
|
-
("pdbx_entity_src_gen_character", "entity_id"),
|
49
|
-
("pdbx_entity_src_gen_chrom", "entity_id"),
|
50
|
-
("pdbx_entity_src_gen_clone", "entity_id"),
|
51
|
-
("pdbx_entity_src_gen_express", "entity_id"),
|
52
|
-
("pdbx_entity_src_gen_fract", "entity_id"),
|
53
|
-
("pdbx_entity_src_gen_lysis", "entity_id"),
|
54
|
-
("pdbx_entity_src_gen_prod_digest", "entity_id"),
|
55
|
-
("pdbx_entity_src_gen_prod_other", "entity_id"),
|
56
|
-
("pdbx_entity_src_gen_prod_pcr", "entity_id"),
|
57
|
-
("pdbx_entity_src_gen_proteolysis", "entity_id"),
|
58
|
-
("pdbx_entity_src_gen_pure", "entity_id"),
|
59
|
-
("pdbx_entity_src_gen_refold", "entity_id"),
|
60
|
-
("pdbx_entity_src_syn", "entity_id"),
|
61
|
-
("pdbx_linked_entity_list", "entity_id"),
|
62
|
-
("pdbx_prerelease_seq", "entity_id"),
|
63
|
-
("pdbx_sifts_xref_db", "entity_id"),
|
64
|
-
("pdbx_sifts_xref_db_segments", "entity_id"),
|
65
|
-
("pdbx_struct_entity_inst", "entity_id"),
|
66
|
-
("struct_asym", "entity_id"),
|
67
|
-
("struct_ref", "entity_id"),
|
68
|
-
]
|
69
|
-
|
70
|
-
CATEGORIES_WITH_ASYM_ID = [
|
71
|
-
("pdbx_coordinate_model", "asym_id"),
|
72
|
-
("pdbx_distant_solvent_atoms", "label_asym_id"),
|
73
|
-
("pdbx_linked_entity_instance_list", "asym_id"),
|
74
|
-
("pdbx_poly_seq_scheme", "asym_id"),
|
75
|
-
("pdbx_sifts_unp_segments", "asym_id"),
|
76
|
-
("pdbx_struct_asym_gen", "asym_id"),
|
77
|
-
("pdbx_struct_ncs_virus_gen", "asym_id"),
|
78
|
-
("pdbx_struct_special_symmetry", "label_asym_id"),
|
79
|
-
("pdbx_unobs_or_zero_occ_atoms", "label_asym_id"),
|
80
|
-
("pdbx_unobs_or_zero_occ_residues", "label_asym_id"),
|
81
|
-
("refine_ls_restr_ncs", "pdbx_asym_id"),
|
82
|
-
("struct_biol_gen", "asym_id"),
|
83
|
-
]
|
84
|
-
|
85
|
-
CATEGORIES_WITH_AUTH_ASYM_ID = [
|
86
|
-
("atom_site_anisotrop", "pdbx_auth_asym_id"),
|
87
|
-
("pdbx_atom_site_aniso_tls", "auth_asym_id"),
|
88
|
-
("pdbx_entity_instance_feature", "auth_asym_id"),
|
89
|
-
("pdbx_feature_monomer", "auth_asym_id"),
|
90
|
-
("pdbx_missing_atom_nonpoly", "auth_asym_id"),
|
91
|
-
("pdbx_missing_atom_poly", "auth_asym_id"),
|
92
|
-
("pdbx_modification_feature", "auth_asym_id"),
|
93
|
-
("pdbx_refine_component", "auth_asym_id"),
|
94
|
-
("pdbx_remediation_atom_site_mapping", "auth_asym_id"),
|
95
|
-
("pdbx_rmch_outlier", "auth_asym_id"),
|
96
|
-
("pdbx_rms_devs_cov_by_monomer", "auth_asym_id"),
|
97
|
-
("pdbx_sequence_pattern", "auth_asym_id"),
|
98
|
-
("pdbx_solvent_atom_site_mapping", "auth_asym_id"),
|
99
|
-
("pdbx_stereochemistry", "auth_asym_id"),
|
100
|
-
("pdbx_struct_chem_comp_diagnostics", "pdb_strand_id"),
|
101
|
-
("pdbx_struct_chem_comp_feature", "pdb_strand_id"),
|
102
|
-
("pdbx_struct_group_components", "auth_asym_id"),
|
103
|
-
("pdbx_struct_mod_residue", "auth_asym_id"),
|
104
|
-
("pdbx_sugar_phosphate_geometry", "auth_asym_id"),
|
105
|
-
("pdbx_validate_chiral", "auth_asym_id"),
|
106
|
-
("pdbx_validate_main_chain_plane", "auth_asym_id"),
|
107
|
-
("pdbx_validate_planes", "auth_asym_id"),
|
108
|
-
("pdbx_validate_planes_atom", "auth_asym_id"),
|
109
|
-
("pdbx_validate_torsion", "auth_asym_id"),
|
110
|
-
("struct_mon_nucl", "auth_asym_id"),
|
111
|
-
("struct_mon_prot", "auth_asym_id"),
|
112
|
-
("struct_site_gen", "auth_asym_id"),
|
113
|
-
]
|
25
|
+
Link = namedtuple(
|
26
|
+
"Link", ["parent_category_id", "parent_name", "child_category_id", "child_name"]
|
27
|
+
)
|
114
28
|
|
115
29
|
|
116
|
-
def
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
ids = set()
|
30
|
+
def load_pdbx_item_linked_group_list():
|
31
|
+
dictionary = os.path.join(
|
32
|
+
os.path.abspath(os.path.dirname(__file__)), "mmcif_pdbx_v50.dic"
|
33
|
+
)
|
34
|
+
adapter = IoAdapterPy()
|
35
|
+
data = adapter.readFile(dictionary)
|
36
|
+
obj = data[0].getObj("pdbx_item_linked_group_list")
|
37
|
+
links = defaultdict(set)
|
125
38
|
|
126
39
|
if obj:
|
127
40
|
for row in obj.getRowList():
|
128
41
|
row_dict = dict(zip(obj.getAttributeList(), row))
|
42
|
+
child_category_id = row_dict["child_category_id"]
|
43
|
+
child_name = row_dict["child_name"].split(".")[1]
|
44
|
+
parent_name = row_dict["parent_name"].split(".")[1]
|
45
|
+
parent_category_id = row_dict["parent_category_id"]
|
46
|
+
links[parent_category_id].add(
|
47
|
+
Link(parent_category_id, parent_name, child_category_id, child_name)
|
48
|
+
)
|
129
49
|
|
130
|
-
|
131
|
-
ids.add(row_dict[extracted_field_name])
|
50
|
+
return links
|
132
51
|
|
133
|
-
|
52
|
+
|
53
|
+
def select_ids(
|
54
|
+
data: List[DataContainer],
|
55
|
+
category: str,
|
56
|
+
field_name_to_extract: str,
|
57
|
+
field_name_to_check: str,
|
58
|
+
accepted_values: Iterable[str],
|
59
|
+
) -> Set[str]:
|
60
|
+
obj = data[0].getObj(category)
|
61
|
+
if not obj:
|
62
|
+
return set()
|
63
|
+
attributes = obj.getAttributeList()
|
64
|
+
if field_name_to_check not in attributes or field_name_to_extract not in attributes:
|
65
|
+
return set()
|
66
|
+
index_to_check = attributes.index(field_name_to_check)
|
67
|
+
index_to_extract = attributes.index(field_name_to_extract)
|
68
|
+
return {
|
69
|
+
row[index_to_extract]
|
70
|
+
for row in obj.getRowList()
|
71
|
+
if row[index_to_check] in accepted_values
|
72
|
+
}
|
134
73
|
|
135
74
|
|
136
75
|
def select_category_by_id(
|
137
76
|
data: List[DataContainer],
|
138
77
|
category: str,
|
139
78
|
field_name: str,
|
140
|
-
ids:
|
79
|
+
ids: Iterable[str],
|
141
80
|
) -> Tuple[List[str], List[List[str]]]:
|
142
81
|
obj = data[0].getObj(category)
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
if
|
147
|
-
attributes
|
148
|
-
|
149
|
-
|
150
|
-
row_dict = dict(zip(obj.getAttributeList(), row))
|
151
|
-
|
152
|
-
if row_dict.get(field_name, None) in ids:
|
153
|
-
rows.append(row)
|
154
|
-
|
155
|
-
return attributes, rows
|
156
|
-
|
82
|
+
if not obj:
|
83
|
+
return [], []
|
84
|
+
attributes = obj.getAttributeList()
|
85
|
+
if field_name not in attributes:
|
86
|
+
return attributes, []
|
87
|
+
index = attributes.index(field_name)
|
88
|
+
return attributes, [row for row in obj.getRowList() if row[index] in ids]
|
157
89
|
|
158
|
-
def filter_by_poly_types(
|
159
|
-
file_content: str,
|
160
|
-
entity_poly_types: Iterable[str] = ["polyribonucleotide"],
|
161
|
-
retain_categories: Iterable[str] = [],
|
162
|
-
) -> str:
|
163
|
-
adapter = IoAdapterPy()
|
164
90
|
|
91
|
+
def read_cif(file_content: str) -> DataContainer:
|
165
92
|
with tempfile.NamedTemporaryFile("rt+") as f:
|
93
|
+
adapter = IoAdapterPy()
|
166
94
|
f.write(file_content)
|
167
95
|
f.seek(0)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
)
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
96
|
+
return adapter.readFile(f.name)
|
97
|
+
|
98
|
+
|
99
|
+
def filter_cif(data, entity_ids, asym_ids, auth_asym_ids, retain_categories):
|
100
|
+
links = load_pdbx_item_linked_group_list()
|
101
|
+
categories_with_entity_id = [("entity", "id")] + [
|
102
|
+
(link.child_category_id, link.child_name)
|
103
|
+
for link in links["entity"]
|
104
|
+
if link.parent_name == "id"
|
105
|
+
]
|
106
|
+
categories_with_asym_id = [("struct_asym", "id")] + [
|
107
|
+
(link.child_category_id, link.child_name)
|
108
|
+
for link in links["struct_asym"]
|
109
|
+
if link.parent_name == "id"
|
110
|
+
]
|
111
|
+
categories_with_auth_asym_id = [("atom_site", "auth_asym_id")] + [
|
112
|
+
(link.child_category_id, link.child_name)
|
113
|
+
for link in links["atom_site"]
|
114
|
+
if link.parent_name == "auth_asym_id"
|
115
|
+
]
|
177
116
|
|
178
117
|
output = DataContainer("rnapolis")
|
179
118
|
|
180
119
|
for table, ids in (
|
181
|
-
(
|
182
|
-
(
|
183
|
-
(
|
120
|
+
(categories_with_entity_id, entity_ids),
|
121
|
+
(categories_with_asym_id, asym_ids),
|
122
|
+
(categories_with_auth_asym_id, auth_asym_ids),
|
184
123
|
):
|
185
124
|
for category, field_name in table:
|
186
125
|
attributes, rows = select_category_by_id(data, category, field_name, ids)
|
@@ -195,13 +134,36 @@ def filter_by_poly_types(
|
|
195
134
|
output.append(obj)
|
196
135
|
|
197
136
|
with tempfile.NamedTemporaryFile("rt+") as tmp:
|
137
|
+
adapter = IoAdapterPy()
|
198
138
|
adapter.writeFile(tmp.name, [output])
|
199
139
|
tmp.seek(0)
|
200
140
|
return tmp.read()
|
201
141
|
|
202
142
|
|
143
|
+
def filter_by_poly_types(
|
144
|
+
file_content: str,
|
145
|
+
entity_poly_types: Iterable[str] = [
|
146
|
+
"polyribonucleotide",
|
147
|
+
"polydeoxyribonucleotide",
|
148
|
+
"polydeoxyribonucleotide/polyribonucleotide hybrid",
|
149
|
+
],
|
150
|
+
retain_categories: Iterable[str] = ["chem_comp"],
|
151
|
+
) -> str:
|
152
|
+
data = read_cif(file_content)
|
153
|
+
entity_ids = select_ids(
|
154
|
+
data, "entity_poly", "entity_id", "type", set(entity_poly_types)
|
155
|
+
)
|
156
|
+
asym_ids = select_ids(data, "struct_asym", "id", "entity_id", entity_ids)
|
157
|
+
auth_asym_ids = select_ids(
|
158
|
+
data, "atom_site", "auth_asym_id", "label_asym_id", asym_ids
|
159
|
+
)
|
160
|
+
return filter_cif(data, entity_ids, asym_ids, auth_asym_ids, retain_categories)
|
161
|
+
|
162
|
+
|
203
163
|
def filter_by_chains(
|
204
|
-
file_content: str,
|
164
|
+
file_content: str,
|
165
|
+
chains: Iterable[str],
|
166
|
+
retain_categories: Iterable[str] = ["chem_comp"],
|
205
167
|
) -> str:
|
206
168
|
"""
|
207
169
|
Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
|
@@ -210,68 +172,55 @@ def filter_by_chains(
|
|
210
172
|
This is because the function filters by entity, so if you ask for chain "A",
|
211
173
|
which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
|
212
174
|
"""
|
213
|
-
|
214
|
-
|
215
|
-
with tempfile.NamedTemporaryFile("rt+") as f:
|
216
|
-
f.write(file_content)
|
217
|
-
f.seek(0)
|
218
|
-
data = adapter.readFile(f.name)
|
219
|
-
|
220
|
-
output = DataContainer("rnapolis")
|
221
|
-
|
222
|
-
entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
|
175
|
+
data = read_cif(file_content)
|
223
176
|
asym_ids = set(chains)
|
177
|
+
entity_ids = select_ids(data, "struct_asym", "entity_id", "id", asym_ids)
|
224
178
|
auth_asym_ids = select_ids(
|
225
|
-
data, "atom_site", "
|
179
|
+
data, "atom_site", "auth_asym_id", "label_asym_id", asym_ids
|
226
180
|
)
|
227
|
-
|
228
|
-
for table, ids in (
|
229
|
-
(CATEGORIES_WITH_ENTITY_ID, entity_ids),
|
230
|
-
(CATEGORIES_WITH_ASYM_ID, asym_ids),
|
231
|
-
(CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
|
232
|
-
):
|
233
|
-
for category, field_name in table:
|
234
|
-
attributes, rows = select_category_by_id(data, category, field_name, ids)
|
235
|
-
|
236
|
-
if attributes and rows:
|
237
|
-
obj = DataCategory(category, attributes, rows)
|
238
|
-
output.append(obj)
|
239
|
-
|
240
|
-
for category in retain_categories:
|
241
|
-
obj = data[0].getObj(category)
|
242
|
-
if obj:
|
243
|
-
output.append(obj)
|
244
|
-
|
245
|
-
with tempfile.NamedTemporaryFile("rt+") as tmp:
|
246
|
-
adapter.writeFile(tmp.name, [output])
|
247
|
-
tmp.seek(0)
|
248
|
-
return tmp.read()
|
181
|
+
return filter_cif(data, entity_ids, asym_ids, auth_asym_ids, retain_categories)
|
249
182
|
|
250
183
|
|
251
184
|
def main():
|
252
185
|
parser = argparse.ArgumentParser()
|
253
186
|
parser.add_argument(
|
254
|
-
"--
|
255
|
-
help="
|
187
|
+
"--filter-by-poly-types",
|
188
|
+
help=f"filter by entity poly types, possible values: {', '.join(ENTITY_POLY_TYPES)}",
|
256
189
|
action="append",
|
257
|
-
default=[
|
258
|
-
choices=ENTITY_POLY_TYPES,
|
190
|
+
default=[],
|
259
191
|
)
|
260
192
|
parser.add_argument(
|
261
|
-
"--
|
262
|
-
help="
|
193
|
+
"--filter-by-chains",
|
194
|
+
help="filter by chain IDs (label_asym_id), e.g. A, B, C",
|
263
195
|
action="append",
|
264
196
|
default=[],
|
265
197
|
)
|
198
|
+
parser.add_argument(
|
199
|
+
"--retain-categories",
|
200
|
+
help="categories to retain in the output file default: chem_comp",
|
201
|
+
action="append",
|
202
|
+
default=["chem_comp"],
|
203
|
+
)
|
266
204
|
parser.add_argument("path", help="path to a PDBx/mmCIF file")
|
267
205
|
args = parser.parse_args()
|
268
206
|
|
269
207
|
file = handle_input_file(args.path)
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
208
|
+
if args.filter_by_poly_types:
|
209
|
+
print(
|
210
|
+
filter_by_poly_types(
|
211
|
+
file.read(),
|
212
|
+
entity_poly_types=args.filter_by_poly_types,
|
213
|
+
retain_categories=args.retain_categories,
|
214
|
+
)
|
215
|
+
)
|
216
|
+
elif args.filter_by_chains:
|
217
|
+
print(
|
218
|
+
filter_by_chains(
|
219
|
+
file.read(),
|
220
|
+
chains=args.filter_by_chains,
|
221
|
+
retain_categories=args.retain_categories,
|
222
|
+
)
|
223
|
+
)
|
275
224
|
else:
|
276
225
|
parser.print_help()
|
277
226
|
|
rnapolis/parser.py
CHANGED
@@ -218,7 +218,9 @@ def parse_cif(
|
|
218
218
|
)
|
219
219
|
|
220
220
|
if entity_id and pdbx_seq_one_letter_code_can:
|
221
|
-
sequence_by_entity[entity_id] =
|
221
|
+
sequence_by_entity[entity_id] = (
|
222
|
+
pdbx_seq_one_letter_code_can.replace("\n", "")
|
223
|
+
)
|
222
224
|
|
223
225
|
if entity:
|
224
226
|
for row in entity.getRowList():
|
@@ -234,7 +236,16 @@ def parse_cif(
|
|
234
236
|
|
235
237
|
if type_:
|
236
238
|
is_nucleic_acid_by_entity[entity_id] = (
|
237
|
-
is_nucleic_acid_by_entity.get(
|
239
|
+
is_nucleic_acid_by_entity.get(
|
240
|
+
entity_id,
|
241
|
+
type_
|
242
|
+
in (
|
243
|
+
"peptide nucleic acid",
|
244
|
+
"polydeoxyribonucleotide",
|
245
|
+
"polydeoxyribonucleotide/polyribonucleotide hybrid",
|
246
|
+
"polyribonucleotide",
|
247
|
+
),
|
248
|
+
)
|
238
249
|
)
|
239
250
|
|
240
251
|
atoms = filter_clashing_atoms(atoms_to_process)
|
File without changes
|
File without changes
|
File without changes
|