RNApolis 0.4.15__tar.gz → 0.4.16__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {rnapolis-0.4.15/src/RNApolis.egg-info → rnapolis-0.4.16}/PKG-INFO +1 -1
- {rnapolis-0.4.15 → rnapolis-0.4.16}/setup.py +1 -1
- {rnapolis-0.4.15 → rnapolis-0.4.16/src/RNApolis.egg-info}/PKG-INFO +1 -1
- rnapolis-0.4.16/src/rnapolis/molecule_filter.py +229 -0
- rnapolis-0.4.15/src/rnapolis/molecule_filter.py +0 -280
- {rnapolis-0.4.15 → rnapolis-0.4.16}/LICENSE +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/README.md +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/pyproject.toml +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/setup.cfg +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/RNApolis.egg-info/SOURCES.txt +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/RNApolis.egg-info/dependency_links.txt +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/RNApolis.egg-info/entry_points.txt +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/RNApolis.egg-info/requires.txt +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/RNApolis.egg-info/top_level.txt +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/annotator.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/clashfinder.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/common.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/metareader.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/motif_extractor.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/parser.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/rfam_folder.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/tertiary.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/transformer.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/src/rnapolis/util.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_annotator.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_bugfixes.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_common.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_metareader.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_molecule_filter.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_parser.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_quadruplexes.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_rfam_folder.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_tertiary.py +0 -0
- {rnapolis-0.4.15 → rnapolis-0.4.16}/tests/test_transformer.py +0 -0
@@ -0,0 +1,229 @@
|
|
1
|
+
#! /usr/bin/env python
|
2
|
+
import argparse
|
3
|
+
import os
|
4
|
+
import tempfile
|
5
|
+
from collections import defaultdict, namedtuple
|
6
|
+
from typing import Iterable, List, Set, Tuple
|
7
|
+
|
8
|
+
from mmcif.io.IoAdapterPy import IoAdapterPy
|
9
|
+
from mmcif.io.PdbxReader import DataCategory, DataContainer
|
10
|
+
|
11
|
+
from rnapolis.util import handle_input_file
|
12
|
+
|
13
|
+
# Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
|
14
|
+
ENTITY_POLY_TYPES = [
|
15
|
+
"cyclic-pseudo-peptide",
|
16
|
+
"other",
|
17
|
+
"peptide nucleic acid",
|
18
|
+
"polydeoxyribonucleotide",
|
19
|
+
"polydeoxyribonucleotide/polyribonucleotide hybrid",
|
20
|
+
"polypeptide(D)",
|
21
|
+
"polypeptide(L)",
|
22
|
+
"polyribonucleotide",
|
23
|
+
]
|
24
|
+
|
25
|
+
Link = namedtuple(
|
26
|
+
"Link", ["parent_category_id", "parent_name", "child_category_id", "child_name"]
|
27
|
+
)
|
28
|
+
|
29
|
+
|
30
|
+
def load_pdbx_item_linked_group_list():
|
31
|
+
dictionary = os.path.join(
|
32
|
+
os.path.abspath(os.path.dirname(__file__)), "mmcif_pdbx_v50.dic"
|
33
|
+
)
|
34
|
+
adapter = IoAdapterPy()
|
35
|
+
data = adapter.readFile(dictionary)
|
36
|
+
obj = data[0].getObj("pdbx_item_linked_group_list")
|
37
|
+
links = defaultdict(set)
|
38
|
+
|
39
|
+
if obj:
|
40
|
+
for row in obj.getRowList():
|
41
|
+
row_dict = dict(zip(obj.getAttributeList(), row))
|
42
|
+
child_category_id = row_dict["child_category_id"]
|
43
|
+
child_name = row_dict["child_name"].split(".")[1]
|
44
|
+
parent_name = row_dict["parent_name"].split(".")[1]
|
45
|
+
parent_category_id = row_dict["parent_category_id"]
|
46
|
+
links[parent_category_id].add(
|
47
|
+
Link(parent_category_id, parent_name, child_category_id, child_name)
|
48
|
+
)
|
49
|
+
|
50
|
+
return links
|
51
|
+
|
52
|
+
|
53
|
+
def select_ids(
|
54
|
+
data: List[DataContainer],
|
55
|
+
category: str,
|
56
|
+
field_name_to_extract: str,
|
57
|
+
field_name_to_check: str,
|
58
|
+
accepted_values: Iterable[str],
|
59
|
+
) -> Set[str]:
|
60
|
+
obj = data[0].getObj(category)
|
61
|
+
if not obj:
|
62
|
+
return set()
|
63
|
+
attributes = obj.getAttributeList()
|
64
|
+
if field_name_to_check not in attributes or field_name_to_extract not in attributes:
|
65
|
+
return set()
|
66
|
+
index_to_check = attributes.index(field_name_to_check)
|
67
|
+
index_to_extract = attributes.index(field_name_to_extract)
|
68
|
+
return {
|
69
|
+
row[index_to_extract]
|
70
|
+
for row in obj.getRowList()
|
71
|
+
if row[index_to_check] in accepted_values
|
72
|
+
}
|
73
|
+
|
74
|
+
|
75
|
+
def select_category_by_id(
|
76
|
+
data: List[DataContainer],
|
77
|
+
category: str,
|
78
|
+
field_name: str,
|
79
|
+
ids: Iterable[str],
|
80
|
+
) -> Tuple[List[str], List[List[str]]]:
|
81
|
+
obj = data[0].getObj(category)
|
82
|
+
if not obj:
|
83
|
+
return [], []
|
84
|
+
attributes = obj.getAttributeList()
|
85
|
+
if field_name not in attributes:
|
86
|
+
return attributes, []
|
87
|
+
index = attributes.index(field_name)
|
88
|
+
return attributes, [row for row in obj.getRowList() if row[index] in ids]
|
89
|
+
|
90
|
+
|
91
|
+
def read_cif(file_content: str) -> DataContainer:
|
92
|
+
with tempfile.NamedTemporaryFile("rt+") as f:
|
93
|
+
adapter = IoAdapterPy()
|
94
|
+
f.write(file_content)
|
95
|
+
f.seek(0)
|
96
|
+
return adapter.readFile(f.name)
|
97
|
+
|
98
|
+
|
99
|
+
def filter_cif(data, entity_ids, asym_ids, auth_asym_ids, retain_categories):
|
100
|
+
links = load_pdbx_item_linked_group_list()
|
101
|
+
categories_with_entity_id = [("entity", "id")] + [
|
102
|
+
(link.child_category_id, link.child_name)
|
103
|
+
for link in links["entity"]
|
104
|
+
if link.parent_name == "id"
|
105
|
+
]
|
106
|
+
categories_with_asym_id = [("struct_asym", "id")] + [
|
107
|
+
(link.child_category_id, link.child_name)
|
108
|
+
for link in links["struct_asym"]
|
109
|
+
if link.parent_name == "id"
|
110
|
+
]
|
111
|
+
categories_with_auth_asym_id = [("atom_site", "auth_asym_id")] + [
|
112
|
+
(link.child_category_id, link.child_name)
|
113
|
+
for link in links["atom_site"]
|
114
|
+
if link.parent_name == "auth_asym_id"
|
115
|
+
]
|
116
|
+
|
117
|
+
output = DataContainer("rnapolis")
|
118
|
+
|
119
|
+
for table, ids in (
|
120
|
+
(categories_with_entity_id, entity_ids),
|
121
|
+
(categories_with_asym_id, asym_ids),
|
122
|
+
(categories_with_auth_asym_id, auth_asym_ids),
|
123
|
+
):
|
124
|
+
for category, field_name in table:
|
125
|
+
attributes, rows = select_category_by_id(data, category, field_name, ids)
|
126
|
+
|
127
|
+
if attributes and rows:
|
128
|
+
obj = DataCategory(category, attributes, rows)
|
129
|
+
output.append(obj)
|
130
|
+
|
131
|
+
for category in retain_categories:
|
132
|
+
obj = data[0].getObj(category)
|
133
|
+
if obj:
|
134
|
+
output.append(obj)
|
135
|
+
|
136
|
+
with tempfile.NamedTemporaryFile("rt+") as tmp:
|
137
|
+
adapter = IoAdapterPy()
|
138
|
+
adapter.writeFile(tmp.name, [output])
|
139
|
+
tmp.seek(0)
|
140
|
+
return tmp.read()
|
141
|
+
|
142
|
+
|
143
|
+
def filter_by_poly_types(
|
144
|
+
file_content: str,
|
145
|
+
entity_poly_types: Iterable[str] = [
|
146
|
+
"polyribonucleotide",
|
147
|
+
"polydeoxyribonucleotide",
|
148
|
+
"polydeoxyribonucleotide/polyribonucleotide hybrid",
|
149
|
+
],
|
150
|
+
retain_categories: Iterable[str] = ["chem_comp"],
|
151
|
+
) -> str:
|
152
|
+
data = read_cif(file_content)
|
153
|
+
entity_ids = select_ids(
|
154
|
+
data, "entity_poly", "entity_id", "type", set(entity_poly_types)
|
155
|
+
)
|
156
|
+
asym_ids = select_ids(data, "struct_asym", "id", "entity_id", entity_ids)
|
157
|
+
auth_asym_ids = select_ids(
|
158
|
+
data, "atom_site", "auth_asym_id", "label_asym_id", asym_ids
|
159
|
+
)
|
160
|
+
return filter_cif(data, entity_ids, asym_ids, auth_asym_ids, retain_categories)
|
161
|
+
|
162
|
+
|
163
|
+
def filter_by_chains(
|
164
|
+
file_content: str,
|
165
|
+
chains: Iterable[str],
|
166
|
+
retain_categories: Iterable[str] = ["chem_comp"],
|
167
|
+
) -> str:
|
168
|
+
"""
|
169
|
+
Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
|
170
|
+
|
171
|
+
Warning! The new file might contain more chains than provided in the `chains` argument.
|
172
|
+
This is because the function filters by entity, so if you ask for chain "A",
|
173
|
+
which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
|
174
|
+
"""
|
175
|
+
data = read_cif(file_content)
|
176
|
+
asym_ids = set(chains)
|
177
|
+
entity_ids = select_ids(data, "struct_asym", "entity_id", "id", asym_ids)
|
178
|
+
auth_asym_ids = select_ids(
|
179
|
+
data, "atom_site", "auth_asym_id", "label_asym_id", asym_ids
|
180
|
+
)
|
181
|
+
return filter_cif(data, entity_ids, asym_ids, auth_asym_ids, retain_categories)
|
182
|
+
|
183
|
+
|
184
|
+
def main():
|
185
|
+
parser = argparse.ArgumentParser()
|
186
|
+
parser.add_argument(
|
187
|
+
"--filter-by-poly-types",
|
188
|
+
help=f"filter by entity poly types, possible values: {', '.join(ENTITY_POLY_TYPES)}",
|
189
|
+
action="append",
|
190
|
+
default=[],
|
191
|
+
)
|
192
|
+
parser.add_argument(
|
193
|
+
"--filter-by-chains",
|
194
|
+
help="filter by chain IDs (label_asym_id), e.g. A, B, C",
|
195
|
+
action="append",
|
196
|
+
default=[],
|
197
|
+
)
|
198
|
+
parser.add_argument(
|
199
|
+
"--retain-categories",
|
200
|
+
help="categories to retain in the output file default: chem_comp",
|
201
|
+
action="append",
|
202
|
+
default=["chem_comp"],
|
203
|
+
)
|
204
|
+
parser.add_argument("path", help="path to a PDBx/mmCIF file")
|
205
|
+
args = parser.parse_args()
|
206
|
+
|
207
|
+
file = handle_input_file(args.path)
|
208
|
+
if args.filter_by_poly_types:
|
209
|
+
print(
|
210
|
+
filter_by_poly_types(
|
211
|
+
file.read(),
|
212
|
+
entity_poly_types=args.filter_by_poly_types,
|
213
|
+
retain_categories=args.retain_categories,
|
214
|
+
)
|
215
|
+
)
|
216
|
+
elif args.filter_by_chains:
|
217
|
+
print(
|
218
|
+
filter_by_chains(
|
219
|
+
file.read(),
|
220
|
+
chains=args.filter_by_chains,
|
221
|
+
retain_categories=args.retain_categories,
|
222
|
+
)
|
223
|
+
)
|
224
|
+
else:
|
225
|
+
parser.print_help()
|
226
|
+
|
227
|
+
|
228
|
+
if __name__ == "__main__":
|
229
|
+
main()
|
@@ -1,280 +0,0 @@
|
|
1
|
-
#! /usr/bin/env python
|
2
|
-
import argparse
|
3
|
-
import tempfile
|
4
|
-
from typing import Iterable, List, Set, Tuple
|
5
|
-
|
6
|
-
from mmcif.io.IoAdapterPy import IoAdapterPy
|
7
|
-
from mmcif.io.PdbxReader import DataCategory, DataContainer
|
8
|
-
|
9
|
-
from rnapolis.util import handle_input_file
|
10
|
-
|
11
|
-
# Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
|
12
|
-
ENTITY_POLY_TYPES = [
|
13
|
-
"cyclic-pseudo-peptide",
|
14
|
-
"other",
|
15
|
-
"peptide nucleic acid",
|
16
|
-
"polydeoxyribonucleotide",
|
17
|
-
"polydeoxyribonucleotide/polyribonucleotide hybrid",
|
18
|
-
"polypeptide(D)",
|
19
|
-
"polypeptide(L)",
|
20
|
-
"polyribonucleotide",
|
21
|
-
]
|
22
|
-
|
23
|
-
CATEGORIES_WITH_ENTITY_ID = [
|
24
|
-
("entity", "id"),
|
25
|
-
("atom_site", "label_entity_id"),
|
26
|
-
("entity_keywords", "entity_id"),
|
27
|
-
("entity_name_com", "entity_id"),
|
28
|
-
("entity_name_sys", "entity_id"),
|
29
|
-
("entity_poly", "entity_id"),
|
30
|
-
("entity_src_gen", "entity_id"),
|
31
|
-
("entity_src_nat", "entity_id"),
|
32
|
-
("pdbx_branch_scheme", "entity_id"),
|
33
|
-
("pdbx_chain_remapping", "entity_id"),
|
34
|
-
("pdbx_construct", "entity_id"),
|
35
|
-
("pdbx_entity_assembly", "entity_id"),
|
36
|
-
("pdbx_entity_branch", "entity_id"),
|
37
|
-
("pdbx_entity_branch_descriptor", "entity_id"),
|
38
|
-
("pdbx_entity_branch_list", "entity_id"),
|
39
|
-
("pdbx_entity_func_bind_mode", "entity_id"),
|
40
|
-
("pdbx_entity_name", "entity_id"),
|
41
|
-
("pdbx_entity_nonpoly", "entity_id"),
|
42
|
-
("pdbx_entity_poly_domain", "entity_id"),
|
43
|
-
("pdbx_entity_poly_na_nonstandard", "entity_id"),
|
44
|
-
("pdbx_entity_poly_na_type", "entity_id"),
|
45
|
-
("pdbx_entity_poly_protein_class", "entity_id"),
|
46
|
-
("pdbx_entity_prod_protocol", "entity_id"),
|
47
|
-
("pdbx_entity_remapping", "entity_id"),
|
48
|
-
("pdbx_entity_src_gen_character", "entity_id"),
|
49
|
-
("pdbx_entity_src_gen_chrom", "entity_id"),
|
50
|
-
("pdbx_entity_src_gen_clone", "entity_id"),
|
51
|
-
("pdbx_entity_src_gen_express", "entity_id"),
|
52
|
-
("pdbx_entity_src_gen_fract", "entity_id"),
|
53
|
-
("pdbx_entity_src_gen_lysis", "entity_id"),
|
54
|
-
("pdbx_entity_src_gen_prod_digest", "entity_id"),
|
55
|
-
("pdbx_entity_src_gen_prod_other", "entity_id"),
|
56
|
-
("pdbx_entity_src_gen_prod_pcr", "entity_id"),
|
57
|
-
("pdbx_entity_src_gen_proteolysis", "entity_id"),
|
58
|
-
("pdbx_entity_src_gen_pure", "entity_id"),
|
59
|
-
("pdbx_entity_src_gen_refold", "entity_id"),
|
60
|
-
("pdbx_entity_src_syn", "entity_id"),
|
61
|
-
("pdbx_linked_entity_list", "entity_id"),
|
62
|
-
("pdbx_prerelease_seq", "entity_id"),
|
63
|
-
("pdbx_sifts_xref_db", "entity_id"),
|
64
|
-
("pdbx_sifts_xref_db_segments", "entity_id"),
|
65
|
-
("pdbx_struct_entity_inst", "entity_id"),
|
66
|
-
("struct_asym", "entity_id"),
|
67
|
-
("struct_ref", "entity_id"),
|
68
|
-
]
|
69
|
-
|
70
|
-
CATEGORIES_WITH_ASYM_ID = [
|
71
|
-
("pdbx_coordinate_model", "asym_id"),
|
72
|
-
("pdbx_distant_solvent_atoms", "label_asym_id"),
|
73
|
-
("pdbx_linked_entity_instance_list", "asym_id"),
|
74
|
-
("pdbx_poly_seq_scheme", "asym_id"),
|
75
|
-
("pdbx_sifts_unp_segments", "asym_id"),
|
76
|
-
("pdbx_struct_asym_gen", "asym_id"),
|
77
|
-
("pdbx_struct_ncs_virus_gen", "asym_id"),
|
78
|
-
("pdbx_struct_special_symmetry", "label_asym_id"),
|
79
|
-
("pdbx_unobs_or_zero_occ_atoms", "label_asym_id"),
|
80
|
-
("pdbx_unobs_or_zero_occ_residues", "label_asym_id"),
|
81
|
-
("refine_ls_restr_ncs", "pdbx_asym_id"),
|
82
|
-
("struct_biol_gen", "asym_id"),
|
83
|
-
]
|
84
|
-
|
85
|
-
CATEGORIES_WITH_AUTH_ASYM_ID = [
|
86
|
-
("atom_site_anisotrop", "pdbx_auth_asym_id"),
|
87
|
-
("pdbx_atom_site_aniso_tls", "auth_asym_id"),
|
88
|
-
("pdbx_entity_instance_feature", "auth_asym_id"),
|
89
|
-
("pdbx_feature_monomer", "auth_asym_id"),
|
90
|
-
("pdbx_missing_atom_nonpoly", "auth_asym_id"),
|
91
|
-
("pdbx_missing_atom_poly", "auth_asym_id"),
|
92
|
-
("pdbx_modification_feature", "auth_asym_id"),
|
93
|
-
("pdbx_refine_component", "auth_asym_id"),
|
94
|
-
("pdbx_remediation_atom_site_mapping", "auth_asym_id"),
|
95
|
-
("pdbx_rmch_outlier", "auth_asym_id"),
|
96
|
-
("pdbx_rms_devs_cov_by_monomer", "auth_asym_id"),
|
97
|
-
("pdbx_sequence_pattern", "auth_asym_id"),
|
98
|
-
("pdbx_solvent_atom_site_mapping", "auth_asym_id"),
|
99
|
-
("pdbx_stereochemistry", "auth_asym_id"),
|
100
|
-
("pdbx_struct_chem_comp_diagnostics", "pdb_strand_id"),
|
101
|
-
("pdbx_struct_chem_comp_feature", "pdb_strand_id"),
|
102
|
-
("pdbx_struct_group_components", "auth_asym_id"),
|
103
|
-
("pdbx_struct_mod_residue", "auth_asym_id"),
|
104
|
-
("pdbx_sugar_phosphate_geometry", "auth_asym_id"),
|
105
|
-
("pdbx_validate_chiral", "auth_asym_id"),
|
106
|
-
("pdbx_validate_main_chain_plane", "auth_asym_id"),
|
107
|
-
("pdbx_validate_planes", "auth_asym_id"),
|
108
|
-
("pdbx_validate_planes_atom", "auth_asym_id"),
|
109
|
-
("pdbx_validate_torsion", "auth_asym_id"),
|
110
|
-
("struct_mon_nucl", "auth_asym_id"),
|
111
|
-
("struct_mon_prot", "auth_asym_id"),
|
112
|
-
("struct_site_gen", "auth_asym_id"),
|
113
|
-
]
|
114
|
-
|
115
|
-
|
116
|
-
def select_ids(
|
117
|
-
data: List[DataContainer],
|
118
|
-
obj_name: str,
|
119
|
-
tested_field_name: str,
|
120
|
-
extracted_field_name: str,
|
121
|
-
accepted_values: Set[str],
|
122
|
-
) -> Set[str]:
|
123
|
-
obj = data[0].getObj(obj_name)
|
124
|
-
ids = set()
|
125
|
-
|
126
|
-
if obj:
|
127
|
-
for row in obj.getRowList():
|
128
|
-
row_dict = dict(zip(obj.getAttributeList(), row))
|
129
|
-
|
130
|
-
if row_dict.get(tested_field_name, None) in accepted_values:
|
131
|
-
ids.add(row_dict[extracted_field_name])
|
132
|
-
|
133
|
-
return ids
|
134
|
-
|
135
|
-
|
136
|
-
def select_category_by_id(
|
137
|
-
data: List[DataContainer],
|
138
|
-
category: str,
|
139
|
-
field_name: str,
|
140
|
-
ids: List[str],
|
141
|
-
) -> Tuple[List[str], List[List[str]]]:
|
142
|
-
obj = data[0].getObj(category)
|
143
|
-
attributes = []
|
144
|
-
rows = []
|
145
|
-
|
146
|
-
if obj:
|
147
|
-
attributes = obj.getAttributeList()
|
148
|
-
|
149
|
-
for row in obj.getRowList():
|
150
|
-
row_dict = dict(zip(obj.getAttributeList(), row))
|
151
|
-
|
152
|
-
if row_dict.get(field_name, None) in ids:
|
153
|
-
rows.append(row)
|
154
|
-
|
155
|
-
return attributes, rows
|
156
|
-
|
157
|
-
|
158
|
-
def filter_by_poly_types(
|
159
|
-
file_content: str,
|
160
|
-
entity_poly_types: Iterable[str] = ["polyribonucleotide"],
|
161
|
-
retain_categories: Iterable[str] = [],
|
162
|
-
) -> str:
|
163
|
-
adapter = IoAdapterPy()
|
164
|
-
|
165
|
-
with tempfile.NamedTemporaryFile("rt+") as f:
|
166
|
-
f.write(file_content)
|
167
|
-
f.seek(0)
|
168
|
-
data = adapter.readFile(f.name)
|
169
|
-
|
170
|
-
entity_ids = select_ids(
|
171
|
-
data, "entity_poly", "type", "entity_id", set(entity_poly_types)
|
172
|
-
)
|
173
|
-
asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
|
174
|
-
auth_asym_ids = select_ids(
|
175
|
-
data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
|
176
|
-
)
|
177
|
-
|
178
|
-
output = DataContainer("rnapolis")
|
179
|
-
|
180
|
-
for table, ids in (
|
181
|
-
(CATEGORIES_WITH_ENTITY_ID, entity_ids),
|
182
|
-
(CATEGORIES_WITH_ASYM_ID, asym_ids),
|
183
|
-
(CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
|
184
|
-
):
|
185
|
-
for category, field_name in table:
|
186
|
-
attributes, rows = select_category_by_id(data, category, field_name, ids)
|
187
|
-
|
188
|
-
if attributes and rows:
|
189
|
-
obj = DataCategory(category, attributes, rows)
|
190
|
-
output.append(obj)
|
191
|
-
|
192
|
-
for category in retain_categories:
|
193
|
-
obj = data[0].getObj(category)
|
194
|
-
if obj:
|
195
|
-
output.append(obj)
|
196
|
-
|
197
|
-
with tempfile.NamedTemporaryFile("rt+") as tmp:
|
198
|
-
adapter.writeFile(tmp.name, [output])
|
199
|
-
tmp.seek(0)
|
200
|
-
return tmp.read()
|
201
|
-
|
202
|
-
|
203
|
-
def filter_by_chains(
|
204
|
-
file_content: str, chains: Iterable[str], retain_categories: Iterable[str] = []
|
205
|
-
) -> str:
|
206
|
-
"""
|
207
|
-
Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
|
208
|
-
|
209
|
-
Warning! The new file might contain more chains than provided in the `chains` argument.
|
210
|
-
This is because the function filters by entity, so if you ask for chain "A",
|
211
|
-
which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
|
212
|
-
"""
|
213
|
-
adapter = IoAdapterPy()
|
214
|
-
|
215
|
-
with tempfile.NamedTemporaryFile("rt+") as f:
|
216
|
-
f.write(file_content)
|
217
|
-
f.seek(0)
|
218
|
-
data = adapter.readFile(f.name)
|
219
|
-
|
220
|
-
output = DataContainer("rnapolis")
|
221
|
-
|
222
|
-
entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
|
223
|
-
asym_ids = set(chains)
|
224
|
-
auth_asym_ids = select_ids(
|
225
|
-
data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
|
226
|
-
)
|
227
|
-
|
228
|
-
for table, ids in (
|
229
|
-
(CATEGORIES_WITH_ENTITY_ID, entity_ids),
|
230
|
-
(CATEGORIES_WITH_ASYM_ID, asym_ids),
|
231
|
-
(CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
|
232
|
-
):
|
233
|
-
for category, field_name in table:
|
234
|
-
attributes, rows = select_category_by_id(data, category, field_name, ids)
|
235
|
-
|
236
|
-
if attributes and rows:
|
237
|
-
obj = DataCategory(category, attributes, rows)
|
238
|
-
output.append(obj)
|
239
|
-
|
240
|
-
for category in retain_categories:
|
241
|
-
obj = data[0].getObj(category)
|
242
|
-
if obj:
|
243
|
-
output.append(obj)
|
244
|
-
|
245
|
-
with tempfile.NamedTemporaryFile("rt+") as tmp:
|
246
|
-
adapter.writeFile(tmp.name, [output])
|
247
|
-
tmp.seek(0)
|
248
|
-
return tmp.read()
|
249
|
-
|
250
|
-
|
251
|
-
def main():
|
252
|
-
parser = argparse.ArgumentParser()
|
253
|
-
parser.add_argument(
|
254
|
-
"--type",
|
255
|
-
help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
|
256
|
-
action="append",
|
257
|
-
default=["polyribonucleotide"],
|
258
|
-
choices=ENTITY_POLY_TYPES,
|
259
|
-
)
|
260
|
-
parser.add_argument(
|
261
|
-
"--chain",
|
262
|
-
help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
|
263
|
-
action="append",
|
264
|
-
default=[],
|
265
|
-
)
|
266
|
-
parser.add_argument("path", help="path to a PDBx/mmCIF file")
|
267
|
-
args = parser.parse_args()
|
268
|
-
|
269
|
-
file = handle_input_file(args.path)
|
270
|
-
|
271
|
-
if args.chain:
|
272
|
-
print(filter_by_chains(file.read(), args.chain))
|
273
|
-
elif args.type:
|
274
|
-
print(filter_by_poly_types(file.read(), args.type))
|
275
|
-
else:
|
276
|
-
parser.print_help()
|
277
|
-
|
278
|
-
|
279
|
-
if __name__ == "__main__":
|
280
|
-
main()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|