RNApolis 0.4.11__py3-none-any.whl → 0.4.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {RNApolis-0.4.11.dist-info → RNApolis-0.4.12.dist-info}/METADATA +1 -1
- {RNApolis-0.4.11.dist-info → RNApolis-0.4.12.dist-info}/RECORD +7 -7
- rnapolis/molecule_filter.py +84 -29
- {RNApolis-0.4.11.dist-info → RNApolis-0.4.12.dist-info}/LICENSE +0 -0
- {RNApolis-0.4.11.dist-info → RNApolis-0.4.12.dist-info}/WHEEL +0 -0
- {RNApolis-0.4.11.dist-info → RNApolis-0.4.12.dist-info}/entry_points.txt +0 -0
- {RNApolis-0.4.11.dist-info → RNApolis-0.4.12.dist-info}/top_level.txt +0 -0
@@ -2,16 +2,16 @@ rnapolis/annotator.py,sha256=_hsSX2VHFvIQ47l_EA7lwGFXLiVLbhFPEsOQzBKbjRk,22100
|
|
2
2
|
rnapolis/clashfinder.py,sha256=i95kp0o6OWNqmJDBr-PbsZd7RY2iJtBDr7QqolJSuAQ,8513
|
3
3
|
rnapolis/common.py,sha256=LY6Uz96Br8ki_gA8LpfatgtvVbt9jOTkwgagayqTgf8,31251
|
4
4
|
rnapolis/metareader.py,sha256=I1-cXc2YNBPwa3zihAnMTjEsAo79tEKzSmWu5yvN1Pk,2071
|
5
|
-
rnapolis/molecule_filter.py,sha256=
|
5
|
+
rnapolis/molecule_filter.py,sha256=F_xkAe7q2NZAaDpRaeikv-twUvbNflWdlLte7oFn2Ms,9130
|
6
6
|
rnapolis/motif_extractor.py,sha256=Lfn1iEkhkP9eZD3GPEWNAfy00QO7QPCc8wM_XS1ory8,1147
|
7
7
|
rnapolis/parser.py,sha256=lHI6LyFbEEPdHOzbged1-Ov0tl6MpSungIPacip0Py0,15838
|
8
8
|
rnapolis/rfam_folder.py,sha256=SjiiyML_T1__saruFwSMJEoQ7Y55GIU8ktS8ZUn5-fw,11111
|
9
9
|
rnapolis/tertiary.py,sha256=6t9ZB4w33-5n_M3sns1RoFXCOTgVAgGH4WDNG5OG9Kg,23426
|
10
10
|
rnapolis/transformer.py,sha256=aC0nBmHHJf5TyLvBIV57Jj3tlwpvHbPo347opfAOlQA,3844
|
11
11
|
rnapolis/util.py,sha256=IdquFO3PV1_KDqodjupzm0Rqvgy0CeSzxGHaGEHYXVU,543
|
12
|
-
RNApolis-0.4.
|
13
|
-
RNApolis-0.4.
|
14
|
-
RNApolis-0.4.
|
15
|
-
RNApolis-0.4.
|
16
|
-
RNApolis-0.4.
|
17
|
-
RNApolis-0.4.
|
12
|
+
RNApolis-0.4.12.dist-info/LICENSE,sha256=ZGRu12MzCgbYA-Lt8MyBlmjvPZh7xfiD5u5wBx0enq4,1066
|
13
|
+
RNApolis-0.4.12.dist-info/METADATA,sha256=fPrlpVrM83UYwPYwaeymBP7SeVLMv7XftCZOl7hUnes,54323
|
14
|
+
RNApolis-0.4.12.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
15
|
+
RNApolis-0.4.12.dist-info/entry_points.txt,sha256=foN2Pn5e-OzEz0fFmNoX6PnFSZFQntOlY8LbognP5F0,308
|
16
|
+
RNApolis-0.4.12.dist-info/top_level.txt,sha256=LcO18koxZcWoJ21KDRRRo_tyIbmXL5z61dPitZpy8yc,9
|
17
|
+
RNApolis-0.4.12.dist-info/RECORD,,
|
rnapolis/molecule_filter.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
#! /usr/bin/env python
|
2
2
|
import argparse
|
3
3
|
import tempfile
|
4
|
-
from typing import List, Set, Tuple
|
4
|
+
from typing import Iterable, List, Set, Tuple
|
5
5
|
|
6
6
|
from mmcif.io.IoAdapterPy import IoAdapterPy
|
7
7
|
from mmcif.io.PdbxReader import DataCategory, DataContainer
|
8
|
+
|
8
9
|
from rnapolis.util import handle_input_file
|
9
10
|
|
10
11
|
# Source: https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Items/_entity_poly.type.html
|
@@ -153,38 +154,63 @@ def select_category_by_id(
|
|
153
154
|
return attributes, rows
|
154
155
|
|
155
156
|
|
156
|
-
def
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
157
|
+
def filter_by_poly_types(
|
158
|
+
file_content: str, entity_poly_types: Iterable[str] = ["polyribonucleotide"]
|
159
|
+
) -> str:
|
160
|
+
adapter = IoAdapterPy()
|
161
|
+
|
162
|
+
with tempfile.NamedTemporaryFile("rt+") as f:
|
163
|
+
f.write(file_content)
|
164
|
+
f.seek(0)
|
165
|
+
data = adapter.readFile(f.name)
|
166
|
+
|
167
|
+
entity_ids = select_ids(
|
168
|
+
data, "entity_poly", "type", "entity_id", set(entity_poly_types)
|
164
169
|
)
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
action="append",
|
169
|
-
default=[],
|
170
|
+
asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
|
171
|
+
auth_asym_ids = select_ids(
|
172
|
+
data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
|
170
173
|
)
|
171
|
-
parser.add_argument("path", help="path to a PDBx/mmCIF file")
|
172
|
-
args = parser.parse_args()
|
173
174
|
|
174
|
-
file = handle_input_file(args.path)
|
175
|
-
adapter = IoAdapterPy()
|
176
|
-
data = adapter.readFile(file.name)
|
177
175
|
output = DataContainer("rnapolis")
|
178
176
|
|
179
|
-
|
180
|
-
|
181
|
-
asym_ids
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
asym_ids = select_ids(data, "struct_asym", "entity_id", "id", entity_ids)
|
177
|
+
for table, ids in (
|
178
|
+
(CATEGORIES_WITH_ENTITY_ID, entity_ids),
|
179
|
+
(CATEGORIES_WITH_ASYM_ID, asym_ids),
|
180
|
+
(CATEGORIES_WITH_AUTH_ASYM_ID, auth_asym_ids),
|
181
|
+
):
|
182
|
+
for category, field_name in table:
|
183
|
+
attributes, rows = select_category_by_id(data, category, field_name, ids)
|
187
184
|
|
185
|
+
if attributes and rows:
|
186
|
+
obj = DataCategory(category, attributes, rows)
|
187
|
+
output.append(obj)
|
188
|
+
|
189
|
+
with tempfile.NamedTemporaryFile("rt+") as tmp:
|
190
|
+
adapter.writeFile(tmp.name, [output])
|
191
|
+
tmp.seek(0)
|
192
|
+
return tmp.read()
|
193
|
+
|
194
|
+
|
195
|
+
def filter_by_chains(file_content: str, chains: Iterable[str]) -> str:
|
196
|
+
"""
|
197
|
+
Filter a PDBx/mmCIF file by chain IDs. The function returns a new PDBx/mmCIF file.
|
198
|
+
|
199
|
+
Warning! The new file might contain more chains than provided in the `chains` argument.
|
200
|
+
This is because the function filters by entity, so if you ask for chain "A",
|
201
|
+
which is part of entity 1 having chains "A", "B" and "C", then you will get all three chains.
|
202
|
+
"""
|
203
|
+
adapter = IoAdapterPy()
|
204
|
+
|
205
|
+
with tempfile.NamedTemporaryFile("rt+") as f:
|
206
|
+
f.write(file_content)
|
207
|
+
f.seek(0)
|
208
|
+
data = adapter.readFile(f.name)
|
209
|
+
|
210
|
+
output = DataContainer("rnapolis")
|
211
|
+
|
212
|
+
entity_ids = select_ids(data, "struct_asym", "id", "entity_id", set(chains))
|
213
|
+
asym_ids = set(chains)
|
188
214
|
auth_asym_ids = select_ids(
|
189
215
|
data, "atom_site", "label_asym_id", "auth_asym_id", asym_ids
|
190
216
|
)
|
@@ -201,9 +227,38 @@ def main():
|
|
201
227
|
obj = DataCategory(category, attributes, rows)
|
202
228
|
output.append(obj)
|
203
229
|
|
204
|
-
with tempfile.NamedTemporaryFile() as tmp:
|
230
|
+
with tempfile.NamedTemporaryFile("rt+") as tmp:
|
205
231
|
adapter.writeFile(tmp.name, [output])
|
206
|
-
|
232
|
+
tmp.seek(0)
|
233
|
+
return tmp.read()
|
234
|
+
|
235
|
+
|
236
|
+
def main():
|
237
|
+
parser = argparse.ArgumentParser()
|
238
|
+
parser.add_argument(
|
239
|
+
"--type",
|
240
|
+
help="a type of molecule to select, you can provide this argument multiple times (default: polyribonucleotide)",
|
241
|
+
action="append",
|
242
|
+
default=["polyribonucleotide"],
|
243
|
+
choices=ENTITY_POLY_TYPES,
|
244
|
+
)
|
245
|
+
parser.add_argument(
|
246
|
+
"--chain",
|
247
|
+
help="a chain ID (label_asym_id) to select, you can provide this argument multiple times (if provided, it overrides the --type argument)",
|
248
|
+
action="append",
|
249
|
+
default=[],
|
250
|
+
)
|
251
|
+
parser.add_argument("path", help="path to a PDBx/mmCIF file")
|
252
|
+
args = parser.parse_args()
|
253
|
+
|
254
|
+
file = handle_input_file(args.path)
|
255
|
+
|
256
|
+
if args.chain:
|
257
|
+
print(filter_by_chains(file.read(), args.chain))
|
258
|
+
elif args.type:
|
259
|
+
print(filter_by_poly_types(file.read(), args.type))
|
260
|
+
else:
|
261
|
+
parser.print_help()
|
207
262
|
|
208
263
|
|
209
264
|
if __name__ == "__main__":
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|