biotite 0.40.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +1 -1
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +119 -119
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +119 -119
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.c +119 -119
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpp +119 -119
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localgapped.c +119 -119
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.c +119 -119
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.c +119 -119
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/pairwise.c +119 -119
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.c +119 -119
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.c +119 -119
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/tracetable.c +119 -119
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +119 -119
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +119 -119
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.c +119 -119
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.c +119 -119
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/bonds.c +1124 -915
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/celllist.c +119 -119
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/charges.c +119 -119
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/info/atoms.py +6 -1
- biotite/structure/info/bonds.py +1 -1
- biotite/structure/info/ccd/amino_acids.txt +17 -0
- biotite/structure/info/ccd/carbohydrates.txt +2 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +1 -0
- biotite/structure/info/misc.py +69 -5
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +157 -165
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/convertarray.c +119 -119
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.c +119 -119
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.c +119 -119
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.c +119 -119
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/pdb/file.py +84 -31
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbx/__init__.py +0 -1
- biotite/structure/io/pdbx/bcif.py +2 -3
- biotite/structure/io/pdbx/cif.py +9 -5
- biotite/structure/io/pdbx/component.py +4 -1
- biotite/structure/io/pdbx/convert.py +203 -79
- biotite/structure/io/pdbx/encoding.c +119 -119
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +119 -119
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +472 -13
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
- biotite/structure/io/pdbx/error.py +0 -14
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
- {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
|
@@ -240,11 +240,11 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
240
240
|
If set to true, a :class:`BondList` will be created for the
|
|
241
241
|
resulting :class:`AtomArray` containing the bond information
|
|
242
242
|
from the file.
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
243
|
+
Inter-residue bonds, will be read from the ``struct_conn``
|
|
244
|
+
category.
|
|
245
|
+
Intra-residue bonds will be read from the ``chem_comp_bond``, if
|
|
246
|
+
available, otherwise they will be derived from the Chemical
|
|
247
|
+
Component Dictionary.
|
|
248
248
|
|
|
249
249
|
Returns
|
|
250
250
|
-------
|
|
@@ -279,11 +279,7 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
279
279
|
model_atom_site = _filter_model(atom_site, model_starts, 1)
|
|
280
280
|
# Any field of the category would work here to get the length
|
|
281
281
|
model_length = model_atom_site.row_count
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
_fill_annotations(
|
|
285
|
-
stack, model_atom_site, extra_fields, use_author_fields
|
|
286
|
-
)
|
|
282
|
+
atoms = AtomArrayStack(model_count, model_length)
|
|
287
283
|
|
|
288
284
|
# Check if each model has the same amount of atoms
|
|
289
285
|
# If not, raise exception
|
|
@@ -294,29 +290,17 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
294
290
|
"instead"
|
|
295
291
|
)
|
|
296
292
|
|
|
297
|
-
|
|
293
|
+
atoms.coord[:, :, 0] = atom_site["Cartn_x"].as_array(np.float32) \
|
|
298
294
|
.reshape((model_count, model_length))
|
|
299
|
-
|
|
295
|
+
atoms.coord[:, :, 1] = atom_site["Cartn_y"].as_array(np.float32) \
|
|
300
296
|
.reshape((model_count, model_length))
|
|
301
|
-
|
|
297
|
+
atoms.coord[:, :, 2] = atom_site["Cartn_z"].as_array(np.float32) \
|
|
302
298
|
.reshape((model_count, model_length))
|
|
303
299
|
|
|
304
|
-
if include_bonds:
|
|
305
|
-
bonds = connect_via_residue_names(stack)
|
|
306
|
-
if "struct_conn" in block:
|
|
307
|
-
bonds = bonds.merge(_parse_inter_residue_bonds(
|
|
308
|
-
model_atom_site, block["struct_conn"]
|
|
309
|
-
))
|
|
310
|
-
stack.bonds = bonds
|
|
311
|
-
|
|
312
|
-
stack = _filter_altloc(stack, model_atom_site, altloc)
|
|
313
|
-
|
|
314
300
|
box = _get_box(block)
|
|
315
301
|
if box is not None:
|
|
316
302
|
# Duplicate same box for each model
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
return stack
|
|
303
|
+
atoms.box = np.repeat(box[np.newaxis, ...], model_count, axis=0)
|
|
320
304
|
|
|
321
305
|
else:
|
|
322
306
|
if model == 0:
|
|
@@ -332,29 +316,44 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
332
316
|
model_atom_site = _filter_model(atom_site, model_starts, model)
|
|
333
317
|
# Any field of the category would work here to get the length
|
|
334
318
|
model_length = model_atom_site.row_count
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
_fill_annotations(
|
|
338
|
-
array, model_atom_site, extra_fields, use_author_fields
|
|
339
|
-
)
|
|
319
|
+
atoms = AtomArray(model_length)
|
|
340
320
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
if include_bonds:
|
|
346
|
-
bonds = connect_via_residue_names(array)
|
|
347
|
-
if "struct_conn" in block:
|
|
348
|
-
bonds = bonds.merge(_parse_inter_residue_bonds(
|
|
349
|
-
model_atom_site, block["struct_conn"]
|
|
350
|
-
))
|
|
351
|
-
array.bonds = bonds
|
|
321
|
+
atoms.coord[:, 0] = model_atom_site["Cartn_x"].as_array(np.float32)
|
|
322
|
+
atoms.coord[:, 1] = model_atom_site["Cartn_y"].as_array(np.float32)
|
|
323
|
+
atoms.coord[:, 2] = model_atom_site["Cartn_z"].as_array(np.float32)
|
|
352
324
|
|
|
353
|
-
|
|
325
|
+
atoms.box = _get_box(block)
|
|
354
326
|
|
|
355
|
-
|
|
327
|
+
# The below part is the same for both, AtomArray and AtomArrayStack
|
|
328
|
+
_fill_annotations(
|
|
329
|
+
atoms, model_atom_site, extra_fields, use_author_fields
|
|
330
|
+
)
|
|
331
|
+
if include_bonds:
|
|
332
|
+
if "chem_comp_bond" in block:
|
|
333
|
+
try:
|
|
334
|
+
custom_bond_dict = _parse_intra_residue_bonds(
|
|
335
|
+
block["chem_comp_bond"]
|
|
336
|
+
)
|
|
337
|
+
except KeyError:
|
|
338
|
+
warnings.warn(
|
|
339
|
+
"The 'chem_comp_bond' category has missing columns, "
|
|
340
|
+
"falling back to using Chemical Component Dictionary",
|
|
341
|
+
UserWarning
|
|
342
|
+
)
|
|
343
|
+
custom_bond_dict = None
|
|
344
|
+
bonds = connect_via_residue_names(
|
|
345
|
+
atoms, custom_bond_dict=custom_bond_dict
|
|
346
|
+
)
|
|
347
|
+
else:
|
|
348
|
+
bonds = connect_via_residue_names(atoms)
|
|
349
|
+
if "struct_conn" in block:
|
|
350
|
+
bonds = bonds.merge(_parse_inter_residue_bonds(
|
|
351
|
+
model_atom_site, block["struct_conn"]
|
|
352
|
+
))
|
|
353
|
+
atoms.bonds = bonds
|
|
354
|
+
atoms = _filter_altloc(atoms, model_atom_site, altloc)
|
|
356
355
|
|
|
357
|
-
|
|
356
|
+
return atoms
|
|
358
357
|
|
|
359
358
|
|
|
360
359
|
def _get_block(pdbx_component, block_name):
|
|
@@ -372,14 +371,14 @@ def _get_block(pdbx_component, block_name):
|
|
|
372
371
|
return pdbx_component
|
|
373
372
|
|
|
374
373
|
|
|
375
|
-
def _get_or_fallback(category, key, fallback_key
|
|
374
|
+
def _get_or_fallback(category, key, fallback_key):
|
|
376
375
|
"""
|
|
377
376
|
Return column related to key in category if it exists,
|
|
378
377
|
otherwise try to get the column related to fallback key.
|
|
379
378
|
"""
|
|
380
379
|
if key not in category:
|
|
381
380
|
warnings.warn(
|
|
382
|
-
f"Attribute '{key}' not found within '
|
|
381
|
+
f"Attribute '{key}' not found within 'atom_site' category. "
|
|
383
382
|
f"The fallback attribute '{fallback_key}' will be used instead",
|
|
384
383
|
UserWarning
|
|
385
384
|
)
|
|
@@ -387,8 +386,8 @@ def _get_or_fallback(category, key, fallback_key, cat_name="input"):
|
|
|
387
386
|
return category[fallback_key]
|
|
388
387
|
except KeyError as key_exc:
|
|
389
388
|
raise InvalidFileError(
|
|
390
|
-
f"Fallback attribute '{fallback_key}' not found
|
|
391
|
-
"'
|
|
389
|
+
f"Fallback attribute '{fallback_key}' not found within "
|
|
390
|
+
"'atom_site' category"
|
|
392
391
|
) from key_exc
|
|
393
392
|
return category[key]
|
|
394
393
|
|
|
@@ -483,6 +482,28 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
483
482
|
)
|
|
484
483
|
|
|
485
484
|
|
|
485
|
+
def _parse_intra_residue_bonds(chem_comp_bond):
|
|
486
|
+
"""
|
|
487
|
+
Create a :func:`connect_via_residue_names()` compatible
|
|
488
|
+
`custom_bond_dict` from the ``chem_comp_bond`` category.
|
|
489
|
+
"""
|
|
490
|
+
custom_bond_dict = {}
|
|
491
|
+
for res_name, atom_1, atom_2, order, aromatic_flag in zip(
|
|
492
|
+
chem_comp_bond["comp_id"].as_array(str),
|
|
493
|
+
chem_comp_bond["atom_id_1"].as_array(str),
|
|
494
|
+
chem_comp_bond["atom_id_2"].as_array(str),
|
|
495
|
+
chem_comp_bond["value_order"].as_array(str),
|
|
496
|
+
chem_comp_bond["pdbx_aromatic_flag"].as_array(str)
|
|
497
|
+
):
|
|
498
|
+
if res_name not in custom_bond_dict:
|
|
499
|
+
custom_bond_dict[res_name] = {}
|
|
500
|
+
bond_type = COMP_BOND_ORDER_TO_TYPE.get(
|
|
501
|
+
(order.upper(), aromatic_flag), BondType.ANY
|
|
502
|
+
)
|
|
503
|
+
custom_bond_dict[res_name][atom_1.item(), atom_2.item()] = bond_type
|
|
504
|
+
return custom_bond_dict
|
|
505
|
+
|
|
506
|
+
|
|
486
507
|
def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
487
508
|
"""
|
|
488
509
|
Create inter-residue bonds by parsing the ``struct_conn`` category.
|
|
@@ -676,7 +697,7 @@ def _get_box(block):
|
|
|
676
697
|
return vectors_from_unitcell(len_a, len_b, len_c, alpha, beta, gamma)
|
|
677
698
|
|
|
678
699
|
|
|
679
|
-
def set_structure(pdbx_file, array, data_block=None):
|
|
700
|
+
def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
680
701
|
"""
|
|
681
702
|
Set the ``atom_site`` category with atom information from an
|
|
682
703
|
:class:`AtomArray` or :class:`AtomArrayStack`.
|
|
@@ -703,7 +724,13 @@ def set_structure(pdbx_file, array, data_block=None):
|
|
|
703
724
|
file.
|
|
704
725
|
If the data block object is passed directly to `pdbx_file`,
|
|
705
726
|
this parameter is ignored.
|
|
706
|
-
If the file is empty, a new data will be created.
|
|
727
|
+
If the file is empty, a new data block will be created.
|
|
728
|
+
include_bonds : bool, optional
|
|
729
|
+
If set to true and `array` has associated ``bonds`` , the
|
|
730
|
+
intra-residue bonds will be written into the ``chem_comp_bond``
|
|
731
|
+
category.
|
|
732
|
+
Inter-residue bonds will be written into the ``struct_conn``
|
|
733
|
+
independent of this parameter.
|
|
707
734
|
|
|
708
735
|
Notes
|
|
709
736
|
-----
|
|
@@ -721,6 +748,8 @@ def set_structure(pdbx_file, array, data_block=None):
|
|
|
721
748
|
>>> file.write(os.path.join(path_to_directory, "structure.cif"))
|
|
722
749
|
|
|
723
750
|
"""
|
|
751
|
+
_check_non_empty(array)
|
|
752
|
+
|
|
724
753
|
block = _get_or_create_block(pdbx_file, data_block)
|
|
725
754
|
Category = block.subcomponent_class()
|
|
726
755
|
Column = Category.subcomponent_class()
|
|
@@ -765,7 +794,13 @@ def set_structure(pdbx_file, array, data_block=None):
|
|
|
765
794
|
)
|
|
766
795
|
|
|
767
796
|
if array.bonds is not None:
|
|
768
|
-
|
|
797
|
+
struct_conn = _set_inter_residue_bonds(array, atom_site)
|
|
798
|
+
if struct_conn is not None:
|
|
799
|
+
block["struct_conn"] = struct_conn
|
|
800
|
+
if include_bonds:
|
|
801
|
+
chem_comp_bond = _set_intra_residue_bonds(array, atom_site)
|
|
802
|
+
if chem_comp_bond is not None:
|
|
803
|
+
block["chem_comp_bond"] = chem_comp_bond
|
|
769
804
|
|
|
770
805
|
# In case of a single model handle each coordinate
|
|
771
806
|
# simply like a flattened array
|
|
@@ -782,7 +817,7 @@ def set_structure(pdbx_file, array, data_block=None):
|
|
|
782
817
|
)
|
|
783
818
|
# In case of multiple models repeat annotations
|
|
784
819
|
# and use model specific coordinates
|
|
785
|
-
|
|
820
|
+
else:
|
|
786
821
|
atom_site = _repeat(atom_site, array.stack_depth())
|
|
787
822
|
coord = np.reshape(
|
|
788
823
|
array.coord, (array.stack_depth() * array.array_length(), 3)
|
|
@@ -794,8 +829,6 @@ def set_structure(pdbx_file, array, data_block=None):
|
|
|
794
829
|
np.arange(1, array.stack_depth() + 1, dtype=np.int32),
|
|
795
830
|
repeats=array.array_length(),
|
|
796
831
|
)
|
|
797
|
-
else:
|
|
798
|
-
raise ValueError("Structure must be AtomArray or AtomArrayStack")
|
|
799
832
|
if not "atom_id" in annot_categories:
|
|
800
833
|
# Count from 1
|
|
801
834
|
atom_site["id"] = np.arange(
|
|
@@ -822,6 +855,20 @@ def set_structure(pdbx_file, array, data_block=None):
|
|
|
822
855
|
block["cell"] = cell
|
|
823
856
|
|
|
824
857
|
|
|
858
|
+
def _check_non_empty(array):
|
|
859
|
+
if isinstance(array, AtomArray):
|
|
860
|
+
if array.array_length() == 0:
|
|
861
|
+
raise BadStructureError("Structure must not be empty")
|
|
862
|
+
elif isinstance(array, AtomArrayStack):
|
|
863
|
+
if array.array_length() == 0 or array.stack_depth() == 0:
|
|
864
|
+
raise BadStructureError("Structure must not be empty")
|
|
865
|
+
else:
|
|
866
|
+
raise ValueError(
|
|
867
|
+
"Structure must be AtomArray or AtomArrayStack, "
|
|
868
|
+
f"but got {type(array).__name__}"
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
|
|
825
872
|
def _get_or_create_block(pdbx_component, block_name):
|
|
826
873
|
if isinstance(pdbx_component, PDBxFile):
|
|
827
874
|
# The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
|
|
@@ -885,6 +932,67 @@ def _repeat(category, repetitions):
|
|
|
885
932
|
return Category(category_dict)
|
|
886
933
|
|
|
887
934
|
|
|
935
|
+
def _set_intra_residue_bonds(array, atom_site):
|
|
936
|
+
"""
|
|
937
|
+
Create the ``chem_comp_bond`` category containing the intra-residue
|
|
938
|
+
bonds.
|
|
939
|
+
``atom_site`` is only used to infer the right :class:`Category` type
|
|
940
|
+
(either :class:`CIFCategory` or :class:`BinaryCIFCategory`).
|
|
941
|
+
"""
|
|
942
|
+
if (array.res_name == "").any():
|
|
943
|
+
raise BadStructureError(
|
|
944
|
+
"Structure contains atoms with empty residue name, "
|
|
945
|
+
"but it is required to write intra-residue bonds"
|
|
946
|
+
)
|
|
947
|
+
if (array.atom_name == "").any():
|
|
948
|
+
raise BadStructureError(
|
|
949
|
+
"Structure contains atoms with empty atom name, "
|
|
950
|
+
"but it is required to write intra-residue bonds"
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
Category = type(atom_site)
|
|
954
|
+
Column = Category.subcomponent_class()
|
|
955
|
+
|
|
956
|
+
bond_array = _filter_bonds(array, "intra")
|
|
957
|
+
if len(bond_array) == 0:
|
|
958
|
+
return None
|
|
959
|
+
value_order = np.zeros(len(bond_array), dtype="U4")
|
|
960
|
+
aromatic_flag = np.zeros(len(bond_array), dtype="U1")
|
|
961
|
+
for i, bond_type in enumerate(bond_array[:, 2]):
|
|
962
|
+
if bond_type == BondType.ANY:
|
|
963
|
+
# ANY bonds will be masked anyway, no need to set the value
|
|
964
|
+
continue
|
|
965
|
+
order, aromatic = COMP_BOND_TYPE_TO_ORDER[bond_type]
|
|
966
|
+
value_order[i] = order
|
|
967
|
+
aromatic_flag[i] = aromatic
|
|
968
|
+
any_mask = bond_array[:, 2] == BondType.ANY
|
|
969
|
+
|
|
970
|
+
chem_comp_bond = Category()
|
|
971
|
+
# Take the residue name from the first atom index, as the residue
|
|
972
|
+
# name is the same for both atoms, since we have only intra bonds
|
|
973
|
+
chem_comp_bond["comp_id"] = array.res_name[bond_array[:, 0]]
|
|
974
|
+
chem_comp_bond["atom_id_1"] = array.atom_name[bond_array[:, 0]]
|
|
975
|
+
chem_comp_bond["atom_id_2"] = array.atom_name[bond_array[:, 1]]
|
|
976
|
+
chem_comp_bond["value_order"] = Column(
|
|
977
|
+
value_order,
|
|
978
|
+
np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
979
|
+
)
|
|
980
|
+
chem_comp_bond["pdbx_aromatic_flag"] = Column(
|
|
981
|
+
aromatic_flag,
|
|
982
|
+
np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
983
|
+
)
|
|
984
|
+
# BondList does not contain stereo information
|
|
985
|
+
# -> all values are missing
|
|
986
|
+
chem_comp_bond["pdbx_stereo_config"] = Column(
|
|
987
|
+
np.zeros(len(bond_array), dtype="U1"),
|
|
988
|
+
np.full(len(bond_array), MaskValue.MISSING)
|
|
989
|
+
)
|
|
990
|
+
chem_comp_bond["pdbx_ordinal"] = np.arange(
|
|
991
|
+
1, len(bond_array) + 1, dtype=np.int32
|
|
992
|
+
)
|
|
993
|
+
return chem_comp_bond
|
|
994
|
+
|
|
995
|
+
|
|
888
996
|
def _set_inter_residue_bonds(array, atom_site):
|
|
889
997
|
"""
|
|
890
998
|
Create the ``struct_conn`` category containing the inter-residue
|
|
@@ -900,15 +1008,9 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
900
1008
|
Category = type(atom_site)
|
|
901
1009
|
Column = Category.subcomponent_class()
|
|
902
1010
|
|
|
903
|
-
bond_array = array
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
residue_starts_1, residue_starts_2 = get_residue_starts_for(
|
|
907
|
-
array, bond_array[:, :2].flatten()
|
|
908
|
-
).reshape(-1, 2).T
|
|
909
|
-
# Filter out all intra-residue bonds
|
|
910
|
-
bond_array = bond_array[residue_starts_1 != residue_starts_2]
|
|
911
|
-
|
|
1011
|
+
bond_array = _filter_bonds(array, "inter")
|
|
1012
|
+
if len(bond_array) == 0:
|
|
1013
|
+
return None
|
|
912
1014
|
struct_conn = Category()
|
|
913
1015
|
struct_conn["id"] = np.arange(1, len(bond_array) + 1)
|
|
914
1016
|
struct_conn["conn_type_id"] = np.full(len(bond_array), "covale")
|
|
@@ -932,6 +1034,25 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
932
1034
|
return struct_conn
|
|
933
1035
|
|
|
934
1036
|
|
|
1037
|
+
def _filter_bonds(array, connection):
|
|
1038
|
+
"""
|
|
1039
|
+
Get a bonds array, that contain either only intra-residue or
|
|
1040
|
+
only inter-residue bonds.
|
|
1041
|
+
"""
|
|
1042
|
+
bond_array = array.bonds.as_array()
|
|
1043
|
+
# To save computation time call 'get_residue_starts_for()' only once
|
|
1044
|
+
# with indices of the first and second atom of each bond
|
|
1045
|
+
residue_starts_1, residue_starts_2 = get_residue_starts_for(
|
|
1046
|
+
array, bond_array[:, :2].flatten()
|
|
1047
|
+
).reshape(-1, 2).T
|
|
1048
|
+
if connection == "intra":
|
|
1049
|
+
return bond_array[residue_starts_1 == residue_starts_2]
|
|
1050
|
+
elif connection == "inter":
|
|
1051
|
+
return bond_array[residue_starts_1 != residue_starts_2]
|
|
1052
|
+
else:
|
|
1053
|
+
raise ValueError("Invalid 'connection' option")
|
|
1054
|
+
|
|
1055
|
+
|
|
935
1056
|
def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
936
1057
|
res_name=None):
|
|
937
1058
|
"""
|
|
@@ -1011,7 +1132,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1011
1132
|
atom_category = _filter(
|
|
1012
1133
|
atom_category, atom_category["comp_id"].as_array() == res_name
|
|
1013
1134
|
)
|
|
1014
|
-
if
|
|
1135
|
+
if atom_category.row_count == 0:
|
|
1015
1136
|
raise KeyError(
|
|
1016
1137
|
f"No rows with residue name '{res_name}' found in "
|
|
1017
1138
|
f"'chem_comp_atom' category"
|
|
@@ -1098,6 +1219,8 @@ def set_component(pdbx_file, array, data_block=None):
|
|
|
1098
1219
|
If the data block object is passed directly to `pdbx_file`,
|
|
1099
1220
|
this parameter is ignored.
|
|
1100
1221
|
"""
|
|
1222
|
+
_check_non_empty(array)
|
|
1223
|
+
|
|
1101
1224
|
block = _get_or_create_block(pdbx_file, data_block)
|
|
1102
1225
|
Category = block.subcomponent_class()
|
|
1103
1226
|
|
|
@@ -1132,7 +1255,7 @@ def set_component(pdbx_file, array, data_block=None):
|
|
|
1132
1255
|
).astype(str)
|
|
1133
1256
|
block["chem_comp_atom"] = atom_cat
|
|
1134
1257
|
|
|
1135
|
-
if array.bonds is not None:
|
|
1258
|
+
if array.bonds is not None and array.bonds.get_bond_count() > 0:
|
|
1136
1259
|
bond_array = array.bonds.as_array()
|
|
1137
1260
|
order_flags = []
|
|
1138
1261
|
aromatic_flags = []
|
|
@@ -1428,25 +1551,26 @@ def _parse_operation_expression(expression):
|
|
|
1428
1551
|
# Split groups by parentheses:
|
|
1429
1552
|
# use the opening parenthesis as delimiter
|
|
1430
1553
|
# and just remove the closing parenthesis
|
|
1554
|
+
# example: '(X0)(1-10,21-25)' from 1a34
|
|
1431
1555
|
expressions_per_step = expression.replace(")", "").split("(")
|
|
1432
1556
|
expressions_per_step = [e for e in expressions_per_step if len(e) > 0]
|
|
1433
1557
|
# Important: Operations are applied from right to left
|
|
1434
1558
|
expressions_per_step.reverse()
|
|
1435
1559
|
|
|
1436
1560
|
operations = []
|
|
1437
|
-
for
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1561
|
+
for one_step_expr in expressions_per_step:
|
|
1562
|
+
one_step_op_ids = []
|
|
1563
|
+
for expr in one_step_expr.split(","):
|
|
1564
|
+
if "-" in expr:
|
|
1565
|
+
# Range of operation IDs, they must be integers
|
|
1566
|
+
first, last = expr.split("-")
|
|
1567
|
+
one_step_op_ids.extend(
|
|
1568
|
+
[str(id) for id in range(int(first), int(last) + 1)]
|
|
1569
|
+
)
|
|
1570
|
+
else:
|
|
1571
|
+
# Single operation ID
|
|
1572
|
+
one_step_op_ids.append(expr)
|
|
1573
|
+
operations.append(one_step_op_ids)
|
|
1450
1574
|
|
|
1451
1575
|
# Cartesian product of operations
|
|
1452
1576
|
return list(itertools.product(*operations))
|