biotite 0.39.0__cp310-cp310-win_amd64.whl → 0.40.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (104) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/rcsb/download.py +19 -14
  4. biotite/sequence/align/banded.c +258 -237
  5. biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
  6. biotite/sequence/align/kmeralphabet.c +243 -222
  7. biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
  8. biotite/sequence/align/kmersimilarity.c +215 -196
  9. biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
  10. biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
  11. biotite/sequence/align/kmertable.cpp +233 -205
  12. biotite/sequence/align/localgapped.c +258 -237
  13. biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
  14. biotite/sequence/align/localungapped.c +235 -214
  15. biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
  16. biotite/sequence/align/multiple.c +255 -234
  17. biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
  18. biotite/sequence/align/pairwise.c +274 -253
  19. biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
  20. biotite/sequence/align/permutation.c +215 -196
  21. biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
  22. biotite/sequence/align/selector.c +217 -197
  23. biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
  24. biotite/sequence/align/tracetable.c +215 -195
  25. biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
  26. biotite/sequence/codec.c +235 -214
  27. biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
  28. biotite/sequence/phylo/nj.c +215 -196
  29. biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
  30. biotite/sequence/phylo/tree.c +227 -202
  31. biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
  32. biotite/sequence/phylo/upgma.c +215 -196
  33. biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
  34. biotite/structure/basepairs.py +7 -12
  35. biotite/structure/bonds.c +1175 -1226
  36. biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
  37. biotite/structure/celllist.c +217 -197
  38. biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
  39. biotite/structure/charges.c +1052 -1101
  40. biotite/structure/charges.cp310-win_amd64.pyd +0 -0
  41. biotite/structure/filter.py +30 -37
  42. biotite/structure/info/__init__.py +5 -8
  43. biotite/structure/info/atoms.py +25 -67
  44. biotite/structure/info/bonds.py +46 -100
  45. biotite/structure/info/ccd/README.rst +8 -0
  46. biotite/structure/info/ccd/amino_acids.txt +1646 -0
  47. biotite/structure/info/ccd/carbohydrates.txt +1133 -0
  48. biotite/structure/info/ccd/components.bcif +0 -0
  49. biotite/structure/info/ccd/nucleotides.txt +797 -0
  50. biotite/structure/info/ccd.py +95 -0
  51. biotite/structure/info/groups.py +90 -0
  52. biotite/structure/info/masses.py +21 -20
  53. biotite/structure/info/misc.py +11 -22
  54. biotite/structure/info/standardize.py +17 -12
  55. biotite/structure/io/__init__.py +2 -4
  56. biotite/structure/io/ctab.py +1 -1
  57. biotite/structure/io/general.py +37 -43
  58. biotite/structure/io/mmtf/__init__.py +3 -0
  59. biotite/structure/io/mmtf/convertarray.c +219 -198
  60. biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
  61. biotite/structure/io/mmtf/convertfile.c +217 -197
  62. biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
  63. biotite/structure/io/mmtf/decode.c +225 -204
  64. biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
  65. biotite/structure/io/mmtf/encode.c +215 -196
  66. biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
  67. biotite/structure/io/mmtf/file.py +34 -26
  68. biotite/structure/io/npz/__init__.py +3 -0
  69. biotite/structure/io/npz/file.py +21 -18
  70. biotite/structure/io/pdb/__init__.py +3 -3
  71. biotite/structure/io/pdb/file.py +5 -3
  72. biotite/structure/io/pdb/hybrid36.c +63 -43
  73. biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
  74. biotite/structure/io/pdbqt/file.py +32 -32
  75. biotite/structure/io/pdbx/__init__.py +13 -6
  76. biotite/structure/io/pdbx/bcif.py +649 -0
  77. biotite/structure/io/pdbx/cif.py +1028 -0
  78. biotite/structure/io/pdbx/component.py +243 -0
  79. biotite/structure/io/pdbx/convert.py +707 -359
  80. biotite/structure/io/pdbx/encoding.c +112813 -0
  81. biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
  82. biotite/structure/io/pdbx/error.py +14 -0
  83. biotite/structure/io/pdbx/legacy.py +267 -0
  84. biotite/structure/molecules.py +151 -151
  85. biotite/structure/sasa.c +215 -196
  86. biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
  87. biotite/structure/superimpose.py +158 -115
  88. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/METADATA +2 -2
  89. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/RECORD +92 -90
  90. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/WHEEL +1 -1
  91. biotite/structure/info/amino_acids.json +0 -1556
  92. biotite/structure/info/amino_acids.py +0 -42
  93. biotite/structure/info/carbohydrates.json +0 -1122
  94. biotite/structure/info/carbohydrates.py +0 -39
  95. biotite/structure/info/intra_bonds.msgpack +0 -0
  96. biotite/structure/info/link_types.msgpack +0 -1
  97. biotite/structure/info/nucleotides.json +0 -772
  98. biotite/structure/info/nucleotides.py +0 -39
  99. biotite/structure/info/residue_masses.msgpack +0 -0
  100. biotite/structure/info/residue_names.msgpack +0 -3
  101. biotite/structure/info/residues.msgpack +0 -0
  102. biotite/structure/io/pdbx/file.py +0 -652
  103. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/LICENSE.rst +0 -0
  104. {biotite-0.39.0.dist-info → biotite-0.40.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,95 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Patrick Kunzmann"
7
+ __all__ = ["get_ccd", "get_from_ccd"]
8
+
9
+ from pathlib import Path
10
+ import numpy as np
11
+
12
+
13
+ CCD_DIR = Path(__file__).parent / "ccd"
14
+ INDEX_COLUMN_NAME = {
15
+ "chem_comp": "id",
16
+ "chem_comp_atom": "comp_id",
17
+ "chem_comp_bond": "comp_id",
18
+ }
19
+
20
+ _ccd_block = None
21
+ # For each category this index gives the start and stop for each residue
22
+ _residue_index = {}
23
+
24
+
25
+ def get_ccd():
26
+ """
27
+ Get the PDB *Chemical Component Dictionary* (CCD).
28
+
29
+ Returns
30
+ -------
31
+ ccd : BinaryCIFFile
32
+ The CCD.
33
+ """
34
+ # Avoid circular import
35
+ from ..io.pdbx.bcif import BinaryCIFFile
36
+
37
+ global _ccd_block
38
+ if _ccd_block is None:
39
+ # Load CCD once and cache it for subsequent calls
40
+ _ccd_block = BinaryCIFFile.read(CCD_DIR / "components.bcif").block
41
+ return _ccd_block
42
+
43
+
44
+ def get_from_ccd(category_name, comp_id, column_name=None):
45
+ """
46
+ Get the rows for the given residue in the given category from the
47
+ PDB *Chemical Component Dictionary* (CCD).
48
+
49
+ Parameters
50
+ ----------
51
+ category_name : str
52
+ The category in the CCD.
53
+ comp_id : str
54
+ The residue identifier, i.e. the ``res_name``.
55
+ column_name : str, optional
56
+ The name of the column to be retrieved.
57
+ If None, all columns are returned as dictionary.
58
+ By default None.
59
+
60
+ Returns
61
+ -------
62
+ value : ndarray or dict or None
63
+ The array of the given column or all columns as dictionary.
64
+ ``None`` if the `comp_id` is not found in the category.
65
+ """
66
+ global _residue_index
67
+ ccd = get_ccd()
68
+ category = ccd[category_name]
69
+ if category_name not in _residue_index:
70
+ _residue_index[category_name] = _index_residues(
71
+ category[INDEX_COLUMN_NAME[category_name]].as_array()
72
+ )
73
+ try:
74
+ start, stop = _residue_index[category_name][comp_id]
75
+ except KeyError:
76
+ return None
77
+
78
+ if column_name is None:
79
+ return {
80
+ col_name: category[col_name].as_array()[start:stop]
81
+ for col_name in category.keys()
82
+ }
83
+ else:
84
+ return category[column_name].as_array()[start:stop]
85
+
86
+
87
+ def _index_residues(id_column):
88
+ residue_starts = np.where(id_column[:-1] != id_column[1:])[0] + 1
89
+ # The final start is the exclusive stop of last residue
90
+ residue_starts = np.concatenate(([0], residue_starts, [len(id_column)]))
91
+ index = {}
92
+ for i in range(len(residue_starts)-1):
93
+ comp_id = id_column[residue_starts[i]].item()
94
+ index[comp_id] = (residue_starts[i], residue_starts[i+1])
95
+ return index
@@ -0,0 +1,90 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.info"
6
+ __author__ = "Tom David Müller, Patrick Kunzmann"
7
+ __all__ = ["amino_acid_names", "nucleotide_names", "carbohydrate_names"]
8
+
9
+ from pathlib import Path
10
+ import copy
11
+
12
+
13
+ CCD_DIR = Path(__file__).parent / "ccd"
14
+
15
+
16
+ group_lists = {}
17
+
18
+
19
+ def amino_acid_names():
20
+ """
21
+ Get a tuple of amino acid three-letter codes according to the
22
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
23
+
24
+ Returns
25
+ -------
26
+ amino_acid_names : tuple of str
27
+ A list of three-letter-codes containing residues that are
28
+ peptide monomers.
29
+
30
+ Notes
31
+ -----
32
+
33
+ References
34
+ ----------
35
+
36
+ .. footbibliography::
37
+ """
38
+ return _get_group_members("amino_acids")
39
+
40
+
41
+ def nucleotide_names():
42
+ """
43
+ Get a tuple of nucleotide three-letter codes according to the
44
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
45
+
46
+ Returns
47
+ -------
48
+ nucleotide_names : tuple of str
49
+ A list of three-letter-codes containing residues that are
50
+ DNA/RNA monomers.
51
+
52
+ Notes
53
+ -----
54
+
55
+ References
56
+ ----------
57
+
58
+ .. footbibliography::
59
+ """
60
+ return _get_group_members("nucleotides")
61
+
62
+
63
+ def carbohydrate_names():
64
+ """
65
+ Get a tuple of carbohydrate three-letter codes according to the
66
+ PDB *Chemical Component Dictionary* :footcite:`Westbrook2015`.
67
+
68
+ Returns
69
+ -------
70
+ carbohydrate_names : tuple of str
71
+ A list of three-letter-codes containing residues that are
72
+ saccharide monomers.
73
+
74
+ Notes
75
+ -----
76
+
77
+ References
78
+ ----------
79
+
80
+ .. footbibliography::
81
+ """
82
+ return _get_group_members("carbohydrates")
83
+
84
+
85
+ def _get_group_members(group_name):
86
+ global group_lists
87
+ if group_name not in group_lists:
88
+ with open(CCD_DIR / f"{group_name}.txt", "r") as file:
89
+ group_lists[group_name] = tuple(file.read().split())
90
+ return group_lists[group_name]
@@ -7,20 +7,14 @@ __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["mass"]
8
8
 
9
9
  import json
10
- from os.path import join, dirname, realpath
11
- import msgpack
10
+ from pathlib import Path
12
11
  from ..atoms import Atom, AtomArray, AtomArrayStack
12
+ from .ccd import get_from_ccd
13
13
 
14
14
 
15
- _info_dir = dirname(realpath(__file__))
16
15
  # Masses are taken from http://www.sbcs.qmul.ac.uk/iupac/AtWt/ (2018/03/01)
17
- with open(join(_info_dir, "atom_masses.json"), "r") as file:
18
- _atom_masses = json.load(file)
19
- # Masses are taken from
20
- # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
21
- # (2019/01/27)
22
- with open(join(_info_dir, "residue_masses.msgpack"), "rb") as file:
23
- _res_masses = msgpack.load(file, raw=False)
16
+ ATOM_MASSES_FILE = Path(__file__).parent / "atom_masses.json"
17
+ _atom_masses = None
24
18
 
25
19
 
26
20
  def mass(item, is_residue=None):
@@ -34,7 +28,7 @@ def mass(item, is_residue=None):
34
28
  from the molecule.
35
29
  For example non-terminal residues in a protein or nucleotide chain
36
30
  miss the mass of a water molecule.
37
-
31
+
38
32
  Parameters
39
33
  ----------
40
34
  item : str or Atom or AtomArray or AtomArrayStack
@@ -50,17 +44,17 @@ def mass(item, is_residue=None):
50
44
  If set to false, the string is strictly interpreted as element.
51
45
  By default the string will be interpreted as element at first
52
46
  and secondly as residue name, if the element is unknown.
53
-
47
+
54
48
  Returns
55
49
  -------
56
50
  mass : float or None
57
51
  The mass of the given object in *u*. None if the mass is unknown.
58
-
52
+
59
53
  References
60
54
  ----------
61
-
55
+
62
56
  .. footbibliography::
63
-
57
+
64
58
  Examples
65
59
  --------
66
60
 
@@ -94,29 +88,36 @@ def mass(item, is_residue=None):
94
88
  >>> print(mass("N"))
95
89
  14.007
96
90
  """
91
+ global _atom_masses
92
+ with open(ATOM_MASSES_FILE, "r") as file:
93
+ _atom_masses = json.load(file)
97
94
 
98
95
  if isinstance(item, str):
99
96
  if is_residue is None:
100
97
  result_mass = _atom_masses.get(item.upper())
101
98
  if result_mass is None:
102
- result_mass = _res_masses.get(item.upper())
99
+ result_mass = get_from_ccd(
100
+ "chem_comp", item.upper(), "formula_weight"
101
+ ).item()
103
102
  elif not is_residue:
104
103
  result_mass = _atom_masses.get(item.upper())
105
104
  else:
106
- result_mass = _res_masses.get(item.upper())
107
-
105
+ result_mass = get_from_ccd(
106
+ "chem_comp", item.upper(), "formula_weight"
107
+ ).item()
108
+
108
109
  elif isinstance(item, Atom):
109
110
  result_mass = mass(item.element, is_residue=False)
110
111
  elif isinstance(item, AtomArray) or isinstance(item, AtomArrayStack):
111
112
  result_mass = sum(
112
113
  (mass(element, is_residue=False) for element in item.element)
113
114
  )
114
-
115
+
115
116
  else:
116
117
  raise TypeError(
117
118
  f"Cannot calculate mass for {type(item).__name__} objects"
118
119
  )
119
-
120
+
120
121
  if result_mass is None:
121
122
  raise KeyError(f"{item} is not known")
122
123
  return result_mass
@@ -6,37 +6,26 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["all_residues", "full_name", "link_type"]
8
8
 
9
- from os.path import join, dirname, realpath
10
- import msgpack
11
-
12
-
13
- _info_dir = dirname(realpath(__file__))
14
- # Data is taken from
15
- # ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif
16
- # (2019/01/27)
17
- with open(join(_info_dir, "residue_names.msgpack"), "rb") as file:
18
- _res_names = msgpack.load(file, raw=False)
19
- with open(join(_info_dir, "link_types.msgpack"), "rb") as file:
20
- _link_types = msgpack.load(file, raw=False)
9
+ from .ccd import get_ccd, get_from_ccd
21
10
 
22
11
 
23
12
  def all_residues():
24
13
  """
25
14
  Get a list of all residues/compound names in the
26
15
  PDB chemical components dictionary.
27
-
16
+
28
17
  Returns
29
18
  -------
30
19
  residues : list of str
31
20
  A list of all available The up to 3-letter residue names.
32
-
21
+
33
22
  Examples
34
23
  --------
35
24
 
36
25
  >>> print(all_residues()[1000 : 1010])
37
- ['0Y4', '0Y5', '0Y7', '0Y8', '0Y9', '0YA', '0YB', '0YC', '0YD', '0YE']
26
+ ['0V9', '0VA', '0VB', '0VC', '0VD', '0VE', '0VF', '0VG', '0VH', '0VI']
38
27
  """
39
- return list(_res_names.keys())
28
+ return get_ccd()["chem_comp"]["id"].as_array().tolist()
40
29
 
41
30
 
42
31
  def full_name(res_name):
@@ -48,19 +37,19 @@ def full_name(res_name):
48
37
  ----------
49
38
  res_name : str
50
39
  The up to 3-letter residue name.
51
-
40
+
52
41
  Returns
53
42
  -------
54
43
  name : str
55
44
  The full name of the residue.
56
-
45
+
57
46
  Examples
58
47
  --------
59
48
 
60
49
  >>> print(full_name("MAN"))
61
50
  alpha-D-mannopyranose
62
51
  """
63
- return _res_names.get(res_name.upper())
52
+ return get_from_ccd("chem_comp", res_name.upper(), "name").item()
64
53
 
65
54
 
66
55
  def link_type(res_name):
@@ -72,12 +61,12 @@ def link_type(res_name):
72
61
  ----------
73
62
  res_name : str
74
63
  The up to 3-letter residue name.
75
-
64
+
76
65
  Returns
77
66
  -------
78
67
  link_type : str
79
68
  The link type.
80
-
69
+
81
70
  Examples
82
71
  --------
83
72
 
@@ -88,4 +77,4 @@ def link_type(res_name):
88
77
  >>> print(link_type("HOH"))
89
78
  NON-POLYMER
90
79
  """
91
- return _link_types.get(res_name.upper())
80
+ return get_from_ccd("chem_comp", res_name.upper(), "type").item()
@@ -6,15 +6,13 @@ __name__ = "biotite.structure.info"
6
6
  __author__ = "Patrick Kunzmann"
7
7
  __all__ = ["standardize_order"]
8
8
 
9
+ import warnings
9
10
  import numpy as np
10
- from .atoms import residue
11
+ from .ccd import get_from_ccd
11
12
  from ..residues import get_residue_starts
12
13
  from ..error import BadStructureError
13
14
 
14
15
 
15
- _atom_name_cache = {}
16
-
17
-
18
16
  def standardize_order(atoms):
19
17
  """
20
18
  Get an index array for an input :class:`AtomArray` or
@@ -34,20 +32,20 @@ def standardize_order(atoms):
34
32
  atoms : AtomArray, shape=(n,) or AtomArrayStack, shape=(m,n)
35
33
  Input structure with atoms that are potentially not in the
36
34
  *standard* order.
37
-
35
+
38
36
  Returns
39
37
  -------
40
38
  indices : ndarray, dtype=int, shape=(n,)
41
39
  When this index array is applied on the input `atoms`,
42
40
  the atoms for each residue are reordered to obtain the
43
41
  standard *RCSB PDB* atom order.
44
-
42
+
45
43
  Raises
46
44
  ------
47
45
  BadStructureError
48
46
  If the input `atoms` have duplicate atoms (same atom name)
49
47
  within a residue.
50
-
48
+
51
49
  Examples
52
50
  --------
53
51
 
@@ -123,11 +121,18 @@ def standardize_order(atoms):
123
121
  stop = starts[i+1]
124
122
 
125
123
  res_name = atoms.res_name[start]
126
- standard_atom_names = _atom_name_cache.get(res_name)
124
+ standard_atom_names = get_from_ccd(
125
+ "chem_comp_atom", res_name, "atom_id"
126
+ )
127
127
  if standard_atom_names is None:
128
- standard_atom_names = residue(res_name).atom_name
129
- _atom_name_cache[res_name] = standard_atom_names
130
-
128
+ # If the residue is not in the CCD, keep the current order
129
+ warnings.warn(
130
+ f"Residue '{res_name}' is not in the CCD, "
131
+ f"keeping current atom order"
132
+ )
133
+ reordered_indices[start : stop] = np.arange(start, stop)
134
+ continue
135
+
131
136
  reordered_indices[start : stop] = _reorder(
132
137
  atoms.atom_name[start : stop], standard_atom_names
133
138
  ) + start
@@ -152,7 +157,7 @@ def _reorder(origin, target):
152
157
  The atom names to reorder.
153
158
  target : ndarray, dtype=str
154
159
  The atom names in target order.
155
-
160
+
156
161
  Returns
157
162
  -------
158
163
  indices : ndarray, dtype=int
@@ -5,7 +5,7 @@
5
5
  """
6
6
  A subpackage for reading and writing structure related data.
7
7
 
8
- Macromolecular structure files (PDB, PDBx/mmCIF, MMTF, etc.) and
8
+ Macromolecular structure files (PDB, PDBx/mmCIF, BinaryCIF, etc.) and
9
9
  small molecule files (MOL, SDF, etc.) can be used
10
10
  to load an :class:`AtomArray` or :class:`AtomArrayStack`.
11
11
 
@@ -15,10 +15,8 @@ only one *altloc* can be chosen for each atom. Hence, the amount of
15
15
  atoms may be lower in the atom array (stack) than in respective
16
16
  structure file.
17
17
 
18
- The recommended format for reading structure files is MMTF.
18
+ The recommended format for reading structure files is *BinaryCIF*.
19
19
  It has by far the shortest parsing time and file size.
20
- Furthermore, chemical bond information can be read from MMTF files
21
- as :class:`BondList` instances.
22
20
 
23
21
  Besides the mentioned structure formats, Gromacs trajectory files can be
24
22
  loaded, if `mdtraj` is installed.
@@ -13,7 +13,7 @@ __all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
13
13
 
14
14
  import warnings
15
15
  import numpy as np
16
- from biotite.structure.error import BadStructureError
16
+ from ..error import BadStructureError
17
17
  from ..atoms import AtomArray, AtomArrayStack
18
18
  from ..bonds import BondList, BondType
19
19
 
@@ -21,12 +21,12 @@ def load_structure(file_path, template=None, **kwargs):
21
21
  Load an :class:`AtomArray` or class`AtomArrayStack` from a structure
22
22
  file without the need to manually instantiate a :class:`File`
23
23
  object.
24
-
24
+
25
25
  Internally this function uses a :class:`File` object, based on the
26
26
  file extension.
27
27
  Trajectory files furthermore require specification of the `template`
28
28
  parameter.
29
-
29
+
30
30
  Parameters
31
31
  ----------
32
32
  file_path : str
@@ -40,13 +40,13 @@ def load_structure(file_path, template=None, **kwargs):
40
40
  This does not affect files given via the `template` parameter.
41
41
  The only exception is the `atom_i`, which is applied to the template
42
42
  as well if number of atoms do not match.
43
-
43
+
44
44
  Returns
45
45
  -------
46
46
  array : AtomArray or AtomArrayStack
47
47
  If the file contains multiple models, an AtomArrayStack is
48
48
  returned, otherwise an AtomArray is returned.
49
-
49
+
50
50
  Raises
51
51
  ------
52
52
  ValueError
@@ -65,56 +65,37 @@ def load_structure(file_path, template=None, **kwargs):
65
65
  from .pdb import PDBFile
66
66
  file = PDBFile.read(file_path)
67
67
  array = file.get_structure(**kwargs)
68
- if isinstance(array, AtomArrayStack) and array.stack_depth() == 1:
69
- # Stack containing only one model -> return as atom array
70
- return array[0]
71
- else:
72
- return array
68
+ return _as_single_model_if_possible(array)
73
69
  elif suffix == ".pdbqt":
74
70
  from .pdbqt import PDBQTFile
75
71
  file = PDBQTFile.read(file_path)
76
72
  array = file.get_structure(**kwargs)
77
- if isinstance(array, AtomArrayStack) and array.stack_depth() == 1:
78
- # Stack containing only one model -> return as atom array
79
- return array[0]
80
- else:
81
- return array
73
+ return _as_single_model_if_possible(array)
82
74
  elif suffix == ".cif" or suffix == ".pdbx":
83
- from .pdbx import PDBxFile, get_structure
84
- file = PDBxFile.read(file_path)
75
+ from .pdbx import CIFFile, get_structure
76
+ file = CIFFile.read(file_path)
85
77
  array = get_structure(file, **kwargs)
86
- if isinstance(array, AtomArrayStack) and array.stack_depth() == 1:
87
- # Stack containing only one model -> return as atom array
88
- return array[0]
89
- else:
90
- return array
78
+ return _as_single_model_if_possible(array)
79
+ elif suffix == ".bcif":
80
+ from .pdbx import BinaryCIFFile, get_structure
81
+ file = BinaryCIFFile.read(file_path)
82
+ array = get_structure(file, **kwargs)
83
+ return _as_single_model_if_possible(array)
91
84
  elif suffix == ".gro":
92
85
  from .gro import GROFile
93
86
  file = GROFile.read(file_path)
94
87
  array = file.get_structure(**kwargs)
95
- if isinstance(array, AtomArrayStack) and array.stack_depth() == 1:
96
- # Stack containing only one model -> return as atom array
97
- return array[0]
98
- else:
99
- return array
88
+ return _as_single_model_if_possible(array)
100
89
  elif suffix == ".mmtf":
101
90
  from .mmtf import MMTFFile, get_structure
102
91
  file = MMTFFile.read(file_path)
103
92
  array = get_structure(file, **kwargs)
104
- if isinstance(array, AtomArrayStack) and array.stack_depth() == 1:
105
- # Stack containing only one model -> return as atom array
106
- return array[0]
107
- else:
108
- return array
93
+ return _as_single_model_if_possible(array)
109
94
  elif suffix == ".npz":
110
95
  from .npz import NpzFile
111
96
  file = NpzFile.read(file_path)
112
97
  array = file.get_structure(**kwargs)
113
- if isinstance(array, AtomArrayStack) and array.stack_depth() == 1:
114
- # Stack containing only one model -> return as atom array
115
- return array[0]
116
- else:
117
- return array
98
+ return _as_single_model_if_possible(array)
118
99
  elif suffix == ".mol" or suffix == ".sdf":
119
100
  from .mol import MOLFile
120
101
  file = MOLFile.read(file_path)
@@ -153,10 +134,10 @@ def save_structure(file_path, array, **kwargs):
153
134
  Save an :class:`AtomArray` or class`AtomArrayStack` to a structure
154
135
  file without the need to manually instantiate a :class:`File`
155
136
  object.
156
-
137
+
157
138
  Internally this function uses a :class:`File` object, based on the
158
139
  file extension.
159
-
140
+
160
141
  Parameters
161
142
  ----------
162
143
  file_path : str
@@ -185,9 +166,14 @@ def save_structure(file_path, array, **kwargs):
185
166
  file.set_structure(array, **kwargs)
186
167
  file.write(file_path)
187
168
  elif suffix == ".cif" or suffix == ".pdbx":
188
- from .pdbx import PDBxFile, set_structure
189
- file = PDBxFile()
190
- set_structure(file, array, data_block="STRUCTURE", **kwargs)
169
+ from .pdbx import CIFFile, set_structure
170
+ file = CIFFile()
171
+ set_structure(file, array, **kwargs)
172
+ file.write(file_path)
173
+ elif suffix == ".bcif":
174
+ from .pdbx import BinaryCIFFile, set_structure
175
+ file = BinaryCIFFile()
176
+ set_structure(file, array, **kwargs)
191
177
  file.write(file_path)
192
178
  elif suffix == ".gro":
193
179
  from .gro import GROFile
@@ -232,8 +218,16 @@ def save_structure(file_path, array, **kwargs):
232
218
  raise ValueError(f"Unknown file format '{suffix}'")
233
219
 
234
220
 
221
+ def _as_single_model_if_possible(atoms):
222
+ if isinstance(atoms, AtomArrayStack) and atoms.stack_depth() == 1:
223
+ # Stack containing only one model -> return as atom array
224
+ return atoms[0]
225
+ else:
226
+ return atoms
227
+
228
+
235
229
  # Helper function to estimate elements from atom names
236
- _elements = [elem.upper() for elem in
230
+ _elements = [elem.upper() for elem in
237
231
  ["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg",
238
232
  "Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
239
233
  "Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
@@ -268,4 +262,4 @@ def _guess_element(atom_name):
268
262
  pass
269
263
 
270
264
  return ""
271
-
265
+
@@ -7,6 +7,9 @@ This subpackage is used for reading and writing an :class:`AtomArray` or
7
7
  :class:`AtomArrayStack` using the binary MMTF format. This format
8
8
  features a smaller file size and a highly increased I/O operation
9
9
  performance, than the text based file formats.
10
+
11
+ DEPRECATED: Use :class:`biotite.structure.io.pdbx.BinaryCIFFile`
12
+ instead.
10
13
  """
11
14
 
12
15
  __name__ = "biotite.structure.io.mmtf"