biotite 0.39.0__cp311-cp311-win_amd64.whl → 0.41.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/pubchem/download.py +23 -23
  4. biotite/database/pubchem/query.py +7 -7
  5. biotite/database/rcsb/download.py +19 -14
  6. biotite/file.py +17 -9
  7. biotite/sequence/align/banded.c +258 -237
  8. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  9. biotite/sequence/align/cigar.py +60 -15
  10. biotite/sequence/align/kmeralphabet.c +243 -222
  11. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  12. biotite/sequence/align/kmersimilarity.c +215 -196
  13. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  14. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  15. biotite/sequence/align/kmertable.cpp +233 -205
  16. biotite/sequence/align/localgapped.c +258 -237
  17. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  18. biotite/sequence/align/localungapped.c +235 -214
  19. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  20. biotite/sequence/align/multiple.c +255 -234
  21. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  22. biotite/sequence/align/pairwise.c +274 -253
  23. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  24. biotite/sequence/align/permutation.c +215 -196
  25. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  26. biotite/sequence/align/selector.c +217 -197
  27. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  28. biotite/sequence/align/tracetable.c +215 -195
  29. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  30. biotite/sequence/annotation.py +2 -2
  31. biotite/sequence/codec.c +235 -214
  32. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  33. biotite/sequence/io/fasta/convert.py +27 -24
  34. biotite/sequence/phylo/nj.c +215 -196
  35. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  36. biotite/sequence/phylo/tree.c +227 -202
  37. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  38. biotite/sequence/phylo/upgma.c +215 -196
  39. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  40. biotite/structure/__init__.py +2 -0
  41. biotite/structure/basepairs.py +7 -12
  42. biotite/structure/bonds.c +1437 -1279
  43. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  44. biotite/structure/celllist.c +217 -197
  45. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  46. biotite/structure/charges.c +1052 -1101
  47. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  48. biotite/structure/dotbracket.py +2 -0
  49. biotite/structure/filter.py +30 -37
  50. biotite/structure/info/__init__.py +5 -8
  51. biotite/structure/info/atoms.py +31 -68
  52. biotite/structure/info/bonds.py +47 -101
  53. biotite/structure/info/ccd/README.rst +8 -0
  54. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  55. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  56. biotite/structure/info/ccd/components.bcif +0 -0
  57. biotite/structure/info/ccd/nucleotides.txt +798 -0
  58. biotite/structure/info/ccd.py +95 -0
  59. biotite/structure/info/groups.py +90 -0
  60. biotite/structure/info/masses.py +21 -20
  61. biotite/structure/info/misc.py +78 -25
  62. biotite/structure/info/standardize.py +17 -12
  63. biotite/structure/integrity.py +19 -70
  64. biotite/structure/io/__init__.py +2 -4
  65. biotite/structure/io/ctab.py +12 -106
  66. biotite/structure/io/general.py +167 -181
  67. biotite/structure/io/gro/file.py +16 -16
  68. biotite/structure/io/mmtf/__init__.py +3 -0
  69. biotite/structure/io/mmtf/convertarray.c +219 -198
  70. biotite/structure/io/mmtf/convertarray.cp311-win_amd64.pyd +0 -0
  71. biotite/structure/io/mmtf/convertfile.c +217 -197
  72. biotite/structure/io/mmtf/convertfile.cp311-win_amd64.pyd +0 -0
  73. biotite/structure/io/mmtf/decode.c +225 -204
  74. biotite/structure/io/mmtf/decode.cp311-win_amd64.pyd +0 -0
  75. biotite/structure/io/mmtf/encode.c +215 -196
  76. biotite/structure/io/mmtf/encode.cp311-win_amd64.pyd +0 -0
  77. biotite/structure/io/mmtf/file.py +34 -26
  78. biotite/structure/io/mol/__init__.py +4 -2
  79. biotite/structure/io/mol/convert.py +71 -7
  80. biotite/structure/io/mol/ctab.py +414 -0
  81. biotite/structure/io/mol/header.py +116 -0
  82. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  83. biotite/structure/io/mol/sdf.py +909 -0
  84. biotite/structure/io/npz/__init__.py +3 -0
  85. biotite/structure/io/npz/file.py +21 -18
  86. biotite/structure/io/pdb/__init__.py +3 -3
  87. biotite/structure/io/pdb/file.py +89 -34
  88. biotite/structure/io/pdb/hybrid36.c +63 -43
  89. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  90. biotite/structure/io/pdbqt/file.py +32 -32
  91. biotite/structure/io/pdbx/__init__.py +12 -6
  92. biotite/structure/io/pdbx/bcif.py +648 -0
  93. biotite/structure/io/pdbx/cif.py +1032 -0
  94. biotite/structure/io/pdbx/component.py +246 -0
  95. biotite/structure/io/pdbx/convert.py +858 -386
  96. biotite/structure/io/pdbx/encoding.c +112813 -0
  97. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  98. biotite/structure/io/pdbx/legacy.py +267 -0
  99. biotite/structure/molecules.py +151 -151
  100. biotite/structure/repair.py +253 -0
  101. biotite/structure/sasa.c +215 -196
  102. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  103. biotite/structure/sequence.py +112 -0
  104. biotite/structure/superimpose.py +618 -116
  105. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
  106. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
  107. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
  108. biotite/structure/info/amino_acids.json +0 -1556
  109. biotite/structure/info/amino_acids.py +0 -42
  110. biotite/structure/info/carbohydrates.json +0 -1122
  111. biotite/structure/info/carbohydrates.py +0 -39
  112. biotite/structure/info/intra_bonds.msgpack +0 -0
  113. biotite/structure/info/link_types.msgpack +0 -1
  114. biotite/structure/info/nucleotides.json +0 -772
  115. biotite/structure/info/nucleotides.py +0 -39
  116. biotite/structure/info/residue_masses.msgpack +0 -0
  117. biotite/structure/info/residue_names.msgpack +0 -3
  118. biotite/structure/info/residues.msgpack +0 -0
  119. biotite/structure/io/pdbx/file.py +0 -652
  120. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  121. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
Binary file
@@ -0,0 +1,112 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Function for converting a structure into a sequence.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["to_sequence"]
12
+
13
+ import numpy as np
14
+ from .info.misc import one_letter_code
15
+ from .info.groups import amino_acid_names, nucleotide_names
16
+ from .residues import get_residues
17
+ from .chains import get_chain_starts
18
+ from .error import BadStructureError
19
+ from ..sequence.seqtypes import ProteinSequence, NucleotideSequence
20
+
21
+
22
+ HETERO_PLACEHOLDER = "."
23
+
24
+
25
+ def to_sequence(atoms, allow_hetero=False):
26
+ """
27
+ Convert each chain in a structure into a sequence.
28
+
29
+ Parameters
30
+ ----------
31
+ atoms : AtomArray or AtomArrayStack
32
+ The structure.
33
+ May contain multiple chains.
34
+ Each chain must be either a peptide or a nucleic acid.
35
+ allow_hetero : bool, optional
36
+ If true, residues inside a amino acid or nucleotide chain,
37
+ that have no one-letter code, are replaced by the respective
38
+ '*any*' symbol (`"X"` or `"N"`, respectively).
39
+ The same is true for amino acids in nucleotide chains and vice
40
+ versa.
41
+ By default, an exception is raised.
42
+
43
+ Returns
44
+ -------
45
+ sequences : list of Sequence, length=n
46
+ The sequence for each chain in the structure.
47
+ chain_start_indices : ndarray, shape=(n,), dtype=int
48
+ The atom index where each chain starts.
49
+
50
+ Notes
51
+ -----
52
+ Residues are considered amino acids or nucleotides based on their
53
+ appearance :func:`info.amino_acid_names()` or
54
+ :func:`info.nucleotide_names()`, respectively.
55
+
56
+ Examples
57
+ --------
58
+
59
+ >>> sequences, chain_starts = to_sequence(atom_array)
60
+ >>> print(sequences)
61
+ [ProteinSequence("NLYIQWLKDGGPSSGRPPPS")]
62
+
63
+ """
64
+ sequences = []
65
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
66
+ for i in range(len(chain_start_indices)-1):
67
+ start = chain_start_indices[i]
68
+ stop = chain_start_indices[i+1]
69
+ chain = atoms[start:stop]
70
+ _, residues = get_residues(chain)
71
+ one_letter_symbols = np.array(
72
+ [one_letter_code(res) or HETERO_PLACEHOLDER for res in residues]
73
+ )
74
+ hetero_mask = one_letter_symbols == HETERO_PLACEHOLDER
75
+
76
+ aa_count = np.count_nonzero(np.isin(residues, amino_acid_names()))
77
+ nuc_count = np.count_nonzero(np.isin(residues, nucleotide_names()))
78
+ if aa_count == 0 and nuc_count == 0:
79
+ raise BadStructureError(
80
+ f"Chain {chain.chain_id[0]} contains neither amino acids "
81
+ "nor nucleotides"
82
+ )
83
+ elif aa_count > nuc_count:
84
+ # Chain is a peptide
85
+ hetero_mask |= ~np.isin(residues, amino_acid_names())
86
+ if not allow_hetero and np.any(hetero_mask):
87
+ hetero_indices = np.where(hetero_mask)[0]
88
+ raise BadStructureError(
89
+ f"Hetero residue(s) "
90
+ f"{', '.join(residues[hetero_indices])} in peptide"
91
+ )
92
+ one_letter_symbols[hetero_mask] = "X"
93
+ # Replace selenocysteine and pyrrolysine
94
+ one_letter_symbols[one_letter_symbols == "U"] = "C"
95
+ one_letter_symbols[one_letter_symbols == "O"] = "K"
96
+ sequences.append(ProteinSequence("".join(one_letter_symbols)))
97
+ else:
98
+ # Chain is a nucleic acid
99
+ hetero_mask |= ~np.isin(residues, nucleotide_names())
100
+ if not allow_hetero and np.any(hetero_mask):
101
+ hetero_indices = np.where(hetero_mask)[0]
102
+ raise BadStructureError(
103
+ f"Hetero residue(s) "
104
+ f"{', '.join(residues[hetero_indices])} in nucleic acid"
105
+ )
106
+ one_letter_symbols[hetero_mask] = "N"
107
+ # Replace uracil
108
+ one_letter_symbols[one_letter_symbols == "U"] = "T"
109
+ sequences.append(NucleotideSequence("".join(one_letter_symbols)))
110
+
111
+ # Remove exclusive stop
112
+ return sequences, chain_start_indices[:-1]