biotite 0.39.0__cp312-cp312-win_amd64.whl → 0.41.0__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (121) hide show
  1. biotite/__init__.py +3 -3
  2. biotite/application/dssp/app.py +18 -18
  3. biotite/database/pubchem/download.py +23 -23
  4. biotite/database/pubchem/query.py +7 -7
  5. biotite/database/rcsb/download.py +19 -14
  6. biotite/file.py +17 -9
  7. biotite/sequence/align/banded.c +256 -235
  8. biotite/sequence/align/banded.cp312-win_amd64.pyd +0 -0
  9. biotite/sequence/align/cigar.py +60 -15
  10. biotite/sequence/align/kmeralphabet.c +241 -220
  11. biotite/sequence/align/kmeralphabet.cp312-win_amd64.pyd +0 -0
  12. biotite/sequence/align/kmersimilarity.c +213 -194
  13. biotite/sequence/align/kmersimilarity.cp312-win_amd64.pyd +0 -0
  14. biotite/sequence/align/kmertable.cp312-win_amd64.pyd +0 -0
  15. biotite/sequence/align/kmertable.cpp +231 -203
  16. biotite/sequence/align/localgapped.c +256 -235
  17. biotite/sequence/align/localgapped.cp312-win_amd64.pyd +0 -0
  18. biotite/sequence/align/localungapped.c +233 -212
  19. biotite/sequence/align/localungapped.cp312-win_amd64.pyd +0 -0
  20. biotite/sequence/align/multiple.c +253 -232
  21. biotite/sequence/align/multiple.cp312-win_amd64.pyd +0 -0
  22. biotite/sequence/align/pairwise.c +272 -251
  23. biotite/sequence/align/pairwise.cp312-win_amd64.pyd +0 -0
  24. biotite/sequence/align/permutation.c +213 -194
  25. biotite/sequence/align/permutation.cp312-win_amd64.pyd +0 -0
  26. biotite/sequence/align/selector.c +215 -195
  27. biotite/sequence/align/selector.cp312-win_amd64.pyd +0 -0
  28. biotite/sequence/align/tracetable.c +213 -193
  29. biotite/sequence/align/tracetable.cp312-win_amd64.pyd +0 -0
  30. biotite/sequence/annotation.py +2 -2
  31. biotite/sequence/codec.c +233 -212
  32. biotite/sequence/codec.cp312-win_amd64.pyd +0 -0
  33. biotite/sequence/io/fasta/convert.py +27 -24
  34. biotite/sequence/phylo/nj.c +213 -194
  35. biotite/sequence/phylo/nj.cp312-win_amd64.pyd +0 -0
  36. biotite/sequence/phylo/tree.c +225 -200
  37. biotite/sequence/phylo/tree.cp312-win_amd64.pyd +0 -0
  38. biotite/sequence/phylo/upgma.c +213 -194
  39. biotite/sequence/phylo/upgma.cp312-win_amd64.pyd +0 -0
  40. biotite/structure/__init__.py +2 -0
  41. biotite/structure/basepairs.py +7 -12
  42. biotite/structure/bonds.c +1435 -1277
  43. biotite/structure/bonds.cp312-win_amd64.pyd +0 -0
  44. biotite/structure/celllist.c +215 -195
  45. biotite/structure/celllist.cp312-win_amd64.pyd +0 -0
  46. biotite/structure/charges.c +1050 -1099
  47. biotite/structure/charges.cp312-win_amd64.pyd +0 -0
  48. biotite/structure/dotbracket.py +2 -0
  49. biotite/structure/filter.py +30 -37
  50. biotite/structure/info/__init__.py +5 -8
  51. biotite/structure/info/atoms.py +31 -68
  52. biotite/structure/info/bonds.py +47 -101
  53. biotite/structure/info/ccd/README.rst +8 -0
  54. biotite/structure/info/ccd/amino_acids.txt +1663 -0
  55. biotite/structure/info/ccd/carbohydrates.txt +1135 -0
  56. biotite/structure/info/ccd/components.bcif +0 -0
  57. biotite/structure/info/ccd/nucleotides.txt +798 -0
  58. biotite/structure/info/ccd.py +95 -0
  59. biotite/structure/info/groups.py +90 -0
  60. biotite/structure/info/masses.py +21 -20
  61. biotite/structure/info/misc.py +78 -25
  62. biotite/structure/info/standardize.py +17 -12
  63. biotite/structure/integrity.py +19 -70
  64. biotite/structure/io/__init__.py +2 -4
  65. biotite/structure/io/ctab.py +12 -106
  66. biotite/structure/io/general.py +167 -181
  67. biotite/structure/io/gro/file.py +16 -16
  68. biotite/structure/io/mmtf/__init__.py +3 -0
  69. biotite/structure/io/mmtf/convertarray.c +217 -196
  70. biotite/structure/io/mmtf/convertarray.cp312-win_amd64.pyd +0 -0
  71. biotite/structure/io/mmtf/convertfile.c +215 -195
  72. biotite/structure/io/mmtf/convertfile.cp312-win_amd64.pyd +0 -0
  73. biotite/structure/io/mmtf/decode.c +223 -202
  74. biotite/structure/io/mmtf/decode.cp312-win_amd64.pyd +0 -0
  75. biotite/structure/io/mmtf/encode.c +213 -194
  76. biotite/structure/io/mmtf/encode.cp312-win_amd64.pyd +0 -0
  77. biotite/structure/io/mmtf/file.py +34 -26
  78. biotite/structure/io/mol/__init__.py +4 -2
  79. biotite/structure/io/mol/convert.py +71 -7
  80. biotite/structure/io/mol/ctab.py +414 -0
  81. biotite/structure/io/mol/header.py +116 -0
  82. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  83. biotite/structure/io/mol/sdf.py +909 -0
  84. biotite/structure/io/npz/__init__.py +3 -0
  85. biotite/structure/io/npz/file.py +21 -18
  86. biotite/structure/io/pdb/__init__.py +3 -3
  87. biotite/structure/io/pdb/file.py +89 -34
  88. biotite/structure/io/pdb/hybrid36.c +63 -43
  89. biotite/structure/io/pdb/hybrid36.cp312-win_amd64.pyd +0 -0
  90. biotite/structure/io/pdbqt/file.py +32 -32
  91. biotite/structure/io/pdbx/__init__.py +12 -6
  92. biotite/structure/io/pdbx/bcif.py +648 -0
  93. biotite/structure/io/pdbx/cif.py +1032 -0
  94. biotite/structure/io/pdbx/component.py +246 -0
  95. biotite/structure/io/pdbx/convert.py +858 -386
  96. biotite/structure/io/pdbx/encoding.c +112803 -0
  97. biotite/structure/io/pdbx/encoding.cp312-win_amd64.pyd +0 -0
  98. biotite/structure/io/pdbx/legacy.py +267 -0
  99. biotite/structure/molecules.py +151 -151
  100. biotite/structure/repair.py +253 -0
  101. biotite/structure/sasa.c +213 -194
  102. biotite/structure/sasa.cp312-win_amd64.pyd +0 -0
  103. biotite/structure/sequence.py +112 -0
  104. biotite/structure/superimpose.py +618 -116
  105. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
  106. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
  107. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
  108. biotite/structure/info/amino_acids.json +0 -1556
  109. biotite/structure/info/amino_acids.py +0 -42
  110. biotite/structure/info/carbohydrates.json +0 -1122
  111. biotite/structure/info/carbohydrates.py +0 -39
  112. biotite/structure/info/intra_bonds.msgpack +0 -0
  113. biotite/structure/info/link_types.msgpack +0 -1
  114. biotite/structure/info/nucleotides.json +0 -772
  115. biotite/structure/info/nucleotides.py +0 -39
  116. biotite/structure/info/residue_masses.msgpack +0 -0
  117. biotite/structure/info/residue_names.msgpack +0 -3
  118. biotite/structure/info/residues.msgpack +0 -0
  119. biotite/structure/io/pdbx/file.py +0 -652
  120. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  121. {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
Binary file
@@ -0,0 +1,112 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Function for converting a structure into a sequence.
7
+ """
8
+
9
+ __name__ = "biotite.structure"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["to_sequence"]
12
+
13
+ import numpy as np
14
+ from .info.misc import one_letter_code
15
+ from .info.groups import amino_acid_names, nucleotide_names
16
+ from .residues import get_residues
17
+ from .chains import get_chain_starts
18
+ from .error import BadStructureError
19
+ from ..sequence.seqtypes import ProteinSequence, NucleotideSequence
20
+
21
+
22
+ HETERO_PLACEHOLDER = "."
23
+
24
+
25
+ def to_sequence(atoms, allow_hetero=False):
26
+ """
27
+ Convert each chain in a structure into a sequence.
28
+
29
+ Parameters
30
+ ----------
31
+ atoms : AtomArray or AtomArrayStack
32
+ The structure.
33
+ May contain multiple chains.
34
+ Each chain must be either a peptide or a nucleic acid.
35
+ allow_hetero : bool, optional
36
+ If true, residues inside a amino acid or nucleotide chain,
37
+ that have no one-letter code, are replaced by the respective
38
+ '*any*' symbol (`"X"` or `"N"`, respectively).
39
+ The same is true for amino acids in nucleotide chains and vice
40
+ versa.
41
+ By default, an exception is raised.
42
+
43
+ Returns
44
+ -------
45
+ sequences : list of Sequence, length=n
46
+ The sequence for each chain in the structure.
47
+ chain_start_indices : ndarray, shape=(n,), dtype=int
48
+ The atom index where each chain starts.
49
+
50
+ Notes
51
+ -----
52
+ Residues are considered amino acids or nucleotides based on their
53
+ appearance :func:`info.amino_acid_names()` or
54
+ :func:`info.nucleotide_names()`, respectively.
55
+
56
+ Examples
57
+ --------
58
+
59
+ >>> sequences, chain_starts = to_sequence(atom_array)
60
+ >>> print(sequences)
61
+ [ProteinSequence("NLYIQWLKDGGPSSGRPPPS")]
62
+
63
+ """
64
+ sequences = []
65
+ chain_start_indices = get_chain_starts(atoms, add_exclusive_stop=True)
66
+ for i in range(len(chain_start_indices)-1):
67
+ start = chain_start_indices[i]
68
+ stop = chain_start_indices[i+1]
69
+ chain = atoms[start:stop]
70
+ _, residues = get_residues(chain)
71
+ one_letter_symbols = np.array(
72
+ [one_letter_code(res) or HETERO_PLACEHOLDER for res in residues]
73
+ )
74
+ hetero_mask = one_letter_symbols == HETERO_PLACEHOLDER
75
+
76
+ aa_count = np.count_nonzero(np.isin(residues, amino_acid_names()))
77
+ nuc_count = np.count_nonzero(np.isin(residues, nucleotide_names()))
78
+ if aa_count == 0 and nuc_count == 0:
79
+ raise BadStructureError(
80
+ f"Chain {chain.chain_id[0]} contains neither amino acids "
81
+ "nor nucleotides"
82
+ )
83
+ elif aa_count > nuc_count:
84
+ # Chain is a peptide
85
+ hetero_mask |= ~np.isin(residues, amino_acid_names())
86
+ if not allow_hetero and np.any(hetero_mask):
87
+ hetero_indices = np.where(hetero_mask)[0]
88
+ raise BadStructureError(
89
+ f"Hetero residue(s) "
90
+ f"{', '.join(residues[hetero_indices])} in peptide"
91
+ )
92
+ one_letter_symbols[hetero_mask] = "X"
93
+ # Replace selenocysteine and pyrrolysine
94
+ one_letter_symbols[one_letter_symbols == "U"] = "C"
95
+ one_letter_symbols[one_letter_symbols == "O"] = "K"
96
+ sequences.append(ProteinSequence("".join(one_letter_symbols)))
97
+ else:
98
+ # Chain is a nucleic acid
99
+ hetero_mask |= ~np.isin(residues, nucleotide_names())
100
+ if not allow_hetero and np.any(hetero_mask):
101
+ hetero_indices = np.where(hetero_mask)[0]
102
+ raise BadStructureError(
103
+ f"Hetero residue(s) "
104
+ f"{', '.join(residues[hetero_indices])} in nucleic acid"
105
+ )
106
+ one_letter_symbols[hetero_mask] = "N"
107
+ # Replace uracil
108
+ one_letter_symbols[one_letter_symbols == "U"] = "T"
109
+ sequences.append(NucleotideSequence("".join(one_letter_symbols)))
110
+
111
+ # Remove exclusive stop
112
+ return sequences, chain_start_indices[:-1]