biotite 0.40.0__cp311-cp311-macosx_11_0_arm64.whl → 0.41.0__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show
  1. biotite/__init__.py +1 -1
  2. biotite/database/pubchem/download.py +23 -23
  3. biotite/database/pubchem/query.py +7 -7
  4. biotite/file.py +17 -9
  5. biotite/sequence/align/banded.c +119 -119
  6. biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
  7. biotite/sequence/align/cigar.py +60 -15
  8. biotite/sequence/align/kmeralphabet.c +119 -119
  9. biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
  10. biotite/sequence/align/kmersimilarity.c +119 -119
  11. biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
  12. biotite/sequence/align/kmertable.cpp +119 -119
  13. biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
  14. biotite/sequence/align/localgapped.c +119 -119
  15. biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
  16. biotite/sequence/align/localungapped.c +119 -119
  17. biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
  18. biotite/sequence/align/multiple.c +119 -119
  19. biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
  20. biotite/sequence/align/pairwise.c +119 -119
  21. biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
  22. biotite/sequence/align/permutation.c +119 -119
  23. biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
  24. biotite/sequence/align/selector.c +119 -119
  25. biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
  26. biotite/sequence/align/tracetable.c +119 -119
  27. biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
  28. biotite/sequence/annotation.py +2 -2
  29. biotite/sequence/codec.c +119 -119
  30. biotite/sequence/codec.cpython-311-darwin.so +0 -0
  31. biotite/sequence/io/fasta/convert.py +27 -24
  32. biotite/sequence/phylo/nj.c +119 -119
  33. biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
  34. biotite/sequence/phylo/tree.c +119 -119
  35. biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
  36. biotite/sequence/phylo/upgma.c +119 -119
  37. biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
  38. biotite/structure/__init__.py +2 -0
  39. biotite/structure/bonds.c +1124 -915
  40. biotite/structure/bonds.cpython-311-darwin.so +0 -0
  41. biotite/structure/celllist.c +119 -119
  42. biotite/structure/celllist.cpython-311-darwin.so +0 -0
  43. biotite/structure/charges.c +119 -119
  44. biotite/structure/charges.cpython-311-darwin.so +0 -0
  45. biotite/structure/dotbracket.py +2 -0
  46. biotite/structure/info/atoms.py +6 -1
  47. biotite/structure/info/bonds.py +1 -1
  48. biotite/structure/info/ccd/amino_acids.txt +17 -0
  49. biotite/structure/info/ccd/carbohydrates.txt +2 -0
  50. biotite/structure/info/ccd/components.bcif +0 -0
  51. biotite/structure/info/ccd/nucleotides.txt +1 -0
  52. biotite/structure/info/misc.py +69 -5
  53. biotite/structure/integrity.py +19 -70
  54. biotite/structure/io/ctab.py +12 -106
  55. biotite/structure/io/general.py +157 -165
  56. biotite/structure/io/gro/file.py +16 -16
  57. biotite/structure/io/mmtf/convertarray.c +119 -119
  58. biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
  59. biotite/structure/io/mmtf/convertfile.c +119 -119
  60. biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
  61. biotite/structure/io/mmtf/decode.c +119 -119
  62. biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
  63. biotite/structure/io/mmtf/encode.c +119 -119
  64. biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
  65. biotite/structure/io/mol/__init__.py +4 -2
  66. biotite/structure/io/mol/convert.py +71 -7
  67. biotite/structure/io/mol/ctab.py +414 -0
  68. biotite/structure/io/mol/header.py +116 -0
  69. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  70. biotite/structure/io/mol/sdf.py +909 -0
  71. biotite/structure/io/pdb/file.py +84 -31
  72. biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
  73. biotite/structure/io/pdbx/__init__.py +0 -1
  74. biotite/structure/io/pdbx/bcif.py +2 -3
  75. biotite/structure/io/pdbx/cif.py +9 -5
  76. biotite/structure/io/pdbx/component.py +4 -1
  77. biotite/structure/io/pdbx/convert.py +203 -79
  78. biotite/structure/io/pdbx/encoding.c +119 -119
  79. biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
  80. biotite/structure/repair.py +253 -0
  81. biotite/structure/sasa.c +119 -119
  82. biotite/structure/sasa.cpython-311-darwin.so +0 -0
  83. biotite/structure/sequence.py +112 -0
  84. biotite/structure/superimpose.py +472 -13
  85. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
  86. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
  87. biotite/structure/io/pdbx/error.py +0 -14
  88. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  89. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
  90. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -106,6 +106,14 @@ def read_alignment_from_cigar(cigar, position,
106
106
  AAAAGGTTTCCGACCGTAGGTAG
107
107
  CCCCGGTTT--GACCGTATGTAG
108
108
 
109
+ Explicit terminal deletions are also possible.
110
+ Note that in this case the deleted positions count as aligned bases
111
+ with respect to the `position` parameter.
112
+
113
+ >>> print(read_alignment_from_cigar("3D9M2D12M4D", 0, ref, seg))
114
+ TATAAAAGGTTTCCGACCGTAGGTAGCTGA
115
+ ---CCCCGGTTT--GACCGTATGTAG----
116
+
109
117
  If bases in the segment sequence are soft-clipped, they do not
110
118
  appear in the alignment.
111
119
  Furthermore, the start of the reference sequence must be adapted.
@@ -122,7 +130,7 @@ def read_alignment_from_cigar(cigar, position,
122
130
  GGTTTCCGACCGTAGGTAG
123
131
  GGTTT--GACCGTATGTAG
124
132
 
125
- Reading from BAM codes is also possible:
133
+ Reading from BAM codes is also possible.
126
134
 
127
135
  >>> seg = NucleotideSequence("CCCCGGTTTGACCGTATGTAG")
128
136
  >>> op_tuples = [
@@ -190,7 +198,8 @@ def read_alignment_from_cigar(cigar, position,
190
198
 
191
199
  def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
192
200
  introns=(), distinguish_matches=False,
193
- hard_clip=False, as_string=True):
201
+ hard_clip=False, include_terminal_gaps=False,
202
+ as_string=True):
194
203
  """
195
204
  Convert an :class:`Alignment` into a CIGAR string.
196
205
 
@@ -220,6 +229,13 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
220
229
  hard_clip : bool, optional
221
230
  If true, clipped bases are hard-clipped.
222
231
  Otherwise, clipped bases are soft-clipped.
232
+ include_terminal_gaps : bool, optional
233
+ If true, terminal gaps in the segment sequence are included in
234
+ the CIGAR string.
235
+ These are represented by ``D`` operations at the start and/or
236
+ end of the string.
237
+ By default, those terminal gaps are omitted in the CIGAR, which
238
+ is the way SAM/BAM expects a CIGAR to be.
223
239
  as_string : bool, optional
224
240
  If true, the CIGAR string is returned.
225
241
  Otherwise, a list of tuples is returned, where the first element
@@ -238,6 +254,12 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
238
254
  --------
239
255
  read_alignment_from_cigar
240
256
 
257
+ Notes
258
+ -----
259
+ If `include_terminal_gaps` is set to true, you usually want to set
260
+ ``position=0`` in :func:`read_alignment_from_cigar` to get the
261
+ correct alignment.
262
+
241
263
  Examples
242
264
  --------
243
265
 
@@ -256,6 +278,8 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
256
278
  9M2N12M
257
279
  >>> print(write_alignment_to_cigar(semiglobal_alignment, distinguish_matches=True))
258
280
  4X5=2D7=1X4=
281
+ >>> print(write_alignment_to_cigar(semiglobal_alignment, include_terminal_gaps=True))
282
+ 3D9M2D12M4D
259
283
  >>> local_alignment = align_optimal(ref, seg, matrix, local=True)[0]
260
284
  >>> print(local_alignment)
261
285
  GGTTTCCGACCGTAGGTAG
@@ -274,9 +298,8 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
274
298
  CigarOp.DELETION 2
275
299
  CigarOp.MATCH 12
276
300
  """
277
- # Ignore terminal gaps in segment sequence
278
- no_gap_pos = np.where(alignment.trace[:, segment_index] != -1)[0]
279
- alignment = alignment[no_gap_pos[0] : no_gap_pos[-1] + 1]
301
+ if not include_terminal_gaps:
302
+ alignment = _remove_terminal_segment_gaps(alignment, segment_index)
280
303
 
281
304
  ref_trace = alignment.trace[:, reference_index]
282
305
  seg_trace = alignment.trace[:, segment_index]
@@ -321,19 +344,13 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
321
344
  op_tuples = _aggregate_consecutive(operations)
322
345
 
323
346
  clip_op = CigarOp.HARD_CLIP if hard_clip else CigarOp.SOFT_CLIP
324
- # Missing bases at the beginning and end of the segment are
325
- # interpreted as clipped
326
- # As first element in the segment trace is the first aligned base,
327
- # all previous bases are clipped...
328
- start_clip_length = seg_trace[0]
347
+ start_clip_length, end_clip_length = _find_clipped_bases(
348
+ alignment, segment_index
349
+ )
329
350
  if start_clip_length != 0:
330
- start_clip = [(clip_op, seg_trace[0])]
351
+ start_clip = [(clip_op, start_clip_length)]
331
352
  else:
332
353
  start_clip = np.zeros((0, 2), dtype=int)
333
- # ...and the same applies for the last base
334
- end_clip_length = (
335
- len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
336
- )
337
354
  if end_clip_length != 0:
338
355
  end_clip = [(clip_op, end_clip_length)]
339
356
  else:
@@ -347,6 +364,34 @@ def write_alignment_to_cigar(alignment, reference_index=0, segment_index=1,
347
364
  return op_tuples
348
365
 
349
366
 
367
+ def _remove_terminal_segment_gaps(alignment, segment_index):
368
+ """
369
+ Remove terminal gaps in the segment sequence.
370
+ """
371
+ no_gap_pos = np.where(alignment.trace[:, segment_index] != -1)[0]
372
+ return alignment[no_gap_pos[0] : no_gap_pos[-1] + 1]
373
+
374
+
375
+ def _find_clipped_bases(alignment, segment_index):
376
+ """
377
+ Find the number of clipped bases at the start and end of the segment.
378
+ """
379
+ # Finding the clipped part is easier, when the terminal segment gaps
380
+ # are removed (if not already done)
381
+ alignment = _remove_terminal_segment_gaps(alignment, segment_index)
382
+ seg_trace = alignment.trace[:, segment_index]
383
+ # Missing bases at the beginning and end of the segment are
384
+ # interpreted as clipped
385
+ # As first element in the segment trace is the first aligned base,
386
+ # all previous bases are clipped...
387
+ start_clip_length = seg_trace[0]
388
+ # ...and the same applies for the last base
389
+ end_clip_length = (
390
+ len(alignment.sequences[segment_index]) - seg_trace[-1] - 1
391
+ )
392
+ return start_clip_length, end_clip_length
393
+
394
+
350
395
  def _aggregate_consecutive(operations):
351
396
  """
352
397
  Aggregate consecutive operations of the same type.