biotite 1.1.0__cp311-cp311-win_amd64.whl → 1.2.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (155) hide show
  1. biotite/application/application.py +3 -3
  2. biotite/application/autodock/app.py +1 -1
  3. biotite/application/blast/webapp.py +1 -1
  4. biotite/application/clustalo/app.py +1 -1
  5. biotite/application/localapp.py +2 -2
  6. biotite/application/msaapp.py +10 -10
  7. biotite/application/muscle/app3.py +3 -3
  8. biotite/application/muscle/app5.py +3 -3
  9. biotite/application/sra/app.py +0 -5
  10. biotite/application/util.py +21 -1
  11. biotite/application/viennarna/rnaalifold.py +8 -8
  12. biotite/application/viennarna/rnaplot.py +3 -1
  13. biotite/application/viennarna/util.py +1 -1
  14. biotite/application/webapp.py +1 -1
  15. biotite/database/afdb/__init__.py +12 -0
  16. biotite/database/afdb/download.py +191 -0
  17. biotite/database/entrez/dbnames.py +10 -0
  18. biotite/database/entrez/download.py +9 -10
  19. biotite/database/entrez/key.py +1 -1
  20. biotite/database/entrez/query.py +5 -4
  21. biotite/database/pubchem/download.py +6 -6
  22. biotite/database/pubchem/error.py +10 -0
  23. biotite/database/pubchem/query.py +12 -23
  24. biotite/database/rcsb/download.py +3 -2
  25. biotite/database/rcsb/query.py +2 -3
  26. biotite/database/uniprot/check.py +2 -2
  27. biotite/database/uniprot/download.py +2 -5
  28. biotite/database/uniprot/query.py +3 -4
  29. biotite/file.py +14 -2
  30. biotite/interface/__init__.py +19 -0
  31. biotite/interface/openmm/__init__.py +16 -0
  32. biotite/interface/openmm/state.py +93 -0
  33. biotite/interface/openmm/system.py +227 -0
  34. biotite/interface/pymol/__init__.py +198 -0
  35. biotite/interface/pymol/cgo.py +346 -0
  36. biotite/interface/pymol/convert.py +185 -0
  37. biotite/interface/pymol/display.py +267 -0
  38. biotite/interface/pymol/object.py +1226 -0
  39. biotite/interface/pymol/shapes.py +178 -0
  40. biotite/interface/pymol/startup.py +169 -0
  41. biotite/interface/rdkit/__init__.py +15 -0
  42. biotite/interface/rdkit/mol.py +490 -0
  43. biotite/interface/version.py +71 -0
  44. biotite/interface/warning.py +19 -0
  45. biotite/sequence/align/__init__.py +0 -4
  46. biotite/sequence/align/alignment.py +33 -11
  47. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  48. biotite/sequence/align/banded.pyx +21 -21
  49. biotite/sequence/align/cigar.py +2 -2
  50. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  51. biotite/sequence/align/kmeralphabet.pyx +2 -2
  52. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  53. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  54. biotite/sequence/align/kmertable.pyx +6 -6
  55. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  56. biotite/sequence/align/localgapped.pyx +47 -47
  57. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  58. biotite/sequence/align/localungapped.pyx +10 -10
  59. biotite/sequence/align/matrix.py +12 -3
  60. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  61. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  62. biotite/sequence/align/pairwise.pyx +35 -35
  63. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  64. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  65. biotite/sequence/align/selector.pyx +2 -2
  66. biotite/sequence/align/statistics.py +1 -1
  67. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  68. biotite/sequence/alphabet.py +2 -2
  69. biotite/sequence/annotation.py +19 -13
  70. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  71. biotite/sequence/codon.py +1 -2
  72. biotite/sequence/graphics/alignment.py +25 -39
  73. biotite/sequence/graphics/dendrogram.py +4 -2
  74. biotite/sequence/graphics/features.py +2 -2
  75. biotite/sequence/graphics/logo.py +10 -12
  76. biotite/sequence/io/fasta/convert.py +1 -2
  77. biotite/sequence/io/fasta/file.py +1 -1
  78. biotite/sequence/io/fastq/file.py +3 -3
  79. biotite/sequence/io/genbank/file.py +3 -3
  80. biotite/sequence/io/genbank/sequence.py +2 -0
  81. biotite/sequence/io/gff/convert.py +1 -1
  82. biotite/sequence/io/gff/file.py +1 -2
  83. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  84. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  85. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  86. biotite/sequence/profile.py +19 -25
  87. biotite/sequence/search.py +0 -1
  88. biotite/sequence/seqtypes.py +12 -5
  89. biotite/sequence/sequence.py +1 -2
  90. biotite/structure/__init__.py +2 -0
  91. biotite/structure/alphabet/i3d.py +1 -2
  92. biotite/structure/alphabet/pb.py +1 -2
  93. biotite/structure/alphabet/unkerasify.py +8 -2
  94. biotite/structure/atoms.py +35 -27
  95. biotite/structure/basepairs.py +26 -26
  96. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  97. biotite/structure/bonds.pyx +8 -5
  98. biotite/structure/box.py +19 -21
  99. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  100. biotite/structure/celllist.pyx +83 -67
  101. biotite/structure/chains.py +5 -37
  102. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  103. biotite/structure/compare.py +420 -13
  104. biotite/structure/density.py +1 -1
  105. biotite/structure/dotbracket.py +27 -28
  106. biotite/structure/filter.py +8 -8
  107. biotite/structure/geometry.py +15 -15
  108. biotite/structure/hbond.py +17 -19
  109. biotite/structure/info/atoms.py +11 -2
  110. biotite/structure/info/ccd.py +0 -2
  111. biotite/structure/info/components.bcif +0 -0
  112. biotite/structure/info/groups.py +0 -3
  113. biotite/structure/info/misc.py +0 -1
  114. biotite/structure/info/radii.py +92 -22
  115. biotite/structure/info/standardize.py +1 -2
  116. biotite/structure/integrity.py +4 -6
  117. biotite/structure/io/general.py +2 -2
  118. biotite/structure/io/gro/file.py +8 -9
  119. biotite/structure/io/mol/convert.py +1 -1
  120. biotite/structure/io/mol/ctab.py +33 -28
  121. biotite/structure/io/mol/mol.py +1 -1
  122. biotite/structure/io/mol/sdf.py +39 -13
  123. biotite/structure/io/pdb/convert.py +2 -3
  124. biotite/structure/io/pdb/file.py +11 -22
  125. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  126. biotite/structure/io/pdbqt/file.py +4 -4
  127. biotite/structure/io/pdbx/bcif.py +22 -7
  128. biotite/structure/io/pdbx/cif.py +20 -7
  129. biotite/structure/io/pdbx/component.py +6 -0
  130. biotite/structure/io/pdbx/compress.py +2 -2
  131. biotite/structure/io/pdbx/convert.py +222 -33
  132. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  133. biotite/structure/io/trajfile.py +9 -6
  134. biotite/structure/io/util.py +38 -0
  135. biotite/structure/mechanics.py +0 -1
  136. biotite/structure/molecules.py +0 -15
  137. biotite/structure/pseudoknots.py +7 -13
  138. biotite/structure/repair.py +2 -4
  139. biotite/structure/residues.py +13 -24
  140. biotite/structure/rings.py +335 -0
  141. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  142. biotite/structure/sasa.pyx +2 -1
  143. biotite/structure/segments.py +68 -9
  144. biotite/structure/sequence.py +0 -1
  145. biotite/structure/sse.py +0 -2
  146. biotite/structure/superimpose.py +74 -62
  147. biotite/structure/tm.py +581 -0
  148. biotite/structure/transform.py +12 -25
  149. biotite/structure/util.py +3 -3
  150. biotite/version.py +9 -4
  151. biotite/visualize.py +111 -1
  152. {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/METADATA +5 -3
  153. {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/RECORD +155 -135
  154. {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/WHEEL +0 -0
  155. {biotite-1.1.0.dist-info → biotite-1.2.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -3,7 +3,7 @@
3
3
  # information.
4
4
 
5
5
  __name__ = "biotite.structure.io.pdbx"
6
- __author__ = "Fabrice Allain, Patrick Kunzmann"
6
+ __author__ = "Fabrice Allain, Patrick Kunzmann, Cheyenne Ziegler"
7
7
  __all__ = [
8
8
  "get_sequence",
9
9
  "get_model_count",
@@ -13,6 +13,7 @@ __all__ = [
13
13
  "set_component",
14
14
  "list_assemblies",
15
15
  "get_assembly",
16
+ "get_sse",
16
17
  ]
17
18
 
18
19
  import itertools
@@ -81,6 +82,7 @@ PDBX_BOND_TYPE_TO_ORDER = {
81
82
  BondType.AROMATIC_TRIPLE: "trip",
82
83
  # These are masked later, it is merely added here to avoid a KeyError
83
84
  BondType.ANY: "",
85
+ BondType.AROMATIC: "",
84
86
  BondType.COORDINATION: "",
85
87
  }
86
88
  # Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
@@ -92,12 +94,19 @@ COMP_BOND_ORDER_TO_TYPE = {
92
94
  ("SING", "Y"): BondType.AROMATIC_SINGLE,
93
95
  ("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
94
96
  ("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
97
+ ("AROM", "Y"): BondType.AROMATIC,
95
98
  }
96
99
  # ...and vice versa
97
100
  COMP_BOND_TYPE_TO_ORDER = {
98
101
  bond_type: order for order, bond_type in COMP_BOND_ORDER_TO_TYPE.items()
99
102
  }
100
103
  CANONICAL_RESIDUE_LIST = canonical_aa_list + canonical_nucleotide_list
104
+ # it was observed that when the number or rows in `atom_site` and `struct_conn`
105
+ # exceed a certain threshold,
106
+ # a dictionary approach is less computation and memory intensive than the dense
107
+ # vectorized approach.
108
+ # https://github.com/biotite-dev/biotite/pull/765#issuecomment-2708867357
109
+ FIND_MATCHES_SWITCH_THRESHOLD = 4000000
101
110
 
102
111
  _proteinseq_type_list = ["polypeptide(D)", "polypeptide(L)"]
103
112
  _nucleotideseq_type_list = [
@@ -160,8 +169,8 @@ def get_sequence(pdbx_file, data_block=None):
160
169
  -------
161
170
  sequence_dict : Dictionary of Sequences
162
171
  Dictionary keys are derived from ``entity_poly.pdbx_strand_id``
163
- (often equivalent to chain_id and atom_site.auth_asym_id
164
- in most cases). Dictionary values are sequences.
172
+ (equivalent to ``atom_site.auth_asym_id``).
173
+ Dictionary values are sequences.
165
174
 
166
175
  Notes
167
176
  -----
@@ -217,9 +226,7 @@ def get_model_count(pdbx_file, data_block=None):
217
226
  The number of models.
218
227
  """
219
228
  block = _get_block(pdbx_file, data_block)
220
- return len(
221
- _get_model_starts(block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))
222
- )
229
+ return len(np.unique((block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))))
223
230
 
224
231
 
225
232
  def get_structure(
@@ -310,7 +317,6 @@ def get_structure(
310
317
  >>> arr = get_structure(file, model=1)
311
318
  >>> print(len(arr))
312
319
  304
313
-
314
320
  """
315
321
  block = _get_block(pdbx_file, data_block)
316
322
 
@@ -321,13 +327,12 @@ def get_structure(
321
327
  raise InvalidFileError("Missing 'atom_site' category in file")
322
328
 
323
329
  models = atom_site["pdbx_PDB_model_num"].as_array(np.int32)
324
- model_starts = _get_model_starts(models)
325
- model_count = len(model_starts)
330
+ model_count = len(np.unique(models))
326
331
  atom_count = len(models)
327
332
 
328
333
  if model is None:
329
334
  # For a stack, the annotations are derived from the first model
330
- model_atom_site = _filter_model(atom_site, model_starts, 1)
335
+ model_atom_site = _filter_model(atom_site, 1)
331
336
  # Any field of the category would work here to get the length
332
337
  model_length = model_atom_site.row_count
333
338
  atoms = AtomArrayStack(model_count, model_length)
@@ -373,7 +378,7 @@ def get_structure(
373
378
  f"the given model {model} does not exist"
374
379
  )
375
380
 
376
- model_atom_site = _filter_model(atom_site, model_starts, model)
381
+ model_atom_site = _filter_model(atom_site, model)
377
382
  # Any field of the category would work here to get the length
378
383
  model_length = model_atom_site.row_count
379
384
  atoms = AtomArray(model_length)
@@ -649,6 +654,17 @@ def _find_matches(query_arrays, reference_arrays):
649
654
  `reference_arrays` where all query values match the reference counterpart.
650
655
  If no match is found for a query, the corresponding index is -1.
651
656
  """
657
+ if (
658
+ query_arrays[0].shape[0] * reference_arrays[0].shape[0]
659
+ <= FIND_MATCHES_SWITCH_THRESHOLD
660
+ ):
661
+ match_indices = _find_matches_by_dense_array(query_arrays, reference_arrays)
662
+ else:
663
+ match_indices = _find_matches_by_dict(query_arrays, reference_arrays)
664
+ return match_indices
665
+
666
+
667
+ def _find_matches_by_dense_array(query_arrays, reference_arrays):
652
668
  match_masks_for_all_columns = np.stack(
653
669
  [
654
670
  query[:, np.newaxis] == reference[np.newaxis, :]
@@ -676,6 +692,38 @@ def _find_matches(query_arrays, reference_arrays):
676
692
  return match_indices
677
693
 
678
694
 
695
+ def _find_matches_by_dict(query_arrays, reference_arrays):
696
+ # Convert reference arrays to a dictionary for O(1) lookups
697
+ reference_dict = {}
698
+ ambiguous_keys = set()
699
+ for ref_idx, ref_row in enumerate(zip(*reference_arrays)):
700
+ ref_key = tuple(ref_row)
701
+ if ref_key in reference_dict:
702
+ ambiguous_keys.add(ref_key)
703
+ continue
704
+ reference_dict[ref_key] = ref_idx
705
+
706
+ match_indices = []
707
+ for query_idx, query_row in enumerate(zip(*query_arrays)):
708
+ query_key = tuple(query_row)
709
+ occurrence = reference_dict.get(query_key)
710
+
711
+ if occurrence is None:
712
+ # -1 indicates that no match was found in the reference
713
+ match_indices.append(-1)
714
+ elif query_key in ambiguous_keys:
715
+ # The query cannot be uniquely matched to an atom in the reference
716
+ raise InvalidFileError(
717
+ f"The covalent bond in the 'struct_conn' category at index "
718
+ f"{query_idx} cannot be unambiguously assigned to atoms in "
719
+ f"the 'atom_site' category"
720
+ )
721
+ else:
722
+ match_indices.append(occurrence)
723
+
724
+ return np.array(match_indices)
725
+
726
+
679
727
  def _get_struct_conn_col_name(col_name, partner):
680
728
  """
681
729
  For a column name in ``atom_site`` get the corresponding column name
@@ -714,21 +762,26 @@ def _filter_altloc(array, atom_site, altloc):
714
762
  raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
715
763
 
716
764
 
717
- def _get_model_starts(model_array):
718
- """
719
- Get the start index for each model in the arrays of the
720
- ``atom_site`` category.
721
- """
722
- _, indices = np.unique(model_array, return_index=True)
723
- indices.sort()
724
- return indices
725
-
726
-
727
- def _filter_model(atom_site, model_starts, model):
765
+ def _filter_model(atom_site, model):
728
766
  """
729
767
  Reduce the ``atom_site`` category to the values for the given
730
768
  model.
769
+
770
+ Parameters
771
+ ----------
772
+ atom_site : CIFCategory or BinaryCIFCategory
773
+ ``atom_site`` category containing all models.
774
+ model : int
775
+ The model to be selected.
776
+
777
+ Returns
778
+ -------
779
+ atom_site : CIFCategory or BinaryCIFCategory
780
+ The ``atom_site`` category containing only the selected model.
731
781
  """
782
+ models = atom_site["pdbx_PDB_model_num"].as_array(np.int32)
783
+ _, model_starts = np.unique(models, return_index=True)
784
+ model_starts.sort()
732
785
  # Append exclusive stop
733
786
  model_starts = np.append(model_starts, [atom_site.row_count])
734
787
  # Indexing starts at 0, but model number starts at 1
@@ -815,7 +868,6 @@ def set_structure(
815
868
  >>> file = CIFFile()
816
869
  >>> set_structure(file, atom_array)
817
870
  >>> file.write(os.path.join(path_to_directory, "structure.cif"))
818
-
819
871
  """
820
872
  _check_non_empty(array)
821
873
 
@@ -836,7 +888,11 @@ def set_structure(
836
888
  )
837
889
  atom_site["label_comp_id"] = np.copy(array.res_name)
838
890
  atom_site["label_asym_id"] = np.copy(array.chain_id)
839
- atom_site["label_entity_id"] = _determine_entity_id(array.chain_id)
891
+ atom_site["label_entity_id"] = (
892
+ np.copy(array.label_entity_id)
893
+ if "label_entity_id" in array.get_annotation_categories()
894
+ else _determine_entity_id(array.chain_id)
895
+ )
840
896
  atom_site["label_seq_id"] = np.copy(array.res_id)
841
897
  atom_site["pdbx_PDB_ins_code"] = Column(
842
898
  np.copy(array.ins_code),
@@ -1181,7 +1237,13 @@ def _filter_canonical_links(array, bond_array):
1181
1237
  ) # fmt: skip
1182
1238
 
1183
1239
 
1184
- def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
1240
+ def get_component(
1241
+ pdbx_file,
1242
+ data_block=None,
1243
+ use_ideal_coord=True,
1244
+ res_name=None,
1245
+ allow_missing_coord=False,
1246
+ ):
1185
1247
  """
1186
1248
  Create an :class:`AtomArray` for a chemical component from the
1187
1249
  ``chem_comp_atom`` and, if available, the ``chem_comp_bond``
@@ -1209,6 +1271,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
1209
1271
  In this case, the component with the given residue name is
1210
1272
  read.
1211
1273
  By default, all rows would be read in this case.
1274
+ allow_missing_coord : bool, optional
1275
+ Whether to allow missing coordinate values in components.
1276
+ If ``True``, these will be represented as ``nan`` values.
1277
+ If ``False``, a ``ValueError`` is raised when missing coordinates
1278
+ are encountered.
1212
1279
 
1213
1280
  Returns
1214
1281
  -------
@@ -1299,7 +1366,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
1299
1366
  else:
1300
1367
  raise
1301
1368
  array.coord = _parse_component_coordinates(
1302
- [atom_category[field] for field in alt_coord_fields]
1369
+ [atom_category[field] for field in alt_coord_fields],
1370
+ allow_missing=allow_missing_coord,
1303
1371
  )
1304
1372
 
1305
1373
  try:
@@ -1310,7 +1378,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
1310
1378
  )
1311
1379
  except KeyError:
1312
1380
  warnings.warn(
1313
- "Category 'chem_comp_bond' not found. " "No bonds will be parsed",
1381
+ "Category 'chem_comp_bond' not found. No bonds will be parsed",
1314
1382
  UserWarning,
1315
1383
  )
1316
1384
  else:
@@ -1330,14 +1398,20 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
1330
1398
  return array
1331
1399
 
1332
1400
 
1333
- def _parse_component_coordinates(coord_columns):
1401
+ def _parse_component_coordinates(coord_columns, allow_missing=False):
1334
1402
  coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
1335
1403
  for i, column in enumerate(coord_columns):
1336
1404
  if column.mask is not None and column.mask.array.any():
1337
- raise ValueError(
1338
- "Missing coordinates for some atoms",
1339
- )
1340
- coord[:, i] = column.as_array(np.float32)
1405
+ if allow_missing:
1406
+ warnings.warn(
1407
+ "Missing coordinates for some atoms. Those will be set to nan",
1408
+ UserWarning,
1409
+ )
1410
+ else:
1411
+ raise ValueError(
1412
+ "Missing coordinates for some atoms",
1413
+ )
1414
+ coord[:, i] = column.as_array(np.float32, masked_value=np.nan)
1341
1415
  return coord
1342
1416
 
1343
1417
 
@@ -1445,6 +1519,7 @@ def list_assemblies(pdbx_file, data_block=None):
1445
1519
 
1446
1520
  Examples
1447
1521
  --------
1522
+
1448
1523
  >>> import os.path
1449
1524
  >>> file = CIFFile.read(os.path.join(path_to_structures, "1f2n.cif"))
1450
1525
  >>> assembly_ids = list_assemblies(file)
@@ -1742,4 +1817,118 @@ def _convert_string_to_sequence(string, stype):
1742
1817
  elif stype in _other_type_list:
1743
1818
  return None
1744
1819
  else:
1745
- raise InvalidFileError("mmCIF _entity_poly.type unsupported" " type: " + stype)
1820
+ raise InvalidFileError("mmCIF _entity_poly.type unsupported type: " + stype)
1821
+
1822
+
1823
+ def get_sse(pdbx_file, data_block=None, match_model=None):
1824
+ """
1825
+ Get the secondary structure from a PDBx file.
1826
+
1827
+ Parameters
1828
+ ----------
1829
+ pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
1830
+ The file object.
1831
+ The following categories are required:
1832
+
1833
+ - ``entity_poly``
1834
+ - ``struct_conf`` (if alpha-helices are present)
1835
+ - ``struct_sheet_range`` (if beta-strands are present)
1836
+ - ``atom_site`` (if `match_model` is set)
1837
+
1838
+ data_block : str, optional
1839
+ The name of the data block.
1840
+ Default is the first (and most times only) data block of the
1841
+ file.
1842
+ If the data block object is passed directly to `pdbx_file`,
1843
+ this parameter is ignored.
1844
+ match_model : None, optional
1845
+ If a model number is given, only secondary structure elements for residues are
1846
+ kept, that are resolved in the given model.
1847
+ This means secondary structure elements for residues that would not appear
1848
+ in a corresponding :class:`AtomArray` from :func:`get_structure()` are removed.
1849
+ By default, all residues in the sequence are kept.
1850
+
1851
+ Returns
1852
+ -------
1853
+ sse_dict : dict of str -> ndarray, dtype=str
1854
+ The dictionary maps the chain ID (derived from ``auth_asym_id``) to the
1855
+ secondary structure of the respective chain.
1856
+
1857
+ - ``"a"``: alpha-helix
1858
+ - ``"b"``: beta-strand
1859
+ - ``"c"``: coil or not an amino acid
1860
+
1861
+ Each secondary structure element corresponds to the ``label_seq_id`` of the
1862
+ ``atom_site`` category.
1863
+ This means that the 0-th position of the array corresponds to the residue
1864
+ in ``atom_site`` with ``label_seq_id`` ``1``.
1865
+
1866
+ Examples
1867
+ --------
1868
+
1869
+ >>> import os.path
1870
+ >>> file = CIFFile.read(os.path.join(path_to_structures, "1aki.cif"))
1871
+ >>> sse = get_sse(file, match_model=1)
1872
+ >>> print(sse)
1873
+ {'A': array(['c', 'c', 'c', 'c', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
1874
+ 'a', 'c', 'c', 'c', 'c', 'c', 'a', 'a', 'a', 'c', 'c', 'a', 'a',
1875
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c',
1876
+ 'c', 'c', 'c', 'b', 'b', 'b', 'c', 'c', 'c', 'c', 'c', 'b', 'b',
1877
+ 'b', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
1878
+ 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c', 'c',
1879
+ 'c', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c', 'c', 'a', 'a', 'a',
1880
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'a',
1881
+ 'a', 'a', 'a', 'c', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'c', 'c',
1882
+ 'c', 'c', 'a', 'a', 'a', 'a', 'c', 'c', 'c', 'c', 'c', 'c'],
1883
+ dtype='<U1')}
1884
+
1885
+ If only secondary structure elements for resolved residues are requested, the length
1886
+ of the returned array matches the number of peptide residues in the structure.
1887
+
1888
+ >>> file = CIFFile.read(os.path.join(path_to_structures, "3o5r.cif"))
1889
+ >>> print(len(get_sse(file, match_model=1)["A"]))
1890
+ 128
1891
+ >>> atoms = get_structure(file, model=1)
1892
+ >>> atoms = atoms[filter_amino_acids(atoms) & (atoms.chain_id == "A")]
1893
+ >>> print(get_residue_count(atoms))
1894
+ 128
1895
+ """
1896
+ block = _get_block(pdbx_file, data_block)
1897
+
1898
+ # Init all chains with "c" for coil
1899
+ sse_dict = {
1900
+ chain_id: np.repeat("c", len(sequence))
1901
+ for chain_id, sequence in get_sequence(block).items()
1902
+ }
1903
+
1904
+ # Populate SSE arrays with helices and strands
1905
+ for sse_symbol, category_name in [
1906
+ ("a", "struct_conf"),
1907
+ ("b", "struct_sheet_range"),
1908
+ ]:
1909
+ if category_name in block:
1910
+ category = block[category_name]
1911
+ chains = category["beg_auth_asym_id"].as_array(str)
1912
+ start_positions = category["beg_label_seq_id"].as_array(int)
1913
+ end_positions = category["end_label_seq_id"].as_array(int)
1914
+
1915
+ # set alpha helix positions
1916
+ for chain, start, end in zip(chains, start_positions, end_positions):
1917
+ # Translate the 1-based positions from PDBx into 0-based array indices
1918
+ sse_dict[chain][start - 1 : end] = sse_symbol
1919
+
1920
+ if match_model is not None:
1921
+ model_atom_site = _filter_model(block["atom_site"], match_model)
1922
+ chain_ids = model_atom_site["auth_asym_id"].as_array(str)
1923
+ res_ids = model_atom_site["label_seq_id"].as_array(int, masked_value=-1)
1924
+ # Filter out masked residues, i.e. residues not part of a chain
1925
+ mask = res_ids != -1
1926
+ chain_ids = chain_ids[mask]
1927
+ res_ids = res_ids[mask]
1928
+ for chain_id, sse in sse_dict.items():
1929
+ res_ids_in_chain = res_ids[chain_ids == chain_id]
1930
+ # Transform from 1-based residue ID to 0-based index
1931
+ indices = np.unique(res_ids_in_chain) - 1
1932
+ sse_dict[chain_id] = sse[indices]
1933
+
1934
+ return sse_dict
@@ -187,9 +187,11 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
187
187
  time : float or ndarray, dtype=float32, shape=(n,) or None
188
188
  The simulation time of the current frame or stack in *ps*.
189
189
 
190
- See also
190
+ See Also
191
191
  --------
192
- read_iter_structure
192
+ read_iter_structure :
193
+ Get an :class:`AtomArray` for each frame or an :class:`AtomArrayStack`
194
+ for each chunk of frames instead.
193
195
 
194
196
  Notes
195
197
  -----
@@ -315,9 +317,10 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
315
317
  If `stack_size` is set, multiple frames are returned as
316
318
  :class:`AtomArrayStack`.
317
319
 
318
- See also
320
+ See Also
319
321
  --------
320
- read_iter
322
+ read_iter :
323
+ Get an the raw data for each frame or for each chunk of frames instead.
321
324
 
322
325
  Notes
323
326
  -----
@@ -480,7 +483,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
480
483
 
481
484
  Parameters
482
485
  ----------
483
- time : ndarray, dtype=float, shape=(m,3,3)
486
+ box : ndarray, dtype=float, shape=(m,3,3)
484
487
  The box vectors to be set.
485
488
  """
486
489
  self._check_model_count(box)
@@ -546,7 +549,7 @@ class TrajectoryFile(File, metaclass=abc.ABCMeta):
546
549
  ------
547
550
  NotImplementedError
548
551
  """
549
- raise NotImplementedError("Copying is not implemented " "for trajectory files")
552
+ raise NotImplementedError("Copying is not implemented for trajectory files")
550
553
 
551
554
  @classmethod
552
555
  @abc.abstractmethod
@@ -0,0 +1,38 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ """
6
+ Common functions used by a number of subpackages.
7
+ """
8
+
9
+ __name__ = "biotite.structure.io"
10
+ __author__ = "Patrick Kunzmann"
11
+ __all__ = ["number_of_integer_digits"]
12
+
13
+ import numpy as np
14
+
15
+
16
+ def number_of_integer_digits(values):
17
+ """
18
+ Get the maximum number of characters needed to represent the
19
+ pre-decimal positions of the given numeric values.
20
+
21
+ Parameters
22
+ ----------
23
+ values : ndarray, dtype=float
24
+ The values to be checked.
25
+
26
+ Returns
27
+ -------
28
+ n_digits : int
29
+ The maximum number of characters needed to represent the
30
+ pre-decimal positions of the given numeric values.
31
+ """
32
+ if len(values) == 0:
33
+ return 0
34
+ values = values.astype(int, copy=False)
35
+ n_digits = 0
36
+ n_digits = max(n_digits, len(str(np.min(values))))
37
+ n_digits = max(n_digits, len(str(np.max(values))))
38
+ return n_digits
@@ -30,7 +30,6 @@ def gyration_radius(array, masses=None):
30
30
  Must have the same length as `array`. By default, the standard
31
31
  atomic mass for each element is taken.
32
32
 
33
-
34
33
  Returns
35
34
  -------
36
35
  masses : float or ndarray, dtype=float
@@ -39,11 +39,6 @@ def get_molecule_indices(array):
39
39
  Consequently, the length of this list is equal to the number of
40
40
  molecules in the input `array`.
41
41
 
42
- See also
43
- --------
44
- get_molecule_masks
45
- molecule_iter
46
-
47
42
  Examples
48
43
  --------
49
44
  Get an :class:`AtomArray` for ATP and show that it is a single
@@ -157,11 +152,6 @@ def get_molecule_masks(array):
157
152
  Consequently, the length of this list is equal to the number of
158
153
  molecules in the input `array`.
159
154
 
160
- See also
161
- --------
162
- get_molecule_indices
163
- molecule_iter
164
-
165
155
  Examples
166
156
  --------
167
157
  Get an :class:`AtomArray` for ATP and show that it is a single
@@ -270,11 +260,6 @@ def molecule_iter(array):
270
260
  molecule : AtomArray or AtomArrayStack
271
261
  A single molecule of the input `array`.
272
262
 
273
- See also
274
- --------
275
- get_molecule_indices
276
- get_molecule_masks
277
-
278
263
  Examples
279
264
  --------
280
265
  Get an :class:`AtomArray` for ATP and break it into two molecules
@@ -69,6 +69,11 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
69
69
  Therefore, there are no pseudoknots between base pairs with the same
70
70
  pseudoknot order.
71
71
 
72
+ References
73
+ ----------
74
+
75
+ .. footbibliography::
76
+
72
77
  Examples
73
78
  --------
74
79
  Remove the pseudoknotted base pair for the sequence *ABCbac*, where
@@ -102,17 +107,6 @@ def pseudoknots(base_pairs, scores=None, max_pseudoknot_order=None):
102
107
  [[0 0 1]]
103
108
  >>> print(dot_bracket(basepairs, 6)[0])
104
109
  (([))]
105
-
106
- See Also
107
- --------
108
- base_pairs
109
- dot_bracket
110
-
111
- References
112
- ----------
113
-
114
- .. footbibliography::
115
-
116
110
  """
117
111
  if len(base_pairs) == 0:
118
112
  # No base pairs -> empty pseudoknot order array
@@ -149,9 +143,9 @@ class _Region:
149
143
 
150
144
  Parameters
151
145
  ----------
152
- base_pairs: ndarray, shape=(n,2), dtype=int
146
+ base_pairs : ndarray, shape=(n,2), dtype=int
153
147
  All base pairs of the structure the region is a subset for.
154
- region_pairs: ndarray, dtype=int
148
+ region_pairs : ndarray, dtype=int
155
149
  The indices of the base pairs in ``base_pairs`` that are part of
156
150
  the region.
157
151
  scores : ndarray, dtype=int, shape=(n,) (default: None)
@@ -48,7 +48,6 @@ def create_continuous_res_ids(atoms, restart_each_chain=True):
48
48
  >>> res_ids, _ = get_residues(atom_array)
49
49
  >>> print(res_ids)
50
50
  [ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
51
-
52
51
  """
53
52
  res_ids_diff = np.zeros(atoms.array_length(), dtype=int)
54
53
  res_starts = get_residue_starts(atoms)
@@ -80,7 +79,7 @@ def infer_elements(atoms):
80
79
 
81
80
  See Also
82
81
  --------
83
- create_atoms_names : The opposite of this function
82
+ create_atoms_names : The opposite of this function.
84
83
 
85
84
  Examples
86
85
  --------
@@ -89,7 +88,6 @@ def infer_elements(atoms):
89
88
  ['N' 'C' 'C' 'O' 'C' 'C' 'O' 'N' 'H' 'H']
90
89
  >>> print(infer_elements(["CA", "C", "C1", "OD1", "HD21", "1H", "FE"]))
91
90
  ['C' 'C' 'C' 'O' 'H' 'H' 'FE']
92
-
93
91
  """
94
92
  if isinstance(atoms, (AtomArray, AtomArrayStack)):
95
93
  atom_names = atoms.atom_name
@@ -117,7 +115,7 @@ def create_atom_names(atoms):
117
115
 
118
116
  See Also
119
117
  --------
120
- infer_elements : The opposite of this function
118
+ infer_elements : The opposite of this function.
121
119
 
122
120
  Notes
123
121
  -----