pytme 0.2.1__cp311-cp311-macosx_14_0_arm64.whl → 0.2.3__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {pytme-0.2.1.data → pytme-0.2.3.data}/scripts/match_template.py +219 -216
  2. {pytme-0.2.1.data → pytme-0.2.3.data}/scripts/postprocess.py +86 -54
  3. pytme-0.2.3.data/scripts/preprocess.py +132 -0
  4. {pytme-0.2.1.data → pytme-0.2.3.data}/scripts/preprocessor_gui.py +181 -94
  5. pytme-0.2.3.dist-info/METADATA +92 -0
  6. pytme-0.2.3.dist-info/RECORD +75 -0
  7. {pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/WHEEL +1 -1
  8. pytme-0.2.1.data/scripts/preprocess.py → scripts/eval.py +1 -1
  9. scripts/extract_candidates.py +20 -13
  10. scripts/match_template.py +219 -216
  11. scripts/match_template_filters.py +154 -95
  12. scripts/postprocess.py +86 -54
  13. scripts/preprocess.py +95 -56
  14. scripts/preprocessor_gui.py +181 -94
  15. scripts/refine_matches.py +265 -61
  16. tme/__init__.py +0 -1
  17. tme/__version__.py +1 -1
  18. tme/analyzer.py +458 -813
  19. tme/backends/__init__.py +40 -11
  20. tme/backends/_jax_utils.py +187 -0
  21. tme/backends/cupy_backend.py +109 -226
  22. tme/backends/jax_backend.py +230 -152
  23. tme/backends/matching_backend.py +445 -384
  24. tme/backends/mlx_backend.py +32 -59
  25. tme/backends/npfftw_backend.py +240 -507
  26. tme/backends/pytorch_backend.py +30 -151
  27. tme/density.py +248 -371
  28. tme/extensions.cpython-311-darwin.so +0 -0
  29. tme/matching_data.py +328 -284
  30. tme/matching_exhaustive.py +195 -1499
  31. tme/matching_optimization.py +143 -106
  32. tme/matching_scores.py +887 -0
  33. tme/matching_utils.py +287 -388
  34. tme/memory.py +377 -0
  35. tme/orientations.py +78 -21
  36. tme/parser.py +3 -4
  37. tme/preprocessing/_utils.py +61 -32
  38. tme/preprocessing/composable_filter.py +7 -4
  39. tme/preprocessing/compose.py +7 -3
  40. tme/preprocessing/frequency_filters.py +49 -39
  41. tme/preprocessing/tilt_series.py +44 -72
  42. tme/preprocessor.py +560 -526
  43. tme/structure.py +491 -188
  44. tme/types.py +5 -3
  45. pytme-0.2.1.dist-info/METADATA +0 -73
  46. pytme-0.2.1.dist-info/RECORD +0 -73
  47. tme/helpers.py +0 -881
  48. tme/matching_constrained.py +0 -195
  49. {pytme-0.2.1.data → pytme-0.2.3.data}/scripts/estimate_ram_usage.py +0 -0
  50. {pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/LICENSE +0 -0
  51. {pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/entry_points.txt +0 -0
  52. {pytme-0.2.1.dist-info → pytme-0.2.3.dist-info}/top_level.txt +0 -0
tme/structure.py CHANGED
@@ -6,103 +6,159 @@
6
6
  """
7
7
  import warnings
8
8
  from copy import deepcopy
9
- from collections import namedtuple
10
- from typing import List, Dict, Tuple
11
9
  from itertools import groupby
12
10
  from dataclasses import dataclass
11
+ from collections import namedtuple
12
+ from typing import List, Dict, Tuple
13
13
  from os.path import splitext, basename
14
14
 
15
15
  import numpy as np
16
16
 
17
- from .parser import PDBParser, MMCIFParser
18
- from .matching_utils import (
19
- rigid_transform,
20
- _format_mmcif_colunns,
21
- minimum_enclosing_box,
22
- )
23
- from .helpers import atom_profile
24
17
  from .types import NDArray
18
+ from .preprocessor import atom_profile, Preprocessor
19
+ from .parser import PDBParser, MMCIFParser
20
+ from .matching_utils import rigid_transform, minimum_enclosing_box
25
21
 
26
22
 
27
23
  @dataclass(repr=False)
28
24
  class Structure:
29
25
  """
30
- Represents atomic structures in accordance with the Protein Data Bank (PDB)
31
- format specification.
26
+ Represents atomic structures per the Protein Data Bank (PDB) specification.
27
+
28
+ Examples
29
+ --------
30
+ The following achieves the definition of a :py:class:`Structure` instance
31
+
32
+ >>> from tme import Structure
33
+ >>> structure = Structure(
34
+ >>> record_type=["ATOM", "ATOM", "ATOM"],
35
+ >>> atom_serial_number=[0, 1, 2] ,
36
+ >>> atom_name=["C", "N", "H"],
37
+ >>> atom_coordinate=[[30,15,10], [35, 20, 15], [35,25,20]],
38
+ >>> alternate_location_indicator=[".", ".", "."],
39
+ >>> residue_name=["GLY", "GLY", "HIS"],
40
+ >>> chain_identifier=["A", "A", "B"],
41
+ >>> residue_sequence_number=[0, 0, 1],
42
+ >>> code_for_residue_insertion=["?", "?", "?"],
43
+ >>> occupancy=[0, 0, 0],
44
+ >>> temperature_factor=[0, 0, 0],
45
+ >>> segment_identifier=["1", "1", "1"],
46
+ >>> element_symbol=["C", "N", "C"],
47
+ >>> charge=["?", "?", "?"],
48
+ >>> metadata={},
49
+ >>> )
50
+ >>> structure
51
+ Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
52
+
53
+ :py:class:`Structure` instances support a range of subsetting operations based on
54
+ atom indices
55
+
56
+ >>> structure[1]
57
+ Unique Chains: A, Atom Range: 1-1 [N = 1], Residue Range: 0-0 [N = 1]
58
+ >>> structure[(False, False, True)]
59
+ Unique Chains: B, Atom Range: 2-2 [N = 1], Residue Range: 1-1 [N = 1]
60
+ >>> structure[(1,2)]
61
+ Unique Chains: A-B, Atom Range: 1-2 [N = 2], Residue Range: 0-1 [N = 2]
62
+
63
+ They can be written to disk in a range of formats using :py:meth:`Structure.to_file`
64
+
65
+ >>> structure.to_file("test.pdb") # Writes a PDB file to disk
66
+ >>> structure.to_file("test.cif") # Writes a mmCIF file to disk
67
+
68
+ New instances can be created from a range of formats using
69
+ :py:meth:`Structure.from_file`
70
+
71
+ >>> Structure.from_file("test.pdb") # Reads PDB file from disk
72
+ Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
73
+ >>> Structure.from_file("test.cif") # Reads mmCIF file from disk
74
+ Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
75
+
76
+ Class instances can be discretized on grids and converted to
77
+ :py:class:`tme.density.Density` instances using :py:meth:`Structure.to_volume`
78
+ or :py:meth:`tme.density.Density.from_structure`.
79
+
80
+ >>> volume, origin, sampling_rate = structure.to_volume(shape=(50,40,30))
32
81
 
33
82
  References
34
83
  ----------
35
- .. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
84
+ .. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
85
+ .. [2] https://www.ccp4.ac.uk/html/mmcifformat.html
86
+
36
87
  """
37
88
 
38
- #: Return a numpy array with record types, e.g. ATOM, HETATM.
89
+ #: Array of record types, e.g.ATOM.
39
90
  record_type: NDArray
40
91
 
41
- #: Return a numpy array with serial number of each atom.
92
+ #: Array of serial numbers.
42
93
  atom_serial_number: NDArray
43
94
 
44
- #: Return a numpy array with name of each atom.
95
+ #: Array of atom names.
45
96
  atom_name: NDArray
46
97
 
47
- #: Return a numpy array with coordinates of each atom in x, y, z.
98
+ #: Array of x,y,z atom coordinates.
48
99
  atom_coordinate: NDArray
49
100
 
50
- #: Return a numpy array with alternate location indicates of each atom.
101
+ #: Array of alternate location indices.
51
102
  alternate_location_indicator: NDArray
52
103
 
53
- #: Return a numpy array with originating residue names of each atom.
104
+ #: Array of residue names.
54
105
  residue_name: NDArray
55
106
 
56
- #: Return a numpy array with originating structure chain of each atom.
107
+ #: Array of chain identifiers.
57
108
  chain_identifier: NDArray
58
109
 
59
- #: Return a numpy array with originating residue id of each atom.
110
+ #: Array of residue ids.
60
111
  residue_sequence_number: NDArray
61
112
 
62
- #: Return a numpy array with insertion information d of each atom.
113
+ #: Array of insertion information.
63
114
  code_for_residue_insertion: NDArray
64
115
 
65
- #: Return a numpy array with occupancy factors of each atom.
116
+ #: Array of occupancy factors.
66
117
  occupancy: NDArray
67
118
 
68
- #: Return a numpy array with B-factors for each atom.
119
+ #: Array of B-factors.
69
120
  temperature_factor: NDArray
70
121
 
71
- #: Return a numpy array with segment identifier for each atom.
122
+ #: Array of segment identifiers.
72
123
  segment_identifier: NDArray
73
124
 
74
- #: Return a numpy array with element symbols of each atom.
125
+ #: Array of element symbols.
75
126
  element_symbol: NDArray
76
127
 
77
- #: Return a numpy array with charges of each atom.
128
+ #: Array of charges.
78
129
  charge: NDArray
79
130
 
80
- #: Returns a dictionary with class instance metadata.
81
- details: dict
131
+ #: Metadata dictionary.
132
+ metadata: dict
82
133
 
83
134
  def __post_init__(self, *args, **kwargs):
84
135
  """
85
- Initialize the structure and populate header details.
136
+ Initialize the structure and populate header metadata.
86
137
 
87
138
  Raises
88
139
  ------
89
140
  ValueError
90
- If other NDArray attributes to not match the number of atoms.
91
- If the shape of atom_coordinates and chain_identifier doesn't match.
141
+ If NDArray attributes does not match the number of atoms.
92
142
  """
93
- self._elements = Elements()
94
- self.details = self._populate_details(self.details)
143
+ for attribute in self.__dict__:
144
+ value = getattr(self, attribute)
145
+ target_type = self.__annotations__.get(attribute, None)
146
+ if target_type == NDArray:
147
+ setattr(self, attribute, np.atleast_1d(np.array(value)))
95
148
 
96
149
  n_atoms = self.atom_coordinate.shape[0]
97
150
  for attribute in self.__dict__:
98
151
  value = getattr(self, attribute)
99
- if type(value) != np.ndarray:
152
+ if not isinstance(value, np.ndarray):
100
153
  continue
101
154
  if value.shape[0] != n_atoms:
102
155
  raise ValueError(
103
156
  f"Expected shape of {attribute}: {n_atoms}, got {value.shape[0]}."
104
157
  )
105
158
 
159
+ self._elements = Elements()
160
+ self.metadata = self._populate_metadata(self.metadata)
161
+
106
162
  def __getitem__(self, indices: List[int]) -> "Structure":
107
163
  """
108
164
  Get a Structure instance for specified indices.
@@ -138,22 +194,17 @@ class Structure:
138
194
  "charge",
139
195
  )
140
196
  kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
141
- ret = self.__class__(**kwargs, details={})
197
+ ret = self.__class__(**kwargs, metadata={})
142
198
  return ret
143
199
 
144
200
  def __repr__(self):
145
201
  """
146
202
  Return a string representation of the Structure.
147
-
148
- Returns
149
- -------
150
- str
151
- The string representation.
152
203
  """
153
204
  unique_chains = "-".join(
154
205
  [
155
206
  ",".join([str(x) for x in entity])
156
- for entity in self.details["unique_chains"]
207
+ for entity in self.metadata["unique_chains"]
157
208
  ]
158
209
  )
159
210
  min_atom = np.min(self.atom_serial_number)
@@ -162,7 +213,7 @@ class Structure:
162
213
 
163
214
  min_residue = np.min(self.residue_sequence_number)
164
215
  max_residue = np.max(self.residue_sequence_number)
165
- n_residue = self.residue_sequence_number.size
216
+ n_residue = np.unique(self.residue_sequence_number).size
166
217
 
167
218
  repr_str = (
168
219
  f"Structure object at {id(self)}\n"
@@ -172,43 +223,39 @@ class Structure:
172
223
  )
173
224
  return repr_str
174
225
 
175
- def get_chains(self) -> List[str]:
176
- """
177
- Returns a list of available chains.
178
-
179
- Returns
180
- -------
181
- list
182
- The list of available chains.
183
- """
184
- return list(self.details["chain_weight"].keys())
185
-
186
226
  def copy(self) -> "Structure":
187
227
  """
188
228
  Returns a copy of the Structure instance.
189
229
 
190
230
  Returns
191
231
  -------
192
- Structure
232
+ :py:class:`Structure`
193
233
  The copied Structure instance.
234
+
235
+ Examples
236
+ --------
237
+ >>> import numpy as np
238
+ >>> structure_copy = structure.copy()
239
+ >>> np.allclose(structure_copy.atom_coordinate, structure.atom_coordinate)
240
+ True
194
241
  """
195
242
  return deepcopy(self)
196
243
 
197
- def _populate_details(self, details: Dict = {}) -> Dict:
244
+ def _populate_metadata(self, metadata: Dict = {}) -> Dict:
198
245
  """
199
- Populate the details dictionary with the data from the Structure instance.
246
+ Populate the metadata dictionary with the data from the Structure instance.
200
247
 
201
248
  Parameters
202
249
  ----------
203
- details : dict, optional
204
- The initial details dictionary, by default {}.
250
+ metadata : dict, optional
251
+ The initial metadata dictionary, by default {}.
205
252
 
206
253
  Returns
207
254
  -------
208
255
  dict
209
- The populated details dictionary.
256
+ The populated metadata dictionary.
210
257
  """
211
- details["weight"] = np.sum(
258
+ metadata["weight"] = np.sum(
212
259
  [self._elements[atype].atomic_weight for atype in self.element_symbol]
213
260
  )
214
261
 
@@ -220,12 +267,12 @@ class Structure:
220
267
  [self._elements[atype].atomic_weight for atype in self.element_symbol],
221
268
  )
222
269
  labels = self.chain_identifier[idx]
223
- details["chain_weight"] = {key: val for key, val in zip(labels, chain_weight)}
270
+ metadata["chain_weight"] = {key: val for key, val in zip(labels, chain_weight)}
224
271
 
225
- # Group non-unique chains in separate lists in details["unique_chains"]
226
- details["unique_chains"], temp = [], {}
272
+ # Group non-unique chains in separate lists in metadata["unique_chains"]
273
+ metadata["unique_chains"], temp = [], {}
227
274
  for chain_label in label:
228
- index = len(details["unique_chains"])
275
+ index = len(metadata["unique_chains"])
229
276
  chain_sequence = "".join(
230
277
  [
231
278
  str(y)
@@ -236,10 +283,10 @@ class Structure:
236
283
  )
237
284
  if chain_sequence not in temp:
238
285
  temp[chain_sequence] = index
239
- details["unique_chains"].append([chain_label])
286
+ metadata["unique_chains"].append([chain_label])
240
287
  continue
241
288
  idx = temp.get(chain_sequence)
242
- details["unique_chains"][idx].append(chain_label)
289
+ metadata["unique_chains"][idx].append(chain_label)
243
290
 
244
291
  filtered_data = [
245
292
  (label, integer)
@@ -248,12 +295,12 @@ class Structure:
248
295
  )
249
296
  ]
250
297
  filtered_data = sorted(filtered_data, key=lambda x: x[0])
251
- details["chain_range"] = {}
298
+ metadata["chain_range"] = {}
252
299
  for label, values in groupby(filtered_data, key=lambda x: x[0]):
253
300
  values = [int(x[1]) for x in values]
254
- details["chain_range"][label] = (min(values), max(values))
301
+ metadata["chain_range"][label] = (min(values), max(values))
255
302
 
256
- return details
303
+ return metadata
257
304
 
258
305
  @classmethod
259
306
  def from_file(
@@ -264,12 +311,18 @@ class Structure:
264
311
  filter_by_residues: set = None,
265
312
  ) -> "Structure":
266
313
  """
267
- Reads in an mmcif or pdb file and converts it into class instance.
314
+ Reads an atomic structure file and into a :py:class:`Structure` instance.
268
315
 
269
316
  Parameters
270
317
  ----------
271
318
  filename : str
272
- Path to the mmcif or pdb file.
319
+ Input file. Supported extensions are:
320
+
321
+ +------+-------------------------------------------------------------+
322
+ | .pdb | Reads a PDB file |
323
+ +------+-------------------------------------------------------------+
324
+ | .cif | Reads an mmCIF file |
325
+ +------+-------------------------------------------------------------+
273
326
  keep_non_atom_records : bool, optional
274
327
  Wheter to keep residues that are not labelled ATOM.
275
328
  filter_by_elements: set, optional
@@ -280,12 +333,34 @@ class Structure:
280
333
  Raises
281
334
  ------
282
335
  ValueError
283
- If the extension is not '.pdb' or '.cif'.
336
+ If the extension is not supported.
284
337
 
285
338
  Returns
286
339
  -------
287
- Structure
288
- Read in structure file.
340
+ :py:class:`Structure`
341
+ Structure instance representing the read in file.
342
+
343
+ Examples
344
+ --------
345
+ >>> from importlib_resources import files
346
+ >>> from tme import Structure
347
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
348
+ >>> structure = Structure.from_file(filename=fname)
349
+ >>> structure
350
+ Unique Chains: A-B, Atom Range: 1-1564 [N = 1564], Residue Range: 142-239 [N = 1564]
351
+
352
+ We can include non ATOM entries and restrict the considered elements
353
+ and residues
354
+
355
+ >>> structure = Structure.from_file(
356
+ >>> filename=fname,
357
+ >>> keep_non_atom_records=True,
358
+ >>> filter_by_elements = {"C"},
359
+ >>> filter_by_residues = {"GLY"},
360
+ >>> )
361
+ >>> structure
362
+ Unique Chains: A,B, Atom Range: 96-1461 [N = 44], Residue Range: 154-228 [N = 44]
363
+
289
364
  """
290
365
  _, file_extension = splitext(basename(filename.upper()))
291
366
  if file_extension == ".PDB":
@@ -313,14 +388,14 @@ class Structure:
313
388
  keep = np.logical_and(keep, data["record_type"] == "ATOM")
314
389
 
315
390
  for key in data:
316
- if key == "details":
391
+ if key == "metadata":
317
392
  continue
318
- if type(data[key]) == np.ndarray:
393
+ if isinstance(data[key], np.ndarray):
319
394
  data[key] = data[key][keep]
320
395
  else:
321
396
  data[key] = [x for x, flag in zip(data[key], keep) if flag]
322
397
 
323
- data["details"]["filepath"] = filename
398
+ data["metadata"]["filepath"] = filename
324
399
 
325
400
  return cls(**data)
326
401
 
@@ -367,12 +442,12 @@ class Structure:
367
442
  out_data = [
368
443
  x.strip() for x in result["atom_site"].get(atom_site_key, ["."])
369
444
  ]
370
- if dtype == int:
445
+ if dtype is int:
371
446
  out_data = [0 if x == "." else int(x) for x in out_data]
372
447
  try:
373
448
  out[out_key] = np.asarray(out_data).astype(dtype)
374
449
  except ValueError:
375
- default = ["."] if dtype == str else 0
450
+ default = ["."] if dtype is str else 0
376
451
  print(f"Converting {out_key} to {dtype} failed, set to {default}.")
377
452
  out[out_key] = np.repeat(default, len(out_data)).astype(dtype)
378
453
 
@@ -382,7 +457,7 @@ class Structure:
382
457
  continue
383
458
  out[key] = np.repeat(value, number_entries // value.size)
384
459
 
385
- out["details"] = {}
460
+ out["metadata"] = {}
386
461
  out["atom_coordinate"] = np.transpose(
387
462
  np.array(
388
463
  [
@@ -405,7 +480,7 @@ class Structure:
405
480
  for out_key, (base_key, inner_key, default) in detail_mapping.items():
406
481
  if base_key not in result:
407
482
  continue
408
- out["details"][out_key] = result[base_key].get(inner_key, default)
483
+ out["metadata"][out_key] = result[base_key].get(inner_key, default)
409
484
 
410
485
  return out
411
486
 
@@ -446,15 +521,15 @@ class Structure:
446
521
  "charge": ("charge", str),
447
522
  }
448
523
 
449
- out = {"details": result["details"]}
524
+ out = {"metadata": result["details"]}
450
525
  for out_key, (inner_key, dtype) in atom_site_mapping.items():
451
526
  out_data = [x.strip() for x in result[inner_key]]
452
- if dtype == int:
527
+ if dtype is int:
453
528
  out_data = [0 if x == "." else int(x) for x in out_data]
454
529
  try:
455
530
  out[out_key] = np.asarray(out_data).astype(dtype)
456
531
  except ValueError:
457
- default = "." if dtype == str else 0
532
+ default = "." if dtype is str else 0
458
533
  print(
459
534
  f"Converting {out_key} to {dtype} failed. Setting {out_key} to {default}."
460
535
  )
@@ -466,19 +541,35 @@ class Structure:
466
541
 
467
542
  def to_file(self, filename: str) -> None:
468
543
  """
469
- Writes the Structure instance data to a Protein Data Bank (PDB) or
470
- macromolecular Crystallographic Information File (mmCIF) file depending
471
- one whether filename ends with '.pdb' or '.cif'.
544
+ Writes the :py:class:`Structure` instance to disk.
545
+
546
+ Parameters
547
+ ----------
548
+ filename : str
549
+ The name of the file to be created. Supported extensions are
550
+
551
+ +------+-------------------------------------------------------------+
552
+ | .pdb | Creates a PDB file |
553
+ +------+-------------------------------------------------------------+
554
+ | .cif | Creates an mmCIF file |
555
+ +------+-------------------------------------------------------------+
472
556
 
473
557
  Raises
474
558
  ------
475
559
  ValueError
476
- If the extension is not '.pdb' or '.cif'.
560
+ If the extension is not supported.
561
+
562
+ Examples
563
+ --------
564
+ >>> from importlib_resources import files
565
+ >>> from tempfile import NamedTemporaryFile
566
+ >>> from tme import Structure
567
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
568
+ >>> oname = NamedTemporaryFile().name
569
+ >>> structure = Structure.from_file(filename=fname)
570
+ >>> structure.to_file(f"{oname}.cif") # Writes an mmCIF file to disk
571
+ >>> structure.to_file(f"{oname}.pdb") # Writes a PDB file to disk
477
572
 
478
- Parameters
479
- ----------
480
- filename : str
481
- The filename of the file to write.
482
573
  """
483
574
  if np.any(np.vectorize(len)(self.chain_identifier) > 2):
484
575
  warnings.warn("Chain identifiers longer than one will be shortened.")
@@ -595,7 +686,7 @@ class Structure:
595
686
  data["pdbx_PDB_model_num"].append(str(model_num))
596
687
 
597
688
  output_data = {"atom_site": data}
598
- original_file = self.details.get("filepath", "")
689
+ original_file = self.metadata.get("filepath", "")
599
690
  try:
600
691
  new_data = {k: v for k, v in MMCIFParser(original_file).items()}
601
692
  index = self.atom_serial_number - 1
@@ -622,7 +713,18 @@ class Structure:
622
713
  else:
623
714
  ret += "loop_\n"
624
715
  ret += "".join([f"_{category}.{k}\n" for k in subdict])
625
- padded_subdict = _format_mmcif_colunns(subdict)
716
+
717
+ subdict = {
718
+ k: [_format_string(s) for s in v] for k, v in subdict.items()
719
+ }
720
+ key_length = {
721
+ key: len(max(value, key=lambda x: len(x), default=""))
722
+ for key, value in subdict.items()
723
+ }
724
+ padded_subdict = {
725
+ key: [s.ljust(key_length[key] + 1) for s in values]
726
+ for key, values in subdict.items()
727
+ }
626
728
 
627
729
  data = [
628
730
  "".join([str(x) for x in content])
@@ -646,8 +748,23 @@ class Structure:
646
748
 
647
749
  Returns
648
750
  -------
649
- Structure
650
- A subset of the original structure containing only the specified chain.
751
+ :py:class:`Structure`
752
+ A subset of the class instance containing only the specified chains.
753
+
754
+ Raises
755
+ ------
756
+ ValueError
757
+ If none of the specified chains exist.
758
+
759
+ Examples
760
+ --------
761
+ >>> from importlib_resources import files
762
+ >>> from tme import Structure
763
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
764
+ >>> structure = Structure.from_file(filename=fname)
765
+ >>> structure.subset_by_chain(chain="A") # Keep A
766
+ >>> structure.subset_by_chain(chain="A,B") # Keep A and B
767
+ >>> structure.subset_by_chain(chain="B,C") # Keep B, C does not exist
651
768
  """
652
769
  chain = np.unique(self.chain_identifier) if chain is None else chain.split(",")
653
770
  keep = np.in1d(self.chain_identifier, chain)
@@ -666,10 +783,8 @@ class Structure:
666
783
  ----------
667
784
  start : int
668
785
  The starting residue sequence number.
669
-
670
786
  stop : int
671
787
  The ending residue sequence number.
672
-
673
788
  chain : str, optional
674
789
  The chain identifier. If multiple chains should be selected they need
675
790
  to be a comma separated string, e.g. 'A,B,CE'. If chain None,
@@ -677,8 +792,21 @@ class Structure:
677
792
 
678
793
  Returns
679
794
  -------
680
- Structure
795
+ :py:class:`Structure`
681
796
  A subset of the original structure within the specified residue range.
797
+
798
+ Raises
799
+ ------
800
+ ValueError
801
+ If none of the specified residue chain combinations exist.
802
+
803
+ Examples
804
+ --------
805
+ >>> from importlib_resources import files
806
+ >>> from tme import Structure
807
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
808
+ >>> structure = Structure.from_file(filename=fname)
809
+ >>> structure.subset_by_range(chain="A",start=150,stop=180)
682
810
  """
683
811
  ret = self.subset_by_chain(chain=chain)
684
812
  keep = np.logical_and(
@@ -694,6 +822,15 @@ class Structure:
694
822
  -------
695
823
  NDArray
696
824
  The center of mass of the structure.
825
+
826
+ Examples
827
+ --------
828
+ >>> from importlib_resources import files
829
+ >>> from tme import Structure
830
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
831
+ >>> structure = Structure.from_file(filename=fname)
832
+ >>> structure.center_of_mass()
833
+ array([-0.89391639, 29.94908928, -2.64736741])
697
834
  """
698
835
  weights = [self._elements[atype].atomic_weight for atype in self.element_symbol]
699
836
  return np.dot(self.atom_coordinate.T, weights) / np.sum(weights)
@@ -719,7 +856,19 @@ class Structure:
719
856
  Returns
720
857
  -------
721
858
  Structure
722
- The transformed instance of :py:class:`tme.structure.Structure`.
859
+ The transformed instance of :py:class:`Structure`.
860
+
861
+ Examples
862
+ --------
863
+ >>> from importlib_resources import files
864
+ >>> from tme import Structure
865
+ >>> from tme.matching_utils import get_rotation_matrices
866
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
867
+ >>> structure = Structure.from_file(filename=fname)
868
+ >>> structure.rigid_transform(
869
+ >>> rotation_matrix = get_rotation_matrices(60)[2],
870
+ >>> translation = (0, 1, -5)
871
+ >>> )
723
872
  """
724
873
  out = np.empty_like(self.atom_coordinate.T)
725
874
  rigid_transform(
@@ -747,7 +896,17 @@ class Structure:
747
896
 
748
897
  See Also
749
898
  --------
750
- :py:meth:`tme.Density.centered`
899
+ :py:meth:`tme.density.Density.centered`
900
+
901
+ Examples
902
+ --------
903
+ >>> from importlib_resources import files
904
+ >>> from tme import Structure
905
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
906
+ >>> structure = Structure.from_file(filename=fname)
907
+ >>> centered_structure, translation = structure.centered()
908
+ >>> translation
909
+ array([34.89391639, 4.05091072, 36.64736741])
751
910
  """
752
911
  center_of_mass = self.center_of_mass()
753
912
  enclosing_box = minimum_enclosing_box(coordinates=self.atom_coordinate.T)
@@ -772,10 +931,8 @@ class Structure:
772
931
  ----------
773
932
  shape : Tuple[int,]
774
933
  The desired shape of the output array.
775
-
776
934
  sampling_rate : float
777
935
  The sampling rate of the output array in unit of self.atom_coordinate.
778
-
779
936
  origin : Tuple[float,]
780
937
  The origin of the coordinate system.
781
938
 
@@ -812,11 +969,11 @@ class Structure:
812
969
  positions = positions[valid_positions == positions.shape[1], :]
813
970
  atom_types = atom_types[valid_positions == positions.shape[1]]
814
971
 
815
- self.details["nAtoms_outOfBound"] = 0
972
+ self.metadata["nAtoms_outOfBound"] = 0
816
973
  if positions.shape[0] != coordinates.shape[0]:
817
974
  out_of_bounds = coordinates.shape[0] - positions.shape[0]
818
975
  print(f"{out_of_bounds}/{coordinates.shape[0]} atoms were out of bounds.")
819
- self.details["nAtoms_outOfBound"] = out_of_bounds
976
+ self.metadata["nAtoms_outOfBound"] = out_of_bounds
820
977
 
821
978
  return positions, atom_types, shape, sampling_rate, origin
822
979
 
@@ -834,14 +991,11 @@ class Structure:
834
991
  ----------
835
992
  positions : Tuple[float, float, float]
836
993
  The positions of the atoms.
837
-
838
994
  atoms : Tuple[str]
839
995
  The types of the atoms.
840
-
841
996
  sampling_rate : float
842
997
  The desired sampling rate in unit of self.atom_coordinate of the
843
998
  output array.
844
-
845
999
  volume : NDArray
846
1000
  The volume to update.
847
1001
  """
@@ -903,7 +1057,7 @@ class Structure:
903
1057
  volume : NDArray
904
1058
  The volume to update.
905
1059
  lowpass_filter : NDArray
906
- Whether the scattering factors hsould be lowpass filtered.
1060
+ Whether the scattering factors should be lowpass filtered.
907
1061
  downsampling_factor : NDArray
908
1062
  Downsampling factor for scattering factor computation.
909
1063
  source : str
@@ -947,6 +1101,75 @@ class Structure:
947
1101
  scattering_profiles[atoms[atom_index]](distances),
948
1102
  )
949
1103
 
1104
+ @staticmethod
1105
+ def _position_to_molmap(
1106
+ positions: NDArray,
1107
+ weights: Tuple[float],
1108
+ resolution: float = 4,
1109
+ sigma_factor: float = 1 / (np.pi * np.sqrt(2)),
1110
+ cutoff_value: float = 4.0,
1111
+ sampling_rate: float = None,
1112
+ ) -> NDArray:
1113
+ """
1114
+ Simulates electron densities analogous to Chimera's molmap function [1]_.
1115
+
1116
+ Parameters
1117
+ ----------
1118
+ positions : NDArray
1119
+ Array containing atomic positions in z,y,x format (n,d).
1120
+ weights : [float]
1121
+ The weights to use for the entries in positions.
1122
+ resolution : float
1123
+ The product of resolution and sigma_factor gives the sigma used to
1124
+ compute the discretized Gaussian.
1125
+ sigma_factor : float
1126
+ The factor used with resolution to compute sigma. Default is 1 / (π√2).
1127
+ cutoff_value : float
1128
+ The cutoff value for the Gaussian kernel. Default is 4.0.
1129
+ sampling_rate : float
1130
+ Sampling rate along each dimension. One third of resolution by default.
1131
+
1132
+ References
1133
+ ----------
1134
+ ..[1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/midas/molmap.html
1135
+
1136
+ Returns
1137
+ -------
1138
+ NDArray
1139
+ A numpy array containing the simulated electron densities.
1140
+ """
1141
+ if sampling_rate is None:
1142
+ sampling_rate = resolution / 3
1143
+
1144
+ pad = int(3 * resolution)
1145
+ sigma = sigma_factor * resolution
1146
+ sigma_grid = sigma / sampling_rate
1147
+
1148
+ # Limit padding to numerically stable values
1149
+ smax = np.max(sigma_grid)
1150
+ arr = np.arange(0, pad)
1151
+ gaussian = (
1152
+ np.exp(-0.5 * (arr / smax) ** 2)
1153
+ * np.power(2 * np.pi, -1.5)
1154
+ * np.power(sigma, -3.0)
1155
+ )
1156
+ pad_cutoff = np.max(arr[gaussian > 1e-8])
1157
+ if arr.size != 0:
1158
+ pad = int(pad_cutoff) + 1
1159
+
1160
+ positions = positions[:, ::-1]
1161
+ origin = positions.min(axis=0) - pad * sampling_rate
1162
+ positions = np.rint(np.divide((positions - origin), sampling_rate)).astype(int)
1163
+ shape = positions.max(axis=0).astype(int) + pad + 1
1164
+
1165
+ out = np.zeros(shape, dtype=np.float32)
1166
+ np.add.at(out, tuple(positions.T), weights)
1167
+
1168
+ out = Preprocessor().gaussian_filter(
1169
+ template=out, sigma=sigma_grid, cutoff_value=cutoff_value
1170
+ )
1171
+ return out, origin
1172
+
950
1173
  def _get_atom_weights(
951
1174
  self, atoms: Tuple[str] = None, weight_type: str = "atomic_weight"
952
1175
  ) -> Tuple[float]:
@@ -983,41 +1206,77 @@ class Structure:
983
1206
  def to_volume(
984
1207
  self,
985
1208
  shape: Tuple[int] = None,
986
- sampling_rate: NDArray = None,
1209
+ sampling_rate: Tuple[float] = None,
987
1210
  origin: Tuple[float] = None,
988
1211
  chain: str = None,
989
1212
  weight_type: str = "atomic_weight",
990
- scattering_args: Dict = dict(),
991
- ) -> Tuple[NDArray, Tuple[int], NDArray]:
1213
+ weight_type_args: Dict = dict(),
1214
+ ) -> Tuple[NDArray, NDArray, NDArray]:
992
1215
  """
993
- Converts atom coordinates of shape [n x 3] x, y, z to a volume with
994
- index z, y, x.
1216
+ Maps class instance to a volume.
995
1217
 
996
1218
  Parameters
997
1219
  ----------
998
- shape : Tuple[int, ...], optional
999
- Desired shape of the output array. If shape is given its expected to be
1000
- in z, y, x form.
1001
- sampling_rate : float, optional
1002
- Sampling rate of the output array in the unit of self.atom_coordinate
1003
- origin : Tuple[float, ...], optional
1004
- Origin of the coordinate system. If origin is given its expected to be
1005
- in z, y, x form.
1220
+ shape : tuple of ints, optional
1221
+ Output array shape in (z,y,x) form.
1222
+ sampling_rate : tuple of float, optional
1223
+ Sampling rate of the output array in units of
1224
+ :py:attr:`Structure.atom_coordinate`
1225
+ origin : tuple of floats, optional
1226
+ Origin of the coordinate system in (z,y,x) form.
1006
1227
  chain : str, optional
1007
- The chain identifier. If multiple chains should be selected they need
1008
- to be a comma separated string, e.g. 'A,B,CE'. If chain None,
1009
- all chains are returned. Default is None.
1228
+ Chain identified. Either single or comma separated string of chains.
1229
+ Defaults to None which returns all chains.
1010
1230
  weight_type : str, optional
1011
- Which weight should be given to individual atoms.
1012
- scattering_args : dict, optional
1013
- Additional arguments for scattering factor computation.
1231
+ Weight given to individual atoms. Supported weight are:
1232
+
1233
+ +----------------------------+---------------------------------------+
1234
+ | atomic_weight | Using element unit point mass |
1235
+ +----------------------------+---------------------------------------+
1236
+ | atomic_number | Using atomic number point mass |
1237
+ +----------------------------+---------------------------------------+
1238
+ | gaussian | Represent atoms as isotropic Gaussian |
1239
+ +----------------------------+---------------------------------------+
1240
+ | van_der_waals_radius | Using binary van der waal spheres |
1241
+ +----------------------------+---------------------------------------+
1242
+ | scattering_factors | Using experimental scattering factors |
1243
+ +----------------------------+---------------------------------------+
1244
+ | lowpass_scattering_factors | Lowpass filtered scattering_factors |
1245
+ +----------------------------+---------------------------------------+
1246
+ weight_type_args : dict, optional
1247
+ Additional arguments used for individual weight_types. `gaussian`
1248
+ accepts ``resolution``, `scattering` accepts ``method``.
1014
1249
 
1015
1250
  Returns
1016
1251
  -------
1017
- Tuple[NDArray, Tuple[int], NDArray]
1018
- The volume, its origin and the voxel size in Ångstrom.
1252
+ Tuple[NDArray, NDArray, NDArray]
1253
+ Volume, origin and sampling_rate.
1254
+
1255
+ Examples
1256
+ --------
1257
+ >>> from importlib_resources import files
1258
+ >>> from tme import Structure
1259
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
1260
+ >>> structure = Structure.from_file(filename=fname)
1261
+ >>> vol, origin, sampling = structure.to_volume()
1262
+ >>> vol.shape, origin, sampling
1263
+ ((59, 35, 53), array([-30.71, 12.42, -27.15]), array([1., 1., 1.]))
1264
+ >>> vol, origin, sampling = structure.to_volume(sampling_rate=(2.2,1,3))
1265
+ ((27, 35, 18), array([-30.71, 12.42, -27.15]), array([2.2, 1. , 3. ]))
1266
+
1267
+ ``sampling_rate`` and ``origin`` can be set to ensure correct alignment
1268
+ with corresponding density maps such as the ones at EMDB. Analogous to
1269
+ :py:meth:`Structure.subset_by_chain` only parts of the structure can be
1270
+ mapped onto grids using a variety of weighting schemes
1271
+
1272
+ >>> structure.to_volume(weight_type="van_der_waals_radius")
1273
+ >>> structure.to_volume(
1274
+ >>> weight_type="lowpass_scattering_factors",
1275
+ >>> method_args={"source" : "dt1969", "downsampling_factor" : 1.35},
1276
+ >>> )
1019
1277
  """
1020
1278
  _weight_types = {
1279
+ "gaussian",
1021
1280
  "atomic_weight",
1022
1281
  "atomic_number",
1023
1282
  "van_der_waals_radius",
@@ -1038,11 +1297,8 @@ class Structure:
1038
1297
  "sampling_rate should either be single value of array with"
1039
1298
  f"size {self.atom_coordinate.shape[1]}."
1040
1299
  )
1041
- if "source" not in scattering_args:
1042
- scattering_args["source"] = "peng1995"
1043
1300
 
1044
1301
  temp = self.subset_by_chain(chain=chain)
1045
-
1046
1302
  positions, atoms, shape, sampling_rate, origin = temp._coordinate_to_position(
1047
1303
  shape=shape, sampling_rate=sampling_rate, origin=origin
1048
1304
  )
@@ -1059,7 +1315,7 @@ class Structure:
1059
1315
  sampling_rate,
1060
1316
  volume,
1061
1317
  lowpass_filter=False,
1062
- **scattering_args,
1318
+ **weight_type_args,
1063
1319
  )
1064
1320
  elif weight_type == "lowpass_scattering_factors":
1065
1321
  self._position_to_scattering_factors(
@@ -1068,10 +1324,19 @@ class Structure:
1068
1324
  sampling_rate,
1069
1325
  volume,
1070
1326
  lowpass_filter=True,
1071
- **scattering_args,
1327
+ **weight_type_args,
1328
+ )
1329
+ elif weight_type == "gaussian":
1330
+ volume, origin = self._position_to_molmap(
1331
+ positions=temp.atom_coordinate,
1332
+ weights=temp._get_atom_weights(
1333
+ atoms=atoms, weight_type="atomic_number"
1334
+ ),
1335
+ sampling_rate=sampling_rate,
1336
+ **weight_type_args,
1072
1337
  )
1073
1338
 
1074
- self.details.update(temp.details)
1339
+ self.metadata.update(temp.metadata)
1075
1340
  return volume, origin, sampling_rate
1076
1341
 
1077
1342
  @classmethod
@@ -1084,32 +1349,41 @@ class Structure:
1084
1349
  weighted: bool = False,
1085
1350
  ) -> float:
1086
1351
  """
1087
- Compute root mean square deviation (RMSD) between two structures.
1088
-
1089
- Both structures need to have the same number of atoms. In practice, this means
1090
- that *structure2* is a transformed version of *structure1*
1352
+ Compute root mean square deviation (RMSD) between two structures with the
1353
+ same number of atoms.
1091
1354
 
1092
1355
  Parameters
1093
1356
  ----------
1094
- structure1 : Structure
1095
- Structure 1.
1096
-
1097
- structure2 : Structure
1098
- Structure 2.
1099
-
1100
- origin : NDArray, optional
1101
- Origin of the structure coordinate system.
1102
-
1103
- sampling_rate : float, optional
1104
- Sampling rate if discretized on a grid in the unit of self.atom_coordinate.
1105
-
1357
+ structure1, structure2 : :py:class:`Structure`
1358
+ Structure instances to compare.
1359
+ origin : tuple of floats, optional
1360
+ Coordinate system origin. For computing RMSD on discretized grids.
1361
+ sampling_rate : tuple of floats, optional
1362
+ Sampling rate in units of :py:attr:`atom_coordinate`.
1363
+ For computing RMSD on discretized grids.
1106
1364
  weighted : bool, optional
1107
- Whether atoms should be weighted by their atomic weight.
1365
+ Whether atoms should be weighted acoording to their atomic weight.
1108
1366
 
1109
1367
  Returns
1110
1368
  -------
1111
1369
  float
1112
- Root Mean Square Deviation (RMSD)
1370
+ Root Mean Square Deviation between input structures.
1371
+
1372
+ Examples
1373
+ --------
1374
+ >>> from importlib_resources import files
1375
+ >>> from tme.matching_utils import get_rotation_matrices
1376
+ >>> from tme import Structure
1377
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
1378
+ >>> structure = Structure.from_file(filename=fname)
1379
+ >>> transformed = structure.rigid_transform(
1380
+ >>> rotation_matrix = get_rotation_matrices(60)[2],
1381
+ >>> translation = (0, 1, -5)
1382
+ >>> )
1383
+ >>> Structure.compare_structures(structure, transformed)
1384
+ 31.35238
1385
+ >>> Structure.compare_structures(structure, structure)
1386
+ 0.0
1113
1387
  """
1114
1388
  if origin is None:
1115
1389
  origin = np.zeros(structure1.atom_coordinate.shape[1])
@@ -1118,14 +1392,18 @@ class Structure:
1118
1392
  coordinates2 = structure2.atom_coordinate
1119
1393
  atoms1, atoms2 = structure1.element_symbol, structure2.element_symbol
1120
1394
  if sampling_rate is not None:
1121
- coordinates1 = np.rint((coordinates1 - origin) / sampling_rate).astype(int)
1122
- coordinates2 = np.rint((coordinates2 - origin) / sampling_rate).astype(int)
1395
+ coordinates1 = np.rint(
1396
+ np.divide(np.subtract(coordinates1, origin), sampling_rate)
1397
+ ).astype(int)
1398
+ coordinates2 = np.rint(
1399
+ np.divide(np.subtract(coordinates2, origin), sampling_rate)
1400
+ ).astype(int)
1123
1401
 
1124
- weights1 = np.array(structure1._get_atom_weights(atoms=atoms1))
1125
- weights2 = np.array(structure2._get_atom_weights(atoms=atoms2))
1126
- if not weighted:
1127
- weights1 = np.ones_like(weights1)
1128
- weights2 = np.ones_like(weights2)
1402
+ weights1 = np.ones_like(structure1.atom_coordinate.shape[0])
1403
+ weights2 = np.ones_like(structure2.atom_coordinate.shape[0])
1404
+ if weighted:
1405
+ weights1 = np.array(structure1._get_atom_weights(atoms=atoms1))
1406
+ weights2 = np.array(structure2._get_atom_weights(atoms=atoms2))
1129
1407
 
1130
1408
  if not np.allclose(coordinates1.shape, coordinates2.shape):
1131
1409
  raise ValueError(
@@ -1150,35 +1428,41 @@ class Structure:
1150
1428
  weighted: bool = False,
1151
1429
  ) -> Tuple["Structure", float]:
1152
1430
  """
1153
- Align the atom coordinates of structure2 to structure1 using
1154
- the Kabsch algorithm.
1155
-
1156
- Both structures need to have the same number of atoms. In practice, this means
1157
- that *structure2* is a subset of *structure1*
1431
+ Align ``structure2`` to ``structure1`` using the Kabsch Algorithm. Both
1432
+ structures need to have the same number of atoms.
1158
1433
 
1159
1434
  Parameters
1160
1435
  ----------
1161
- structure1 : Structure
1162
- Structure 1.
1163
-
1164
- structure2 : Structure
1165
- Structure 2.
1166
-
1167
- origin : NDArray, optional
1168
- Origin of the structure coordinate system.
1169
-
1170
- sampling_rate : float, optional
1171
- Voxel size if discretized on a grid.
1172
-
1436
+ structure1, structure2 : :py:class:`Structure`
1437
+ Structure instances to align.
1438
+ origin : tuple of floats, optional
1439
+ Coordinate system origin. For computing RMSD on discretized grids.
1440
+ sampling_rate : tuple of floats, optional
1441
+ Sampling rate in units of :py:attr:`atom_coordinate`.
1442
+ For computing RMSD on discretized grids.
1173
1443
  weighted : bool, optional
1174
1444
  Whether atoms should be weighted by their atomic weight.
1175
1445
 
1176
1446
  Returns
1177
1447
  -------
1178
- Structure
1179
- *structure2* aligned to *structure1*.
1448
+ :py:class:`Structure`
1449
+ ``structure2`` aligned to ``structure1``.
1180
1450
  float
1181
- Root Mean Square Error (RMSE)
1451
+ Alignment RMSD
1452
+
1453
+ Examples
1454
+ --------
1455
+ >>> from importlib_resources import files
1456
+ >>> from tme import Structure
1457
+ >>> from tme.matching_utils import get_rotation_matrices
1458
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
1459
+ >>> structure = Structure.from_file(filename=fname)
1460
+ >>> transformed = structure.rigid_transform(
1461
+ >>> rotation_matrix = get_rotation_matrices(60)[2],
1462
+ >>> translation = (0, 1, -5)
1463
+ >>> )
1464
+ >>> aligned, rmsd = Structure.align_structures(structure, transformed)
1465
+ Initial RMSD: 31.07189 - Final RMSD: 0.00000
1182
1466
  """
1183
1467
  if origin is None:
1184
1468
  origin = np.minimum(
@@ -1240,9 +1524,7 @@ class Structure:
1240
1524
 
1241
1525
  @dataclass(frozen=True, repr=True)
1242
1526
  class Elements:
1243
- """
1244
- Lookup table containing information on chemical elements.
1245
- """
1527
+ """Lookup table for chemical elements."""
1246
1528
 
1247
1529
  Atom = namedtuple(
1248
1530
  "Atom",
@@ -1385,12 +1667,33 @@ class Elements:
1385
1667
  Parameters
1386
1668
  ----------
1387
1669
  key : str
1388
- The key to use for retrieving the corresponding value from
1389
- the internal data.
1670
+ Key to retrieve the corresponding value for.
1390
1671
 
1391
1672
  Returns
1392
1673
  -------
1393
- value
1394
- The value associated with the provided key in the internal data.
1674
+ namedtuple
1675
+ The Atom tuple associated with the provided key.
1395
1676
  """
1396
1677
  return self._elements.get(key, self._default)
1678
+
1679
+
1680
+ def _format_string(string: str) -> str:
1681
+ """
1682
+ Formats a string by adding quotation marks if it contains white spaces.
1683
+
1684
+ Parameters
1685
+ ----------
1686
+ string : str
1687
+ Input string to be formatted.
1688
+
1689
+ Returns
1690
+ -------
1691
+ str
1692
+ Formatted string with added quotation marks if needed.
1693
+ """
1694
+ if " " in string:
1695
+ return f"'{string}'"
1696
+ # Occurs e.g. for C1' atoms. The trailing whitespace is necessary.
1697
+ if string.count("'") == 1:
1698
+ return f'"{string}"'
1699
+ return string