pytme 0.2.0b0__cp311-cp311-macosx_14_0_arm64.whl → 0.2.2__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. pytme-0.2.2.data/scripts/match_template.py +1187 -0
  2. {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/postprocess.py +170 -71
  3. {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/preprocessor_gui.py +179 -86
  4. pytme-0.2.2.dist-info/METADATA +91 -0
  5. pytme-0.2.2.dist-info/RECORD +74 -0
  6. {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/WHEEL +1 -1
  7. scripts/extract_candidates.py +126 -87
  8. scripts/match_template.py +596 -209
  9. scripts/match_template_filters.py +571 -223
  10. scripts/postprocess.py +170 -71
  11. scripts/preprocessor_gui.py +179 -86
  12. scripts/refine_matches.py +567 -159
  13. tme/__init__.py +0 -1
  14. tme/__version__.py +1 -1
  15. tme/analyzer.py +627 -855
  16. tme/backends/__init__.py +41 -11
  17. tme/backends/_jax_utils.py +185 -0
  18. tme/backends/cupy_backend.py +120 -225
  19. tme/backends/jax_backend.py +282 -0
  20. tme/backends/matching_backend.py +464 -388
  21. tme/backends/mlx_backend.py +45 -68
  22. tme/backends/npfftw_backend.py +256 -514
  23. tme/backends/pytorch_backend.py +41 -154
  24. tme/density.py +312 -421
  25. tme/extensions.cpython-311-darwin.so +0 -0
  26. tme/matching_data.py +366 -303
  27. tme/matching_exhaustive.py +279 -1521
  28. tme/matching_optimization.py +234 -129
  29. tme/matching_scores.py +884 -0
  30. tme/matching_utils.py +281 -387
  31. tme/memory.py +377 -0
  32. tme/orientations.py +226 -66
  33. tme/parser.py +3 -4
  34. tme/preprocessing/__init__.py +2 -0
  35. tme/preprocessing/_utils.py +217 -0
  36. tme/preprocessing/composable_filter.py +31 -0
  37. tme/preprocessing/compose.py +55 -0
  38. tme/preprocessing/frequency_filters.py +388 -0
  39. tme/preprocessing/tilt_series.py +1011 -0
  40. tme/preprocessor.py +574 -530
  41. tme/structure.py +495 -189
  42. tme/types.py +5 -3
  43. pytme-0.2.0b0.data/scripts/match_template.py +0 -800
  44. pytme-0.2.0b0.dist-info/METADATA +0 -73
  45. pytme-0.2.0b0.dist-info/RECORD +0 -66
  46. tme/helpers.py +0 -881
  47. tme/matching_constrained.py +0 -195
  48. {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/estimate_ram_usage.py +0 -0
  49. {pytme-0.2.0b0.data → pytme-0.2.2.data}/scripts/preprocess.py +0 -0
  50. {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/LICENSE +0 -0
  51. {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/entry_points.txt +0 -0
  52. {pytme-0.2.0b0.dist-info → pytme-0.2.2.dist-info}/top_level.txt +0 -0
tme/structure.py CHANGED
@@ -6,103 +6,159 @@
6
6
  """
7
7
  import warnings
8
8
  from copy import deepcopy
9
- from collections import namedtuple
10
- from typing import List, Dict, Tuple
11
9
  from itertools import groupby
12
10
  from dataclasses import dataclass
11
+ from collections import namedtuple
12
+ from typing import List, Dict, Tuple
13
13
  from os.path import splitext, basename
14
14
 
15
15
  import numpy as np
16
16
 
17
- from .parser import PDBParser, MMCIFParser
18
- from .matching_utils import (
19
- rigid_transform,
20
- _format_mmcif_colunns,
21
- minimum_enclosing_box,
22
- )
23
- from .helpers import atom_profile
24
17
  from .types import NDArray
18
+ from .preprocessor import atom_profile, Preprocessor
19
+ from .parser import PDBParser, MMCIFParser
20
+ from .matching_utils import rigid_transform, minimum_enclosing_box
25
21
 
26
22
 
27
23
  @dataclass(repr=False)
28
24
  class Structure:
29
25
  """
30
- Represents atomic structures in accordance with the Protein Data Bank (PDB)
31
- format specification.
26
+ Represents atomic structures per the Protein Data Bank (PDB) specification.
27
+
28
+ Examples
29
+ --------
30
+ The following achieves the definition of a :py:class:`Structure` instance
31
+
32
+ >>> from tme import Structure
33
+ >>> structure = Structure(
34
+ >>> record_type=["ATOM", "ATOM", "ATOM"],
35
+ >>> atom_serial_number=[0, 1, 2] ,
36
+ >>> atom_name=["C", "N", "H"],
37
+ >>> atom_coordinate=[[30,15,10], [35, 20, 15], [35,25,20]],
38
+ >>> alternate_location_indicator=[".", ".", "."],
39
+ >>> residue_name=["GLY", "GLY", "HIS"],
40
+ >>> chain_identifier=["A", "A", "B"],
41
+ >>> residue_sequence_number=[0, 0, 1],
42
+ >>> code_for_residue_insertion=["?", "?", "?"],
43
+ >>> occupancy=[0, 0, 0],
44
+ >>> temperature_factor=[0, 0, 0],
45
+ >>> segment_identifier=["1", "1", "1"],
46
+ >>> element_symbol=["C", "N", "C"],
47
+ >>> charge=["?", "?", "?"],
48
+ >>> metadata={},
49
+ >>> )
50
+ >>> structure
51
+ Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
52
+
53
+ :py:class:`Structure` instances support a range of subsetting operations based on
54
+ atom indices
55
+
56
+ >>> structure[1]
57
+ Unique Chains: A, Atom Range: 1-1 [N = 1], Residue Range: 0-0 [N = 1]
58
+ >>> structure[(False, False, True)]
59
+ Unique Chains: B, Atom Range: 2-2 [N = 1], Residue Range: 1-1 [N = 1]
60
+ >>> structure[(1,2)]
61
+ Unique Chains: A-B, Atom Range: 1-2 [N = 2], Residue Range: 0-1 [N = 2]
62
+
63
+ They can be written to disk in a range of formats using :py:meth:`Structure.to_file`
64
+
65
+ >>> structure.to_file("test.pdb") # Writes a PDB file to disk
66
+ >>> structure.to_file("test.cif") # Writes a mmCIF file to disk
67
+
68
+ New instances can be created from a range of formats using
69
+ :py:meth:`Structure.from_file`
70
+
71
+ >>> Structure.from_file("test.pdb") # Reads PDB file from disk
72
+ Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
73
+ >>> Structure.from_file("test.cif") # Reads mmCIF file from disk
74
+ Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
75
+
76
+ Class instances can be discretized on grids and converted to
77
+ :py:class:`tme.density.Density` instances using :py:meth:`Structure.to_volume`
78
+ or :py:meth:`tme.density.Density.from_structure`.
79
+
80
+ >>> volume, origin, sampling_rate = structure.to_volume(shape=(50,40,30))
32
81
 
33
82
  References
34
83
  ----------
35
- .. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
84
+ .. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
85
+ .. [2] https://www.ccp4.ac.uk/html/mmcifformat.html
86
+
36
87
  """
37
88
 
38
- #: Return a numpy array with record types, e.g. ATOM, HETATM.
89
+ #: Array of record types, e.g.ATOM.
39
90
  record_type: NDArray
40
91
 
41
- #: Return a numpy array with serial number of each atom.
92
+ #: Array of serial numbers.
42
93
  atom_serial_number: NDArray
43
94
 
44
- #: Return a numpy array with name of each atom.
95
+ #: Array of atom names.
45
96
  atom_name: NDArray
46
97
 
47
- #: Return a numpy array with coordinates of each atom in x, y, z.
98
+ #: Array of x,y,z atom coordinates.
48
99
  atom_coordinate: NDArray
49
100
 
50
- #: Return a numpy array with alternate location indicates of each atom.
101
+ #: Array of alternate location indices.
51
102
  alternate_location_indicator: NDArray
52
103
 
53
- #: Return a numpy array with originating residue names of each atom.
104
+ #: Array of residue names.
54
105
  residue_name: NDArray
55
106
 
56
- #: Return a numpy array with originating structure chain of each atom.
107
+ #: Array of chain identifiers.
57
108
  chain_identifier: NDArray
58
109
 
59
- #: Return a numpy array with originating residue id of each atom.
110
+ #: Array of residue ids.
60
111
  residue_sequence_number: NDArray
61
112
 
62
- #: Return a numpy array with insertion information d of each atom.
113
+ #: Array of insertion information.
63
114
  code_for_residue_insertion: NDArray
64
115
 
65
- #: Return a numpy array with occupancy factors of each atom.
116
+ #: Array of occupancy factors.
66
117
  occupancy: NDArray
67
118
 
68
- #: Return a numpy array with B-factors for each atom.
119
+ #: Array of B-factors.
69
120
  temperature_factor: NDArray
70
121
 
71
- #: Return a numpy array with segment identifier for each atom.
122
+ #: Array of segment identifiers.
72
123
  segment_identifier: NDArray
73
124
 
74
- #: Return a numpy array with element symbols of each atom.
125
+ #: Array of element symbols.
75
126
  element_symbol: NDArray
76
127
 
77
- #: Return a numpy array with charges of each atom.
128
+ #: Array of charges.
78
129
  charge: NDArray
79
130
 
80
- #: Returns a dictionary with class instance metadata.
81
- details: dict
131
+ #: Metadata dictionary.
132
+ metadata: dict
82
133
 
83
134
  def __post_init__(self, *args, **kwargs):
84
135
  """
85
- Initialize the structure and populate header details.
136
+ Initialize the structure and populate header metadata.
86
137
 
87
138
  Raises
88
139
  ------
89
140
  ValueError
90
- If other NDArray attributes to not match the number of atoms.
91
- If the shape of atom_coordinates and chain_identifier doesn't match.
141
+ If NDArray attributes does not match the number of atoms.
92
142
  """
93
- self._elements = Elements()
94
- self.details = self._populate_details(self.details)
143
+ for attribute in self.__dict__:
144
+ value = getattr(self, attribute)
145
+ target_type = self.__annotations__.get(attribute, None)
146
+ if target_type == NDArray:
147
+ setattr(self, attribute, np.atleast_1d(np.array(value)))
95
148
 
96
149
  n_atoms = self.atom_coordinate.shape[0]
97
150
  for attribute in self.__dict__:
98
151
  value = getattr(self, attribute)
99
- if type(value) != np.ndarray:
152
+ if not isinstance(value, np.ndarray):
100
153
  continue
101
154
  if value.shape[0] != n_atoms:
102
155
  raise ValueError(
103
156
  f"Expected shape of {attribute}: {n_atoms}, got {value.shape[0]}."
104
157
  )
105
158
 
159
+ self._elements = Elements()
160
+ self.metadata = self._populate_metadata(self.metadata)
161
+
106
162
  def __getitem__(self, indices: List[int]) -> "Structure":
107
163
  """
108
164
  Get a Structure instance for specified indices.
@@ -138,22 +194,17 @@ class Structure:
138
194
  "charge",
139
195
  )
140
196
  kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
141
- ret = self.__class__(**kwargs, details={})
197
+ ret = self.__class__(**kwargs, metadata={})
142
198
  return ret
143
199
 
144
200
  def __repr__(self):
145
201
  """
146
202
  Return a string representation of the Structure.
147
-
148
- Returns
149
- -------
150
- str
151
- The string representation.
152
203
  """
153
204
  unique_chains = "-".join(
154
205
  [
155
206
  ",".join([str(x) for x in entity])
156
- for entity in self.details["unique_chains"]
207
+ for entity in self.metadata["unique_chains"]
157
208
  ]
158
209
  )
159
210
  min_atom = np.min(self.atom_serial_number)
@@ -162,7 +213,7 @@ class Structure:
162
213
 
163
214
  min_residue = np.min(self.residue_sequence_number)
164
215
  max_residue = np.max(self.residue_sequence_number)
165
- n_residue = self.residue_sequence_number.size
216
+ n_residue = np.unique(self.residue_sequence_number).size
166
217
 
167
218
  repr_str = (
168
219
  f"Structure object at {id(self)}\n"
@@ -172,43 +223,39 @@ class Structure:
172
223
  )
173
224
  return repr_str
174
225
 
175
- def get_chains(self) -> List[str]:
176
- """
177
- Returns a list of available chains.
178
-
179
- Returns
180
- -------
181
- list
182
- The list of available chains.
183
- """
184
- return list(self.details["chain_weight"].keys())
185
-
186
226
  def copy(self) -> "Structure":
187
227
  """
188
228
  Returns a copy of the Structure instance.
189
229
 
190
230
  Returns
191
231
  -------
192
- Structure
232
+ :py:class:`Structure`
193
233
  The copied Structure instance.
234
+
235
+ Examples
236
+ --------
237
+ >>> import numpy as np
238
+ >>> structure_copy = structure.copy()
239
+ >>> np.allclose(structure_copy.atom_coordinate, structure.atom_coordinate)
240
+ True
194
241
  """
195
242
  return deepcopy(self)
196
243
 
197
- def _populate_details(self, details: Dict = {}) -> Dict:
244
+ def _populate_metadata(self, metadata: Dict = {}) -> Dict:
198
245
  """
199
- Populate the details dictionary with the data from the Structure instance.
246
+ Populate the metadata dictionary with the data from the Structure instance.
200
247
 
201
248
  Parameters
202
249
  ----------
203
- details : dict, optional
204
- The initial details dictionary, by default {}.
250
+ metadata : dict, optional
251
+ The initial metadata dictionary, by default {}.
205
252
 
206
253
  Returns
207
254
  -------
208
255
  dict
209
- The populated details dictionary.
256
+ The populated metadata dictionary.
210
257
  """
211
- details["weight"] = np.sum(
258
+ metadata["weight"] = np.sum(
212
259
  [self._elements[atype].atomic_weight for atype in self.element_symbol]
213
260
  )
214
261
 
@@ -220,12 +267,12 @@ class Structure:
220
267
  [self._elements[atype].atomic_weight for atype in self.element_symbol],
221
268
  )
222
269
  labels = self.chain_identifier[idx]
223
- details["chain_weight"] = {key: val for key, val in zip(labels, chain_weight)}
270
+ metadata["chain_weight"] = {key: val for key, val in zip(labels, chain_weight)}
224
271
 
225
- # Group non-unique chains in separate lists in details["unique_chains"]
226
- details["unique_chains"], temp = [], {}
272
+ # Group non-unique chains in separate lists in metadata["unique_chains"]
273
+ metadata["unique_chains"], temp = [], {}
227
274
  for chain_label in label:
228
- index = len(details["unique_chains"])
275
+ index = len(metadata["unique_chains"])
229
276
  chain_sequence = "".join(
230
277
  [
231
278
  str(y)
@@ -236,10 +283,10 @@ class Structure:
236
283
  )
237
284
  if chain_sequence not in temp:
238
285
  temp[chain_sequence] = index
239
- details["unique_chains"].append([chain_label])
286
+ metadata["unique_chains"].append([chain_label])
240
287
  continue
241
288
  idx = temp.get(chain_sequence)
242
- details["unique_chains"][idx].append(chain_label)
289
+ metadata["unique_chains"][idx].append(chain_label)
243
290
 
244
291
  filtered_data = [
245
292
  (label, integer)
@@ -248,12 +295,12 @@ class Structure:
248
295
  )
249
296
  ]
250
297
  filtered_data = sorted(filtered_data, key=lambda x: x[0])
251
- details["chain_range"] = {}
298
+ metadata["chain_range"] = {}
252
299
  for label, values in groupby(filtered_data, key=lambda x: x[0]):
253
300
  values = [int(x[1]) for x in values]
254
- details["chain_range"][label] = (min(values), max(values))
301
+ metadata["chain_range"][label] = (min(values), max(values))
255
302
 
256
- return details
303
+ return metadata
257
304
 
258
305
  @classmethod
259
306
  def from_file(
@@ -264,12 +311,18 @@ class Structure:
264
311
  filter_by_residues: set = None,
265
312
  ) -> "Structure":
266
313
  """
267
- Reads in an mmcif or pdb file and converts it into class instance.
314
+ Reads an atomic structure file and into a :py:class:`Structure` instance.
268
315
 
269
316
  Parameters
270
317
  ----------
271
318
  filename : str
272
- Path to the mmcif or pdb file.
319
+ Input file. Supported extensions are:
320
+
321
+ +------+-------------------------------------------------------------+
322
+ | .pdb | Reads a PDB file |
323
+ +------+-------------------------------------------------------------+
324
+ | .cif | Reads an mmCIF file |
325
+ +------+-------------------------------------------------------------+
273
326
  keep_non_atom_records : bool, optional
274
327
  Wheter to keep residues that are not labelled ATOM.
275
328
  filter_by_elements: set, optional
@@ -280,12 +333,34 @@ class Structure:
280
333
  Raises
281
334
  ------
282
335
  ValueError
283
- If the extension is not '.pdb' or '.cif'.
336
+ If the extension is not supported.
284
337
 
285
338
  Returns
286
339
  -------
287
- Structure
288
- Read in structure file.
340
+ :py:class:`Structure`
341
+ Structure instance representing the read in file.
342
+
343
+ Examples
344
+ --------
345
+ >>> from importlib_resources import files
346
+ >>> from tme import Structure
347
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
348
+ >>> structure = Structure.from_file(filename=fname)
349
+ >>> structure
350
+ Unique Chains: A-B, Atom Range: 1-1564 [N = 1564], Residue Range: 142-239 [N = 1564]
351
+
352
+ We can include non ATOM entries and restrict the considered elements
353
+ and residues
354
+
355
+ >>> structure = Structure.from_file(
356
+ >>> filename=fname,
357
+ >>> keep_non_atom_records=True,
358
+ >>> filter_by_elements = {"C"},
359
+ >>> filter_by_residues = {"GLY"},
360
+ >>> )
361
+ >>> structure
362
+ Unique Chains: A,B, Atom Range: 96-1461 [N = 44], Residue Range: 154-228 [N = 44]
363
+
289
364
  """
290
365
  _, file_extension = splitext(basename(filename.upper()))
291
366
  if file_extension == ".PDB":
@@ -313,14 +388,14 @@ class Structure:
313
388
  keep = np.logical_and(keep, data["record_type"] == "ATOM")
314
389
 
315
390
  for key in data:
316
- if key == "details":
391
+ if key == "metadata":
317
392
  continue
318
- if type(data[key]) == np.ndarray:
393
+ if isinstance(data[key], np.ndarray):
319
394
  data[key] = data[key][keep]
320
395
  else:
321
396
  data[key] = [x for x, flag in zip(data[key], keep) if flag]
322
397
 
323
- data["details"]["filepath"] = filename
398
+ data["metadata"]["filepath"] = filename
324
399
 
325
400
  return cls(**data)
326
401
 
@@ -367,12 +442,12 @@ class Structure:
367
442
  out_data = [
368
443
  x.strip() for x in result["atom_site"].get(atom_site_key, ["."])
369
444
  ]
370
- if dtype == int:
445
+ if dtype is int:
371
446
  out_data = [0 if x == "." else int(x) for x in out_data]
372
447
  try:
373
448
  out[out_key] = np.asarray(out_data).astype(dtype)
374
449
  except ValueError:
375
- default = ["."] if dtype == str else 0
450
+ default = ["."] if dtype is str else 0
376
451
  print(f"Converting {out_key} to {dtype} failed, set to {default}.")
377
452
  out[out_key] = np.repeat(default, len(out_data)).astype(dtype)
378
453
 
@@ -382,7 +457,7 @@ class Structure:
382
457
  continue
383
458
  out[key] = np.repeat(value, number_entries // value.size)
384
459
 
385
- out["details"] = {}
460
+ out["metadata"] = {}
386
461
  out["atom_coordinate"] = np.transpose(
387
462
  np.array(
388
463
  [
@@ -405,7 +480,7 @@ class Structure:
405
480
  for out_key, (base_key, inner_key, default) in detail_mapping.items():
406
481
  if base_key not in result:
407
482
  continue
408
- out["details"][out_key] = result[base_key].get(inner_key, default)
483
+ out["metadata"][out_key] = result[base_key].get(inner_key, default)
409
484
 
410
485
  return out
411
486
 
@@ -446,15 +521,15 @@ class Structure:
446
521
  "charge": ("charge", str),
447
522
  }
448
523
 
449
- out = {"details": result["details"]}
524
+ out = {"metadata": result["details"]}
450
525
  for out_key, (inner_key, dtype) in atom_site_mapping.items():
451
526
  out_data = [x.strip() for x in result[inner_key]]
452
- if dtype == int:
527
+ if dtype is int:
453
528
  out_data = [0 if x == "." else int(x) for x in out_data]
454
529
  try:
455
530
  out[out_key] = np.asarray(out_data).astype(dtype)
456
531
  except ValueError:
457
- default = "." if dtype == str else 0
532
+ default = "." if dtype is str else 0
458
533
  print(
459
534
  f"Converting {out_key} to {dtype} failed. Setting {out_key} to {default}."
460
535
  )
@@ -466,21 +541,36 @@ class Structure:
466
541
 
467
542
  def to_file(self, filename: str) -> None:
468
543
  """
469
- Writes the Structure instance data to a Protein Data Bank (PDB) or
470
- macromolecular Crystallographic Information File (mmCIF) file depending
471
- one whether filename ends with '.pdb' or '.cif'.
544
+ Writes the :py:class:`Structure` instance to disk.
545
+
546
+ Parameters
547
+ ----------
548
+ filename : str
549
+ The name of the file to be created. Supported extensions are
550
+
551
+ +------+-------------------------------------------------------------+
552
+ | .pdb | Creates a PDB file |
553
+ +------+-------------------------------------------------------------+
554
+ | .cif | Creates an mmCIF file |
555
+ +------+-------------------------------------------------------------+
472
556
 
473
557
  Raises
474
558
  ------
475
559
  ValueError
476
- If the extension is not '.pdb' or '.cif'.
560
+ If the extension is not supported.
561
+
562
+ Examples
563
+ --------
564
+ >>> from importlib_resources import files
565
+ >>> from tempfile import NamedTemporaryFile
566
+ >>> from tme import Structure
567
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
568
+ >>> oname = NamedTemporaryFile().name
569
+ >>> structure = Structure.from_file(filename=fname)
570
+ >>> structure.to_file(f"{oname}.cif") # Writes an mmCIF file to disk
571
+ >>> structure.to_file(f"{oname}.pdb") # Writes a PDB file to disk
477
572
 
478
- Parameters
479
- ----------
480
- filename : str
481
- The filename of the file to write.
482
573
  """
483
- data_out = []
484
574
  if np.any(np.vectorize(len)(self.chain_identifier) > 2):
485
575
  warnings.warn("Chain identifiers longer than one will be shortened.")
486
576
 
@@ -596,7 +686,7 @@ class Structure:
596
686
  data["pdbx_PDB_model_num"].append(str(model_num))
597
687
 
598
688
  output_data = {"atom_site": data}
599
- original_file = self.details.get("filepath", "")
689
+ original_file = self.metadata.get("filepath", "")
600
690
  try:
601
691
  new_data = {k: v for k, v in MMCIFParser(original_file).items()}
602
692
  index = self.atom_serial_number - 1
@@ -612,6 +702,9 @@ class Structure:
612
702
 
613
703
  ret = ""
614
704
  for category, subdict in output_data.items():
705
+ if not len(subdict):
706
+ continue
707
+
615
708
  ret += "#\n"
616
709
  is_loop = isinstance(subdict[list(subdict.keys())[0]], list)
617
710
  if not is_loop:
@@ -620,7 +713,18 @@ class Structure:
620
713
  else:
621
714
  ret += "loop_\n"
622
715
  ret += "".join([f"_{category}.{k}\n" for k in subdict])
623
- padded_subdict = _format_mmcif_colunns(subdict)
716
+
717
+ subdict = {
718
+ k: [_format_string(s) for s in v] for k, v in subdict.items()
719
+ }
720
+ key_length = {
721
+ key: len(max(value, key=lambda x: len(x), default=""))
722
+ for key, value in subdict.items()
723
+ }
724
+ padded_subdict = {
725
+ key: [s.ljust(key_length[key] + 1) for s in values]
726
+ for key, values in subdict.items()
727
+ }
624
728
 
625
729
  data = [
626
730
  "".join([str(x) for x in content])
@@ -644,8 +748,23 @@ class Structure:
644
748
 
645
749
  Returns
646
750
  -------
647
- Structure
648
- A subset of the original structure containing only the specified chain.
751
+ :py:class:`Structure`
752
+ A subset of the class instance containing only the specified chains.
753
+
754
+ Raises
755
+ ------
756
+ ValueError
757
+ If none of the specified chains exist.
758
+
759
+ Examples
760
+ --------
761
+ >>> from importlib_resources import files
762
+ >>> from tme import Structure
763
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
764
+ >>> structure = Structure.from_file(filename=fname)
765
+ >>> structure.subset_by_chain(chain="A") # Keep A
766
+ >>> structure.subset_by_chain(chain="A,B") # Keep A and B
767
+ >>> structure.subset_by_chain(chain="B,C") # Keep B, C does not exist
649
768
  """
650
769
  chain = np.unique(self.chain_identifier) if chain is None else chain.split(",")
651
770
  keep = np.in1d(self.chain_identifier, chain)
@@ -664,10 +783,8 @@ class Structure:
664
783
  ----------
665
784
  start : int
666
785
  The starting residue sequence number.
667
-
668
786
  stop : int
669
787
  The ending residue sequence number.
670
-
671
788
  chain : str, optional
672
789
  The chain identifier. If multiple chains should be selected they need
673
790
  to be a comma separated string, e.g. 'A,B,CE'. If chain None,
@@ -675,8 +792,21 @@ class Structure:
675
792
 
676
793
  Returns
677
794
  -------
678
- Structure
795
+ :py:class:`Structure`
679
796
  A subset of the original structure within the specified residue range.
797
+
798
+ Raises
799
+ ------
800
+ ValueError
801
+ If none of the specified residue chain combinations exist.
802
+
803
+ Examples
804
+ --------
805
+ >>> from importlib_resources import files
806
+ >>> from tme import Structure
807
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
808
+ >>> structure = Structure.from_file(filename=fname)
809
+ >>> structure.subset_by_range(chain="A",start=150,stop=180)
680
810
  """
681
811
  ret = self.subset_by_chain(chain=chain)
682
812
  keep = np.logical_and(
@@ -692,6 +822,15 @@ class Structure:
692
822
  -------
693
823
  NDArray
694
824
  The center of mass of the structure.
825
+
826
+ Examples
827
+ --------
828
+ >>> from importlib_resources import files
829
+ >>> from tme import Structure
830
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
831
+ >>> structure = Structure.from_file(filename=fname)
832
+ >>> structure.center_of_mass()
833
+ array([-0.89391639, 29.94908928, -2.64736741])
695
834
  """
696
835
  weights = [self._elements[atype].atomic_weight for atype in self.element_symbol]
697
836
  return np.dot(self.atom_coordinate.T, weights) / np.sum(weights)
@@ -717,7 +856,19 @@ class Structure:
717
856
  Returns
718
857
  -------
719
858
  Structure
720
- The transformed instance of :py:class:`tme.structure.Structure`.
859
+ The transformed instance of :py:class:`Structure`.
860
+
861
+ Examples
862
+ --------
863
+ >>> from importlib_resources import files
864
+ >>> from tme import Structure
865
+ >>> from tme.matching_utils import get_rotation_matrices
866
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
867
+ >>> structure = Structure.from_file(filename=fname)
868
+ >>> structure.rigid_transform(
869
+ >>> rotation_matrix = get_rotation_matrices(60)[2],
870
+ >>> translation = (0, 1, -5)
871
+ >>> )
721
872
  """
722
873
  out = np.empty_like(self.atom_coordinate.T)
723
874
  rigid_transform(
@@ -745,7 +896,17 @@ class Structure:
745
896
 
746
897
  See Also
747
898
  --------
748
- :py:meth:`tme.Density.centered`
899
+ :py:meth:`tme.density.Density.centered`
900
+
901
+ Examples
902
+ --------
903
+ >>> from importlib_resources import files
904
+ >>> from tme import Structure
905
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
906
+ >>> structure = Structure.from_file(filename=fname)
907
+ >>> centered_structure, translation = structure.centered()
908
+ >>> translation
909
+ array([34.89391639, 4.05091072, 36.64736741])
749
910
  """
750
911
  center_of_mass = self.center_of_mass()
751
912
  enclosing_box = minimum_enclosing_box(coordinates=self.atom_coordinate.T)
@@ -770,12 +931,11 @@ class Structure:
770
931
  ----------
771
932
  shape : Tuple[int,]
772
933
  The desired shape of the output array.
773
-
774
934
  sampling_rate : float
775
935
  The sampling rate of the output array in unit of self.atom_coordinate.
776
-
777
936
  origin : Tuple[float,]
778
937
  The origin of the coordinate system.
938
+
779
939
  Returns
780
940
  -------
781
941
  Tuple[NDArray, List[str], Tuple[int, ], float, Tuple[float,]]
@@ -809,11 +969,11 @@ class Structure:
809
969
  positions = positions[valid_positions == positions.shape[1], :]
810
970
  atom_types = atom_types[valid_positions == positions.shape[1]]
811
971
 
812
- self.details["nAtoms_outOfBound"] = 0
972
+ self.metadata["nAtoms_outOfBound"] = 0
813
973
  if positions.shape[0] != coordinates.shape[0]:
814
974
  out_of_bounds = coordinates.shape[0] - positions.shape[0]
815
975
  print(f"{out_of_bounds}/{coordinates.shape[0]} atoms were out of bounds.")
816
- self.details["nAtoms_outOfBound"] = out_of_bounds
976
+ self.metadata["nAtoms_outOfBound"] = out_of_bounds
817
977
 
818
978
  return positions, atom_types, shape, sampling_rate, origin
819
979
 
@@ -831,14 +991,11 @@ class Structure:
831
991
  ----------
832
992
  positions : Tuple[float, float, float]
833
993
  The positions of the atoms.
834
-
835
994
  atoms : Tuple[str]
836
995
  The types of the atoms.
837
-
838
996
  sampling_rate : float
839
997
  The desired sampling rate in unit of self.atom_coordinate of the
840
998
  output array.
841
-
842
999
  volume : NDArray
843
1000
  The volume to update.
844
1001
  """
@@ -900,7 +1057,7 @@ class Structure:
900
1057
  volume : NDArray
901
1058
  The volume to update.
902
1059
  lowpass_filter : NDArray
903
- Whether the scattering factors hsould be lowpass filtered.
1060
+ Whether the scattering factors should be lowpass filtered.
904
1061
  downsampling_factor : NDArray
905
1062
  Downsampling factor for scattering factor computation.
906
1063
  source : str
@@ -944,6 +1101,75 @@ class Structure:
944
1101
  scattering_profiles[atoms[atom_index]](distances),
945
1102
  )
946
1103
 
1104
+ @staticmethod
1105
+ def _position_to_molmap(
1106
+ positions: NDArray,
1107
+ weights: Tuple[float],
1108
+ resolution: float = 4,
1109
+ sigma_factor: float = 1 / (np.pi * np.sqrt(2)),
1110
+ cutoff_value: float = 4.0,
1111
+ sampling_rate: float = None,
1112
+ ) -> NDArray:
1113
+ """
1114
+ Simulates electron densities analogous to Chimera's molmap function [1]_.
1115
+
1116
+ Parameters
1117
+ ----------
1118
+ positions : NDArray
1119
+ Array containing atomic positions in z,y,x format (n,d).
1120
+ weights : [float]
1121
+ The weights to use for the entries in positions.
1122
+ resolution : float
1123
+ The product of resolution and sigma_factor gives the sigma used to
1124
+ compute the discretized Gaussian.
1125
+ sigma_factor : float
1126
+ The factor used with resolution to compute sigma. Default is 1 / (π√2).
1127
+ cutoff_value : float
1128
+ The cutoff value for the Gaussian kernel. Default is 4.0.
1129
+ sampling_rate : float
1130
+ Sampling rate along each dimension. One third of resolution by default.
1131
+
1132
+ References
1133
+ ----------
1134
+ ..[1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/midas/molmap.html
1135
+
1136
+ Returns
1137
+ -------
1138
+ NDArray
1139
+ A numpy array containing the simulated electron densities.
1140
+ """
1141
+ if sampling_rate is None:
1142
+ sampling_rate = resolution / 3
1143
+
1144
+ pad = int(3 * resolution)
1145
+ sigma = sigma_factor * resolution
1146
+ sigma_grid = sigma / sampling_rate
1147
+
1148
+ # Limit padding to numerically stable values
1149
+ smax = np.max(sigma_grid)
1150
+ arr = np.arange(0, pad)
1151
+ gaussian = (
1152
+ np.exp(-0.5 * (arr / smax) ** 2)
1153
+ * np.power(2 * np.pi, -1.5)
1154
+ * np.power(sigma, -3.0)
1155
+ )
1156
+ pad_cutoff = np.max(arr[gaussian > 1e-8])
1157
+ if arr.size != 0:
1158
+ pad = int(pad_cutoff) + 1
1159
+
1160
+ positions = positions[:, ::-1]
1161
+ origin = positions.min(axis=0) - pad * sampling_rate
1162
+ positions = np.rint(np.divide((positions - origin), sampling_rate)).astype(int)
1163
+ shape = positions.max(axis=0).astype(int) + pad + 1
1164
+
1165
+ out = np.zeros(shape, dtype=np.float32)
1166
+ np.add.at(out, tuple(positions.T), weights)
1167
+
1168
+ out = Preprocessor().gaussian_filter(
1169
+ template=out, sigma=sigma_grid, cutoff_value=cutoff_value
1170
+ )
1171
+ return out, origin
1172
+
947
1173
  def _get_atom_weights(
948
1174
  self, atoms: Tuple[str] = None, weight_type: str = "atomic_weight"
949
1175
  ) -> Tuple[float]:
@@ -980,41 +1206,77 @@ class Structure:
980
1206
  def to_volume(
981
1207
  self,
982
1208
  shape: Tuple[int] = None,
983
- sampling_rate: NDArray = None,
1209
+ sampling_rate: Tuple[float] = None,
984
1210
  origin: Tuple[float] = None,
985
1211
  chain: str = None,
986
1212
  weight_type: str = "atomic_weight",
987
- scattering_args: Dict = dict(),
988
- ) -> Tuple[NDArray, Tuple[int], NDArray]:
1213
+ weight_type_args: Dict = dict(),
1214
+ ) -> Tuple[NDArray, NDArray, NDArray]:
989
1215
  """
990
- Converts atom coordinates of shape [n x 3] x, y, z to a volume with
991
- index z, y, x.
1216
+ Maps class instance to a volume.
992
1217
 
993
1218
  Parameters
994
1219
  ----------
995
- shape : Tuple[int, ...], optional
996
- Desired shape of the output array. If shape is given its expected to be
997
- in z, y, x form.
998
- sampling_rate : float, optional
999
- Sampling rate of the output array in the unit of self.atom_coordinate
1000
- origin : Tuple[float, ...], optional
1001
- Origin of the coordinate system. If origin is given its expected to be
1002
- in z, y, x form.
1220
+ shape : tuple of ints, optional
1221
+ Output array shape in (z,y,x) form.
1222
+ sampling_rate : tuple of float, optional
1223
+ Sampling rate of the output array in units of
1224
+ :py:attr:`Structure.atom_coordinate`
1225
+ origin : tuple of floats, optional
1226
+ Origin of the coordinate system in (z,y,x) form.
1003
1227
  chain : str, optional
1004
- The chain identifier. If multiple chains should be selected they need
1005
- to be a comma separated string, e.g. 'A,B,CE'. If chain None,
1006
- all chains are returned. Default is None.
1228
+ Chain identified. Either single or comma separated string of chains.
1229
+ Defaults to None which returns all chains.
1007
1230
  weight_type : str, optional
1008
- Which weight should be given to individual atoms.
1009
- scattering_args : dict, optional
1010
- Additional arguments for scattering factor computation.
1231
+ Weight given to individual atoms. Supported weight are:
1232
+
1233
+ +----------------------------+---------------------------------------+
1234
+ | atomic_weight | Using element unit point mass |
1235
+ +----------------------------+---------------------------------------+
1236
+ | atomic_number | Using atomic number point mass |
1237
+ +----------------------------+---------------------------------------+
1238
+ | gaussian | Represent atoms as isotropic Gaussian |
1239
+ +----------------------------+---------------------------------------+
1240
+ | van_der_waals_radius | Using binary van der waal spheres |
1241
+ +----------------------------+---------------------------------------+
1242
+ | scattering_factors | Using experimental scattering factors |
1243
+ +----------------------------+---------------------------------------+
1244
+ | lowpass_scattering_factors | Lowpass filtered scattering_factors |
1245
+ +----------------------------+---------------------------------------+
1246
+ weight_type_args : dict, optional
1247
+ Additional arguments used for individual weight_types. `gaussian`
1248
+ accepts ``resolution``, `scattering` accepts ``method``.
1011
1249
 
1012
1250
  Returns
1013
1251
  -------
1014
- Tuple[NDArray, Tuple[int], NDArray]
1015
- The volume, its origin and the voxel size in Ångstrom.
1252
+ Tuple[NDArray, NDArray, NDArray]
1253
+ Volume, origin and sampling_rate.
1254
+
1255
+ Examples
1256
+ --------
1257
+ >>> from importlib_resources import files
1258
+ >>> from tme import Structure
1259
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
1260
+ >>> structure = Structure.from_file(filename=fname)
1261
+ >>> vol, origin, sampling = structure.to_volume()
1262
+ >>> vol.shape, origin, sampling
1263
+ ((59, 35, 53), array([-30.71, 12.42, -27.15]), array([1., 1., 1.]))
1264
+ >>> vol, origin, sampling = structure.to_volume(sampling_rate=(2.2,1,3))
1265
+ ((27, 35, 18), array([-30.71, 12.42, -27.15]), array([2.2, 1. , 3. ]))
1266
+
1267
+ ``sampling_rate`` and ``origin`` can be set to ensure correct alignment
1268
+ with corresponding density maps such as the ones at EMDB. Analogous to
1269
+ :py:meth:`Structure.subset_by_chain` only parts of the structure can be
1270
+ mapped onto grids using a variety of weighting schemes
1271
+
1272
+ >>> structure.to_volume(weight_type="van_der_waals_radius")
1273
+ >>> structure.to_volume(
1274
+ >>> weight_type="lowpass_scattering_factors",
1275
+ >>> method_args={"source" : "dt1969", "downsampling_factor" : 1.35},
1276
+ >>> )
1016
1277
  """
1017
1278
  _weight_types = {
1279
+ "gaussian",
1018
1280
  "atomic_weight",
1019
1281
  "atomic_number",
1020
1282
  "van_der_waals_radius",
@@ -1035,11 +1297,8 @@ class Structure:
1035
1297
  "sampling_rate should either be single value of array with"
1036
1298
  f"size {self.atom_coordinate.shape[1]}."
1037
1299
  )
1038
- if "source" not in scattering_args:
1039
- scattering_args["source"] = "peng1995"
1040
1300
 
1041
1301
  temp = self.subset_by_chain(chain=chain)
1042
-
1043
1302
  positions, atoms, shape, sampling_rate, origin = temp._coordinate_to_position(
1044
1303
  shape=shape, sampling_rate=sampling_rate, origin=origin
1045
1304
  )
@@ -1056,7 +1315,7 @@ class Structure:
1056
1315
  sampling_rate,
1057
1316
  volume,
1058
1317
  lowpass_filter=False,
1059
- **scattering_args,
1318
+ **weight_type_args,
1060
1319
  )
1061
1320
  elif weight_type == "lowpass_scattering_factors":
1062
1321
  self._position_to_scattering_factors(
@@ -1065,10 +1324,19 @@ class Structure:
1065
1324
  sampling_rate,
1066
1325
  volume,
1067
1326
  lowpass_filter=True,
1068
- **scattering_args,
1327
+ **weight_type_args,
1328
+ )
1329
+ elif weight_type == "gaussian":
1330
+ volume, origin = self._position_to_molmap(
1331
+ positions=temp.atom_coordinate,
1332
+ weights=temp._get_atom_weights(
1333
+ atoms=atoms, weight_type="atomic_number"
1334
+ ),
1335
+ sampling_rate=sampling_rate,
1336
+ **weight_type_args,
1069
1337
  )
1070
1338
 
1071
- self.details.update(temp.details)
1339
+ self.metadata.update(temp.metadata)
1072
1340
  return volume, origin, sampling_rate
1073
1341
 
1074
1342
  @classmethod
@@ -1081,32 +1349,41 @@ class Structure:
1081
1349
  weighted: bool = False,
1082
1350
  ) -> float:
1083
1351
  """
1084
- Compute root mean square deviation (RMSD) between two structures.
1085
-
1086
- Both structures need to have the same number of atoms. In practice, this means
1087
- that *structure2* is a transformed version of *structure1*
1352
+ Compute root mean square deviation (RMSD) between two structures with the
1353
+ same number of atoms.
1088
1354
 
1089
1355
  Parameters
1090
1356
  ----------
1091
- structure1 : Structure
1092
- Structure 1.
1093
-
1094
- structure2 : Structure
1095
- Structure 2.
1096
-
1097
- origin : NDArray, optional
1098
- Origin of the structure coordinate system.
1099
-
1100
- sampling_rate : float, optional
1101
- Sampling rate if discretized on a grid in the unit of self.atom_coordinate.
1102
-
1357
+ structure1, structure2 : :py:class:`Structure`
1358
+ Structure instances to compare.
1359
+ origin : tuple of floats, optional
1360
+ Coordinate system origin. For computing RMSD on discretized grids.
1361
+ sampling_rate : tuple of floats, optional
1362
+ Sampling rate in units of :py:attr:`atom_coordinate`.
1363
+ For computing RMSD on discretized grids.
1103
1364
  weighted : bool, optional
1104
- Whether atoms should be weighted by their atomic weight.
1365
+ Whether atoms should be weighted acoording to their atomic weight.
1105
1366
 
1106
1367
  Returns
1107
1368
  -------
1108
1369
  float
1109
- Root Mean Square Deviation (RMSD)
1370
+ Root Mean Square Deviation between input structures.
1371
+
1372
+ Examples
1373
+ --------
1374
+ >>> from importlib_resources import files
1375
+ >>> from tme.matching_utils import get_rotation_matrices
1376
+ >>> from tme import Structure
1377
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
1378
+ >>> structure = Structure.from_file(filename=fname)
1379
+ >>> transformed = structure.rigid_transform(
1380
+ >>> rotation_matrix = get_rotation_matrices(60)[2],
1381
+ >>> translation = (0, 1, -5)
1382
+ >>> )
1383
+ >>> Structure.compare_structures(structure, transformed)
1384
+ 31.35238
1385
+ >>> Structure.compare_structures(structure, structure)
1386
+ 0.0
1110
1387
  """
1111
1388
  if origin is None:
1112
1389
  origin = np.zeros(structure1.atom_coordinate.shape[1])
@@ -1115,14 +1392,18 @@ class Structure:
1115
1392
  coordinates2 = structure2.atom_coordinate
1116
1393
  atoms1, atoms2 = structure1.element_symbol, structure2.element_symbol
1117
1394
  if sampling_rate is not None:
1118
- coordinates1 = np.rint((coordinates1 - origin) / sampling_rate).astype(int)
1119
- coordinates2 = np.rint((coordinates2 - origin) / sampling_rate).astype(int)
1395
+ coordinates1 = np.rint(
1396
+ np.divide(np.subtract(coordinates1, origin), sampling_rate)
1397
+ ).astype(int)
1398
+ coordinates2 = np.rint(
1399
+ np.divide(np.subtract(coordinates2, origin), sampling_rate)
1400
+ ).astype(int)
1120
1401
 
1121
- weights1 = np.array(structure1._get_atom_weights(atoms=atoms1))
1122
- weights2 = np.array(structure2._get_atom_weights(atoms=atoms2))
1123
- if not weighted:
1124
- weights1 = np.ones_like(weights1)
1125
- weights2 = np.ones_like(weights2)
1402
+ weights1 = np.ones_like(structure1.atom_coordinate.shape[0])
1403
+ weights2 = np.ones_like(structure2.atom_coordinate.shape[0])
1404
+ if weighted:
1405
+ weights1 = np.array(structure1._get_atom_weights(atoms=atoms1))
1406
+ weights2 = np.array(structure2._get_atom_weights(atoms=atoms2))
1126
1407
 
1127
1408
  if not np.allclose(coordinates1.shape, coordinates2.shape):
1128
1409
  raise ValueError(
@@ -1147,35 +1428,41 @@ class Structure:
1147
1428
  weighted: bool = False,
1148
1429
  ) -> Tuple["Structure", float]:
1149
1430
  """
1150
- Align the atom coordinates of structure2 to structure1 using
1151
- the Kabsch algorithm.
1152
-
1153
- Both structures need to have the same number of atoms. In practice, this means
1154
- that *structure2* is a subset of *structure1*
1431
+ Align ``structure2`` to ``structure1`` using the Kabsch Algorithm. Both
1432
+ structures need to have the same number of atoms.
1155
1433
 
1156
1434
  Parameters
1157
1435
  ----------
1158
- structure1 : Structure
1159
- Structure 1.
1160
-
1161
- structure2 : Structure
1162
- Structure 2.
1163
-
1164
- origin : NDArray, optional
1165
- Origin of the structure coordinate system.
1166
-
1167
- sampling_rate : float, optional
1168
- Voxel size if discretized on a grid.
1169
-
1436
+ structure1, structure2 : :py:class:`Structure`
1437
+ Structure instances to align.
1438
+ origin : tuple of floats, optional
1439
+ Coordinate system origin. For computing RMSD on discretized grids.
1440
+ sampling_rate : tuple of floats, optional
1441
+ Sampling rate in units of :py:attr:`atom_coordinate`.
1442
+ For computing RMSD on discretized grids.
1170
1443
  weighted : bool, optional
1171
1444
  Whether atoms should be weighted by their atomic weight.
1172
1445
 
1173
1446
  Returns
1174
1447
  -------
1175
- Structure
1176
- *structure2* aligned to *structure1*.
1448
+ :py:class:`Structure`
1449
+ ``structure2`` aligned to ``structure1``.
1177
1450
  float
1178
- Root Mean Square Error (RMSE)
1451
+ Alignment RMSD
1452
+
1453
+ Examples
1454
+ --------
1455
+ >>> from importlib_resources import files
1456
+ >>> from tme import Structure
1457
+ >>> from tme.matching_utils import get_rotation_matrices
1458
+ >>> fname = str(files("tme.tests.data").joinpath("Structures/5khe.cif"))
1459
+ >>> structure = Structure.from_file(filename=fname)
1460
+ >>> transformed = structure.rigid_transform(
1461
+ >>> rotation_matrix = get_rotation_matrices(60)[2],
1462
+ >>> translation = (0, 1, -5)
1463
+ >>> )
1464
+ >>> aligned, rmsd = Structure.align_structures(structure, transformed)
1465
+ Initial RMSD: 31.07189 - Final RMSD: 0.00000
1179
1466
  """
1180
1467
  if origin is None:
1181
1468
  origin = np.minimum(
@@ -1237,9 +1524,7 @@ class Structure:
1237
1524
 
1238
1525
  @dataclass(frozen=True, repr=True)
1239
1526
  class Elements:
1240
- """
1241
- Lookup table containing information on chemical elements.
1242
- """
1527
+ """Lookup table for chemical elements."""
1243
1528
 
1244
1529
  Atom = namedtuple(
1245
1530
  "Atom",
@@ -1382,12 +1667,33 @@ class Elements:
1382
1667
  Parameters
1383
1668
  ----------
1384
1669
  key : str
1385
- The key to use for retrieving the corresponding value from
1386
- the internal data.
1670
+ Key to retrieve the corresponding value for.
1387
1671
 
1388
1672
  Returns
1389
1673
  -------
1390
- value
1391
- The value associated with the provided key in the internal data.
1674
+ namedtuple
1675
+ The Atom tuple associated with the provided key.
1392
1676
  """
1393
1677
  return self._elements.get(key, self._default)
1678
+
1679
+
1680
+ def _format_string(string: str) -> str:
1681
+ """
1682
+ Formats a string by adding quotation marks if it contains white spaces.
1683
+
1684
+ Parameters
1685
+ ----------
1686
+ string : str
1687
+ Input string to be formatted.
1688
+
1689
+ Returns
1690
+ -------
1691
+ str
1692
+ Formatted string with added quotation marks if needed.
1693
+ """
1694
+ if " " in string:
1695
+ return f"'{string}'"
1696
+ # Occurs e.g. for C1' atoms. The trailing whitespace is necessary.
1697
+ if string.count("'") == 1:
1698
+ return f'"{string}"'
1699
+ return string