biotite 0.40.0__cp310-cp310-win_amd64.whl → 0.41.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (90) hide show
  1. biotite/__init__.py +1 -1
  2. biotite/database/pubchem/download.py +23 -23
  3. biotite/database/pubchem/query.py +7 -7
  4. biotite/file.py +17 -9
  5. biotite/sequence/align/banded.c +119 -119
  6. biotite/sequence/align/banded.cp310-win_amd64.pyd +0 -0
  7. biotite/sequence/align/cigar.py +60 -15
  8. biotite/sequence/align/kmeralphabet.c +119 -119
  9. biotite/sequence/align/kmeralphabet.cp310-win_amd64.pyd +0 -0
  10. biotite/sequence/align/kmersimilarity.c +119 -119
  11. biotite/sequence/align/kmersimilarity.cp310-win_amd64.pyd +0 -0
  12. biotite/sequence/align/kmertable.cp310-win_amd64.pyd +0 -0
  13. biotite/sequence/align/kmertable.cpp +119 -119
  14. biotite/sequence/align/localgapped.c +119 -119
  15. biotite/sequence/align/localgapped.cp310-win_amd64.pyd +0 -0
  16. biotite/sequence/align/localungapped.c +119 -119
  17. biotite/sequence/align/localungapped.cp310-win_amd64.pyd +0 -0
  18. biotite/sequence/align/multiple.c +119 -119
  19. biotite/sequence/align/multiple.cp310-win_amd64.pyd +0 -0
  20. biotite/sequence/align/pairwise.c +119 -119
  21. biotite/sequence/align/pairwise.cp310-win_amd64.pyd +0 -0
  22. biotite/sequence/align/permutation.c +119 -119
  23. biotite/sequence/align/permutation.cp310-win_amd64.pyd +0 -0
  24. biotite/sequence/align/selector.c +119 -119
  25. biotite/sequence/align/selector.cp310-win_amd64.pyd +0 -0
  26. biotite/sequence/align/tracetable.c +119 -119
  27. biotite/sequence/align/tracetable.cp310-win_amd64.pyd +0 -0
  28. biotite/sequence/annotation.py +2 -2
  29. biotite/sequence/codec.c +119 -119
  30. biotite/sequence/codec.cp310-win_amd64.pyd +0 -0
  31. biotite/sequence/io/fasta/convert.py +27 -24
  32. biotite/sequence/phylo/nj.c +119 -119
  33. biotite/sequence/phylo/nj.cp310-win_amd64.pyd +0 -0
  34. biotite/sequence/phylo/tree.c +119 -119
  35. biotite/sequence/phylo/tree.cp310-win_amd64.pyd +0 -0
  36. biotite/sequence/phylo/upgma.c +119 -119
  37. biotite/sequence/phylo/upgma.cp310-win_amd64.pyd +0 -0
  38. biotite/structure/__init__.py +2 -0
  39. biotite/structure/bonds.c +1124 -915
  40. biotite/structure/bonds.cp310-win_amd64.pyd +0 -0
  41. biotite/structure/celllist.c +119 -119
  42. biotite/structure/celllist.cp310-win_amd64.pyd +0 -0
  43. biotite/structure/charges.c +119 -119
  44. biotite/structure/charges.cp310-win_amd64.pyd +0 -0
  45. biotite/structure/dotbracket.py +2 -0
  46. biotite/structure/info/atoms.py +6 -1
  47. biotite/structure/info/bonds.py +1 -1
  48. biotite/structure/info/ccd/amino_acids.txt +17 -0
  49. biotite/structure/info/ccd/carbohydrates.txt +2 -0
  50. biotite/structure/info/ccd/components.bcif +0 -0
  51. biotite/structure/info/ccd/nucleotides.txt +1 -0
  52. biotite/structure/info/misc.py +69 -5
  53. biotite/structure/integrity.py +19 -70
  54. biotite/structure/io/ctab.py +12 -106
  55. biotite/structure/io/general.py +157 -165
  56. biotite/structure/io/gro/file.py +16 -16
  57. biotite/structure/io/mmtf/convertarray.c +119 -119
  58. biotite/structure/io/mmtf/convertarray.cp310-win_amd64.pyd +0 -0
  59. biotite/structure/io/mmtf/convertfile.c +119 -119
  60. biotite/structure/io/mmtf/convertfile.cp310-win_amd64.pyd +0 -0
  61. biotite/structure/io/mmtf/decode.c +119 -119
  62. biotite/structure/io/mmtf/decode.cp310-win_amd64.pyd +0 -0
  63. biotite/structure/io/mmtf/encode.c +119 -119
  64. biotite/structure/io/mmtf/encode.cp310-win_amd64.pyd +0 -0
  65. biotite/structure/io/mol/__init__.py +4 -2
  66. biotite/structure/io/mol/convert.py +71 -7
  67. biotite/structure/io/mol/ctab.py +414 -0
  68. biotite/structure/io/mol/header.py +116 -0
  69. biotite/structure/io/mol/{file.py → mol.py} +69 -82
  70. biotite/structure/io/mol/sdf.py +909 -0
  71. biotite/structure/io/pdb/file.py +84 -31
  72. biotite/structure/io/pdb/hybrid36.cp310-win_amd64.pyd +0 -0
  73. biotite/structure/io/pdbx/__init__.py +0 -1
  74. biotite/structure/io/pdbx/bcif.py +2 -3
  75. biotite/structure/io/pdbx/cif.py +9 -5
  76. biotite/structure/io/pdbx/component.py +4 -1
  77. biotite/structure/io/pdbx/convert.py +203 -79
  78. biotite/structure/io/pdbx/encoding.c +119 -119
  79. biotite/structure/io/pdbx/encoding.cp310-win_amd64.pyd +0 -0
  80. biotite/structure/repair.py +253 -0
  81. biotite/structure/sasa.c +119 -119
  82. biotite/structure/sasa.cp310-win_amd64.pyd +0 -0
  83. biotite/structure/sequence.py +112 -0
  84. biotite/structure/superimpose.py +472 -13
  85. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/METADATA +2 -2
  86. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/RECORD +89 -85
  87. biotite/structure/io/pdbx/error.py +0 -14
  88. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
  89. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +0 -0
  90. {biotite-0.40.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,909 @@
1
+ # This source code is part of the Biotite package and is distributed
2
+ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
3
+ # information.
4
+
5
+ __name__ = "biotite.structure.io.mol"
6
+ __author__ = "Patrick Kunzmann, Benjamin Mayer"
7
+ __all__ = ["SDFile", "SDRecord", "Metadata"]
8
+
9
+ import re
10
+ from dataclasses import dataclass
11
+ from collections.abc import MutableMapping, Mapping
12
+ import numpy as np
13
+ from ....file import File, InvalidFileError, is_open_compatible, is_text, \
14
+ DeserializationError, SerializationError
15
+ from .ctab import read_structure_from_ctab, write_structure_to_ctab
16
+ from .header import Header
17
+ from ...atoms import AtomArray
18
+ from ...bonds import BondList, BondType
19
+
20
+
21
+ _N_HEADER = 3
22
+ # Number of header lines
23
+ _RECORD_DELIMITER = "$$$$"
24
+
25
+
26
+ class Metadata(MutableMapping):
27
+ r"""
28
+ Additional nonstructural data in an SD record.
29
+
30
+ The metadata is stored as key-value pairs.
31
+ As SDF allows multiple different identifiers for keys,
32
+ the keys are represented by :class:`Metadata.Key`.
33
+
34
+ Parameters
35
+ ----------
36
+ metadata : dict, optional
37
+ The metadata as key-value pairs.
38
+ Keys are instances of :class:`Metadata.Key`.
39
+ Alternatively, keys can be given as strings, in which case the
40
+ string is used as the :attr:`Metadata.Key.name`.
41
+ Values are strings.
42
+ Line breaks in values are allowed.
43
+
44
+ Notes
45
+ -----
46
+ Key names may only contain alphanumeric characters, underscores and
47
+ periods.
48
+
49
+ Examples
50
+ --------
51
+
52
+ >>> metadata = Metadata({
53
+ ... "foo": "Lorem ipsum",
54
+ ... Metadata.Key(number=42, name="bar"): "dolor sit amet,\nconsectetur"
55
+ ... })
56
+ >>> print(metadata)
57
+ > <foo>
58
+ Lorem ipsum
59
+ <BLANKLINE>
60
+ > DT42 <bar>
61
+ dolor sit amet,
62
+ consectetur
63
+ <BLANKLINE>
64
+ >>> print(metadata["foo"])
65
+ Lorem ipsum
66
+ >>> # Strings can be only used for access, if the key contains only a name
67
+ >>> print("bar" in metadata)
68
+ False
69
+ >>> print(metadata[Metadata.Key(number=42, name="bar")])
70
+ dolor sit amet,
71
+ consectetur
72
+
73
+ """
74
+
75
+ @dataclass(frozen=True, kw_only=True)
76
+ class Key:
77
+ """
78
+ A metadata key.
79
+
80
+ Parameters
81
+ ----------
82
+ number : int, optional
83
+ number of the field in the database.
84
+ name : str, optional
85
+ Name of the field.
86
+ May only contain alphanumeric characters, underscores and
87
+ periods.
88
+ registry_internal : int, optional
89
+ Internal registry number.
90
+ registry_external : str, optional
91
+ External registry number.
92
+
93
+ Attributes
94
+ ----------
95
+ number, name, registry_internal, registry_external
96
+ The same as the parameters.
97
+ """
98
+ # The characters that can be given as input to `name`
99
+ # First character must be alphanumeric,
100
+ # following characters may include underscores and periods
101
+ # Although the V3000 specification forbids the use of periods,
102
+ # they are still used in practice and therefore allowed here
103
+ _NAME_INPUT_REGEX = re.compile(r"^[a-zA-Z0-9][\w.]*$")
104
+ # These regexes are used to parse the key from a line
105
+ _COMPONENT_REGEX = {
106
+ "number": re.compile(r"^DT(\d+)$"),
107
+ "name": re.compile(r"^<([a-zA-Z0-9][\w.]*)>$"),
108
+ "registry_internal": re.compile(r"^(\d+)$"),
109
+ "registry_external": re.compile(r"^\(([\w.-]*)\)$"),
110
+ }
111
+
112
+ number: ... = None
113
+ name: ... = None
114
+ registry_internal: ... = None
115
+ registry_external: ... = None
116
+
117
+ def __post_init__(self):
118
+ if self.name is None and self.number is None:
119
+ raise ValueError("At least the field number or name must be set")
120
+ if self.name is not None:
121
+ if not Metadata.Key._NAME_INPUT_REGEX.match(self.name):
122
+ raise ValueError(
123
+ f"Invalid name '{self.name}', must only contains "
124
+ "alphanumeric characters, underscores and periods"
125
+ )
126
+ if self.number is not None:
127
+ # Cannot set field directly as 'frozen=True'
128
+ object.__setattr__(self, "number", int(self.number))
129
+ if self.registry_internal is not None:
130
+ object.__setattr__(
131
+ self, "registry_internal", int(self.registry_internal)
132
+ )
133
+
134
+ @staticmethod
135
+ def deserialize(text):
136
+ """
137
+ Create an object by deserializing the given text content.
138
+
139
+ Parameters
140
+ ----------
141
+ content : str
142
+ The content to be deserialized.
143
+ """
144
+ # Omit the leading '>'
145
+ key_components = text[1:].split()
146
+ parsed_component_dict = {}
147
+ for component in key_components:
148
+ # For each component in each the key,
149
+ # try to match it with each of the regexes
150
+ for attr_name, regex in Metadata.Key._COMPONENT_REGEX.items():
151
+ pattern_match = regex.match(component)
152
+ if pattern_match is None:
153
+ # Try next pattern
154
+ continue
155
+ if attr_name in parsed_component_dict:
156
+ raise DeserializationError(
157
+ f"Duplicate key component for '{attr_name}'"
158
+ )
159
+ value = pattern_match.group(1)
160
+ parsed_component_dict[attr_name] = value
161
+ break
162
+ else:
163
+ # There is no matching pattern
164
+ raise DeserializationError(
165
+ f"Invalid key component '{component}'"
166
+ )
167
+ return Metadata.Key(**parsed_component_dict)
168
+
169
+ def serialize(self):
170
+ """
171
+ Convert this object into text content.
172
+
173
+ Returns
174
+ -------
175
+ content : str
176
+ The serialized content.
177
+ """
178
+ key_string = "> "
179
+ if self.number is not None:
180
+ key_string += f"DT{self.number} "
181
+ if self.name is not None:
182
+ key_string += f"<{self.name}> "
183
+ if self.registry_internal is not None:
184
+ key_string += f"{self.registry_internal} "
185
+ if self.registry_external is not None:
186
+ key_string += f"({self.registry_external}) "
187
+ return key_string
188
+
189
+ def __str__(self):
190
+ return self.serialize()
191
+
192
+
193
+ def __init__(self, metadata=None):
194
+ if metadata is None:
195
+ metadata = {}
196
+ self._metadata = {}
197
+ for key, value in metadata.items():
198
+ self._metadata[_to_metadata_key(key)] = value
199
+
200
+ @staticmethod
201
+ def deserialize(text):
202
+ """
203
+ Create an object by deserializing the given text content.
204
+
205
+ Parameters
206
+ ----------
207
+ content : str
208
+ The content to be deserialized.
209
+ """
210
+ metadata = {}
211
+ current_key = None
212
+ current_value = None
213
+ for line in text.splitlines():
214
+ line = line.strip()
215
+ if len(line) == 0:
216
+ # Skip empty lines
217
+ continue
218
+ if line.startswith(">"):
219
+ _add_key_value_pair(metadata, current_key, current_value)
220
+ current_key = Metadata.Key.deserialize(line)
221
+ current_value = None
222
+ else:
223
+ if current_key is None:
224
+ raise DeserializationError(
225
+ "Value found before metadata key"
226
+ )
227
+ if current_value is None:
228
+ current_value = line
229
+ else:
230
+ current_value += "\n" + line
231
+ # Add final pair
232
+ _add_key_value_pair(metadata, current_key, current_value)
233
+ return Metadata(metadata)
234
+
235
+ def serialize(self):
236
+ """
237
+ Convert this object into text content.
238
+
239
+ Returns
240
+ -------
241
+ content : str
242
+ The serialized content.
243
+ """
244
+ text_blocks = []
245
+ for key, value in self._metadata.items():
246
+ text_blocks.append(key.serialize())
247
+ # Add empty line after value
248
+ text_blocks.append(value + "\n")
249
+ return _join_with_terminal_newline(text_blocks)
250
+
251
+ def __getitem__(self, key):
252
+ return self._metadata[_to_metadata_key(key)]
253
+
254
+ def __setitem__(self, key, value):
255
+ if len(value) == 0:
256
+ raise ValueError("Metadata value must not be empty")
257
+ self._metadata[_to_metadata_key(key)] = value
258
+
259
+ def __delitem__(self, key):
260
+ del self._metadata[_to_metadata_key(key)]
261
+
262
+ def __iter__(self):
263
+ return iter(self._metadata)
264
+
265
+ def __len__(self):
266
+ return len(self._metadata)
267
+
268
+ def __eq__(self, other):
269
+ if not isinstance(other, type(self)):
270
+ return False
271
+ if set(self.keys()) != set(other.keys()):
272
+ return False
273
+ for key in self.keys():
274
+ if self[key] != other[key]:
275
+ return False
276
+ return True
277
+
278
+ def __str__(self):
279
+ return self.serialize()
280
+
281
+
282
+ class SDRecord:
283
+ """
284
+ A record in a SD file.
285
+
286
+ Parameters
287
+ ----------
288
+ header : Header, optional
289
+ The header of the record.
290
+ By default, an empty header is created.
291
+ ctab : str, optional
292
+ The connection table (atoms and bonds) in the record.
293
+ By default, an empty structure is created.
294
+ metadata : Metadata, Mapping or str, optional
295
+ The metadata of the record.
296
+ Can be given as dictionary mapping :attr:`Metadata.Key.name`
297
+ to the respective metadata value.
298
+ By default, no metadata is appended to the record.
299
+
300
+ Attributes
301
+ ----------
302
+ header, ctab, metadata
303
+ The same as the parameters.
304
+
305
+ Examples
306
+ --------
307
+
308
+ >>> atoms = residue("ALA")
309
+ >>> record = SDRecord(header=Header(mol_name="ALA", dimensions="3D"))
310
+ >>> record.set_structure(atoms)
311
+ >>> print(record.get_structure())
312
+ 0 N -0.970 0.490 1.500
313
+ 0 C 0.260 0.420 0.690
314
+ 0 C -0.090 0.020 -0.720
315
+ 0 O -1.060 -0.680 -0.920
316
+ 0 C 1.200 -0.620 1.300
317
+ 0 O 0.660 0.440 -1.740
318
+ 0 H -1.380 -0.420 1.480
319
+ 0 H -0.680 0.660 2.450
320
+ 0 H 0.750 1.390 0.680
321
+ 0 H 1.460 -0.330 2.320
322
+ 0 H 0.720 -1.590 1.310
323
+ 0 H 2.110 -0.680 0.700
324
+ 0 H 0.440 0.180 -2.650
325
+ >>> # Add the record to an SD file
326
+ >>> file = SDFile()
327
+ >>> file["ALA"] = record
328
+ >>> print(file)
329
+ ALA
330
+ 3D
331
+ <BLANKLINE>
332
+ 13 12 0 0 0 0 0 0 0 1 V2000
333
+ -0.9700 0.4900 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
334
+ 0.2600 0.4200 0.6900 C 0 0 0 0 0 0 0 0 0 0 0 0
335
+ -0.0900 0.0200 -0.7200 C 0 0 0 0 0 0 0 0 0 0 0 0
336
+ -1.0600 -0.6800 -0.9200 O 0 0 0 0 0 0 0 0 0 0 0 0
337
+ 1.2000 -0.6200 1.3000 C 0 0 0 0 0 0 0 0 0 0 0 0
338
+ 0.6600 0.4400 -1.7400 O 0 0 0 0 0 0 0 0 0 0 0 0
339
+ -1.3800 -0.4200 1.4800 H 0 0 0 0 0 0 0 0 0 0 0 0
340
+ -0.6800 0.6600 2.4500 H 0 0 0 0 0 0 0 0 0 0 0 0
341
+ 0.7500 1.3900 0.6800 H 0 0 0 0 0 0 0 0 0 0 0 0
342
+ 1.4600 -0.3300 2.3200 H 0 0 0 0 0 0 0 0 0 0 0 0
343
+ 0.7200 -1.5900 1.3100 H 0 0 0 0 0 0 0 0 0 0 0 0
344
+ 2.1100 -0.6800 0.7000 H 0 0 0 0 0 0 0 0 0 0 0 0
345
+ 0.4400 0.1800 -2.6500 H 0 0 0 0 0 0 0 0 0 0 0 0
346
+ 1 2 1 0 0 0 0
347
+ 1 7 1 0 0 0 0
348
+ 1 8 1 0 0 0 0
349
+ 2 3 1 0 0 0 0
350
+ 2 5 1 0 0 0 0
351
+ 2 9 1 0 0 0 0
352
+ 3 4 2 0 0 0 0
353
+ 3 6 1 0 0 0 0
354
+ 5 10 1 0 0 0 0
355
+ 5 11 1 0 0 0 0
356
+ 5 12 1 0 0 0 0
357
+ 6 13 1 0 0 0 0
358
+ M END
359
+ $$$$
360
+ """
361
+
362
+ def __init__(self, header=None, ctab=None, metadata=None):
363
+ if header is None:
364
+ self._header = Header()
365
+ else:
366
+ self._header = header
367
+
368
+ self._ctab = ctab
369
+
370
+ if metadata is None:
371
+ self._metadata = Metadata()
372
+ elif isinstance(metadata, Metadata):
373
+ self._metadata = metadata
374
+ elif isinstance(metadata, Mapping):
375
+ self._metadata = Metadata(metadata)
376
+ elif isinstance(metadata, str):
377
+ # Serialized form -> will be lazily deserialized
378
+ self._metadata = metadata
379
+ else:
380
+ raise TypeError(
381
+ "Expected 'Metadata', Mapping or str, "
382
+ f"but got '{type(metadata).__name__}'"
383
+ )
384
+
385
+ @property
386
+ def header(self):
387
+ if isinstance(self._header, str):
388
+ try:
389
+ self._header = Header.deserialize(self._header)
390
+ except:
391
+ raise DeserializationError("Failed to deserialize header")
392
+ return self._header
393
+
394
+ @header.setter
395
+ def header(self, header):
396
+ self._header = header
397
+
398
+ @property
399
+ def ctab(self):
400
+ # CTAB string cannot be changed directly -> no setter
401
+ return self._ctab
402
+
403
+ @property
404
+ def metadata(self):
405
+ if isinstance(self._metadata, str):
406
+ try:
407
+ self._metadata = Metadata.deserialize(self._metadata)
408
+ except:
409
+ raise DeserializationError("Failed to deserialize metadata")
410
+ return self._metadata
411
+
412
+ @metadata.setter
413
+ def metadata(self, metadata):
414
+ if isinstance(metadata, Metadata):
415
+ self._metadata = metadata
416
+ elif isinstance(metadata, Mapping):
417
+ self._metadata = Metadata(metadata)
418
+ else:
419
+ raise TypeError(
420
+ "Expected 'Metadata' or Mapping, "
421
+ f"but got '{type(metadata).__name__}'"
422
+ )
423
+
424
+ @staticmethod
425
+ def deserialize(text):
426
+ """
427
+ Create an object by deserializing the given text content.
428
+
429
+ Parameters
430
+ ----------
431
+ content : str
432
+ The content to be deserialized.
433
+ """
434
+ lines = text.splitlines()
435
+ ctab_end = _get_ctab_stop(lines)
436
+
437
+ header = _join_with_terminal_newline(lines[:_N_HEADER])
438
+ ctab = _join_with_terminal_newline(lines[_N_HEADER:ctab_end])
439
+ metadata = _join_with_terminal_newline(lines[ctab_end:])
440
+ return SDRecord(header, ctab, metadata)
441
+
442
+ def serialize(self):
443
+ """
444
+ Convert this object into text content.
445
+
446
+ Returns
447
+ -------
448
+ content : str
449
+ The serialized content.
450
+ """
451
+ if isinstance(self._header, str):
452
+ header_string = self._header
453
+ else:
454
+ header_string = self._header.serialize()
455
+
456
+ if self._ctab is None:
457
+ ctab_string = _empty_ctab()
458
+ else:
459
+ ctab_string = self._ctab
460
+
461
+ if isinstance(self._metadata, str):
462
+ metadata_string = self._metadata
463
+ else:
464
+ metadata_string = self._metadata.serialize()
465
+
466
+ return header_string + ctab_string + metadata_string
467
+
468
+ def get_structure(self):
469
+ """
470
+ Parse the structural data in the SD record.
471
+
472
+ Returns
473
+ -------
474
+ array : AtomArray
475
+ This :class:`AtomArray` contains the optional ``charge``
476
+ annotation and has an associated :class:`BondList`.
477
+ All other annotation categories, except ``element`` are
478
+ empty.
479
+ """
480
+ ctab_lines = self._ctab.splitlines()
481
+ if len(ctab_lines) == 0:
482
+ raise InvalidFileError("File does not contain structure data")
483
+ return read_structure_from_ctab(ctab_lines)
484
+
485
+ def set_structure(self, atoms, default_bond_type=BondType.ANY,
486
+ version=None):
487
+ """
488
+ Set the structural data in the SD record.
489
+
490
+ Parameters
491
+ ----------
492
+ array : AtomArray
493
+ The array to be saved into this file.
494
+ Must have an associated :class:`BondList`.
495
+ default_bond_type : BondType, optional
496
+ Bond type fallback for the *Bond block*, if a
497
+ :class:`BondType` has no CTAB counterpart.
498
+ By default, each such bond is treated as
499
+ :attr:`BondType.ANY`.
500
+ version : {"V2000", "V3000"}, optional
501
+ The version of the CTAB format.
502
+ ``"V2000"`` uses the *Atom* and *Bond* block, while
503
+ ``"V3000"`` uses the *Properties* block.
504
+ By default, ``"V2000"`` is used, unless the number of atoms
505
+ or bonds exceeds 999, in which case ``"V3000"`` is used.
506
+ """
507
+ self._ctab = _join_with_terminal_newline(write_structure_to_ctab(
508
+ atoms, default_bond_type, version
509
+ ))
510
+
511
+ def __eq__(self, other):
512
+ if not isinstance(other, type(self)):
513
+ return False
514
+ if not self.header == other.header:
515
+ return False
516
+ if not self.ctab == other.ctab:
517
+ return False
518
+ if not self.metadata == other.metadata:
519
+ return False
520
+ return True
521
+
522
+ def __str__(self):
523
+ return self.serialize()
524
+
525
+
526
+ class SDFile(File, MutableMapping):
527
+ """
528
+ This class represents an SD file for storing small molecule
529
+ structures.
530
+
531
+ The records for each molecule in the file can be accessed and
532
+ modified like a dictionary.
533
+ The structures can be parsed and written from/to each
534
+ :class:`SDRecord` object via :func:`get_structure()` or
535
+ :func:`set_structure()`, respectively.
536
+
537
+ Attributes
538
+ ----------
539
+ record : CIFBlock
540
+ The sole record of the file.
541
+ If the file contains multiple blocks, an exception is raised.
542
+
543
+ Examples
544
+ --------
545
+ Read a SD file and parse the molecular structure:
546
+
547
+ >>> import os.path
548
+ >>> file = SDFile.read(os.path.join(path_to_structures, "molecules", "TYR.sdf"))
549
+ >>> molecule = file.record.get_structure()
550
+ >>> print(molecule)
551
+ 0 N 1.320 0.952 1.428
552
+ 0 C -0.018 0.429 1.734
553
+ 0 C -0.103 0.094 3.201
554
+ 0 O 0.886 -0.254 3.799
555
+ 0 C -0.274 -0.831 0.907
556
+ 0 C -0.189 -0.496 -0.559
557
+ 0 C 1.022 -0.589 -1.219
558
+ 0 C -1.324 -0.102 -1.244
559
+ 0 C 1.103 -0.282 -2.563
560
+ 0 C -1.247 0.210 -2.587
561
+ 0 C -0.032 0.118 -3.252
562
+ 0 O 0.044 0.420 -4.574
563
+ 0 O -1.279 0.184 3.842
564
+ 0 H 1.977 0.225 1.669
565
+ 0 H 1.365 1.063 0.426
566
+ 0 H -0.767 1.183 1.489
567
+ 0 H 0.473 -1.585 1.152
568
+ 0 H -1.268 -1.219 1.134
569
+ 0 H 1.905 -0.902 -0.683
570
+ 0 H -2.269 -0.031 -0.727
571
+ 0 H 2.049 -0.354 -3.078
572
+ 0 H -2.132 0.523 -3.121
573
+ 0 H -0.123 -0.399 -5.059
574
+ 0 H -1.333 -0.030 4.784
575
+
576
+ Note that important atom annotations may be missing.
577
+ These can be set afterwards:
578
+
579
+ >>> molecule.res_name[:] = "TYR"
580
+ >>> molecule.atom_name[:] = create_atom_names(molecule)
581
+ >>> print(molecule)
582
+ 0 TYR N1 N 1.320 0.952 1.428
583
+ 0 TYR C1 C -0.018 0.429 1.734
584
+ 0 TYR C2 C -0.103 0.094 3.201
585
+ 0 TYR O1 O 0.886 -0.254 3.799
586
+ 0 TYR C3 C -0.274 -0.831 0.907
587
+ 0 TYR C4 C -0.189 -0.496 -0.559
588
+ 0 TYR C5 C 1.022 -0.589 -1.219
589
+ 0 TYR C6 C -1.324 -0.102 -1.244
590
+ 0 TYR C7 C 1.103 -0.282 -2.563
591
+ 0 TYR C8 C -1.247 0.210 -2.587
592
+ 0 TYR C9 C -0.032 0.118 -3.252
593
+ 0 TYR O2 O 0.044 0.420 -4.574
594
+ 0 TYR O3 O -1.279 0.184 3.842
595
+ 0 TYR H1 H 1.977 0.225 1.669
596
+ 0 TYR H2 H 1.365 1.063 0.426
597
+ 0 TYR H3 H -0.767 1.183 1.489
598
+ 0 TYR H4 H 0.473 -1.585 1.152
599
+ 0 TYR H5 H -1.268 -1.219 1.134
600
+ 0 TYR H6 H 1.905 -0.902 -0.683
601
+ 0 TYR H7 H -2.269 -0.031 -0.727
602
+ 0 TYR H8 H 2.049 -0.354 -3.078
603
+ 0 TYR H9 H -2.132 0.523 -3.121
604
+ 0 TYR H10 H -0.123 -0.399 -5.059
605
+ 0 TYR H11 H -1.333 -0.030 4.784
606
+
607
+ Create a SD file and write it to disk:
608
+
609
+ >>> another_molecule = residue("ALA")
610
+ >>> file = SDFile()
611
+ >>> record = SDRecord()
612
+ >>> record.set_structure(molecule)
613
+ >>> file["TYR"] = record
614
+ >>> record = SDRecord()
615
+ >>> record.set_structure(another_molecule)
616
+ >>> file["ALA"] = record
617
+ >>> file.write(os.path.join(path_to_directory, "some_file.cif"))
618
+ >>> print(file)
619
+ TYR
620
+ <BLANKLINE>
621
+ <BLANKLINE>
622
+ 24 24 0 0 0 0 0 0 0 1 V2000
623
+ 1.3200 0.9520 1.4280 N 0 0 0 0 0 0 0 0 0 0 0 0
624
+ -0.0180 0.4290 1.7340 C 0 0 0 0 0 0 0 0 0 0 0 0
625
+ -0.1030 0.0940 3.2010 C 0 0 0 0 0 0 0 0 0 0 0 0
626
+ 0.8860 -0.2540 3.7990 O 0 0 0 0 0 0 0 0 0 0 0 0
627
+ -0.2740 -0.8310 0.9070 C 0 0 0 0 0 0 0 0 0 0 0 0
628
+ -0.1890 -0.4960 -0.5590 C 0 0 0 0 0 0 0 0 0 0 0 0
629
+ 1.0220 -0.5890 -1.2190 C 0 0 0 0 0 0 0 0 0 0 0 0
630
+ -1.3240 -0.1020 -1.2440 C 0 0 0 0 0 0 0 0 0 0 0 0
631
+ 1.1030 -0.2820 -2.5630 C 0 0 0 0 0 0 0 0 0 0 0 0
632
+ -1.2470 0.2100 -2.5870 C 0 0 0 0 0 0 0 0 0 0 0 0
633
+ -0.0320 0.1180 -3.2520 C 0 0 0 0 0 0 0 0 0 0 0 0
634
+ 0.0440 0.4200 -4.5740 O 0 0 0 0 0 0 0 0 0 0 0 0
635
+ -1.2790 0.1840 3.8420 O 0 0 0 0 0 0 0 0 0 0 0 0
636
+ 1.9770 0.2250 1.6690 H 0 0 0 0 0 0 0 0 0 0 0 0
637
+ 1.3650 1.0630 0.4260 H 0 0 0 0 0 0 0 0 0 0 0 0
638
+ -0.7670 1.1830 1.4890 H 0 0 0 0 0 0 0 0 0 0 0 0
639
+ 0.4730 -1.5850 1.1520 H 0 0 0 0 0 0 0 0 0 0 0 0
640
+ -1.2680 -1.2190 1.1340 H 0 0 0 0 0 0 0 0 0 0 0 0
641
+ 1.9050 -0.9020 -0.6830 H 0 0 0 0 0 0 0 0 0 0 0 0
642
+ -2.2690 -0.0310 -0.7270 H 0 0 0 0 0 0 0 0 0 0 0 0
643
+ 2.0490 -0.3540 -3.0780 H 0 0 0 0 0 0 0 0 0 0 0 0
644
+ -2.1320 0.5230 -3.1210 H 0 0 0 0 0 0 0 0 0 0 0 0
645
+ -0.1230 -0.3990 -5.0590 H 0 0 0 0 0 0 0 0 0 0 0 0
646
+ -1.3330 -0.0300 4.7840 H 0 0 0 0 0 0 0 0 0 0 0 0
647
+ 1 2 1 0 0 0 0
648
+ 1 14 1 0 0 0 0
649
+ 1 15 1 0 0 0 0
650
+ 2 3 1 0 0 0 0
651
+ 2 5 1 0 0 0 0
652
+ 2 16 1 0 0 0 0
653
+ 3 4 2 0 0 0 0
654
+ 3 13 1 0 0 0 0
655
+ 5 6 1 0 0 0 0
656
+ 5 17 1 0 0 0 0
657
+ 5 18 1 0 0 0 0
658
+ 6 7 2 0 0 0 0
659
+ 6 8 1 0 0 0 0
660
+ 7 9 1 0 0 0 0
661
+ 7 19 1 0 0 0 0
662
+ 8 10 2 0 0 0 0
663
+ 8 20 1 0 0 0 0
664
+ 9 11 2 0 0 0 0
665
+ 9 21 1 0 0 0 0
666
+ 10 11 1 0 0 0 0
667
+ 10 22 1 0 0 0 0
668
+ 11 12 1 0 0 0 0
669
+ 12 23 1 0 0 0 0
670
+ 13 24 1 0 0 0 0
671
+ M END
672
+ $$$$
673
+ ALA
674
+ <BLANKLINE>
675
+ <BLANKLINE>
676
+ 13 12 0 0 0 0 0 0 0 1 V2000
677
+ -0.9700 0.4900 1.5000 N 0 0 0 0 0 0 0 0 0 0 0 0
678
+ 0.2600 0.4200 0.6900 C 0 0 0 0 0 0 0 0 0 0 0 0
679
+ -0.0900 0.0200 -0.7200 C 0 0 0 0 0 0 0 0 0 0 0 0
680
+ -1.0600 -0.6800 -0.9200 O 0 0 0 0 0 0 0 0 0 0 0 0
681
+ 1.2000 -0.6200 1.3000 C 0 0 0 0 0 0 0 0 0 0 0 0
682
+ 0.6600 0.4400 -1.7400 O 0 0 0 0 0 0 0 0 0 0 0 0
683
+ -1.3800 -0.4200 1.4800 H 0 0 0 0 0 0 0 0 0 0 0 0
684
+ -0.6800 0.6600 2.4500 H 0 0 0 0 0 0 0 0 0 0 0 0
685
+ 0.7500 1.3900 0.6800 H 0 0 0 0 0 0 0 0 0 0 0 0
686
+ 1.4600 -0.3300 2.3200 H 0 0 0 0 0 0 0 0 0 0 0 0
687
+ 0.7200 -1.5900 1.3100 H 0 0 0 0 0 0 0 0 0 0 0 0
688
+ 2.1100 -0.6800 0.7000 H 0 0 0 0 0 0 0 0 0 0 0 0
689
+ 0.4400 0.1800 -2.6500 H 0 0 0 0 0 0 0 0 0 0 0 0
690
+ 1 2 1 0 0 0 0
691
+ 1 7 1 0 0 0 0
692
+ 1 8 1 0 0 0 0
693
+ 2 3 1 0 0 0 0
694
+ 2 5 1 0 0 0 0
695
+ 2 9 1 0 0 0 0
696
+ 3 4 2 0 0 0 0
697
+ 3 6 1 0 0 0 0
698
+ 5 10 1 0 0 0 0
699
+ 5 11 1 0 0 0 0
700
+ 5 12 1 0 0 0 0
701
+ 6 13 1 0 0 0 0
702
+ M END
703
+ $$$$
704
+ <BLANKLINE>
705
+ """
706
+
707
+ def __init__(self, records=None):
708
+ self._records = {}
709
+ if records is not None:
710
+ for mol_name, record in records.items():
711
+ if isinstance(record, SDRecord):
712
+ record.header.mol_name = mol_name
713
+ self._records[mol_name] = record
714
+
715
+ @property
716
+ def lines(self):
717
+ return self.serialize().splitlines()
718
+
719
+ @property
720
+ def record(self):
721
+ if len(self) == 0:
722
+ raise ValueError("There are no records in the file")
723
+ if len(self) > 1:
724
+ raise ValueError("There are multiple records in the file")
725
+ return self[next(iter(self))]
726
+
727
+ @staticmethod
728
+ def deserialize(text):
729
+ """
730
+ Create an object by deserializing the given text content.
731
+
732
+ Parameters
733
+ ----------
734
+ content : str
735
+ The content to be deserialized.
736
+ """
737
+ lines = text.splitlines()
738
+ record_ends = np.array([
739
+ i for i, line in enumerate(lines)
740
+ if line.startswith(_RECORD_DELIMITER)
741
+ ])
742
+ # The first record starts at the first line and the last
743
+ # delimiter is at the end of the file
744
+ # Records in the middle start directly after the delimiter
745
+ record_starts = np.concatenate(([0], record_ends[:-1] + 1))
746
+ record_names = [lines[start].strip() for start in record_starts]
747
+ return SDFile({
748
+ # Do not include the delimiter
749
+ # -> stop at end (instead of end + 1)
750
+ name: _join_with_terminal_newline(lines[start : end])
751
+ for name, start, end
752
+ in zip(record_names, record_starts, record_ends)
753
+ })
754
+
755
+ def serialize(self):
756
+ """
757
+ Convert this object into text content.
758
+
759
+ Returns
760
+ -------
761
+ content : str
762
+ The serialized content.
763
+ """
764
+ text_blocks = []
765
+ for record_name, record in self._records.items():
766
+ if isinstance(record, str):
767
+ # Record is already stored as text
768
+ text_blocks.append(record)
769
+ else:
770
+ try:
771
+ text_blocks.append(record.serialize())
772
+ except:
773
+ raise SerializationError(
774
+ f"Failed to serialize record '{record_name}'"
775
+ )
776
+ text_blocks.append(_RECORD_DELIMITER + "\n")
777
+ return "".join(text_blocks)
778
+
779
+ @classmethod
780
+ def read(cls, file):
781
+ """
782
+ Read a SD file.
783
+
784
+ Parameters
785
+ ----------
786
+ file : file-like object or str
787
+ The file to be read.
788
+ Alternatively a file path can be supplied.
789
+
790
+ Returns
791
+ -------
792
+ file_object : SDFile
793
+ The parsed file.
794
+ """
795
+ # File name
796
+ if is_open_compatible(file):
797
+ with open(file, "r") as f:
798
+ text = f.read()
799
+ # File object
800
+ else:
801
+ if not is_text(file):
802
+ raise TypeError("A file opened in 'text' mode is required")
803
+ text = file.read()
804
+ return SDFile.deserialize(text)
805
+
806
+ def write(self, file):
807
+ """
808
+ Write the contents of this object into a SD file.
809
+
810
+ Parameters
811
+ ----------
812
+ file : file-like object or str
813
+ The file to be written to.
814
+ Alternatively a file path can be supplied.
815
+ """
816
+ if is_open_compatible(file):
817
+ with open(file, "w") as f:
818
+ f.write(self.serialize())
819
+ else:
820
+ if not is_text(file):
821
+ raise TypeError("A file opened in 'text' mode is required")
822
+ file.write(self.serialize())
823
+
824
+ def __getitem__(self, key):
825
+ record = self._records[key]
826
+ if isinstance(record, str):
827
+ # Element is stored in serialized form
828
+ # -> must be deserialized first
829
+ try:
830
+ record = SDRecord.deserialize(record)
831
+ except:
832
+ raise DeserializationError(
833
+ f"Failed to deserialize record '{key}'"
834
+ )
835
+ # Update with deserialized object
836
+ self._records[key] = record
837
+ return record
838
+
839
+ def __setitem__(self, key, record):
840
+ if not isinstance(record, SDRecord):
841
+ raise TypeError(
842
+ f"Expected 'SDRecord', but got '{type(record).__name__}'"
843
+ )
844
+ # The molecule name in the header is unique across the file
845
+ record.header.mol_name = key
846
+ self._records[key] = record
847
+
848
+ def __delitem__(self, key):
849
+ del self._records[key]
850
+
851
+ def __iter__(self):
852
+ return iter(self._records)
853
+
854
+ def __len__(self):
855
+ return len(self._records)
856
+
857
+ def __eq__(self, other):
858
+ if not isinstance(other, type(self)):
859
+ return False
860
+ if set(self.keys()) != set(other.keys()):
861
+ return False
862
+ for record_name in self.keys():
863
+ if self[record_name] != other[record_name]:
864
+ return False
865
+ return True
866
+
867
+ def __str__(self):
868
+ return self.serialize()
869
+
870
+
871
+ def _join_with_terminal_newline(text_blocks):
872
+ if len(text_blocks) == 0:
873
+ return ""
874
+ else:
875
+ return "\n".join(text_blocks) + "\n"
876
+
877
+
878
+ def _empty_ctab():
879
+ empty_atoms = AtomArray(0)
880
+ empty_atoms.bonds = BondList(0)
881
+ return _join_with_terminal_newline(write_structure_to_ctab(empty_atoms))
882
+
883
+
884
+ def _to_metadata_key(key):
885
+ if isinstance(key, Metadata.Key):
886
+ return key
887
+ elif isinstance(key, str):
888
+ return Metadata.Key(name=key)
889
+ else:
890
+ raise TypeError(
891
+ "Expected 'Metadata.Key' or str, "
892
+ f"but got '{type(key).__name__}'"
893
+ )
894
+
895
+
896
+ def _add_key_value_pair(metadata, key, value):
897
+ if key is not None:
898
+ if value is None:
899
+ raise DeserializationError(
900
+ f"No value found for metadata key {key}"
901
+ )
902
+ metadata[key] = value
903
+
904
+
905
+ def _get_ctab_stop(lines):
906
+ for i in range(_N_HEADER, len(lines)):
907
+ if lines[i].startswith("M END"):
908
+ return i+1
909
+ return len(lines)