nomad-parser-plugins-atomistic 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. atomisticparsers/__init__.py +400 -0
  2. atomisticparsers/amber/__init__.py +19 -0
  3. atomisticparsers/amber/__main__.py +31 -0
  4. atomisticparsers/amber/metainfo/__init__.py +19 -0
  5. atomisticparsers/amber/metainfo/amber.py +495 -0
  6. atomisticparsers/amber/parser.py +42 -0
  7. atomisticparsers/asap/__init__.py +19 -0
  8. atomisticparsers/asap/__main__.py +31 -0
  9. atomisticparsers/asap/metainfo/__init__.py +19 -0
  10. atomisticparsers/asap/metainfo/asap.py +75 -0
  11. atomisticparsers/asap/parser.py +197 -0
  12. atomisticparsers/bopfox/__init__.py +19 -0
  13. atomisticparsers/bopfox/__main__.py +31 -0
  14. atomisticparsers/bopfox/metainfo/__init__.py +19 -0
  15. atomisticparsers/bopfox/metainfo/bopfox.py +225 -0
  16. atomisticparsers/bopfox/parser.py +808 -0
  17. atomisticparsers/dftbplus/__init__.py +19 -0
  18. atomisticparsers/dftbplus/__main__.py +31 -0
  19. atomisticparsers/dftbplus/metainfo/__init__.py +19 -0
  20. atomisticparsers/dftbplus/metainfo/dftbplus.py +217 -0
  21. atomisticparsers/dftbplus/parser.py +500 -0
  22. atomisticparsers/dlpoly/__init__.py +19 -0
  23. atomisticparsers/dlpoly/__main__.py +31 -0
  24. atomisticparsers/dlpoly/metainfo/__init__.py +19 -0
  25. atomisticparsers/dlpoly/metainfo/dl_poly.py +312 -0
  26. atomisticparsers/dlpoly/parser.py +798 -0
  27. atomisticparsers/gromacs/__init__.py +19 -0
  28. atomisticparsers/gromacs/__main__.py +31 -0
  29. atomisticparsers/gromacs/metainfo/__init__.py +19 -0
  30. atomisticparsers/gromacs/metainfo/gromacs.py +2388 -0
  31. atomisticparsers/gromacs/parser.py +1581 -0
  32. atomisticparsers/gromos/__init__.py +19 -0
  33. atomisticparsers/gromos/__main__.py +31 -0
  34. atomisticparsers/gromos/metainfo/__init__.py +19 -0
  35. atomisticparsers/gromos/metainfo/gromos.py +1995 -0
  36. atomisticparsers/gromos/parser.py +58 -0
  37. atomisticparsers/gulp/__init__.py +19 -0
  38. atomisticparsers/gulp/__main__.py +31 -0
  39. atomisticparsers/gulp/metainfo/__init__.py +19 -0
  40. atomisticparsers/gulp/metainfo/gulp.py +1117 -0
  41. atomisticparsers/gulp/parser.py +1316 -0
  42. atomisticparsers/h5md/__init__.py +19 -0
  43. atomisticparsers/h5md/__main__.py +31 -0
  44. atomisticparsers/h5md/metainfo/__init__.py +19 -0
  45. atomisticparsers/h5md/metainfo/h5md.py +239 -0
  46. atomisticparsers/h5md/parser.py +901 -0
  47. atomisticparsers/lammps/__init__.py +19 -0
  48. atomisticparsers/lammps/__main__.py +31 -0
  49. atomisticparsers/lammps/metainfo/__init__.py +19 -0
  50. atomisticparsers/lammps/metainfo/lammps.py +1417 -0
  51. atomisticparsers/lammps/parser.py +1753 -0
  52. atomisticparsers/libatoms/__init__.py +19 -0
  53. atomisticparsers/libatoms/__main__.py +31 -0
  54. atomisticparsers/libatoms/metainfo/__init__.py +19 -0
  55. atomisticparsers/libatoms/metainfo/lib_atoms.py +251 -0
  56. atomisticparsers/libatoms/parser.py +38 -0
  57. atomisticparsers/namd/__init__.py +19 -0
  58. atomisticparsers/namd/__main__.py +31 -0
  59. atomisticparsers/namd/metainfo/__init__.py +19 -0
  60. atomisticparsers/namd/metainfo/namd.py +1605 -0
  61. atomisticparsers/namd/parser.py +312 -0
  62. atomisticparsers/tinker/__init__.py +19 -0
  63. atomisticparsers/tinker/__main__.py +31 -0
  64. atomisticparsers/tinker/metainfo/__init__.py +18 -0
  65. atomisticparsers/tinker/metainfo/tinker.py +1363 -0
  66. atomisticparsers/tinker/parser.py +685 -0
  67. atomisticparsers/utils/__init__.py +22 -0
  68. atomisticparsers/utils/mdanalysis.py +662 -0
  69. atomisticparsers/utils/parsers.py +226 -0
  70. atomisticparsers/xtb/__init__.py +19 -0
  71. atomisticparsers/xtb/__main__.py +32 -0
  72. atomisticparsers/xtb/metainfo/__init__.py +19 -0
  73. atomisticparsers/xtb/metainfo/xtb.py +256 -0
  74. atomisticparsers/xtb/parser.py +979 -0
  75. nomad_parser_plugins_atomistic-1.0.dist-info/LICENSE +202 -0
  76. nomad_parser_plugins_atomistic-1.0.dist-info/METADATA +327 -0
  77. nomad_parser_plugins_atomistic-1.0.dist-info/RECORD +80 -0
  78. nomad_parser_plugins_atomistic-1.0.dist-info/WHEEL +5 -0
  79. nomad_parser_plugins_atomistic-1.0.dist-info/entry_points.txt +15 -0
  80. nomad_parser_plugins_atomistic-1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,901 @@
1
+ #
2
+ # Copyright The NOMAD Authors.
3
+ #
4
+ # This file is part of NOMAD.
5
+ # See https://nomad-lab.eu for further info.
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+ import os
20
+ import numpy as np
21
+ import logging
22
+ import h5py
23
+
24
+ from typing import List, Dict, Union, Any
25
+ from h5py import Group
26
+
27
+ from nomad.metainfo.util import MEnum
28
+ from nomad.parsing.file_parser import FileParser
29
+ from runschema.run import Run, Program, MSection
30
+ from runschema.method import (
31
+ Method,
32
+ ForceField,
33
+ Model,
34
+ AtomParameters,
35
+ ForceCalculations,
36
+ NeighborSearching,
37
+ )
38
+ from runschema.system import System, AtomsGroup
39
+ from runschema.calculation import (
40
+ Calculation,
41
+ Energy,
42
+ EnergyEntry,
43
+ BaseCalculation,
44
+ )
45
+ from simulationworkflowschema.molecular_dynamics import (
46
+ EnsembleProperty,
47
+ EnsemblePropertyValues,
48
+ CorrelationFunction,
49
+ CorrelationFunctionValues,
50
+ )
51
+ from atomisticparsers.utils import MDParser, MOL
52
+ from .metainfo.h5md import ParamEntry, CalcEntry, Author
53
+ from nomad.units import ureg
54
+ from ase.symbols import symbols2numbers
55
+
56
+
57
+ class HDF5Parser(FileParser):
58
+ def __init__(self):
59
+ super().__init__(None)
60
+
61
+ @property
62
+ def filehdf5(self):
63
+ if self._file_handler is None:
64
+ try:
65
+ self._file_handler = h5py.File(self.mainfile, 'r')
66
+ except Exception:
67
+ self.logger.error('Error reading hdf5 file.')
68
+
69
+ return self._file_handler
70
+
71
+ def apply_unit(self, quantity, unit: str, unit_factor: float):
72
+ if quantity is None:
73
+ return
74
+ if unit:
75
+ unit_val = ureg(unit)
76
+ unit_val *= unit_factor
77
+ quantity *= unit_val
78
+
79
+ return quantity
80
+
81
+ def decode_bytes(self, dataset):
82
+ if dataset is None:
83
+ return None
84
+ elif isinstance(dataset, np.ndarray):
85
+ if dataset.size == 0:
86
+ return None
87
+ dataset = (
88
+ [val.decode('utf-8') for val in dataset]
89
+ if isinstance(dataset[0], bytes)
90
+ else dataset
91
+ )
92
+ dataset = (
93
+ [val.__bool__() for val in dataset]
94
+ if isinstance(dataset[0], bool)
95
+ else dataset
96
+ )
97
+ elif (
98
+ type(dataset).__name__ == 'bool_'
99
+ ): # TODO fix error when using isinstance() here
100
+ dataset = dataset.__bool__()
101
+ else:
102
+ dataset = dataset.decode('utf-8') if isinstance(dataset, bytes) else dataset
103
+ return dataset
104
+
105
+ def get_attribute(
106
+ self, group, attribute: str = None, path: str = None, default=None
107
+ ):
108
+ """
109
+ Extracts attribute from group object based on path, and returns default if not defined.
110
+ """
111
+ if path:
112
+ section_segments = path.split('.')
113
+ for section in section_segments:
114
+ try:
115
+ value = group.get(section)
116
+ group = value
117
+ except Exception:
118
+ return
119
+ value = group.attrs.get(attribute)
120
+ value = self.decode_bytes(value) if value is not None else default
121
+
122
+ return value if value is not None else default
123
+
124
+ def get_value(self, group, path: str, default=None):
125
+ """
126
+ Extracts group or dataset from group object based on path, and returns default if not defined.
127
+ """
128
+ section_segments = path.split('.')
129
+ for section in section_segments:
130
+ try:
131
+ value = group.get(section)
132
+ unit = self.get_attribute(group, 'unit', path=section)
133
+ unit_factor = self.get_attribute(
134
+ group, 'unit_factor', path=section, default=1.0
135
+ )
136
+ group = value
137
+ except Exception:
138
+ return
139
+
140
+ if value is None:
141
+ value = default
142
+ elif isinstance(value, h5py.Dataset):
143
+ value = value[()]
144
+ value = self.apply_unit(value, unit, unit_factor)
145
+ value = self.decode_bytes(value)
146
+
147
+ return value if value is not None else default
148
+
149
+ def parse(self, path: str = None, **kwargs):
150
+ source = kwargs.get('source', self.filehdf5)
151
+ isattr = kwargs.get('isattr', False)
152
+ value = None
153
+ if isattr:
154
+ attr_path, attribute = path.rsplit('.', 1)
155
+ value = self.get_attribute(source, attribute, path=attr_path)
156
+ else:
157
+ value = self.get_value(source, path)
158
+ self._results[path] = value
159
+
160
+
161
+ class H5MDParser(MDParser):
162
+ def __init__(self):
163
+ super().__init__()
164
+ self._data_parser = HDF5Parser()
165
+ self._n_frames = None
166
+ self._n_atoms = None
167
+ self._atom_parameters = None
168
+ self._system_info = None
169
+ self._observable_info = None
170
+ self._parameter_info = None
171
+ self._time_unit = ureg.picosecond
172
+ self._path_group_particles_all = 'particles.all'
173
+ self._path_group_positions_all = 'particles.all.position'
174
+ self._path_value_positions_all = 'particles.all.position.value'
175
+
176
+ self._nomad_to_particles_group_map = {
177
+ 'positions': 'position',
178
+ 'velocities': 'velocity',
179
+ 'forces': 'force',
180
+ 'labels': 'species_label',
181
+ 'label': 'force_field_label',
182
+ 'mass': 'mass',
183
+ 'charge': 'charge',
184
+ }
185
+
186
+ self._nomad_to_box_group_map = {
187
+ 'lattice_vectors': 'edges',
188
+ 'periodic': 'boundary',
189
+ 'dimension': 'dimension',
190
+ }
191
+
192
+ def parse_atom_parameters(self):
193
+ if self._n_atoms is None:
194
+ return {}
195
+ self._atom_parameters = {}
196
+ n_atoms = self._n_atoms[0] # TODO Extend to non-static n_atoms
197
+
198
+ atom_parameter_keys = ['label', 'mass', 'charge']
199
+ for key in atom_parameter_keys:
200
+ value = self._data_parser.get(
201
+ f'{self._path_group_particles_all}.{self._nomad_to_particles_group_map[key]}'
202
+ )
203
+ if value is not None:
204
+ self._atom_parameters[key] = value
205
+ else:
206
+ continue
207
+ if isinstance(self._atom_parameters[key], h5py.Group):
208
+ self.logger.warning(
209
+ 'Time-dependent atom parameters currently not supported.'
210
+ ' Atom parameter values will not be stored.'
211
+ )
212
+ continue
213
+ elif len(self._atom_parameters[key]) != n_atoms:
214
+ self.logger.warning(
215
+ 'Inconsistent length of some atom parameters.'
216
+ ' Atom parameter values will not be stored.'
217
+ )
218
+ continue
219
+
220
+ def parse_system_info(self):
221
+ self._system_info = {'system': {}, 'calculation': {}}
222
+ particles_group = self._data_parser.get(self._path_group_particles_all)
223
+ positions = self._data_parser.get(self._path_value_positions_all)
224
+ n_frames = self._n_frames
225
+ if (
226
+ particles_group is None or positions is None or positions is None
227
+ ): # For now we require that positions are present in the H5MD file to store other particle attributes
228
+ self.logger.warning(
229
+ 'No positions available in H5MD file.'
230
+ ' Other particle attributes will not be stored'
231
+ )
232
+ return self._system_info
233
+
234
+ def get_value(value, steps, path=''):
235
+ if value is None:
236
+ return value
237
+ if isinstance(value, h5py.Group):
238
+ value = self._data_parser.get(f'{path}.value' if path else 'value')
239
+ path_step = f'{path}.step' if path else 'step'
240
+ attr_steps = self._data_parser.get(path_step)
241
+ if value is None or attr_steps is None:
242
+ self.logger.warning(
243
+ 'Missing values or steps in particle attributes.'
244
+ ' These attributes will not be stored.'
245
+ )
246
+ return None
247
+ elif sorted(attr_steps) != sorted(steps):
248
+ self.logger.warning(
249
+ 'Distinct trajectory lengths of particle attributes not supported.'
250
+ ' These attributes will not be stored.'
251
+ )
252
+ return None
253
+ else:
254
+ return value
255
+ else:
256
+ return [value] * n_frames
257
+
258
+ # get the steps based on the positions
259
+ steps = self._data_parser.get(f'{self._path_group_positions_all}.step')
260
+ if steps is None:
261
+ self.logger.warning(
262
+ 'No step information available in H5MD file.'
263
+ ' System information cannot be parsed.'
264
+ )
265
+ return self._system_info
266
+ self.trajectory_steps = steps
267
+
268
+ # get the rest of the particle quantities
269
+ values_dict = {'system': {}, 'calculation': {}}
270
+ times = self._data_parser.get(f'{self._path_group_positions_all}.time')
271
+ values_dict['system']['time'] = times
272
+ values_dict['calculation']['time'] = times
273
+ values_dict['system']['positions'] = positions
274
+ values_dict['system']['n_atoms'] = self._n_atoms
275
+ system_keys = {
276
+ 'labels': 'system',
277
+ 'velocities': 'system',
278
+ 'forces': 'calculation',
279
+ }
280
+ for key, sec_key in system_keys.items():
281
+ path = f'{self._path_group_particles_all}.{self._nomad_to_particles_group_map[key]}'
282
+ value = self._data_parser.get(path)
283
+ values_dict[sec_key][key] = get_value(value, steps, path=path)
284
+
285
+ # get the box quantities
286
+ box = self._data_parser.get(f'{self._path_group_particles_all}.box')
287
+ if box is not None:
288
+ box_attributes = ['dimension', 'periodic']
289
+ for box_key in box_attributes:
290
+ value = self._data_parser.get(
291
+ f'{self._path_group_particles_all}.box.{self._nomad_to_box_group_map[box_key]}',
292
+ isattr=True,
293
+ )
294
+ values_dict['system'][box_key] = (
295
+ [value] * n_frames if value is not None else None
296
+ )
297
+
298
+ box_key = 'lattice_vectors'
299
+ path = f'{self._path_group_particles_all}.box.{self._nomad_to_box_group_map[box_key]}'
300
+ value = self._data_parser.get(path)
301
+ values_dict['system'][box_key] = get_value(value, steps, path=path)
302
+ # populate the dictionary
303
+ for i_step, step in enumerate(steps):
304
+ self._system_info['system'][step] = {
305
+ key: val[i_step]
306
+ for key, val in values_dict['system'].items()
307
+ if val is not None
308
+ }
309
+ self._system_info['calculation'][step] = {
310
+ key: val[i_step]
311
+ for key, val in values_dict['calculation'].items()
312
+ if val is not None
313
+ }
314
+
315
+ def parse_observable_info(self):
316
+ self._observable_info = {
317
+ 'configurational': {},
318
+ 'ensemble_average': {},
319
+ 'correlation_function': {},
320
+ }
321
+ thermodynamics_steps = []
322
+ observables_group = self._data_parser.get('observables')
323
+ if observables_group is None:
324
+ return self._observable_info
325
+
326
+ def get_observable_paths(observable_group: Dict, current_path: str) -> List:
327
+ paths = []
328
+ for obs_key in observable_group.keys():
329
+ path = f'{obs_key}.'
330
+ observable = self._data_parser.get_value(observable_group, obs_key)
331
+ observable_type = self._data_parser.get_value(
332
+ observable_group, obs_key
333
+ ).attrs.get('type')
334
+ if not observable_type:
335
+ paths.extend(
336
+ get_observable_paths(observable, f'{current_path}{path}')
337
+ )
338
+ else:
339
+ paths.append(current_path + path[:-1])
340
+
341
+ return paths
342
+
343
+ observable_paths = get_observable_paths(observables_group, current_path='')
344
+ for path in observable_paths:
345
+ full_path = f'observables.{path}'
346
+ observable = self._data_parser.get_value(observables_group, path)
347
+ observable_type = self._data_parser.get_value(
348
+ observables_group, path
349
+ ).attrs.get('type')
350
+ observable_name, observable_label = (
351
+ path.split('.', 1) if len(path.split('.')) > 1 else [path, '']
352
+ )
353
+ if observable_type == 'configurational':
354
+ steps = self._data_parser.get(f'{full_path}.step')
355
+ if steps is None:
356
+ self.logger.warning(
357
+ 'Missing step information in some observables.'
358
+ ' These will not be stored.'
359
+ )
360
+ continue
361
+ thermodynamics_steps = set(list(steps) + list(thermodynamics_steps))
362
+ times = self._data_parser.get(f'{full_path}.time')
363
+ values = self._data_parser.get(f'{full_path}.value')
364
+ if isinstance(values, h5py.Group):
365
+ self.logger.warning(
366
+ 'Group structures within individual observables not supported.'
367
+ ' These will not be stored.'
368
+ )
369
+ continue
370
+ for i_step, step in enumerate(steps):
371
+ if not self._observable_info[observable_type].get(step):
372
+ self._observable_info[observable_type][step] = {}
373
+ self._observable_info[observable_type][step]['time'] = times[
374
+ i_step
375
+ ]
376
+ observable_key = (
377
+ f'{observable_name}-{observable_label}'
378
+ if observable_label
379
+ else f'{observable_name}'
380
+ )
381
+ self._observable_info[observable_type][step][observable_key] = (
382
+ values[i_step]
383
+ )
384
+ else:
385
+ if observable_name not in self._observable_info[observable_type].keys():
386
+ self._observable_info[observable_type][observable_name] = {}
387
+ self._observable_info[observable_type][observable_name][
388
+ observable_label
389
+ ] = {}
390
+ for key in observable.keys():
391
+ observable_attribute = self._data_parser.get(f'{full_path}.{key}')
392
+ if isinstance(observable_attribute, h5py.Group):
393
+ self.logger.warning(
394
+ 'Group structures within individual observables not supported.'
395
+ ' These will not be stored.'
396
+ )
397
+ continue
398
+ self._observable_info[observable_type][observable_name][
399
+ observable_label
400
+ ][key] = observable_attribute
401
+
402
+ self.thermodynamics_steps = thermodynamics_steps
403
+
404
+ def parse_atomsgroup(
405
+ self,
406
+ nomad_sec: Union[System, AtomsGroup],
407
+ h5md_sec_particlesgroup: Group,
408
+ path_particlesgroup: str,
409
+ ):
410
+ for i_key, key in enumerate(h5md_sec_particlesgroup.keys()):
411
+ path_particlesgroup_key = f'{path_particlesgroup}.{key}'
412
+ particles_group = {
413
+ group_key: self._data_parser.get(
414
+ f'{path_particlesgroup_key}.{group_key}'
415
+ )
416
+ for group_key in h5md_sec_particlesgroup[key].keys()
417
+ }
418
+ sec_atomsgroup = AtomsGroup()
419
+ nomad_sec.atoms_group.append(sec_atomsgroup)
420
+ sec_atomsgroup.type = particles_group.pop('type', None)
421
+ sec_atomsgroup.index = i_key
422
+ sec_atomsgroup.atom_indices = particles_group.pop('indices', None)
423
+ sec_atomsgroup.n_atoms = (
424
+ len(sec_atomsgroup.atom_indices)
425
+ if sec_atomsgroup.atom_indices is not None
426
+ else None
427
+ )
428
+ sec_atomsgroup.is_molecule = particles_group.pop('is_molecule', None)
429
+ sec_atomsgroup.label = particles_group.pop('label', None)
430
+ sec_atomsgroup.composition_formula = particles_group.pop('formula', None)
431
+ particles_subgroup = particles_group.pop('particles_group', None)
432
+ # set the remaining attributes
433
+ for particles_group_key in particles_group.keys():
434
+ val = particles_group.get(particles_group_key)
435
+ units = val.units if hasattr(val, 'units') else None
436
+ val = val.magnitude if units is not None else val
437
+ sec_atomsgroup.x_h5md_parameters.append(
438
+ ParamEntry(kind=particles_group_key, value=val, unit=units)
439
+ )
440
+ # get the next atomsgroup
441
+ if particles_subgroup:
442
+ self.parse_atomsgroup(
443
+ sec_atomsgroup,
444
+ particles_subgroup,
445
+ f'{path_particlesgroup_key}.particles_group',
446
+ )
447
+
448
+ def is_valid_key_val(self, metainfo_class: MSection, key: str, val) -> bool:
449
+ if hasattr(metainfo_class, key):
450
+ quant_type = getattr(metainfo_class, key).get('type')
451
+ is_menum = isinstance(quant_type, MEnum) if quant_type else False
452
+ return False if is_menum and val not in quant_type._list else True
453
+ else:
454
+ return False
455
+
456
+ def parse_parameter_info(self):
457
+ self._parameter_info = {'force_calculations': {}, 'workflow': {}}
458
+
459
+ def get_parameters(parameter_group: Group, path: str) -> Dict:
460
+ param_dict: Dict[Any, Any] = {}
461
+ for key, val in parameter_group.items():
462
+ path_key = f'{path}.{key}'
463
+ if isinstance(val, h5py.Group):
464
+ param_dict[key] = get_parameters(val, path_key)
465
+ else:
466
+ param_dict[key] = self._data_parser.get(path_key)
467
+ if isinstance(param_dict[key], str):
468
+ param_dict[key] = (
469
+ param_dict[key].upper()
470
+ if key == 'thermodynamic_ensemble'
471
+ else param_dict[key].lower()
472
+ )
473
+ elif isinstance(param_dict[key], (int, np.int32, np.int64)):
474
+ param_dict[key] = param_dict[key].item()
475
+
476
+ return param_dict
477
+
478
+ force_calculations_group = self._data_parser.get(
479
+ 'parameters.force_calculations'
480
+ )
481
+ if force_calculations_group is not None:
482
+ self._parameter_info['force_calculations'] = get_parameters(
483
+ force_calculations_group, 'parameters.force_calculations'
484
+ )
485
+ workflow_group = self._data_parser.get('parameters.workflow')
486
+ if workflow_group is not None:
487
+ self._parameter_info['workflow'] = get_parameters(
488
+ workflow_group, 'parameters.workflow'
489
+ )
490
+
491
+ def parse_calculation(self):
492
+ sec_run = self.archive.run[-1]
493
+ calculation_info = self._observable_info.get('configurational')
494
+ if (
495
+ not calculation_info
496
+ ): # TODO should still create entries for system time link in this case
497
+ return
498
+
499
+ system_info = self._system_info.get(
500
+ 'calculation'
501
+ ) # note: it is currently ensured in parse_system() that these have the same length as the system_map
502
+ for step in self.steps:
503
+ data = {
504
+ 'method_ref': sec_run.method[-1] if sec_run.method else None,
505
+ 'step': step,
506
+ 'energy': {},
507
+ }
508
+ data_h5md = {
509
+ 'x_h5md_custom_calculations': [],
510
+ 'x_h5md_energy_contributions': [],
511
+ }
512
+ data['time'] = calculation_info.get('step', {}).get('time')
513
+ if not data['time']:
514
+ data['time'] = system_info.get('step', {}).get('time')
515
+
516
+ for key, val in system_info.get(step, {}).items():
517
+ if key == 'forces':
518
+ data[key] = dict(total=dict(value=val))
519
+ else:
520
+ if hasattr(BaseCalculation, key):
521
+ data[key] = val
522
+ else:
523
+ unit = None
524
+ if hasattr(val, 'units'):
525
+ unit = val.units
526
+ val = val.magnitude
527
+ data_h5md['x_h5md_custom_calculations'].append(
528
+ CalcEntry(kind=key, value=val, unit=unit)
529
+ )
530
+
531
+ for key, val in calculation_info.get(step).items():
532
+ key_split = key.split('-')
533
+ observable_name = key_split[0]
534
+ observable_label = key_split[1] if len(key_split) > 1 else key_split[0]
535
+ if (
536
+ 'energ' in observable_name
537
+ ): # TODO check for energies or energy when matching name
538
+ # check for usage of energy/mole and convert to energy
539
+ if val.check('[energy]/[substance]') and 'mole' in str(val.units):
540
+ val = val * MOL * ureg.mole
541
+
542
+ if val.check('[energy]'):
543
+ if hasattr(Energy, observable_label):
544
+ data['energy'][observable_label] = dict(value=val)
545
+ else:
546
+ data_h5md['x_h5md_energy_contributions'].append(
547
+ EnergyEntry(kind=key, value=val)
548
+ )
549
+ else:
550
+ self.logger.warning(
551
+ 'Energy value not in energy units. Skipping entry.'
552
+ )
553
+ else:
554
+ if hasattr(BaseCalculation, observable_label):
555
+ data[observable_label] = val
556
+ else:
557
+ unit = None
558
+ if hasattr(val, 'units'):
559
+ unit = val.units
560
+ val = val.magnitude
561
+ data_h5md['x_h5md_custom_calculations'].append(
562
+ CalcEntry(kind=key, value=val, unit=unit)
563
+ )
564
+
565
+ self.parse_thermodynamics_step(data)
566
+ sec_calc = sec_run.calculation[-1]
567
+
568
+ if sec_calc.step != step: # TODO check this comparison
569
+ sec_calc = Calculation()
570
+ sec_run.calculation.append(sec_calc)
571
+ sec_calc.step = int(step)
572
+ sec_calc.time = data['time']
573
+ for calc_entry in data_h5md['x_h5md_custom_calculations']:
574
+ sec_calc.x_h5md_custom_calculations.append(calc_entry)
575
+ sec_energy = sec_calc.energy
576
+ if not sec_energy:
577
+ sec_energy = Energy()
578
+ sec_calc.append(sec_energy)
579
+ for energy_entry in data_h5md['x_h5md_energy_contributions']:
580
+ sec_energy.x_h5md_energy_contributions.append(energy_entry)
581
+
582
+ def parse_system(self):
583
+ sec_run = self.archive.run[-1]
584
+
585
+ system_info = self._system_info.get('system')
586
+ if not system_info:
587
+ self.logger.error('No particle information found in H5MD file.')
588
+ return
589
+
590
+ self._system_time_map = {}
591
+ for i_step, step in enumerate(self.trajectory_steps):
592
+ time = system_info[step].pop('time')
593
+ atoms_dict = system_info[step]
594
+
595
+ atom_labels = atoms_dict.get('labels')
596
+ if atom_labels is not None:
597
+ try:
598
+ symbols2numbers(atom_labels)
599
+ except KeyError: # TODO this check should be moved to the system normalizer in the new schema
600
+ atoms_dict['labels'] = ['X'] * len(atom_labels)
601
+
602
+ topology = None
603
+ if i_step == 0: # TODO extend to time-dependent bond lists and topologies
604
+ atoms_dict['bond_list'] = self._data_parser.get('connectivity.bonds')
605
+ path_topology = 'connectivity.particles_group'
606
+ topology = self._data_parser.get(path_topology)
607
+
608
+ self.parse_trajectory_step({'atoms': atoms_dict})
609
+
610
+ if i_step == 0 and topology: # TODO extend to time-dependent topologies
611
+ self.parse_atomsgroup(sec_run.system[i_step], topology, path_topology)
612
+
613
+ def parse_method(self):
614
+ sec_method = Method()
615
+ self.archive.run[-1].method.append(sec_method)
616
+ sec_force_field = ForceField()
617
+ sec_method.force_field = sec_force_field
618
+ sec_model = Model()
619
+ sec_force_field.model.append(sec_model)
620
+
621
+ # get the atom parameters
622
+ n_atoms = (
623
+ self._n_atoms[0] if self._n_atoms is not None else 0
624
+ ) # TODO Extend to non-static n_atoms
625
+ for n in range(n_atoms):
626
+ sec_atom = AtomParameters()
627
+ sec_method.atom_parameters.append(sec_atom)
628
+
629
+ for key in self._atom_parameters.keys():
630
+ sec_atom.m_set(
631
+ sec_atom.m_get_quantity_definition(key),
632
+ self._atom_parameters[key][n],
633
+ )
634
+
635
+ # Get the interactions
636
+ connectivity_group = self._data_parser.get('connectivity')
637
+ if connectivity_group:
638
+ atom_labels = self._atom_parameters.get('label')
639
+ interaction_keys = ['bonds', 'angles', 'dihedrals', 'impropers']
640
+ interactions_by_type = []
641
+ for interaction_key in interaction_keys:
642
+ interaction_list = self._data_parser.get(
643
+ f'connectivity.{interaction_key}'
644
+ )
645
+ if interaction_list is None:
646
+ continue
647
+ elif isinstance(interaction_list, h5py.Group):
648
+ self.logger.warning(
649
+ 'Time-dependent interactions currently not supported.'
650
+ ' These values will not be stored'
651
+ )
652
+ continue
653
+
654
+ interaction_type_dict = {
655
+ 'type': interaction_key,
656
+ 'n_interactions': len(interaction_list),
657
+ 'n_atoms': len(interaction_list[0]),
658
+ 'atom_indices': interaction_list,
659
+ 'atom_labels': np.array(atom_labels)[interaction_list]
660
+ if atom_labels is not None
661
+ else None,
662
+ }
663
+ interactions_by_type.append(interaction_type_dict)
664
+ self.parse_interactions_by_type(interactions_by_type, sec_model)
665
+
666
+ # Get the force calculation parameters
667
+ force_calculation_parameters = self._parameter_info.get('force_calculations')
668
+ if force_calculation_parameters:
669
+ sec_force_calculations = ForceCalculations()
670
+ sec_force_field.force_calculations = sec_force_calculations
671
+ sec_neighbor_searching = NeighborSearching()
672
+ sec_force_calculations.neighbor_searching = sec_neighbor_searching
673
+
674
+ for key, val in force_calculation_parameters.items():
675
+ if not isinstance(val, dict):
676
+ if self.is_valid_key_val(ForceCalculations, key, val):
677
+ sec_force_calculations.m_set(
678
+ sec_force_calculations.m_get_quantity_definition(key), val
679
+ )
680
+ else:
681
+ units = val.units if hasattr(val, 'units') else None
682
+ val = val.magnitude if units is not None else val
683
+ sec_force_calculations.x_h5md_parameters.append(
684
+ ParamEntry(kind=key, value=val, unit=units)
685
+ )
686
+ elif key == 'neighbor_searching':
687
+ for neigh_key, neigh_val in val.items():
688
+ if self.is_valid_key_val(
689
+ NeighborSearching, neigh_key, neigh_val
690
+ ):
691
+ sec_neighbor_searching.m_set(
692
+ sec_neighbor_searching.m_get_quantity_definition(
693
+ neigh_key
694
+ ),
695
+ neigh_val,
696
+ )
697
+ else:
698
+ units = val.units if hasattr(val, 'units') else None
699
+ val = val.magnitude if units is not None else val
700
+ sec_neighbor_searching.x_h5md_parameters.append(
701
+ ParamEntry(kind=key, value=val, unit=units)
702
+ )
703
+ else:
704
+ self.logger.warning(
705
+ 'Unknown parameters in force calculations section.'
706
+ ' These will not be stored.'
707
+ )
708
+
709
+ def get_workflow_properties_dict(
710
+ self,
711
+ observables: Dict,
712
+ property_type_key=None,
713
+ property_type_value_key=None,
714
+ properties_known={},
715
+ property_def: MSection = None,
716
+ property_value_def: MSection = None,
717
+ ):
718
+ property_quantities = property_def.all_quantities
719
+ property_value_quantities = property_value_def.all_quantities
720
+
721
+ def reshape(quantity_name, val):
722
+ quantity_def = property_quantities.get(
723
+ quantity_name, property_value_quantities.get(quantity_name)
724
+ )
725
+ units = val.units if hasattr(val, 'units') else None
726
+ magnitude = val.magnitude if units else val
727
+ if (
728
+ quantity_def
729
+ and quantity_def.shape
730
+ and not isinstance(magnitude, (np.ndarray, list))
731
+ ):
732
+ magnitude = [magnitude]
733
+ return magnitude * units if units else magnitude
734
+
735
+ def populate_property_dict(
736
+ property_dict, val_name, val, flag_known_property=False
737
+ ):
738
+ if val is None:
739
+ return
740
+ if flag_known_property:
741
+ property_dict[val_name] = reshape(val_name, val)
742
+ else:
743
+ val = reshape(f'{val_name}_magnitude', val)
744
+ value_unit = val.units if hasattr(val, 'units') else None
745
+ property_dict[f'{val_name}_unit'] = (
746
+ str(value_unit) if value_unit else None
747
+ )
748
+ property_dict[f'{val_name}_magnitude'] = (
749
+ val.magnitude if value_unit else val
750
+ )
751
+
752
+ workflow_properties_dict: Dict[Any, Any] = {}
753
+ for observable_type, observable_dict in observables.items():
754
+ flag_known_property = False
755
+ if observable_type in properties_known:
756
+ property_type_key = observable_type
757
+ property_type_value_key = properties_known[observable_type]
758
+ flag_known_property = True
759
+ property_dict: Dict[Any, Any] = {property_type_value_key: []}
760
+ property_dict['label'] = observable_type
761
+ for key, observable in observable_dict.items():
762
+ property_values_dict = {'label': key}
763
+ for quant_name, val in observable.items():
764
+ if quant_name == 'val':
765
+ continue
766
+ if quant_name == 'bins':
767
+ continue
768
+ val = reshape(quant_name, val)
769
+ if quant_name in property_quantities:
770
+ property_dict[quant_name] = val
771
+ if quant_name in property_value_quantities:
772
+ property_values_dict[quant_name] = val
773
+ # TODO Still need to add custom values here.
774
+
775
+ val = observable.get('value')
776
+ populate_property_dict(
777
+ property_values_dict,
778
+ 'value',
779
+ val,
780
+ flag_known_property=flag_known_property,
781
+ )
782
+ bins = observable.get('bins')
783
+ populate_property_dict(
784
+ property_values_dict,
785
+ 'bins',
786
+ bins,
787
+ flag_known_property=flag_known_property,
788
+ )
789
+ property_dict[property_type_value_key].append(property_values_dict)
790
+
791
+ if workflow_properties_dict.get(property_type_key):
792
+ workflow_properties_dict[property_type_key].append(property_dict)
793
+ else:
794
+ workflow_properties_dict[property_type_key] = [property_dict]
795
+
796
+ return workflow_properties_dict
797
+
798
+ def parse_workflow(self):
799
+ workflow_parameters = self._parameter_info.get('workflow').get(
800
+ 'molecular_dynamics'
801
+ )
802
+ if workflow_parameters is None:
803
+ return
804
+
805
+ workflow_results = {}
806
+ ensemble_average_observables = self._observable_info.get('ensemble_average')
807
+ ensemble_property_dict = {
808
+ 'property_type_key': 'ensemble_properties',
809
+ 'property_type_value_key': 'ensemble_property_values',
810
+ 'properties_known': {
811
+ 'radial_distribution_functions': 'radial_distribution_function_values'
812
+ },
813
+ 'property_def': EnsembleProperty.m_def,
814
+ 'property_value_def': EnsemblePropertyValues.m_def,
815
+ }
816
+ workflow_results.update(
817
+ self.get_workflow_properties_dict(
818
+ ensemble_average_observables, **ensemble_property_dict
819
+ )
820
+ )
821
+ correlation_function_observables = self._observable_info.get(
822
+ 'correlation_function'
823
+ )
824
+ correlation_function_dict = {
825
+ 'property_type_key': 'correlation_functions',
826
+ 'property_type_value_key': 'correlation_function_values',
827
+ 'properties_known': {
828
+ 'mean_squared_displacements': 'mean_squared_displacement_values'
829
+ },
830
+ 'property_def': CorrelationFunction.m_def,
831
+ 'property_value_def': CorrelationFunctionValues.m_def,
832
+ }
833
+ workflow_results.update(
834
+ self.get_workflow_properties_dict(
835
+ correlation_function_observables, **correlation_function_dict
836
+ )
837
+ )
838
+ self.parse_md_workflow(
839
+ dict(method=workflow_parameters, results=workflow_results)
840
+ )
841
+
842
+ def write_to_archive(self) -> None:
843
+ self._maindir = os.path.dirname(self.mainfile)
844
+ self._h5md_files = os.listdir(self._maindir)
845
+ self._basename = os.path.basename(self.mainfile).rsplit('.', 1)[0]
846
+ self._data_parser.mainfile = self.mainfile
847
+ if self._data_parser.filehdf5 is None:
848
+ self.logger.warning('hdf5 file missing in H5MD Parser.')
849
+ return
850
+
851
+ positions = self._data_parser.get(self._path_value_positions_all)
852
+ if positions is not None:
853
+ self._n_frames = len(positions) if positions is not None else None
854
+ self._n_atoms = (
855
+ [len(pos) for pos in positions] if positions is not None else None
856
+ )
857
+
858
+ # Parse the hdf5 groups
859
+ sec_run = Run()
860
+ self.archive.run.append(sec_run)
861
+
862
+ group_h5md = self._data_parser.get('h5md')
863
+ if group_h5md:
864
+ program_name = self._data_parser.get('h5md.program.name', isattr=True)
865
+ program_version = self._data_parser.get('h5md.program.version', isattr=True)
866
+ sec_run.program = Program(name=program_name, version=program_version)
867
+ h5md_version = self._data_parser.get('h5md.version', isattr=True)
868
+ sec_run.x_h5md_version = h5md_version
869
+ h5md_author_name = self._data_parser.get('h5md.author.name', isattr=True)
870
+ h5md_author_email = self._data_parser.get('h5md.author.email', isattr=True)
871
+ sec_run.x_h5md_author = Author(
872
+ name=h5md_author_name, email=h5md_author_email
873
+ )
874
+ h5md_creator_name = self._data_parser.get('h5md.creator.name', isattr=True)
875
+ h5md_creator_version = self._data_parser.get(
876
+ 'h5md.creator.version', isattr=True
877
+ )
878
+ sec_run.x_h5md_creator = Program(
879
+ name=h5md_creator_name, version=h5md_creator_version
880
+ )
881
+ else:
882
+ self.logger.warning(
883
+ '"h5md" group missing in (H5MD)hdf5 file.'
884
+ ' Program and author metadata will be missing!'
885
+ )
886
+
887
+ self.parse_atom_parameters()
888
+ self.parse_system_info()
889
+ self.parse_observable_info()
890
+ self.parse_parameter_info()
891
+
892
+ # Populate the archive
893
+ self.parse_method()
894
+
895
+ self.parse_system()
896
+
897
+ self.parse_calculation()
898
+
899
+ self.parse_workflow()
900
+
901
+ self._data_parser.close()