nomad-parser-plugins-atomistic 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. atomisticparsers/__init__.py +400 -0
  2. atomisticparsers/amber/__init__.py +19 -0
  3. atomisticparsers/amber/__main__.py +31 -0
  4. atomisticparsers/amber/metainfo/__init__.py +19 -0
  5. atomisticparsers/amber/metainfo/amber.py +495 -0
  6. atomisticparsers/amber/parser.py +42 -0
  7. atomisticparsers/asap/__init__.py +19 -0
  8. atomisticparsers/asap/__main__.py +31 -0
  9. atomisticparsers/asap/metainfo/__init__.py +19 -0
  10. atomisticparsers/asap/metainfo/asap.py +75 -0
  11. atomisticparsers/asap/parser.py +197 -0
  12. atomisticparsers/bopfox/__init__.py +19 -0
  13. atomisticparsers/bopfox/__main__.py +31 -0
  14. atomisticparsers/bopfox/metainfo/__init__.py +19 -0
  15. atomisticparsers/bopfox/metainfo/bopfox.py +225 -0
  16. atomisticparsers/bopfox/parser.py +808 -0
  17. atomisticparsers/dftbplus/__init__.py +19 -0
  18. atomisticparsers/dftbplus/__main__.py +31 -0
  19. atomisticparsers/dftbplus/metainfo/__init__.py +19 -0
  20. atomisticparsers/dftbplus/metainfo/dftbplus.py +217 -0
  21. atomisticparsers/dftbplus/parser.py +500 -0
  22. atomisticparsers/dlpoly/__init__.py +19 -0
  23. atomisticparsers/dlpoly/__main__.py +31 -0
  24. atomisticparsers/dlpoly/metainfo/__init__.py +19 -0
  25. atomisticparsers/dlpoly/metainfo/dl_poly.py +312 -0
  26. atomisticparsers/dlpoly/parser.py +798 -0
  27. atomisticparsers/gromacs/__init__.py +19 -0
  28. atomisticparsers/gromacs/__main__.py +31 -0
  29. atomisticparsers/gromacs/metainfo/__init__.py +19 -0
  30. atomisticparsers/gromacs/metainfo/gromacs.py +2388 -0
  31. atomisticparsers/gromacs/parser.py +1581 -0
  32. atomisticparsers/gromos/__init__.py +19 -0
  33. atomisticparsers/gromos/__main__.py +31 -0
  34. atomisticparsers/gromos/metainfo/__init__.py +19 -0
  35. atomisticparsers/gromos/metainfo/gromos.py +1995 -0
  36. atomisticparsers/gromos/parser.py +58 -0
  37. atomisticparsers/gulp/__init__.py +19 -0
  38. atomisticparsers/gulp/__main__.py +31 -0
  39. atomisticparsers/gulp/metainfo/__init__.py +19 -0
  40. atomisticparsers/gulp/metainfo/gulp.py +1117 -0
  41. atomisticparsers/gulp/parser.py +1316 -0
  42. atomisticparsers/h5md/__init__.py +19 -0
  43. atomisticparsers/h5md/__main__.py +31 -0
  44. atomisticparsers/h5md/metainfo/__init__.py +19 -0
  45. atomisticparsers/h5md/metainfo/h5md.py +239 -0
  46. atomisticparsers/h5md/parser.py +901 -0
  47. atomisticparsers/lammps/__init__.py +19 -0
  48. atomisticparsers/lammps/__main__.py +31 -0
  49. atomisticparsers/lammps/metainfo/__init__.py +19 -0
  50. atomisticparsers/lammps/metainfo/lammps.py +1417 -0
  51. atomisticparsers/lammps/parser.py +1753 -0
  52. atomisticparsers/libatoms/__init__.py +19 -0
  53. atomisticparsers/libatoms/__main__.py +31 -0
  54. atomisticparsers/libatoms/metainfo/__init__.py +19 -0
  55. atomisticparsers/libatoms/metainfo/lib_atoms.py +251 -0
  56. atomisticparsers/libatoms/parser.py +38 -0
  57. atomisticparsers/namd/__init__.py +19 -0
  58. atomisticparsers/namd/__main__.py +31 -0
  59. atomisticparsers/namd/metainfo/__init__.py +19 -0
  60. atomisticparsers/namd/metainfo/namd.py +1605 -0
  61. atomisticparsers/namd/parser.py +312 -0
  62. atomisticparsers/tinker/__init__.py +19 -0
  63. atomisticparsers/tinker/__main__.py +31 -0
  64. atomisticparsers/tinker/metainfo/__init__.py +18 -0
  65. atomisticparsers/tinker/metainfo/tinker.py +1363 -0
  66. atomisticparsers/tinker/parser.py +685 -0
  67. atomisticparsers/utils/__init__.py +22 -0
  68. atomisticparsers/utils/mdanalysis.py +662 -0
  69. atomisticparsers/utils/parsers.py +226 -0
  70. atomisticparsers/xtb/__init__.py +19 -0
  71. atomisticparsers/xtb/__main__.py +32 -0
  72. atomisticparsers/xtb/metainfo/__init__.py +19 -0
  73. atomisticparsers/xtb/metainfo/xtb.py +256 -0
  74. atomisticparsers/xtb/parser.py +979 -0
  75. nomad_parser_plugins_atomistic-1.0.dist-info/LICENSE +202 -0
  76. nomad_parser_plugins_atomistic-1.0.dist-info/METADATA +327 -0
  77. nomad_parser_plugins_atomistic-1.0.dist-info/RECORD +80 -0
  78. nomad_parser_plugins_atomistic-1.0.dist-info/WHEEL +5 -0
  79. nomad_parser_plugins_atomistic-1.0.dist-info/entry_points.txt +15 -0
  80. nomad_parser_plugins_atomistic-1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,979 @@
1
+ #
2
+ # Copyright The NOMAD Authors.
3
+ #
4
+ # This file is part of NOMAD.
5
+ # See https://nomad-lab.eu for further info.
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+ import os
20
+ import numpy as np
21
+ import logging
22
+ from ase.io import read as aseread
23
+ from ase import Atoms as aseAtoms
24
+ from datetime import datetime
25
+
26
+ from nomad.units import ureg
27
+ from nomad.parsing.file_parser import Quantity, TextParser
28
+ from runschema.run import Run, Program, TimeRun
29
+ from runschema.method import Method, TB, xTB, Interaction
30
+ from runschema.system import System, Atoms
31
+ from runschema.calculation import (
32
+ Calculation,
33
+ ScfIteration,
34
+ Energy,
35
+ EnergyEntry,
36
+ BandEnergies,
37
+ Multipoles,
38
+ MultipolesEntry,
39
+ )
40
+ from simulationworkflowschema import (
41
+ SinglePoint,
42
+ GeometryOptimization,
43
+ GeometryOptimizationMethod,
44
+ )
45
+ from atomisticparsers.utils import MDParser
46
+ from atomisticparsers.xtb.metainfo import xtb # pylint: disable=unused-import
47
+
48
+
49
+ re_f = r'[-+]?\d+\.\d*(?:[Ee][-+]\d+)?'
50
+ re_n = r'[\n\r]'
51
+
52
+
53
+ class OutParser(TextParser):
54
+ def __init__(self, **kwargs):
55
+ super().__init__(**kwargs)
56
+
57
+ def init_quantities(self):
58
+ re_f = r'[\d\.E\+\-]+'
59
+
60
+ def str_to_eigenvalues(val_in):
61
+ occupations, energies = [], []
62
+ for val in val_in.strip().split('\n'):
63
+ val = val.split('(')[0].split()
64
+ if not val[0].isdecimal():
65
+ continue
66
+ occupations.append(float(val.pop(1)) if len(val) > 3 else 0.0)
67
+ energies.append(float(val[1]))
68
+ return occupations, energies * ureg.hartree
69
+
70
+ def str_to_parameters(val_in):
71
+ val = [v.strip() for v in val_in.split(' ', 1)]
72
+ val[1] = val[1].split()
73
+ return val
74
+
75
+ def str_to_wall_time(val_in):
76
+ name, d, h, m, s = val_in.rsplit(' ', 4)
77
+ return name.strip(), 24 * 60 * 60 * float(d) + 60 * 60 * float(
78
+ h
79
+ ) + 60 * float(m) + float(s)
80
+
81
+ common_quantities = [
82
+ Quantity(
83
+ 'setup',
84
+ r'SETUP\s*:\s*([\s\S]+?\.+\n *\n)',
85
+ sub_parser=TextParser(
86
+ quantities=[
87
+ Quantity(
88
+ 'parameter',
89
+ r'\n +\: +(.+?\s{2,}[\w\.\-\+]+)',
90
+ str_operation=lambda x: [
91
+ v.strip() for v in x.split(' ', 1)
92
+ ],
93
+ repeats=True,
94
+ )
95
+ ]
96
+ ),
97
+ ),
98
+ Quantity(
99
+ 'summary',
100
+ r'(SUMMARY[\s\S]+?\:\n *\n)',
101
+ sub_parser=TextParser(
102
+ quantities=[
103
+ Quantity(
104
+ 'energy_total',
105
+ rf':: total energy\s*({re_f})',
106
+ unit=ureg.hartree,
107
+ dtype=np.float64,
108
+ ),
109
+ Quantity(
110
+ 'x_xtb_gradient_norm',
111
+ rf':: gradient norm\s*({re_f})',
112
+ unit=ureg.hartree / ureg.angstrom,
113
+ dtype=np.float64,
114
+ ),
115
+ Quantity(
116
+ 'x_xtb_hl_gap',
117
+ rf':: HOMO-LUMO gap\s*({re_f})',
118
+ unit=ureg.eV,
119
+ dtype=np.float64,
120
+ ),
121
+ Quantity(
122
+ 'energy_x_xtb_scc',
123
+ rf':: SCC energy\s*({re_f})',
124
+ unit=ureg.hartree,
125
+ dtype=np.float64,
126
+ ),
127
+ Quantity(
128
+ 'energy_x_xtb_isotropic_es',
129
+ rf':: \-\> isotropic ES\s*({re_f})',
130
+ unit=ureg.hartree,
131
+ dtype=np.float64,
132
+ ),
133
+ Quantity(
134
+ 'energy_x_xtb_anisotropic_es',
135
+ rf':: \-\> anisotropic ES\s*({re_f})',
136
+ unit=ureg.hartree,
137
+ dtype=np.float64,
138
+ ),
139
+ Quantity(
140
+ 'energy_x_xtb_anisotropic_xc',
141
+ rf':: \-\> anisotropic XC\s*({re_f})',
142
+ unit=ureg.hartree,
143
+ dtype=np.float64,
144
+ ),
145
+ Quantity(
146
+ 'energy_x_xtb_dispersion',
147
+ rf':: \-\> dispersion\s*({re_f})',
148
+ unit=ureg.hartree,
149
+ dtype=np.float64,
150
+ ),
151
+ Quantity(
152
+ 'energy_electrostatic',
153
+ rf':: \-\> electrostatic\s*({re_f})',
154
+ unit=ureg.hartree,
155
+ dtype=np.float64,
156
+ ),
157
+ Quantity(
158
+ 'energy_x_xtb_repulsion',
159
+ rf':: repulsion energy\s*({re_f})',
160
+ unit=ureg.hartree,
161
+ dtype=np.float64,
162
+ ),
163
+ Quantity(
164
+ 'energy_x_xtb_halogen_bond_corr',
165
+ rf':: halogen bond corr\.\s*({re_f})',
166
+ unit=ureg.hartree,
167
+ dtype=np.float64,
168
+ ),
169
+ Quantity(
170
+ 'energy_x_xtb_add_restraining',
171
+ rf':: repulsion energy\s*({re_f})',
172
+ unit=ureg.hartree,
173
+ dtype=np.float64,
174
+ ),
175
+ Quantity(
176
+ 'charge_total',
177
+ rf':: total charge\s*({re_f})',
178
+ unit=ureg.elementary_charge,
179
+ dtype=np.float64,
180
+ ),
181
+ ]
182
+ ),
183
+ ),
184
+ ]
185
+
186
+ orbital_quantities = [
187
+ Quantity(
188
+ 'eigenvalues',
189
+ r'# +Occupation +Energy.+\s*\-+([\s\S]+?)\-+\n',
190
+ str_operation=str_to_eigenvalues,
191
+ ),
192
+ Quantity(
193
+ 'hl_gap', rf'HL\-Gap\s*({re_f})', dtype=np.float64, unit=ureg.hartree
194
+ ),
195
+ Quantity(
196
+ 'energy_fermi',
197
+ rf'Fermi\-level\s*({re_f})',
198
+ dtype=np.float64,
199
+ unit=ureg.hartree,
200
+ ),
201
+ ]
202
+
203
+ property_quantities = orbital_quantities + [
204
+ Quantity(
205
+ 'dipole',
206
+ r'(dipole\:[\s\S]+?)molecular',
207
+ sub_parser=TextParser(
208
+ quantities=[
209
+ Quantity(
210
+ 'q',
211
+ rf'q only: +({re_f} +{re_f} +{re_f})',
212
+ dtype=np.dtype(np.float64),
213
+ unit=ureg.elementary_charge * ureg.bohr,
214
+ ),
215
+ Quantity(
216
+ 'full',
217
+ rf'full: +({re_f} +{re_f} +{re_f})',
218
+ dtype=np.dtype(np.float64),
219
+ unit=ureg.elementary_charge * ureg.bohr,
220
+ ),
221
+ ]
222
+ ),
223
+ ),
224
+ Quantity(
225
+ 'quadrupole',
226
+ r'(quadrupole \(traceless\):[\s\S]+?)\n *\n',
227
+ sub_parser=TextParser(
228
+ quantities=[
229
+ Quantity(
230
+ 'q',
231
+ r'q only:(.+)',
232
+ dtype=np.dtype(np.float64),
233
+ unit=ureg.elementary_charge * ureg.bohr**2,
234
+ ),
235
+ Quantity(
236
+ 'full',
237
+ r'full:(.+)',
238
+ dtype=np.dtype(np.float64),
239
+ unit=ureg.elementary_charge * ureg.bohr**2,
240
+ ),
241
+ Quantity(
242
+ 'q_dip',
243
+ r'q\+dip:(.+)',
244
+ dtype=np.dtype(np.float64),
245
+ unit=ureg.elementary_charge * ureg.bohr**2,
246
+ ),
247
+ ]
248
+ ),
249
+ ),
250
+ ]
251
+
252
+ geometry_quantities = [
253
+ Quantity('file', r'optimized geometry written to:\s*(\S+)')
254
+ ]
255
+
256
+ scf_quantities = (
257
+ common_quantities
258
+ + orbital_quantities
259
+ + [
260
+ Quantity(
261
+ 'model',
262
+ r'((?:G F N \d+ \- x T B.+\s+\-+\s+|Reference)\s*[\s\S]+?\n *\n)',
263
+ sub_parser=TextParser(
264
+ quantities=[
265
+ Quantity('reference', r'Reference\s*(\S+)'),
266
+ Quantity(
267
+ 'contribution',
268
+ r'(\w+:\s*[\s\S]+?)(?:\*|\n *\n)',
269
+ repeats=True,
270
+ sub_parser=TextParser(
271
+ quantities=[
272
+ Quantity('name', r'(\w+):'),
273
+ Quantity(
274
+ 'parameters',
275
+ r'\n +(\w.+? .+)',
276
+ str_operation=str_to_parameters,
277
+ repeats=True,
278
+ ),
279
+ ]
280
+ ),
281
+ ),
282
+ ]
283
+ ),
284
+ ),
285
+ Quantity(
286
+ 'scf_iteration',
287
+ r'iter\s*E\s*dE.+([\s\S]+?convergence.+)',
288
+ sub_parser=TextParser(
289
+ quantities=[
290
+ Quantity('step', r'(\d+ .+)', repeats=True),
291
+ Quantity(
292
+ 'converged',
293
+ r'(\*\*\* convergence criteria.+)',
294
+ str_operation=lambda x: 'satisfied' in x,
295
+ ),
296
+ ]
297
+ ),
298
+ ),
299
+ ]
300
+ )
301
+
302
+ optimization_quantities = [
303
+ Quantity(
304
+ 'cycle',
305
+ r'CYCLE +\d([\s\S]+?\n *\n)',
306
+ repeats=True,
307
+ sub_parser=TextParser(
308
+ quantities=[
309
+ Quantity(
310
+ 'energy_total',
311
+ rf'total energy +: +({re_f}) Eh',
312
+ dtype=np.float64,
313
+ unit=ureg.hartree,
314
+ ),
315
+ Quantity(
316
+ 'energy_change',
317
+ rf'change +({re_f}) Eh',
318
+ dtype=np.float64,
319
+ unit=ureg.hartree,
320
+ ),
321
+ Quantity(
322
+ 'scf_iteration',
323
+ rf'\.+(\s+\d+\s+{re_f}[\s\S]+?)\*',
324
+ sub_parser=TextParser(
325
+ quantities=[
326
+ Quantity(
327
+ 'step',
328
+ rf'{re_n} +(\d+ +{re_f}.+)',
329
+ repeats=True,
330
+ ),
331
+ Quantity(
332
+ 'time',
333
+ rf'SCC iter\. +\.+ +(\d+) min, +({re_f}) sec',
334
+ ),
335
+ ]
336
+ ),
337
+ ),
338
+ ]
339
+ ),
340
+ ),
341
+ Quantity(
342
+ 'converged',
343
+ r'(\*\*\* GEOMETRY OPTIMIZATION.+)',
344
+ str_operation=lambda x: 'CONVERGED' in x,
345
+ ),
346
+ Quantity(
347
+ 'final_structure',
348
+ r'final structure:([\s\S]+?\-+\s+\|)',
349
+ sub_parser=TextParser(
350
+ quantities=[
351
+ Quantity('atom_labels', r'([A-Z][a-z]?) ', repeats=True),
352
+ Quantity(
353
+ 'atom_positions',
354
+ rf'({re_f} +{re_f} +{re_f})',
355
+ unit=ureg.angstrom,
356
+ dtype=np.dtype(np.float64),
357
+ ),
358
+ ]
359
+ ),
360
+ ),
361
+ Quantity(
362
+ 'final_single_point',
363
+ r'(Final Singlepoint +\|[\s\S]+?::::::::::::)',
364
+ sub_parser=TextParser(quantities=scf_quantities),
365
+ ),
366
+ ] + common_quantities
367
+
368
+ md_quantities = [
369
+ Quantity('traj_file', r'trajectories on (.+?\.trj)'),
370
+ Quantity(
371
+ 'x_xtb_md_time',
372
+ rf'MD time /ps +: +({re_f})',
373
+ dtype=np.float64,
374
+ unit=ureg.ps,
375
+ ),
376
+ Quantity(
377
+ 'timestep', rf'dt /fs +: +({re_f})', dtype=np.float64, unit=ureg.fs
378
+ ),
379
+ Quantity(
380
+ 'x_xtb_scc_accuracy', rf'SCC accuracy +: +({re_f})', dtype=np.float64
381
+ ),
382
+ Quantity(
383
+ 'x_xtb_temperature',
384
+ rf'temperature /K +: +({re_f})',
385
+ dtype=np.float64,
386
+ unit=ureg.K,
387
+ ),
388
+ Quantity('x_xtb_max_steps', rf'max_steps +: +(\d+)', dtype=np.int32),
389
+ Quantity(
390
+ 'x_xtb_block_length',
391
+ rf'block length \(av\. \) +: +(\d+)',
392
+ dtype=np.int32,
393
+ ),
394
+ Quantity(
395
+ 'x_xtb_dumpstep_trj',
396
+ rf'dumpstep\(trj\) /fs +: +({re_f})',
397
+ dtype=np.float64,
398
+ ),
399
+ Quantity(
400
+ 'x_xtb_dumpstep_coords',
401
+ rf'dumpstep\(coords\) /fs +: +({re_f})',
402
+ dtype=np.float64,
403
+ ),
404
+ Quantity(
405
+ 'x_xtb_h_atoms_mass',
406
+ rf'H atoms mass \(amu\) +: +(\d+)',
407
+ dtype=np.float64,
408
+ unit=ureg.amu,
409
+ ),
410
+ Quantity('x_xtb_n_degrees_freedom', rf' +: +(\d+)', dtype=np.float64),
411
+ Quantity(
412
+ 'x_xtb_shake_bonds', rf'SHAKE on\. # bonds +: +(\d+)', dtype=np.float64
413
+ ),
414
+ Quantity(
415
+ 'x_xtb_berendsen',
416
+ rf'Berendsen THERMOSTAT (\S+)',
417
+ str_operation=lambda x: x == 'on',
418
+ ),
419
+ Quantity(
420
+ 'cycle',
421
+ rf'{re_n} +(\d+ +{re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f})',
422
+ dtype=np.dtype(np.float64),
423
+ repeats=True,
424
+ ),
425
+ ]
426
+
427
+ self._quantities = [
428
+ Quantity('program_version', r'\* xtb version ([\d\.]+)'),
429
+ Quantity(
430
+ 'date_start',
431
+ r'started run on (\d+/\d+/\d+) at (\d+:\d+:\d+\.\d+)',
432
+ dtype=str,
433
+ flatten=False,
434
+ ),
435
+ Quantity(
436
+ 'date_end',
437
+ r'finished run on (\d+/\d+/\d+) at (\d+:\d+:\d+\.\d+)',
438
+ dtype=str,
439
+ flatten=False,
440
+ ),
441
+ Quantity(
442
+ 'calculation_setup',
443
+ r'Calculation Setup +\|\s*\-+\s*([\s\S]+?)\-+\s+\|',
444
+ sub_parser=TextParser(
445
+ quantities=[
446
+ Quantity(
447
+ 'parameter',
448
+ r'([\w ]+:.+)',
449
+ str_operation=lambda x: [v.strip() for v in x.split(':')],
450
+ repeats=True,
451
+ )
452
+ ]
453
+ ),
454
+ ),
455
+ Quantity(
456
+ 'gfnff',
457
+ r'(G F N - F F[\s\S]+?::::::::::::\n *\n)',
458
+ sub_parser=TextParser(quantities=scf_quantities),
459
+ ),
460
+ Quantity(
461
+ 'gfn1',
462
+ r'(G F N 1 - x T B[\s\S]+?::::::::::::\n *\n)',
463
+ sub_parser=TextParser(quantities=scf_quantities),
464
+ ),
465
+ Quantity(
466
+ 'gfn2',
467
+ r'(G F N 2 - x T B[\s\S]+?::::::::::::\n *\n)',
468
+ sub_parser=TextParser(quantities=scf_quantities),
469
+ ),
470
+ Quantity(
471
+ 'ancopt',
472
+ r'(A N C O P T +\|[\s\S]+?::::::::::::\n *\n)',
473
+ sub_parser=TextParser(quantities=optimization_quantities),
474
+ ),
475
+ Quantity(
476
+ 'md',
477
+ r'(Molecular Dynamics +\|[\s\S]+?exit of md)',
478
+ sub_parser=TextParser(quantities=md_quantities),
479
+ ),
480
+ Quantity(
481
+ 'property',
482
+ r'(Property Printout +\|[\s\S]+?\-+\s+\|)',
483
+ sub_parser=TextParser(quantities=property_quantities),
484
+ ),
485
+ Quantity(
486
+ 'geometry',
487
+ r'(Geometry Summary +\|[\s\S]+?\-+\s+\|)',
488
+ sub_parser=TextParser(quantities=geometry_quantities),
489
+ ),
490
+ Quantity(
491
+ 'energy_total',
492
+ rf'\| TOTAL ENERGY\s*({re_f})',
493
+ dtype=np.float64,
494
+ unit=ureg.hartree,
495
+ ),
496
+ Quantity(
497
+ 'gradient_norm',
498
+ rf'\| GRADIENT NORM\s*({re_f})',
499
+ dtype=np.float64,
500
+ unit=ureg.hartree / ureg.angstrom,
501
+ ),
502
+ Quantity(
503
+ 'hl_gap',
504
+ rf'\| HOMO-LUMO GAP\s*({re_f})',
505
+ dtype=np.float64,
506
+ unit=ureg.eV,
507
+ ),
508
+ Quantity('topo_file', r'Writing topology from bond orders to (.+\.mol)'),
509
+ Quantity(
510
+ 'footer',
511
+ r'(\* finished run on [\s\S]+?\Z)',
512
+ sub_parser=TextParser(
513
+ quantities=[
514
+ Quantity(
515
+ 'end_time', r'finished run on (\S+) at (\S+)', flatten=False
516
+ ),
517
+ Quantity(
518
+ 'wall_time',
519
+ r'(.+):\s+\* +wall-time: +(\d+) d, +(\d+) h, +(\d+) min, +([\d\.]+) sec',
520
+ repeats=True,
521
+ str_operation=str_to_wall_time,
522
+ ),
523
+ Quantity(
524
+ 'cpu_time',
525
+ r'\* +cpu-time: +(\d+) d, +(\d+) h, +(\d+) min, +([\d\.]+) sec',
526
+ repeats=True,
527
+ ),
528
+ ]
529
+ ),
530
+ ),
531
+ ]
532
+
533
+ def get_time(self, section=None, index=0):
534
+ start_time = 0
535
+ section_index = 0
536
+ for time in self.get('footer', {}).get('wall_time', []):
537
+ if time[0] == section or section is None:
538
+ if index == section_index:
539
+ return start_time, time[1]
540
+ section_index += 1
541
+ if time[0] != 'total':
542
+ start_time += time[1]
543
+ return start_time, None
544
+
545
+
546
+ class CoordParser(TextParser):
547
+ def __init__(self):
548
+ super().__init__()
549
+
550
+ def init_quantities(self):
551
+ re_f = r'[\d\.\-]+'
552
+
553
+ self._quantities = [
554
+ Quantity('coord_unit', r'\$coord(.+)'),
555
+ Quantity(
556
+ 'positions_labels',
557
+ rf'({re_f} +{re_f} +{re_f} +[A-Za-z]+\s+)',
558
+ repeats=True,
559
+ ),
560
+ Quantity('periodic', r'\$periodic(.+)'),
561
+ Quantity('lattice_unit', r'\$lattice(.+)'),
562
+ Quantity(
563
+ 'lattice',
564
+ rf'({re_f} +{re_f} +{re_f}) *\n',
565
+ repeats=True,
566
+ dtype=np.dtype(np.float64),
567
+ ),
568
+ Quantity('cell_unit', r'\$cell(.+)'),
569
+ Quantity(
570
+ 'cell',
571
+ rf'({re_f} +{re_f} +{re_f} +{re_f} +{re_f} +{re_f}) *\n',
572
+ dtype=np.dtype(np.float64),
573
+ ),
574
+ ]
575
+
576
+ def get_atoms(self):
577
+ positions = self.get('positions_labels')
578
+ if positions is None:
579
+ return
580
+
581
+ lattice_unit = self.get('lattice_unit', '').strip()
582
+ lattice_unit = ureg.angstrom if lattice_unit.startswith('angs') else ureg.bohr
583
+ lattice = self.get('lattice')
584
+ lattice = (
585
+ (lattice * lattice_unit).to('angstrom').magnitude
586
+ if lattice is not None
587
+ else lattice
588
+ )
589
+
590
+ cell = self.get('cell')
591
+ if cell is not None:
592
+ cell_unit = self.get('cell_unit')
593
+ cell_unit = ureg.angstrom if cell_unit is not None else ureg.bohr
594
+ cell_abc = (cell[:3] * cell_unit).to('angstrom').magnitude
595
+ lattice = list(cell_abc) + list(cell[3:])
596
+
597
+ labels = [p[-1].title() for p in positions]
598
+ positions = [p[:3] for p in positions]
599
+ coord_unit = self.get('coord_unit', '').strip()
600
+ if coord_unit.startswith('frac') and lattice is not None:
601
+ positions = np.dot(positions, lattice)
602
+ elif coord_unit.startswith('angs'):
603
+ positions = positions * ureg.angstrom
604
+ else:
605
+ positions = positions * ureg.bohr
606
+ positions = positions.to('angstrom').magnitude
607
+
608
+ pbc = ([True] * int(self.get('periodic', 0))) + [False] * 3
609
+
610
+ return aseAtoms(symbols=labels, positions=positions, cell=lattice, pbc=pbc[:3])
611
+
612
+
613
+ class TrajParser(TextParser):
614
+ def __init__(self):
615
+ super().__init__()
616
+
617
+ def init_quantities(self):
618
+ re_f = r'[\d\.\-]+'
619
+
620
+ self._quantities = [
621
+ Quantity(
622
+ 'frame',
623
+ r'energy\:([\s\S]+?(?:\Z|\n *\d+ *\n))',
624
+ repeats=True,
625
+ sub_parser=TextParser(
626
+ quantities=[
627
+ Quantity(
628
+ 'positions',
629
+ rf'({re_f} +{re_f} +{re_f})',
630
+ repeats=True,
631
+ dtype=np.dtype(np.float64),
632
+ ),
633
+ Quantity('labels', r'\n *([A-Za-z]{1,2}) +', repeats=True),
634
+ ]
635
+ ),
636
+ )
637
+ ]
638
+
639
+ def get_atoms(self, n_frame):
640
+ frames = self.get('frame', [])
641
+ if n_frame >= len(frames):
642
+ return
643
+ frame = self.get('frame')[n_frame]
644
+ labels = [label.title() for label in frame.get('labels', [])]
645
+ # TODO verify if trajectory positions are always printed out in angstroms
646
+ return aseAtoms(symbols=labels, positions=frame.positions)
647
+
648
+
649
+ class XTBParser(MDParser):
650
+ def __init__(self):
651
+ self.out_parser = OutParser()
652
+ self.coord_parser = CoordParser()
653
+ self.traj_parser = TrajParser()
654
+ self.calculation_type = None
655
+ self._metainfo_map = {
656
+ 'optimization level': 'optimization_level',
657
+ 'max. optcycles': 'max_opt_cycles',
658
+ 'ANC micro-cycles': 'anc_micro_cycles',
659
+ 'degrees of freedom': 'n_degrees_freedom',
660
+ 'RF solver': 'rf_solver',
661
+ 'linear?': 'linear',
662
+ 'Hlow (freq-cutoff)': 'hlow',
663
+ 'Hmax (freq-cutoff)': 'hmax',
664
+ 'S6 in model hess.': 's6',
665
+ }
666
+ super().__init__()
667
+
668
+ def parse_system(self, source):
669
+ if isinstance(source, int):
670
+ atoms = self.traj_parser.get_atoms(source)
671
+ elif source.endswith('.xyz') or source.endswith('.poscar'):
672
+ atoms = aseread(os.path.join(self.maindir, source))
673
+ else:
674
+ self.coord_parser.mainfile = os.path.join(self.maindir, source)
675
+ atoms = self.coord_parser.get_atoms()
676
+
677
+ if atoms is None:
678
+ return
679
+
680
+ sec_system = System()
681
+ self.archive.run[0].system.append(sec_system)
682
+ sec_atoms = Atoms()
683
+ sec_system.atoms = sec_atoms
684
+ sec_atoms.labels = atoms.get_chemical_symbols()
685
+ sec_atoms.positions = atoms.get_positions() * ureg.angstrom
686
+ lattice_vectors = np.array(atoms.get_cell())
687
+ if np.count_nonzero(lattice_vectors) > 0:
688
+ sec_atoms.lattice_vectors = lattice_vectors * ureg.angstrom
689
+ sec_atoms.periodic = atoms.get_pbc()
690
+
691
+ return sec_system
692
+
693
+ def parse_calculation(self, source):
694
+ sec_calc = Calculation()
695
+ self.archive.run[0].calculation.append(sec_calc)
696
+ # total energy
697
+ sec_energy = Energy()
698
+ sec_calc.energy = sec_energy
699
+ sec_energy.total = EnergyEntry(value=source.energy_total)
700
+ sec_energy.change = source.energy_change
701
+
702
+ # scf
703
+ for step in source.get('scf_iteration', {}).get('step', []):
704
+ sec_scf = ScfIteration()
705
+ sec_calc.scf_iteration.append(sec_scf)
706
+ sec_scf.energy = Energy(
707
+ total=EnergyEntry(value=step[1] * ureg.hartree),
708
+ change=step[2] * ureg.hartree,
709
+ )
710
+
711
+ # summary of calculated properties
712
+ summary = source.get('summary', {})
713
+ for key, val in summary.items():
714
+ if key.startswith('energy_') and val is not None:
715
+ setattr(sec_energy, key.replace('energy_', ''), EnergyEntry(value=val))
716
+
717
+ # eigenvalues
718
+ if source.eigenvalues is not None:
719
+ sec_eigs = BandEnergies()
720
+ sec_calc.eigenvalues.append(sec_eigs)
721
+ sec_eigs.occupations = np.reshape(
722
+ source.eigenvalues[0], (1, 1, len(source.eigenvalues[0]))
723
+ )
724
+ sec_eigs.energies = np.reshape(
725
+ source.eigenvalues[1], (1, 1, len(source.eigenvalues[1]))
726
+ )
727
+ sec_eigs.kpoints = np.zeros((1, 3))
728
+
729
+ return sec_calc
730
+
731
+ def parse_method(self, section):
732
+ model = self.out_parser.get(section, {}).get('model')
733
+ if model is None:
734
+ return
735
+
736
+ sec_method = Method()
737
+ self.archive.run[-1].method.append(sec_method)
738
+ parameters = {
739
+ p[0]: p[1]
740
+ for p in self.out_parser.get(section, {})
741
+ .get('setup', {})
742
+ .get('parameter', [])
743
+ }
744
+ sec_tb = TB()
745
+ sec_method.tb = sec_tb
746
+ sec_tb.name = 'xTB'
747
+ sec_tb.x_xtb_setup = parameters
748
+ sec_xtb = xTB()
749
+ sec_tb.xtb = sec_xtb
750
+ sec_xtb.name = section
751
+
752
+ if model.get('reference') is not None:
753
+ sec_xtb.reference = model.reference
754
+
755
+ for contribution in model.get('contribution', []):
756
+ name = contribution.name.lower()
757
+ if name == 'hamiltonian':
758
+ sec_interaction = Interaction()
759
+ sec_xtb.hamiltonian.append(sec_interaction)
760
+ elif name == 'coulomb':
761
+ sec_interaction = Interaction()
762
+ sec_xtb.coulomb.append(sec_interaction)
763
+ elif name == 'repulsion':
764
+ sec_interaction = Interaction()
765
+ sec_xtb.repulsion.append(sec_interaction)
766
+ else:
767
+ sec_interaction = Interaction()
768
+ sec_xtb.contributions.append(sec_interaction)
769
+ sec_interaction.type = name
770
+ sec_interaction.parameters = {
771
+ p[0]: p[1].tolist() if isinstance(p[1], np.ndarray) else p[1]
772
+ for p in contribution.parameters
773
+ }
774
+
775
+ def parse_single_point(self, source, section):
776
+ if source is None:
777
+ return
778
+
779
+ total_time = None
780
+ # determine file extension of input structure file
781
+ coord_file = self.archive.run[-1].x_xtb_calculation_setup.get(
782
+ 'coordinate file', 'coord'
783
+ )
784
+ if section == 'final_single_point':
785
+ extension = 'coord' if coord_file == 'coord' else coord_file.split('.')[-1]
786
+ coord_file = f'xtbopt.{extension}'
787
+ else:
788
+ self._run_index += 1
789
+ start_time, total_time = self.out_parser.get_time(index=self._run_index)
790
+
791
+ sec_system = self.parse_system(coord_file)
792
+ sec_calc = self.parse_calculation(source)
793
+ sec_calc.system_ref = sec_system
794
+ if total_time is not None:
795
+ sec_calc.time_physical = start_time + total_time
796
+ sec_calc.time_calculation = total_time
797
+
798
+ return sec_calc
799
+
800
+ def parse_gfn(self, section):
801
+ self.parse_method(section)
802
+ self.parse_single_point(self.out_parser.get(section), section)
803
+ self.archive.workflow2 = SinglePoint()
804
+
805
+ def parse_opt(self, section):
806
+ module = self.out_parser.get(section)
807
+ if module is None:
808
+ return
809
+
810
+ self._run_index += 1
811
+
812
+ start_time, total_time = self.out_parser.get_time(section='ANC optimizer')
813
+ time_per_step = (
814
+ total_time / (len(module.get('cycle')) + 1)
815
+ if total_time is not None
816
+ else None
817
+ )
818
+ self.traj_parser.mainfile = os.path.join(self.maindir, 'xtbopt.log')
819
+
820
+ for n, cycle in enumerate(module.get('cycle', [])):
821
+ self.parse_system(n)
822
+ sec_scc = self.parse_calculation(cycle)
823
+ if sec_scc is not None and time_per_step is not None:
824
+ sec_scc.time_physical = start_time + time_per_step * (n + 1)
825
+ sec_scc.time_calculation = time_per_step
826
+
827
+ # final single point
828
+ sec_scc = self.parse_single_point(
829
+ module.get('final_single_point'), 'final_single_point'
830
+ )
831
+ if sec_scc is not None and time_per_step is not None:
832
+ sec_scc.time_physical = start_time + time_per_step * (
833
+ len(module.get('cycle', [])) + 1
834
+ )
835
+ sec_scc.time_calculation = time_per_step
836
+
837
+ # workflow parameters
838
+ workflow = GeometryOptimization(method=GeometryOptimizationMethod())
839
+ for key, val in module.get('setup', {}).get('parameter', []):
840
+ name = self._metainfo_map.get(key)
841
+ if key == 'energy convergence':
842
+ workflow.method.convergence_tolerance_energy_difference = (
843
+ val * ureg.hartree
844
+ )
845
+ elif key == 'grad. convergence':
846
+ workflow.method.convergence_tolerance_force_maximum = (
847
+ val * ureg.hartree / ureg.bohr
848
+ )
849
+ elif key == 'maximium RF displ.':
850
+ workflow.method.convergence_tolerance_displacement_maximum = (
851
+ val * ureg.bohr
852
+ )
853
+ elif name is not None:
854
+ setattr(workflow, f'x_xtb_{name}', val)
855
+ self.archive.workflow2 = workflow
856
+
857
+ def parse_md(self, section):
858
+ module = self.out_parser.get(section)
859
+ if module is None:
860
+ return
861
+
862
+ self.traj_parser.mainfile = os.path.join(self.maindir, 'xtb.trj')
863
+
864
+ # get trj dump frequency to determine which frame to parse in trajectory file
865
+ trj_freq = module.get('x_xtb_dumpstep_trj', 1)
866
+
867
+ traj_steps = [
868
+ n * int(trj_freq) for n in range(len(self.traj_parser.get('frame', [])))
869
+ ]
870
+ self.n_atoms = self.archive.run[-1].x_xtb_calculation_setup.get(
871
+ 'number of atoms', 0
872
+ )
873
+ self.trajectory_steps = [-1] + traj_steps
874
+ self.thermodynamics_steps = [int(cycle[0]) for cycle in module.get('cycle', [])]
875
+
876
+ for step in self.trajectory_steps:
877
+ if step < 0:
878
+ continue
879
+ atoms = self.traj_parser.get_atoms(traj_steps.index(step))
880
+ data = dict(
881
+ labels=atoms.get_chemical_symbols(),
882
+ positions=atoms.get_positions() * ureg.angstrom,
883
+ )
884
+ lattice_vectors = np.array(atoms.get_cell())
885
+ if np.count_nonzero(lattice_vectors) > 0:
886
+ data['lattice_vectors'] = lattice_vectors * ureg.angstrom
887
+ self.parse_trajectory_step(dict(atoms=data))
888
+
889
+ time_start, time_calc = self.out_parser.get_time(section='MD')
890
+ time_step = (
891
+ time_calc / (max(self.thermodynamics_steps) + 1)
892
+ if time_calc is not None
893
+ else None
894
+ )
895
+
896
+ for n_frame, step in enumerate(self.thermodynamics_steps):
897
+ cycle = module.get('cycle')[n_frame]
898
+ data = dict(
899
+ step=step,
900
+ time_physical=cycle[1] * ureg.ps,
901
+ energy=dict(
902
+ total=dict(
903
+ potential=cycle[2] * ureg.hartree,
904
+ kinetic=cycle[3] * ureg.hartree,
905
+ value=cycle[6] * ureg.hartree,
906
+ )
907
+ ),
908
+ temperature=cycle[5] * ureg.kelvin,
909
+ )
910
+ if time_step is not None:
911
+ data['time_physical'] = time_start + time_step * (step + 1)
912
+ data['time_calculation'] = time_step
913
+ self.parse_thermodynamics_step(data)
914
+
915
+ # workflow parameters
916
+ self.parse_md_workflow(
917
+ {key: val for key, val in module.items() if key.startswith('x_xtb')}
918
+ )
919
+
920
+ def write_to_archive(self) -> None:
921
+ self.out_parser.mainfile = self.mainfile
922
+ self.out_parser.logger = self.logger
923
+ self.coord_parser.logger = self.logger
924
+ self.traj_parser.logger = self.logger
925
+ self.calculation_type = None
926
+ self.maindir = os.path.dirname(self.mainfile)
927
+ self._run_index = 0
928
+
929
+ # run parameters
930
+ sec_run = Run()
931
+ self.archive.run.append(sec_run)
932
+ sec_run.program = Program(
933
+ name='xTB', version=self.out_parser.get('program_version')
934
+ )
935
+ sec_run.x_xtb_calculation_setup = {
936
+ p[0]: p[1]
937
+ for p in self.out_parser.get('calculation_setup', {}).get('parameter', [])
938
+ }
939
+ if self.out_parser.date_start is not None:
940
+ sec_run.time_run = TimeRun(
941
+ date_start=datetime.strptime(
942
+ self.out_parser.date_start, '%Y/%m/%d %H:%M:%S.%f'
943
+ ).timestamp()
944
+ )
945
+ if self.out_parser.date_end is not None:
946
+ sec_run.time_run.date_end = datetime.strptime(
947
+ self.out_parser.date_end, '%Y/%m/%d %H:%M:%S.%f'
948
+ ).timestamp()
949
+
950
+ # modules
951
+ self.parse_gfn('gfnff')
952
+ self.parse_gfn('gfn1')
953
+ self.parse_gfn('gfn2')
954
+ self.parse_opt('ancopt')
955
+ self.parse_md('md')
956
+
957
+ # output properties
958
+ properties = self.out_parser.get('property')
959
+ if properties.dipole is not None:
960
+ if sec_run.calculation:
961
+ sec_calc = sec_run.calculation[-1]
962
+ else:
963
+ sec_calc = Calculation()
964
+ sec_run.calculation.append(sec_calc)
965
+ sec_multipoles = Multipoles()
966
+ sec_calc.multipoles.append(sec_multipoles)
967
+ sec_multipoles.dipole = MultipolesEntry(
968
+ total=properties.dipole.full.to('C * m').magnitude,
969
+ x_xtb_q_only=properties.dipole.q.to('C * m').magnitude,
970
+ )
971
+ if properties.quadrupole is not None:
972
+ sec_multipoles.quadrupole = MultipolesEntry(
973
+ total=properties.quadrupole.full.to('C * m**2').magnitude,
974
+ x_xtb_q_only=properties.quadrupole.q.to('C * m**2').magnitude,
975
+ x_xtb_q_plus_dip=properties.quadrupole.q_dip.to(
976
+ 'C * m**2'
977
+ ).magnitude,
978
+ )
979
+ # TODO implement vibrational properties