castep-outputs 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2023, Jacob Wilkins
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,12 @@
1
+ Metadata-Version: 2.1
2
+ Name: castep_outputs
3
+ Version: 0.1
4
+ Summary: A package for extracting information from castep outputs
5
+ Home-page: https://github.com/oerc0122/castep_outputs
6
+ Download-URL: https://github.com/oerc0122/castep_outputs
7
+ Author: Jacob Wilkins
8
+ Author-email: jacob.wilkins@stfc.ac.uk
9
+ License: BSD3
10
+ Requires-Python: >=3.8
11
+ Provides-Extra: ruamel
12
+ Provides-Extra: yaml
@@ -0,0 +1,2 @@
1
+ # castep_outputs
2
+ Parser for CASTEP output files
@@ -0,0 +1,6 @@
1
+ """
2
+ Module to parse miscellaneous castep files
3
+ """
4
+
5
+ __AUTHOR__ = "Jacob Wilkins"
6
+ __VERSION__ = "v0.01"
@@ -0,0 +1,10 @@
1
+ """
2
+ Run main castep parser
3
+ """
4
+
5
+ from .castep_outputs_main import main
6
+
7
+ __AUTHOR__ = "Jacob Wilkins"
8
+ __VERSION__ = "v0.1"
9
+
10
+ main()
@@ -0,0 +1,95 @@
1
+ """
2
+ Argument parser
3
+ """
4
+ from typing import Sequence, Dict, List
5
+ import os.path
6
+ import argparse
7
+ # pylint: disable=line-too-long
8
+
9
+ from .utility import SUPPORTED_FORMATS
10
+ from .constants import CASTEP_OUTPUT_NAMES, CASTEP_FILE_FORMATS
11
+
12
+ AP = argparse.ArgumentParser(
13
+ prog="CASTEP outputs",
14
+ description=f"""Attempts to find all files for seedname, filtered by `inc` args (default: all).
15
+ Explicit files can be passed using longname arguments.
16
+ Parse most human-readable castep outputs including: {', '.join(CASTEP_FILE_FORMATS)}"""
17
+ )
18
+
19
+ AP.add_argument("seedname", nargs=argparse.REMAINDER, help="Seed name for data")
20
+ AP.add_argument("-V", "--version", action="version", version="%(prog)s v0.1")
21
+ AP.add_argument("-L", "--log", help="Verbose output",
22
+ choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), default="WARNING")
23
+ AP.add_argument("-o", "--output", help="File to write output, default: screen", default=None)
24
+ AP.add_argument("-f", "--out-format", help="Output format", choices=SUPPORTED_FORMATS, default="json")
25
+
26
+ AP.add_argument("-t", "--testing", action="store_true", help="Set testing mode to produce flat outputs")
27
+
28
+ AP.add_argument("-A", "--inc-all", action="store_true", help="Extract all available information")
29
+ AP.add_argument("-c", "--inc-castep", action="store_true", help="Extract .castep information")
30
+ AP.add_argument("-g", "--inc-geom", action="store_true", help="Extract .geom information")
31
+ AP.add_argument("-m", "--inc-md", action="store_true", help="Extract .md information")
32
+ AP.add_argument("-b", "--inc-bands", action="store_true", help="Extract .bands information")
33
+ AP.add_argument("-p", "--inc-phonon_dos", action="store_true", help="Extract .phonon_dos information")
34
+ AP.add_argument("-e", "--inc-efield", action="store_true", help="Extract .efield information")
35
+ AP.add_argument("-x", "--inc-xrd_sf", action="store_true", help="Extract .xrd_sf information")
36
+ AP.add_argument("-H", "--inc-hug", action="store_true", help="Extract .hug information")
37
+ AP.add_argument("-E", "--inc-elf_fmt", action="store_true", help="Extract .elf_fmt information")
38
+ AP.add_argument("-C", "--inc-chdiff_fmt", action="store_true", help="Extract .chdiff_fmt information")
39
+ AP.add_argument("-P", "--inc-pot_fmt", action="store_true", help="Extract .pot_fmt information")
40
+ AP.add_argument("-D", "--inc-den_fmt", action="store_true", help="Extract .den_fmt information")
41
+ AP.add_argument("-X", "--inc-elastic", action="store_true", help="Extract .elastic information")
42
+ AP.add_argument("-T", "--inc-ts", action="store_true", help="Extract .ts information")
43
+
44
+ AP.add_argument('--inc-param', action="store_true", help="Extract .param information")
45
+ AP.add_argument('--inc-cell', action="store_true", help="Extract .cell information")
46
+
47
+ AP.add_argument("--castep", nargs="*", help="Extract from CASTEP as .castep type", default=[])
48
+ AP.add_argument("--geom", nargs="*", help="Extract from GEOM as .geom type", default=[])
49
+ AP.add_argument("--cell", nargs="*", help="Extract from CELL as .cell type", default=[])
50
+ AP.add_argument("--param", nargs="*", help="Extract from PARAM as .param type", default=[])
51
+ AP.add_argument("--md", nargs="*", help="Extract from MD as .md type", default=[])
52
+ AP.add_argument("--bands", nargs="*", help="Extract from BANDS as .bands type", default=[])
53
+ AP.add_argument("--hug", nargs="*", help="Extract from HUG as .hug type", default=[])
54
+ AP.add_argument("--phonon_dos", nargs="*", help="Extract from PHONON_DOS as .phonon_dos type", default=[])
55
+ AP.add_argument("--efield", nargs="*", help="Extract from EFIELD as .efield type", default=[])
56
+ AP.add_argument("--xrd_sf", nargs="*", help="Extract from XRD_SF as .xrd_sf type", default=[])
57
+ AP.add_argument("--elf_fmt", nargs="*", help="Extract from ELF_FMT as .elf_fmt type", default=[])
58
+ AP.add_argument("--chdiff_fmt", nargs="*", help="Extract from CHDIFF_FMT as .chdiff_fmt type", default=[])
59
+ AP.add_argument("--pot_fmt", nargs="*", help="Extract from POT_FMT as .pot_fmt type", default=[])
60
+ AP.add_argument("--den_fmt", nargs="*", help="Extract from DEN_FMT as .den_fmt type", default=[])
61
+ AP.add_argument("--elastic", nargs="*", help="Extract from ELASTIC as .elastic type", default=[])
62
+ AP.add_argument("--ts", nargs="*", help="Extract from TS as .ts type", default=[])
63
+
64
+
65
+ def parse_args(to_parse: Sequence[str] = ()) -> argparse.Namespace:
66
+ """ Parse all arguments and add those caught by flags """
67
+ args = AP.parse_args()
68
+
69
+ parse_all = args.inc_all or not any(getattr(args, f"inc_{typ}") for typ in CASTEP_OUTPUT_NAMES)
70
+
71
+ # Set all flags
72
+ if parse_all and not to_parse:
73
+ for typ in CASTEP_OUTPUT_NAMES:
74
+ setattr(args, f"inc_{typ}", True)
75
+
76
+ # Only parse those which are requested
77
+ for typ in to_parse:
78
+ setattr(args, f"inc_{typ}", True)
79
+
80
+ # Add seeded files into parse list if to be included
81
+ for seed in args.seedname:
82
+ if os.path.isfile(seed) and (ext := os.path.splitext(seed)[1][1:]) in CASTEP_OUTPUT_NAMES:
83
+ getattr(args, ext).append(seed)
84
+ else:
85
+ for typ in CASTEP_OUTPUT_NAMES:
86
+ if getattr(args, f"inc_{typ}") and os.path.isfile(seed+typ):
87
+ getattr(args, typ).append(seed+typ)
88
+
89
+ return args
90
+
91
+
92
+ def args_to_dict(args: argparse.Namespace) -> Dict[str, List[str]]:
93
+ """ Convert args namespace to dictionary """
94
+ out_dict = {typ: getattr(args, typ) for typ in CASTEP_OUTPUT_NAMES}
95
+ return out_dict
@@ -0,0 +1,105 @@
1
+ """
2
+ Run main castep parser
3
+ """
4
+ import logging
5
+ import fileinput
6
+ import io
7
+ import sys
8
+ import os.path
9
+
10
+ from .args import (parse_args, args_to_dict)
11
+ from .utility import (json_safe, flatten_dict, get_dumpers)
12
+ from .parse_castep_file import parse_castep_file
13
+ from .parse_cell_param_file import parse_cell_param_file
14
+ from .parse_md_geom_file import parse_md_geom_file
15
+ from .parse_extra_files import (parse_bands_file, parse_hug_file, parse_phonon_dos_file,
16
+ parse_efield_file, parse_xrd_sf_file, parse_elf_fmt_file,
17
+ parse_chdiff_fmt_file, parse_pot_fmt_file, parse_den_fmt_file,
18
+ parse_elastic_file, parse_ts_file)
19
+
20
+
21
+ PARSERS = {
22
+ ".castep": parse_castep_file,
23
+ ".cell": parse_cell_param_file,
24
+ ".param": parse_cell_param_file,
25
+ ".geom": parse_md_geom_file,
26
+ ".md": parse_md_geom_file,
27
+ ".bands": parse_bands_file,
28
+ ".hug": parse_hug_file,
29
+ ".phonon_dos": parse_phonon_dos_file,
30
+ ".efield": parse_efield_file,
31
+ ".xrd_sf": parse_xrd_sf_file,
32
+ ".elf_fmt": parse_elf_fmt_file,
33
+ ".chdiff_fmt": parse_chdiff_fmt_file,
34
+ ".pot_fmt": parse_pot_fmt_file,
35
+ ".den_fmt": parse_den_fmt_file,
36
+ ".elastic": parse_elastic_file,
37
+ ".ts": parse_ts_file
38
+ }
39
+
40
+
41
+ def parse_single(in_file, parser: callable = None, out_format="json",
42
+ *, loglevel=logging.WARNING, testing=False):
43
+ """
44
+ Parse a file using the given parser and post-process according to options
45
+ """
46
+
47
+ logging.basicConfig(format="%(levelname)s: %(message)s", level=loglevel)
48
+
49
+ if parser is None:
50
+ _, ext = os.path.splitext(in_file)
51
+ parser = PARSERS.get(ext, None)
52
+ if not parser:
53
+ raise KeyError(f"Parser for file {in_file} (assumed type: {ext}) not found")
54
+
55
+ if isinstance(in_file, io.TextIOBase):
56
+ data = parser(in_file)
57
+ else:
58
+ with fileinput.FileInput(in_file, mode='r', encoding='utf-8') as file:
59
+ data = parser(file)
60
+
61
+ if out_format == "json" or testing:
62
+ data = json_safe(data)
63
+
64
+ if testing:
65
+ if isinstance(data, list):
66
+ data = [flatten_dict(run) for run in data]
67
+ else:
68
+ data = flatten_dict(data)
69
+
70
+ return data
71
+
72
+
73
+ def parse_all(output=None, out_format="json", loglevel=logging.WARNING, *, testing=False, **files):
74
+ """ Parse all files in files dict """
75
+ file_dumper = get_dumpers(out_format)
76
+
77
+ for typ, paths in files.items():
78
+ parser = PARSERS[f".{typ}"]
79
+ for path in paths:
80
+ data = parse_single(path, parser, out_format, loglevel=loglevel, testing=testing)
81
+
82
+ if output is None:
83
+ file_dumper(data, sys.stdout)
84
+ print()
85
+ elif isinstance(output, io.TextIOBase):
86
+ file_dumper(data, output)
87
+ else:
88
+ with open(output, 'a+', encoding='utf-8') as out_file:
89
+ file_dumper(data, out_file)
90
+
91
+
92
+ def main():
93
+ """ Run the main program from command line """
94
+ args = parse_args()
95
+ dict_args = args_to_dict(args)
96
+
97
+ parse_all(output=args.output,
98
+ loglevel=getattr(logging, args.log.upper()),
99
+ testing=args.testing,
100
+ out_format=args.out_format,
101
+ **dict_args)
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
@@ -0,0 +1,307 @@
1
+ """ Module containing all regexes """
2
+
3
+ from typing import List, Sequence, Optional, TextIO
4
+ import re
5
+ import io
6
+ import itertools
7
+
8
+ from .constants import MINIMISERS, SHELLS, FST_D
9
+
10
+
11
+ def get_numbers(line: str) -> List[str]:
12
+ """ Get all numbers in a string as a list """
13
+ return NUMBER_RE.findall(line)
14
+
15
+
16
+ def get_block(init_line: str, in_file: TextIO,
17
+ start: re.Pattern, end: re.Pattern, *, cnt: int = 1,
18
+ out_fmt: type = io.StringIO, eof_possible: bool = False):
19
+ """ Check if line is the start of a block and return
20
+ the block if it is, moving in_file forward as it does so """
21
+
22
+ block = ""
23
+
24
+ if not re.search(start, init_line):
25
+ return block
26
+
27
+ block = init_line
28
+ fnd = cnt
29
+ for line in in_file:
30
+ block += line
31
+ if re.search(end, line):
32
+ fnd -= 1
33
+ if fnd == 0:
34
+ break
35
+ else:
36
+ if not eof_possible:
37
+ if hasattr(in_file, 'name'):
38
+ raise IOError(f"Unexpected end of file in {in_file.name}.")
39
+ raise IOError("Unexpected end of file.")
40
+
41
+ if not block:
42
+ return ""
43
+ if out_fmt is str:
44
+ return block
45
+ if out_fmt is list:
46
+ return block.splitlines()
47
+ if out_fmt is io.StringIO:
48
+ return io.StringIO(block)
49
+ return out_fmt(block)
50
+
51
+
52
+ def labelled_floats(labels: Sequence[str], counts: Sequence[Optional[int]] = (None,),
53
+ sep: str = r"\s+?", suff: str = "") -> str:
54
+ """ Constructs a regex for extracting floats with assigned labels
55
+ :param labels:iterable of labels to label each group
56
+ :param counts:iterable of counts to group into each label (count must not exceed that of labels)
57
+ :param sep:separator between floats
58
+ """
59
+ if suff and any(cnt for cnt in counts):
60
+ raise NotImplementedError("Suffix and counts not currently supported")
61
+
62
+ outstr = ""
63
+ for label, cnt in itertools.zip_longest(labels, counts):
64
+ if cnt:
65
+ outstr += f"(?:(?P<{label}>(?:{sep}{EXPNUMBER_RE}{suff}){{{cnt}}}))"
66
+ else:
67
+ outstr += f"(?:{sep}(?P<{label}>{EXPNUMBER_RE}){suff})"
68
+
69
+ return outstr
70
+
71
+
72
+ # --- RegExes
73
+ # Regexps to recognise numbers
74
+ FNUMBER_RE = r"(?:[+-]?(?:\d*\.?\d+|\d+\.?\d*))"
75
+ EXPNUMBER_RE = rf"(?:{FNUMBER_RE}(?:[Ee][+-]?\d{{1,3}})?)"
76
+ INTNUMBER_RE = r"(?:\d+)"
77
+ NUMBER_RE = re.compile(rf"(?:{EXPNUMBER_RE}|{FNUMBER_RE}|{INTNUMBER_RE})")
78
+
79
+ # Regexp to identify extended chemical species
80
+ SPECIES_RE = r"[A-Z][a-z]{0,2}"
81
+ ATOM_NAME_RE = rf"\b{SPECIES_RE}(?::\w+)?\b"
82
+
83
+ # Unless we have *VERY* exotic electron shells
84
+ SHELL_RE = rf"\d[{''.join(SHELLS)}]\d{{0,2}}"
85
+
86
+ TAG_RE = re.compile(r"<--\s*(?P<tag>\w+)")
87
+
88
+ # Atom regexp
89
+ ATREG = rf"(?P<spec>{ATOM_NAME_RE})\s+(?P<index>\d+)"
90
+
91
+
92
+ # Atom reference with 3-vector
93
+ ATDAT3VEC = re.compile(ATREG + labelled_floats(FST_D))
94
+ ATDATTAG = re.compile(rf"\s*{ATDAT3VEC.pattern}\s*{TAG_RE.pattern}")
95
+
96
+ # SCF Loop
97
+ SCF_LOOP_RE = re.compile(r"\s*(?:Initial|\d+)\s*"
98
+ rf"{labelled_floats(('energy', 'fermi_energy', 'energy_gain'))}?\s*"
99
+ f"{labelled_floats(('time',))}")
100
+
101
+ # PS Energy
102
+ PS_SHELL_RE = re.compile(
103
+ rf"\s*Pseudo atomic calculation performed for (?P<spec>{SPECIES_RE})(\s+{SHELL_RE})+")
104
+
105
+ # PS Projector
106
+ PSPOT_PROJ_RE = re.compile(r"(?P<orbital>\d)(?P<shell>\d)(?P<type>U|UU|N)?")
107
+ UNLABELLED_PROJ_RE = r"\d\d(?:UU|U|N)?"
108
+
109
+ PSPOT_REFERENCE_STRUC_RE = re.compile(
110
+ rf"""
111
+ ^\s*\|\s*
112
+ (?P<orb>{SHELL_RE}(?:/\d+)?)\s*
113
+ {labelled_floats(('occupation', 'energy'))}
114
+ \s*\|\s*$
115
+ """, re.VERBOSE)
116
+ PSPOT_DEF_RE = re.compile(
117
+ rf"""
118
+ ^\s*\|\s*
119
+ (?P<beta>\d+|loc)\s*
120
+ (?P<l>\d+)\s*
121
+ (?P<j>\d+)?\s*
122
+ {labelled_floats(('e', 'Rc'))}\s*
123
+ (?P<scheme>\w+)\s*
124
+ (?P<norm>\d+)
125
+ \s*\|\s*$
126
+ """, re.VERBOSE)
127
+
128
+ # PSPot String
129
+ PSPOT_RE = re.compile(labelled_floats(("local_channel",
130
+ "core_radius",
131
+ "beta_radius",
132
+ "r_inner",
133
+ "coarse",
134
+ "medium",
135
+ "fine"), sep=r"\|?")
136
+ +
137
+ r"\|"
138
+ rf"(?P<proj>{UNLABELLED_PROJ_RE}(?::{UNLABELLED_PROJ_RE})*)"
139
+ rf"(?:\{{(?P<shell_swp>{SHELL_RE}(?:,{SHELL_RE})*)\}})?"
140
+ rf"\((?P<opt>[^)]+)\)"
141
+ rf"(?P<debug>#)?"
142
+ rf"(?:\[(?P<shell_swp2>{SHELL_RE}(?:,{SHELL_RE})*)\])?"
143
+ )
144
+
145
+ # Forces block
146
+ FORCES_BLOCK_RE = re.compile(r" ([a-zA-Z ]*)Forces \*+\s*$", re.IGNORECASE)
147
+ # Stresses block
148
+ STRESSES_BLOCK_RE = re.compile(r" ([a-zA-Z ]*)Stress Tensor \*+\s*$", re.IGNORECASE)
149
+
150
+ # Bonds
151
+ BOND_RE = re.compile(rf"""\s*
152
+ (?P<spec1>{ATOM_NAME_RE})\s*(?P<ind1>\d+)\s*
153
+ --\s*
154
+ (?P<spec2>{ATOM_NAME_RE})\s*(?P<ind2>\d+)\s*
155
+ {labelled_floats(("population", "length"))}
156
+ """, re.VERBOSE)
157
+
158
+ # Pair pot
159
+ PAIR_POT_RES = {
160
+ 'two_body_one_spec': re.compile(
161
+ rf"^(?P<tag>\w+)?\s*\*\s*(?P<spec>{ATOM_NAME_RE})\s*\*\s*$"
162
+ ),
163
+ 'two_body_spec': re.compile(
164
+ rf"(?P<spec1>{ATOM_NAME_RE})\s*-\s*"
165
+ rf"(?P<spec2>{ATOM_NAME_RE})"
166
+ ),
167
+ 'two_body_val': re.compile(
168
+ rf"""
169
+ (?P<tag>\w+)?\s*\*\s*
170
+ (?P<label>\w+)\s*
171
+ {labelled_floats(('params',), counts=('1,4',))}\s*
172
+ [\w^/*]+\s* \* \s*
173
+ <--\s*(?P<type>\w+)
174
+ """, re.ASCII | re.VERBOSE
175
+ ),
176
+ 'three_body_spec': re.compile(
177
+ rf"""
178
+ ^(?P<tag>\w+)?\s*\*\s*
179
+ (?P<spec>(?:{ATOM_NAME_RE}\s*){{3}})
180
+ \s*\*\s*$""", re.VERBOSE
181
+ ),
182
+ 'three_body_val': re.compile(
183
+ rf"""
184
+ ^(?P<tag>\w+)?\s*\*\s*
185
+ (?P<label>\w+)\s*
186
+ {labelled_floats(('params',))}\s*
187
+ [\w^/*]+\s* \* \s*
188
+ <--\s*(?P<type>\w+)
189
+ """, re.VERBOSE
190
+ )
191
+ }
192
+
193
+ # Orbital population
194
+ ORBITAL_POPN_RE = re.compile(rf"\s*{ATREG}\s*(?P<orb>[SPDF][xyz]?)"
195
+ rf"\s*{labelled_floats(('charge',))}")
196
+
197
+ # Regexp to identify phonon block in .castep file
198
+ PHONON_RE = re.compile(
199
+ rf"""
200
+ \s+\+\s+
201
+ q-pt=\s*{INTNUMBER_RE}\s+
202
+ \({labelled_floats(("qpt",), counts=(3,))}\)
203
+ \s+
204
+ ({FNUMBER_RE})\s+\+
205
+ """, re.VERBOSE)
206
+
207
+ PROCESS_PHONON_RE = re.compile(
208
+ rf"""\s+\+\s+
209
+ (?P<N>\d+)\s+
210
+ (?P<frequency>{FNUMBER_RE})\s*
211
+ (?P<irrep>[a-zA-V])?\s*
212
+ (?P<intensity>{FNUMBER_RE})?\s*
213
+ (?P<active>[YN])?\s*
214
+ (?P<raman_intensity>{FNUMBER_RE})?\s*
215
+ (?P<raman_active>[YN])?\s*\+""", re.VERBOSE)
216
+
217
+ TDDFT_RE = re.compile(
218
+ rf"""\s*\+\s*
219
+ {INTNUMBER_RE}
220
+ {labelled_floats(("energy", "error"))}
221
+ \s*(?P<type>\w+)
222
+ \s*\+TDDFT""", re.VERBOSE)
223
+
224
+ BS_RE = re.compile(
225
+ rf"""
226
+ Spin=\s*(?P<spin>{INTNUMBER_RE})\s*
227
+ kpt=\s*{INTNUMBER_RE}\s*
228
+ \({labelled_floats(("kx","ky","kz"))}\)\s*
229
+ kpt-group=\s*(?P<kpgrp>{INTNUMBER_RE})
230
+ """, re.VERBOSE)
231
+
232
+ THERMODYNAMICS_DATA_RE = re.compile(labelled_floats(("T", "E", "F", "S", "Cv")))
233
+
234
+ MINIMISERS_RE = f"(?:{'|'.join(map(lambda x: x.upper(), MINIMISERS))})"
235
+ GEOMOPT_MIN_TABLE_RE = re.compile(
236
+ r"\s*\|\s* (?P<step>[^|]+)" +
237
+ labelled_floats(("lambda", "Fdelta", "enthalpy"), sep=r"\s*\|\s*") +
238
+ r"\s* \|", re.VERBOSE)
239
+
240
+ GEOMOPT_TABLE_RE = re.compile(
241
+ r"\s*\|\s* (?P<parameter>\S+)" +
242
+ labelled_floats(('value', 'tolerance'), sep=r"\s*\|\s*") +
243
+ r"\s*\|\s* \S+ (?#Units) \s*\|\s* (?P<converged>No|Yes) \s*\|", re.VERBOSE)
244
+
245
+
246
+ # Regexp to identify Mulliken ppoulation analysis line
247
+ POPN_RE = re.compile(rf"\s*{ATREG}\s*(?P<spin_sep>up:)?" +
248
+ labelled_floats((*SHELLS, "total", "charge", "spin")) +
249
+ "?" # Spin is optional
250
+ )
251
+
252
+ POPN_RE_DN = re.compile(r"\s+\d+\s*dn:" +
253
+ labelled_floats((*SHELLS, "total"))
254
+ )
255
+
256
+ # Regexp for born charges
257
+ BORN_RE = re.compile(rf"\s+{ATREG}(?P<charges>(?:\s*{FNUMBER_RE}){{3}})")
258
+
259
+ # MagRes REs
260
+ MAGRES_RE = (
261
+ # "Chemical Shielding Tensor" 0
262
+ re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso','aniso'))}\s*"
263
+ rf"(?P<asym>{FNUMBER_RE}|N/A)\s*\|\s*"),
264
+ # "Chemical Shielding and Electric Field Gradient Tensor" 1
265
+ re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso','aniso'))}\s*"
266
+ rf"(?P<asym>{FNUMBER_RE}|N/A)"
267
+ rf"{labelled_floats(('cq', 'eta'))}\s*\|\s*"),
268
+ # "Electric Field Gradient Tensor" 2
269
+ re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('cq',))}\s*"
270
+ rf"(?P<asym>{FNUMBER_RE}|N/A)\s*\|\s*"),
271
+ # "(?:I|Ani)sotropic J-coupling" 3
272
+ re.compile(rf"\s*\|\**\s*{ATREG}{labelled_floats(('fc','sd','para','dia','tot'))}\s*\|\s*"),
273
+ # "Hyperfine Tensor" 4
274
+ re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso',))}\s*\|\s*")
275
+ )
276
+
277
+ # MagRes Tasks
278
+ MAGRES_TASK = (
279
+ "Chemical Shielding",
280
+ "Chemical Shielding and Electric Field Gradient",
281
+ "Electric Field Gradient",
282
+ "(An)Isotropic J-coupling",
283
+ "Hyperfine"
284
+ )
285
+
286
+ # Regexp to identify block in .phonon or .phonon_dos file
287
+ FRACCOORDS_RE = re.compile(rf"\s*(?P<index>{INTNUMBER_RE}){labelled_floats(('u', 'v', 'w'))}"
288
+ rf"\s*(?P<spec>{SPECIES_RE}){labelled_floats(('mass',))}")
289
+
290
+ PHONON_PHONON_RE = re.compile(rf"""
291
+ \s+q-pt=\s*{INTNUMBER_RE}\s*
292
+ {labelled_floats(('qpt', 'pth'), counts=(3, 1))}
293
+ """, re.VERBOSE)
294
+
295
+ PROCESS_PHONON_PHONON_RE = re.compile(labelled_floats(('n', 'f', 'Grad_qf')))
296
+
297
+
298
+ # Regexp to identify Fermi energies in .bands file
299
+ BANDS_FERMI_RE = re.compile(r"Fermi energ(ies|y) \(in atomic units\)\s*" +
300
+ labelled_floats(('a', 'b')))
301
+
302
+ # Regexp to identify eigenvalue block in .bands file
303
+ # BANDS_EIGENS_RE =
304
+ # rf"K-point\s+(\d+)\s*(\s*{FNUMBER_RE})\s*({FNUMBER_RE})\s*({FNUMBER_RE})\s*({FNUMBER_RE})"
305
+
306
+ DEVEL_CODE_VAL_RE = r'[A-Za-z0-9_]+[:=]\S+'
307
+ DEVEL_CODE_BLOCK_RE = rf'([A-Za-z0-9_]+):(?:\s*{DEVEL_CODE_VAL_RE}\s*)*:end\S+'
@@ -0,0 +1,44 @@
1
+ """
2
+ Module for constants used in castep_outputs
3
+ """
4
+
5
+ SHELLS = ('s', 'p', 'd', 'f')
6
+ FST_D = ('x', 'y', 'z')
7
+ SND_D = ('xx', 'yy', 'zz', 'yz', 'zx', 'xy')
8
+ MINIMISERS = ('bfgs', 'lbfgs', 'fire', 'tpsd', 'dmd')
9
+ PAIR_POTS = ('LJ', 'BUCK', 'COUL', 'DZ', 'POL', 'BB', 'SHO',
10
+ 'SW', 'MORS', 'POLM', 'LJ_S', 'PES', 'BU_S', 'TIP4', 'QUIP')
11
+
12
+ TAG_ALIASES = {'E': 'energy',
13
+ 'T': 'temperature',
14
+ 'P': 'pressure',
15
+ 'h': 'lattice_vectors',
16
+ 'hv': 'lattice_velocity',
17
+ 'R': 'position',
18
+ 'V': 'velocity',
19
+ 'F': 'force'}
20
+
21
+ TS_TYPES = {"REA": "reagent",
22
+ "PRO": "product",
23
+ "TST": "test"}
24
+
25
+
26
+ CASTEP_OUTPUT_NAMES = (
27
+ "castep",
28
+ "param",
29
+ "cell",
30
+ "geom",
31
+ "md",
32
+ "bands",
33
+ "hug",
34
+ "phonon_dos",
35
+ "efield",
36
+ "xrd_sf",
37
+ "elf_fmt",
38
+ "chdiff_fmt",
39
+ "pot_fmt",
40
+ "den_fmt",
41
+ "elastic",
42
+ "ts"
43
+ )
44
+ CASTEP_FILE_FORMATS = tuple(f".{typ}" for typ in CASTEP_OUTPUT_NAMES)