PyPI - castep-outputs - Versions diffs - 0.1__tar.gz - Mend

castep-outputs 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

castep_outputs-0.1/LICENSE +28 -0
castep_outputs-0.1/PKG-INFO +12 -0
castep_outputs-0.1/README.md +2 -0
castep_outputs-0.1/castep_outputs/__init__.py +6 -0
castep_outputs-0.1/castep_outputs/__main__.py +10 -0
castep_outputs-0.1/castep_outputs/args.py +95 -0
castep_outputs-0.1/castep_outputs/castep_outputs_main.py +105 -0
castep_outputs-0.1/castep_outputs/castep_res.py +307 -0
castep_outputs-0.1/castep_outputs/constants.py +44 -0
castep_outputs-0.1/castep_outputs/parse_castep_file.py +1548 -0
castep_outputs-0.1/castep_outputs/parse_cell_param_file.py +72 -0
castep_outputs-0.1/castep_outputs/parse_extra_files.py +330 -0
castep_outputs-0.1/castep_outputs/parse_md_geom_file.py +46 -0
castep_outputs-0.1/castep_outputs/utility.py +202 -0
castep_outputs-0.1/castep_outputs.egg-info/PKG-INFO +12 -0
castep_outputs-0.1/castep_outputs.egg-info/SOURCES.txt +21 -0
castep_outputs-0.1/castep_outputs.egg-info/dependency_links.txt +1 -0
castep_outputs-0.1/castep_outputs.egg-info/entry_points.txt +2 -0
castep_outputs-0.1/castep_outputs.egg-info/requires.txt +6 -0
castep_outputs-0.1/castep_outputs.egg-info/top_level.txt +1 -0
castep_outputs-0.1/pyproject.toml +3 -0
castep_outputs-0.1/setup.cfg +4 -0
castep_outputs-0.1/setup.py +36 -0

castep_outputs-0.1/LICENSE ADDED Viewed

@@ -0,0 +1,28 @@
+BSD 3-Clause License
+Copyright (c) 2023, Jacob Wilkins
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

castep_outputs-0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,12 @@
+Metadata-Version: 2.1
+Name: castep_outputs
+Version: 0.1
+Summary: A package for extracting information from castep outputs
+Home-page: https://github.com/oerc0122/castep_outputs
+Download-URL: https://github.com/oerc0122/castep_outputs
+Author: Jacob Wilkins
+Author-email: jacob.wilkins@stfc.ac.uk
+License: BSD3
+Requires-Python: >=3.8
+Provides-Extra: ruamel
+Provides-Extra: yaml

castep_outputs-0.1/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # castep_outputs
2	+ Parser for CASTEP output files

castep_outputs-0.1/castep_outputs/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""
+Module to parse miscellaneous castep files
+"""
+__AUTHOR__ = "Jacob Wilkins"
+__VERSION__ = "v0.01"

castep_outputs-0.1/castep_outputs/__main__.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""
+Run main castep parser
+"""
+from .castep_outputs_main import main
+__AUTHOR__ = "Jacob Wilkins"
+__VERSION__ = "v0.1"
+main()

castep_outputs-0.1/castep_outputs/args.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""
+Argument parser
+"""
+from typing import Sequence, Dict, List
+import os.path
+import argparse
+# pylint: disable=line-too-long
+from .utility import SUPPORTED_FORMATS
+from .constants import CASTEP_OUTPUT_NAMES, CASTEP_FILE_FORMATS
+AP = argparse.ArgumentParser(
+    prog="CASTEP outputs",
+    description=f"""Attempts to find all files for seedname, filtered by `inc` args (default: all).
+    Explicit files can be passed using longname arguments.
+    Parse most human-readable castep outputs including: {', '.join(CASTEP_FILE_FORMATS)}"""
+)
+AP.add_argument("seedname", nargs=argparse.REMAINDER, help="Seed name for data")
+AP.add_argument("-V", "--version", action="version", version="%(prog)s v0.1")
+AP.add_argument("-L", "--log", help="Verbose output",
+                choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), default="WARNING")
+AP.add_argument("-o", "--output", help="File to write output, default: screen", default=None)
+AP.add_argument("-f", "--out-format", help="Output format", choices=SUPPORTED_FORMATS, default="json")
+AP.add_argument("-t", "--testing", action="store_true", help="Set testing mode to produce flat outputs")
+AP.add_argument("-A", "--inc-all", action="store_true", help="Extract all available information")
+AP.add_argument("-c", "--inc-castep", action="store_true", help="Extract .castep information")
+AP.add_argument("-g", "--inc-geom", action="store_true", help="Extract .geom information")
+AP.add_argument("-m", "--inc-md", action="store_true", help="Extract .md information")
+AP.add_argument("-b", "--inc-bands", action="store_true", help="Extract .bands information")
+AP.add_argument("-p", "--inc-phonon_dos", action="store_true", help="Extract .phonon_dos information")
+AP.add_argument("-e", "--inc-efield", action="store_true", help="Extract .efield information")
+AP.add_argument("-x", "--inc-xrd_sf", action="store_true", help="Extract .xrd_sf information")
+AP.add_argument("-H", "--inc-hug", action="store_true", help="Extract .hug information")
+AP.add_argument("-E", "--inc-elf_fmt", action="store_true", help="Extract .elf_fmt information")
+AP.add_argument("-C", "--inc-chdiff_fmt", action="store_true", help="Extract .chdiff_fmt information")
+AP.add_argument("-P", "--inc-pot_fmt", action="store_true", help="Extract .pot_fmt information")
+AP.add_argument("-D", "--inc-den_fmt", action="store_true", help="Extract .den_fmt information")
+AP.add_argument("-X", "--inc-elastic", action="store_true", help="Extract .elastic information")
+AP.add_argument("-T", "--inc-ts", action="store_true", help="Extract .ts information")
+AP.add_argument('--inc-param', action="store_true", help="Extract .param information")
+AP.add_argument('--inc-cell', action="store_true", help="Extract .cell information")
+AP.add_argument("--castep", nargs="*", help="Extract from CASTEP as .castep type", default=[])
+AP.add_argument("--geom", nargs="*", help="Extract from GEOM as .geom type", default=[])
+AP.add_argument("--cell", nargs="*", help="Extract from CELL as .cell type", default=[])
+AP.add_argument("--param", nargs="*", help="Extract from PARAM as .param type", default=[])
+AP.add_argument("--md", nargs="*", help="Extract from MD as .md type", default=[])
+AP.add_argument("--bands", nargs="*", help="Extract from BANDS as .bands type", default=[])
+AP.add_argument("--hug", nargs="*", help="Extract from HUG as .hug type", default=[])
+AP.add_argument("--phonon_dos", nargs="*", help="Extract from PHONON_DOS as .phonon_dos type", default=[])
+AP.add_argument("--efield", nargs="*", help="Extract from EFIELD as .efield type", default=[])
+AP.add_argument("--xrd_sf", nargs="*", help="Extract from XRD_SF as .xrd_sf type", default=[])
+AP.add_argument("--elf_fmt", nargs="*", help="Extract from ELF_FMT as .elf_fmt type", default=[])
+AP.add_argument("--chdiff_fmt", nargs="*", help="Extract from CHDIFF_FMT as .chdiff_fmt type", default=[])
+AP.add_argument("--pot_fmt", nargs="*", help="Extract from POT_FMT as .pot_fmt type", default=[])
+AP.add_argument("--den_fmt", nargs="*", help="Extract from DEN_FMT as .den_fmt type", default=[])
+AP.add_argument("--elastic", nargs="*", help="Extract from ELASTIC as .elastic type", default=[])
+AP.add_argument("--ts", nargs="*", help="Extract from TS as .ts type", default=[])
+def parse_args(to_parse: Sequence[str] = ()) -> argparse.Namespace:
+    """ Parse all arguments and add those caught by flags """
+    args = AP.parse_args()
+    parse_all = args.inc_all or not any(getattr(args, f"inc_{typ}") for typ in CASTEP_OUTPUT_NAMES)
+    # Set all flags
+    if parse_all and not to_parse:
+        for typ in CASTEP_OUTPUT_NAMES:
+            setattr(args, f"inc_{typ}", True)
+    # Only parse those which are requested
+    for typ in to_parse:
+        setattr(args, f"inc_{typ}", True)
+    # Add seeded files into parse list if to be included
+    for seed in args.seedname:
+        if os.path.isfile(seed) and (ext := os.path.splitext(seed)[1][1:]) in CASTEP_OUTPUT_NAMES:
+            getattr(args, ext).append(seed)
+        else:
+            for typ in CASTEP_OUTPUT_NAMES:
+                if getattr(args, f"inc_{typ}") and os.path.isfile(seed+typ):
+                    getattr(args, typ).append(seed+typ)
+    return args
+def args_to_dict(args: argparse.Namespace) -> Dict[str, List[str]]:
+    """ Convert args namespace to dictionary """
+    out_dict = {typ: getattr(args, typ) for typ in CASTEP_OUTPUT_NAMES}
+    return out_dict

castep_outputs-0.1/castep_outputs/castep_outputs_main.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""
+Run main castep parser
+"""
+import logging
+import fileinput
+import io
+import sys
+import os.path
+from .args import (parse_args, args_to_dict)
+from .utility import (json_safe, flatten_dict, get_dumpers)
+from .parse_castep_file import parse_castep_file
+from .parse_cell_param_file import parse_cell_param_file
+from .parse_md_geom_file import parse_md_geom_file
+from .parse_extra_files import (parse_bands_file, parse_hug_file, parse_phonon_dos_file,
+                                parse_efield_file, parse_xrd_sf_file, parse_elf_fmt_file,
+                                parse_chdiff_fmt_file, parse_pot_fmt_file, parse_den_fmt_file,
+                                parse_elastic_file, parse_ts_file)
+PARSERS = {
+    ".castep": parse_castep_file,
+    ".cell": parse_cell_param_file,
+    ".param": parse_cell_param_file,
+    ".geom": parse_md_geom_file,
+    ".md": parse_md_geom_file,
+    ".bands": parse_bands_file,
+    ".hug": parse_hug_file,
+    ".phonon_dos": parse_phonon_dos_file,
+    ".efield": parse_efield_file,
+    ".xrd_sf": parse_xrd_sf_file,
+    ".elf_fmt": parse_elf_fmt_file,
+    ".chdiff_fmt": parse_chdiff_fmt_file,
+    ".pot_fmt": parse_pot_fmt_file,
+    ".den_fmt": parse_den_fmt_file,
+    ".elastic": parse_elastic_file,
+    ".ts": parse_ts_file
+    }
+def parse_single(in_file, parser: callable = None, out_format="json",
+                 *, loglevel=logging.WARNING, testing=False):
+    """
+    Parse a file using the given parser and post-process according to options
+    """
+    logging.basicConfig(format="%(levelname)s: %(message)s", level=loglevel)
+    if parser is None:
+        _, ext = os.path.splitext(in_file)
+        parser = PARSERS.get(ext, None)
+        if not parser:
+            raise KeyError(f"Parser for file {in_file} (assumed type: {ext}) not found")
+    if isinstance(in_file, io.TextIOBase):
+        data = parser(in_file)
+    else:
+        with fileinput.FileInput(in_file, mode='r', encoding='utf-8') as file:
+            data = parser(file)
+    if out_format == "json" or testing:
+        data = json_safe(data)
+    if testing:
+        if isinstance(data, list):
+            data = [flatten_dict(run) for run in data]
+        else:
+            data = flatten_dict(data)
+    return data
+def parse_all(output=None, out_format="json", loglevel=logging.WARNING, *, testing=False, **files):
+    """ Parse all files in files dict """
+    file_dumper = get_dumpers(out_format)
+    for typ, paths in files.items():
+        parser = PARSERS[f".{typ}"]
+        for path in paths:
+            data = parse_single(path, parser, out_format, loglevel=loglevel, testing=testing)
+            if output is None:
+                file_dumper(data, sys.stdout)
+                print()
+            elif isinstance(output, io.TextIOBase):
+                file_dumper(data, output)
+            else:
+                with open(output, 'a+', encoding='utf-8') as out_file:
+                    file_dumper(data, out_file)
+def main():
+    """ Run the main program from command line """
+    args = parse_args()
+    dict_args = args_to_dict(args)
+    parse_all(output=args.output,
+              loglevel=getattr(logging, args.log.upper()),
+              testing=args.testing,
+              out_format=args.out_format,
+              **dict_args)
+if __name__ == "__main__":
+    main()

castep_outputs-0.1/castep_outputs/castep_res.py ADDED Viewed

@@ -0,0 +1,307 @@
+""" Module containing all regexes """
+from typing import List, Sequence, Optional, TextIO
+import re
+import io
+import itertools
+from .constants import MINIMISERS, SHELLS, FST_D
+def get_numbers(line: str) -> List[str]:
+    """ Get all numbers in a string as a list """
+    return NUMBER_RE.findall(line)
+def get_block(init_line: str, in_file: TextIO,
+              start: re.Pattern, end: re.Pattern, *, cnt: int = 1,
+              out_fmt: type = io.StringIO, eof_possible: bool = False):
+    """ Check if line is the start of a block and return
+    the block if it is, moving in_file forward as it does so """
+    block = ""
+    if not re.search(start, init_line):
+        return block
+    block = init_line
+    fnd = cnt
+    for line in in_file:
+        block += line
+        if re.search(end, line):
+            fnd -= 1
+            if fnd == 0:
+                break
+    else:
+        if not eof_possible:
+            if hasattr(in_file, 'name'):
+                raise IOError(f"Unexpected end of file in {in_file.name}.")
+            raise IOError("Unexpected end of file.")
+    if not block:
+        return ""
+    if out_fmt is str:
+        return block
+    if out_fmt is list:
+        return block.splitlines()
+    if out_fmt is io.StringIO:
+        return io.StringIO(block)
+    return out_fmt(block)
+def labelled_floats(labels: Sequence[str], counts: Sequence[Optional[int]] = (None,),
+                    sep: str = r"\s+?", suff: str = "") -> str:
+    """ Constructs a regex for extracting floats with assigned labels
+    :param labels:iterable of labels to label each group
+    :param counts:iterable of counts to group into each label (count must not exceed that of labels)
+    :param sep:separator between floats
+    """
+    if suff and any(cnt for cnt in counts):
+        raise NotImplementedError("Suffix and counts not currently supported")
+    outstr = ""
+    for label, cnt in itertools.zip_longest(labels, counts):
+        if cnt:
+            outstr += f"(?:(?P<{label}>(?:{sep}{EXPNUMBER_RE}{suff}){{{cnt}}}))"
+        else:
+            outstr += f"(?:{sep}(?P<{label}>{EXPNUMBER_RE}){suff})"
+    return outstr
+# --- RegExes
+# Regexps to recognise numbers
+FNUMBER_RE = r"(?:[+-]?(?:\d*\.?\d+|\d+\.?\d*))"
+EXPNUMBER_RE = rf"(?:{FNUMBER_RE}(?:[Ee][+-]?\d{{1,3}})?)"
+INTNUMBER_RE = r"(?:\d+)"
+NUMBER_RE = re.compile(rf"(?:{EXPNUMBER_RE}|{FNUMBER_RE}|{INTNUMBER_RE})")
+# Regexp to identify extended chemical species
+SPECIES_RE = r"[A-Z][a-z]{0,2}"
+ATOM_NAME_RE = rf"\b{SPECIES_RE}(?::\w+)?\b"
+# Unless we have *VERY* exotic electron shells
+SHELL_RE = rf"\d[{''.join(SHELLS)}]\d{{0,2}}"
+TAG_RE = re.compile(r"<--\s*(?P<tag>\w+)")
+# Atom regexp
+ATREG = rf"(?P<spec>{ATOM_NAME_RE})\s+(?P<index>\d+)"
+# Atom reference with 3-vector
+ATDAT3VEC = re.compile(ATREG + labelled_floats(FST_D))
+ATDATTAG = re.compile(rf"\s*{ATDAT3VEC.pattern}\s*{TAG_RE.pattern}")
+# SCF Loop
+SCF_LOOP_RE = re.compile(r"\s*(?:Initial|\d+)\s*"
+                         rf"{labelled_floats(('energy', 'fermi_energy', 'energy_gain'))}?\s*"
+                         f"{labelled_floats(('time',))}")
+# PS Energy
+PS_SHELL_RE = re.compile(
+    rf"\s*Pseudo atomic calculation performed for (?P<spec>{SPECIES_RE})(\s+{SHELL_RE})+")
+# PS Projector
+PSPOT_PROJ_RE = re.compile(r"(?P<orbital>\d)(?P<shell>\d)(?P<type>U|UU|N)?")
+UNLABELLED_PROJ_RE = r"\d\d(?:UU|U|N)?"
+PSPOT_REFERENCE_STRUC_RE = re.compile(
+    rf"""
+    ^\s*\|\s*
+    (?P<orb>{SHELL_RE}(?:/\d+)?)\s*
+    {labelled_floats(('occupation', 'energy'))}
+    \s*\|\s*$
+    """, re.VERBOSE)
+PSPOT_DEF_RE = re.compile(
+    rf"""
+    ^\s*\|\s*
+    (?P<beta>\d+|loc)\s*
+    (?P<l>\d+)\s*
+    (?P<j>\d+)?\s*
+    {labelled_floats(('e', 'Rc'))}\s*
+    (?P<scheme>\w+)\s*
+    (?P<norm>\d+)
+    \s*\|\s*$
+    """, re.VERBOSE)
+# PSPot String
+PSPOT_RE = re.compile(labelled_floats(("local_channel",
+                                       "core_radius",
+                                       "beta_radius",
+                                       "r_inner",
+                                       "coarse",
+                                       "medium",
+                                       "fine"), sep=r"\|?")
+                      +
+                      r"\|"
+                      rf"(?P<proj>{UNLABELLED_PROJ_RE}(?::{UNLABELLED_PROJ_RE})*)"
+                      rf"(?:\{{(?P<shell_swp>{SHELL_RE}(?:,{SHELL_RE})*)\}})?"
+                      rf"\((?P<opt>[^)]+)\)"
+                      rf"(?P<debug>#)?"
+                      rf"(?:\[(?P<shell_swp2>{SHELL_RE}(?:,{SHELL_RE})*)\])?"
+                      )
+# Forces block
+FORCES_BLOCK_RE = re.compile(r" ([a-zA-Z ]*)Forces \*+\s*$", re.IGNORECASE)
+# Stresses block
+STRESSES_BLOCK_RE = re.compile(r" ([a-zA-Z ]*)Stress Tensor \*+\s*$", re.IGNORECASE)
+# Bonds
+BOND_RE = re.compile(rf"""\s*
+                       (?P<spec1>{ATOM_NAME_RE})\s*(?P<ind1>\d+)\s*
+                       --\s*
+                       (?P<spec2>{ATOM_NAME_RE})\s*(?P<ind2>\d+)\s*
+                       {labelled_floats(("population", "length"))}
+                       """, re.VERBOSE)
+# Pair pot
+PAIR_POT_RES = {
+    'two_body_one_spec': re.compile(
+        rf"^(?P<tag>\w+)?\s*\*\s*(?P<spec>{ATOM_NAME_RE})\s*\*\s*$"
+    ),
+    'two_body_spec':  re.compile(
+        rf"(?P<spec1>{ATOM_NAME_RE})\s*-\s*"
+        rf"(?P<spec2>{ATOM_NAME_RE})"
+    ),
+    'two_body_val': re.compile(
+        rf"""
+            (?P<tag>\w+)?\s*\*\s*
+            (?P<label>\w+)\s*
+            {labelled_floats(('params',), counts=('1,4',))}\s*
+            [\w^/*]+\s* \* \s*
+            <--\s*(?P<type>\w+)
+            """, re.ASCII | re.VERBOSE
+    ),
+    'three_body_spec': re.compile(
+        rf"""
+        ^(?P<tag>\w+)?\s*\*\s*
+        (?P<spec>(?:{ATOM_NAME_RE}\s*){{3}})
+        \s*\*\s*$""", re.VERBOSE
+    ),
+    'three_body_val': re.compile(
+        rf"""
+        ^(?P<tag>\w+)?\s*\*\s*
+        (?P<label>\w+)\s*
+        {labelled_floats(('params',))}\s*
+        [\w^/*]+\s* \* \s*
+        <--\s*(?P<type>\w+)
+        """, re.VERBOSE
+    )
+}
+# Orbital population
+ORBITAL_POPN_RE = re.compile(rf"\s*{ATREG}\s*(?P<orb>[SPDF][xyz]?)"
+                             rf"\s*{labelled_floats(('charge',))}")
+# Regexp to identify phonon block in .castep file
+PHONON_RE = re.compile(
+    rf"""
+    \s+\+\s+
+    q-pt=\s*{INTNUMBER_RE}\s+
+    \({labelled_floats(("qpt",), counts=(3,))}\)
+    \s+
+    ({FNUMBER_RE})\s+\+
+    """, re.VERBOSE)
+PROCESS_PHONON_RE = re.compile(
+    rf"""\s+\+\s+
+    (?P<N>\d+)\s+
+    (?P<frequency>{FNUMBER_RE})\s*
+    (?P<irrep>[a-zA-V])?\s*
+    (?P<intensity>{FNUMBER_RE})?\s*
+    (?P<active>[YN])?\s*
+    (?P<raman_intensity>{FNUMBER_RE})?\s*
+    (?P<raman_active>[YN])?\s*\+""", re.VERBOSE)
+TDDFT_RE = re.compile(
+    rf"""\s*\+\s*
+    {INTNUMBER_RE}
+    {labelled_floats(("energy", "error"))}
+    \s*(?P<type>\w+)
+    \s*\+TDDFT""", re.VERBOSE)
+BS_RE = re.compile(
+    rf"""
+    Spin=\s*(?P<spin>{INTNUMBER_RE})\s*
+    kpt=\s*{INTNUMBER_RE}\s*
+    \({labelled_floats(("kx","ky","kz"))}\)\s*
+    kpt-group=\s*(?P<kpgrp>{INTNUMBER_RE})
+    """, re.VERBOSE)
+THERMODYNAMICS_DATA_RE = re.compile(labelled_floats(("T", "E", "F", "S", "Cv")))
+MINIMISERS_RE = f"(?:{'|'.join(map(lambda x: x.upper(), MINIMISERS))})"
+GEOMOPT_MIN_TABLE_RE = re.compile(
+    r"\s*\|\s* (?P<step>[^|]+)" +
+    labelled_floats(("lambda", "Fdelta", "enthalpy"), sep=r"\s*\|\s*") +
+    r"\s* \|", re.VERBOSE)
+GEOMOPT_TABLE_RE = re.compile(
+    r"\s*\|\s* (?P<parameter>\S+)" +
+    labelled_floats(('value', 'tolerance'), sep=r"\s*\|\s*") +
+    r"\s*\|\s* \S+ (?#Units) \s*\|\s* (?P<converged>No|Yes) \s*\|", re.VERBOSE)
+# Regexp to identify Mulliken ppoulation analysis line
+POPN_RE = re.compile(rf"\s*{ATREG}\s*(?P<spin_sep>up:)?" +
+                     labelled_floats((*SHELLS, "total", "charge", "spin")) +
+                     "?"   # Spin is optional
+                     )
+POPN_RE_DN = re.compile(r"\s+\d+\s*dn:" +
+                        labelled_floats((*SHELLS, "total"))
+                        )
+# Regexp for born charges
+BORN_RE = re.compile(rf"\s+{ATREG}(?P<charges>(?:\s*{FNUMBER_RE}){{3}})")
+# MagRes REs
+MAGRES_RE = (
+    # "Chemical Shielding Tensor" 0
+    re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso','aniso'))}\s*"
+               rf"(?P<asym>{FNUMBER_RE}|N/A)\s*\|\s*"),
+    # "Chemical Shielding and Electric Field Gradient Tensor" 1
+    re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso','aniso'))}\s*"
+               rf"(?P<asym>{FNUMBER_RE}|N/A)"
+               rf"{labelled_floats(('cq', 'eta'))}\s*\|\s*"),
+    # "Electric Field Gradient Tensor" 2
+    re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('cq',))}\s*"
+               rf"(?P<asym>{FNUMBER_RE}|N/A)\s*\|\s*"),
+    # "(?:I|Ani)sotropic J-coupling" 3
+    re.compile(rf"\s*\|\**\s*{ATREG}{labelled_floats(('fc','sd','para','dia','tot'))}\s*\|\s*"),
+    # "Hyperfine Tensor" 4
+    re.compile(rf"\s*\|\s*{ATREG}{labelled_floats(('iso',))}\s*\|\s*")
+)
+# MagRes Tasks
+MAGRES_TASK = (
+    "Chemical Shielding",
+    "Chemical Shielding and Electric Field Gradient",
+    "Electric Field Gradient",
+    "(An)Isotropic J-coupling",
+    "Hyperfine"
+)
+# Regexp to identify block in .phonon or .phonon_dos file
+FRACCOORDS_RE = re.compile(rf"\s*(?P<index>{INTNUMBER_RE}){labelled_floats(('u', 'v', 'w'))}"
+                           rf"\s*(?P<spec>{SPECIES_RE}){labelled_floats(('mass',))}")
+PHONON_PHONON_RE = re.compile(rf"""
+    \s+q-pt=\s*{INTNUMBER_RE}\s*
+    {labelled_floats(('qpt', 'pth'), counts=(3, 1))}
+    """, re.VERBOSE)
+PROCESS_PHONON_PHONON_RE = re.compile(labelled_floats(('n', 'f', 'Grad_qf')))
+# Regexp to identify Fermi energies in .bands file
+BANDS_FERMI_RE = re.compile(r"Fermi energ(ies|y) \(in atomic units\)\s*" +
+                            labelled_floats(('a', 'b')))
+# Regexp to identify eigenvalue block in .bands file
+# BANDS_EIGENS_RE =
+# rf"K-point\s+(\d+)\s*(\s*{FNUMBER_RE})\s*({FNUMBER_RE})\s*({FNUMBER_RE})\s*({FNUMBER_RE})"
+DEVEL_CODE_VAL_RE = r'[A-Za-z0-9_]+[:=]\S+'
+DEVEL_CODE_BLOCK_RE = rf'([A-Za-z0-9_]+):(?:\s*{DEVEL_CODE_VAL_RE}\s*)*:end\S+'

castep_outputs-0.1/castep_outputs/constants.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""
+Module for constants used in castep_outputs
+"""
+SHELLS = ('s', 'p', 'd', 'f')
+FST_D = ('x', 'y', 'z')
+SND_D = ('xx', 'yy', 'zz', 'yz', 'zx', 'xy')
+MINIMISERS = ('bfgs', 'lbfgs', 'fire', 'tpsd', 'dmd')
+PAIR_POTS = ('LJ', 'BUCK', 'COUL', 'DZ', 'POL', 'BB', 'SHO',
+             'SW', 'MORS', 'POLM', 'LJ_S', 'PES', 'BU_S', 'TIP4', 'QUIP')
+TAG_ALIASES = {'E': 'energy',
+               'T': 'temperature',
+               'P': 'pressure',
+               'h': 'lattice_vectors',
+               'hv': 'lattice_velocity',
+               'R': 'position',
+               'V': 'velocity',
+               'F': 'force'}
+TS_TYPES = {"REA": "reagent",
+            "PRO": "product",
+            "TST": "test"}
+CASTEP_OUTPUT_NAMES = (
+    "castep",
+    "param",
+    "cell",
+    "geom",
+    "md",
+    "bands",
+    "hug",
+    "phonon_dos",
+    "efield",
+    "xrd_sf",
+    "elf_fmt",
+    "chdiff_fmt",
+    "pot_fmt",
+    "den_fmt",
+    "elastic",
+    "ts"
+)
+CASTEP_FILE_FORMATS = tuple(f".{typ}" for typ in CASTEP_OUTPUT_NAMES)