PyPI - psdi-data-conversion - Versions diffs - 0.0.23__py3-none-any.whl - Mend

psdi-data-conversion 0.0.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

psdi_data_conversion/__init__.py +11 -0
psdi_data_conversion/app.py +242 -0
psdi_data_conversion/bin/linux/atomsk +0 -0
psdi_data_conversion/bin/linux/c2x +0 -0
psdi_data_conversion/bin/mac/atomsk +0 -0
psdi_data_conversion/bin/mac/c2x +0 -0
psdi_data_conversion/constants.py +185 -0
psdi_data_conversion/converter.py +459 -0
psdi_data_conversion/converters/__init__.py +6 -0
psdi_data_conversion/converters/atomsk.py +32 -0
psdi_data_conversion/converters/base.py +702 -0
psdi_data_conversion/converters/c2x.py +32 -0
psdi_data_conversion/converters/openbabel.py +239 -0
psdi_data_conversion/database.py +1064 -0
psdi_data_conversion/dist.py +87 -0
psdi_data_conversion/file_io.py +216 -0
psdi_data_conversion/log_utility.py +241 -0
psdi_data_conversion/main.py +776 -0
psdi_data_conversion/scripts/atomsk.sh +32 -0
psdi_data_conversion/scripts/c2x.sh +26 -0
psdi_data_conversion/security.py +38 -0
psdi_data_conversion/static/content/accessibility.htm +254 -0
psdi_data_conversion/static/content/convert.htm +121 -0
psdi_data_conversion/static/content/convertato.htm +65 -0
psdi_data_conversion/static/content/convertc2x.htm +65 -0
psdi_data_conversion/static/content/documentation.htm +94 -0
psdi_data_conversion/static/content/feedback.htm +53 -0
psdi_data_conversion/static/content/header-links.html +8 -0
psdi_data_conversion/static/content/index-versions/header-links.html +8 -0
psdi_data_conversion/static/content/index-versions/psdi-common-footer.html +99 -0
psdi_data_conversion/static/content/index-versions/psdi-common-header.html +28 -0
psdi_data_conversion/static/content/psdi-common-footer.html +99 -0
psdi_data_conversion/static/content/psdi-common-header.html +28 -0
psdi_data_conversion/static/content/report.htm +103 -0
psdi_data_conversion/static/data/data.json +143940 -0
psdi_data_conversion/static/img/colormode-toggle-dm.svg +3 -0
psdi_data_conversion/static/img/colormode-toggle-lm.svg +3 -0
psdi_data_conversion/static/img/psdi-icon-dark.svg +136 -0
psdi_data_conversion/static/img/psdi-icon-light.svg +208 -0
psdi_data_conversion/static/img/psdi-logo-darktext.png +0 -0
psdi_data_conversion/static/img/psdi-logo-lighttext.png +0 -0
psdi_data_conversion/static/img/social-logo-bluesky-black.svg +4 -0
psdi_data_conversion/static/img/social-logo-bluesky-white.svg +4 -0
psdi_data_conversion/static/img/social-logo-instagram-black.svg +1 -0
psdi_data_conversion/static/img/social-logo-instagram-white.svg +1 -0
psdi_data_conversion/static/img/social-logo-linkedin-black.png +0 -0
psdi_data_conversion/static/img/social-logo-linkedin-white.png +0 -0
psdi_data_conversion/static/img/social-logo-mastodon-black.svg +4 -0
psdi_data_conversion/static/img/social-logo-mastodon-white.svg +4 -0
psdi_data_conversion/static/img/social-logo-x-black.svg +3 -0
psdi_data_conversion/static/img/social-logo-x-white.svg +3 -0
psdi_data_conversion/static/img/social-logo-youtube-black.png +0 -0
psdi_data_conversion/static/img/social-logo-youtube-white.png +0 -0
psdi_data_conversion/static/img/ukri-epsr-logo-darktext.png +0 -0
psdi_data_conversion/static/img/ukri-epsr-logo-lighttext.png +0 -0
psdi_data_conversion/static/img/ukri-logo-darktext.png +0 -0
psdi_data_conversion/static/img/ukri-logo-lighttext.png +0 -0
psdi_data_conversion/static/javascript/accessibility.js +196 -0
psdi_data_conversion/static/javascript/common.js +42 -0
psdi_data_conversion/static/javascript/convert.js +296 -0
psdi_data_conversion/static/javascript/convert_common.js +252 -0
psdi_data_conversion/static/javascript/convertato.js +107 -0
psdi_data_conversion/static/javascript/convertc2x.js +107 -0
psdi_data_conversion/static/javascript/data.js +176 -0
psdi_data_conversion/static/javascript/format.js +611 -0
psdi_data_conversion/static/javascript/load_accessibility.js +89 -0
psdi_data_conversion/static/javascript/psdi-common.js +177 -0
psdi_data_conversion/static/javascript/report.js +381 -0
psdi_data_conversion/static/styles/format.css +147 -0
psdi_data_conversion/static/styles/psdi-common.css +705 -0
psdi_data_conversion/templates/index.htm +114 -0
psdi_data_conversion/testing/__init__.py +5 -0
psdi_data_conversion/testing/constants.py +12 -0
psdi_data_conversion/testing/conversion_callbacks.py +394 -0
psdi_data_conversion/testing/conversion_test_specs.py +208 -0
psdi_data_conversion/testing/utils.py +522 -0
psdi_data_conversion-0.0.23.dist-info/METADATA +663 -0
psdi_data_conversion-0.0.23.dist-info/RECORD +81 -0
psdi_data_conversion-0.0.23.dist-info/WHEEL +4 -0
psdi_data_conversion-0.0.23.dist-info/entry_points.txt +2 -0
psdi_data_conversion-0.0.23.dist-info/licenses/LICENSE +201 -0

psdi_data_conversion/dist.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""@file psdi_data_conversion/dist.py
+Created 2025-02-25 by Bryan Gillis.
+Functions and utilities related to handling multiple user OSes and distributions
+"""
+import os
+import shutil
+import psdi_data_conversion
+import sys
+# Labels for each platform (which we use for the folder in this project), and the head of the name each platform will
+# have in `sys.platform`
+LINUX_LABEL = "linux"
+LINUX_NAME_HEAD = "linux"
+WINDOWS_LABEL = "windows"
+WINDOWS_NAME_HEAD = "win"
+MAC_LABEL = "mac"
+MAC_NAME_HEAD = "darwin"
+D_DIST_NAME_HEADS = {LINUX_LABEL: LINUX_NAME_HEAD,
+                     WINDOWS_LABEL: WINDOWS_NAME_HEAD,
+                     MAC_LABEL: MAC_NAME_HEAD, }
+# Determine the fully-qualified binary directory when this module is first imported
+BIN_DIR: str = os.path.join(psdi_data_conversion.__path__[0], "bin")
+def get_dist():
+    """Determine the current platform
+    """
+    dist: str | None = None
+    for label, name_head in D_DIST_NAME_HEADS.items():
+        if sys.platform.startswith(name_head):
+            dist = label
+            break
+    return dist
+def _get_local_bin(bin_name: str) -> str | None:
+    """Searches for a binary in the user's path
+    """
+    bin_path = shutil.which(bin_name)
+    if bin_path:
+        return bin_path
+    return None
+def get_bin_path(bin_name: str) -> str | None:
+    """Gets the path to an appropriate binary for the user's platform, if one exists. Will first search in this
+    package, then in the user's $PATH
+    Parameters
+    ----------
+    bin_name : str
+        The unqualified name of the binary
+    Returns
+    -------
+    str | None
+        If an appropriate binary exists for the user's platform, a fully-qualified path to it. Otherwise, None
+    """
+    # If DIST is None, then the user's OS/distribution is unsupported
+    dist = get_dist()
+    if not dist:
+        return _get_local_bin(bin_name)
+    bin_path = os.path.join(BIN_DIR, dist, bin_name)
+    # Check if the binary exists in the path for the user's OS/distribution
+    if not os.path.isfile(bin_path):
+        return _get_local_bin(bin_name)
+    return bin_path
+def bin_exists(bin_name: str) -> bool:
+    """Gets whether or not a binary of the given name exists for the user's platform
+    """
+    return get_bin_path(bin_name) is not None

psdi_data_conversion/file_io.py ADDED Viewed

@@ -0,0 +1,216 @@
+"""@file psdi_data_conversion/file_io.py
+Created 2025-02-11 by Bryan Gillis.
+Functions and classes related to general filesystem input/output
+"""
+import glob
+import os
+from shutil import copyfile, make_archive, unpack_archive
+from tempfile import TemporaryDirectory
+from psdi_data_conversion import constants as const
+def is_archive(filename: str) -> bool:
+    """Uses a file's extension to check if it's an archive or not
+    """
+    return any([filename.endswith(x) for x in const.L_ALL_ARCHIVE_EXTENSIONS])
+def is_supported_archive(filename: str) -> bool:
+    """Uses a file's extension to check if it's an archive of a supported type or not
+    """
+    return any([filename.endswith(x) for x in const.D_SUPPORTED_ARCHIVE_FORMATS])
+def split_archive_ext(filename: str) -> tuple[str, str]:
+    """Splits a file into a base and an extension, with handling for compound .tar.* extensions
+    """
+    base, ext = os.path.splitext(filename)
+    if base.endswith(const.TAR_EXTENSION):
+        base, pre_ext = os.path.splitext(base)
+        ext = pre_ext+ext
+    return base, ext
+def unpack_zip_or_tar(archive_filename: str,
+                      extract_dir: str = ".") -> list[str]:
+    """Unpack a zip or tar archive into a temporary directory and return a list of the extracted files
+    Parameters
+    ----------
+    archive_filename : str
+        Filename of the archive to unpack, either relative or fully-qualified
+    extract_dir : str
+        The directory to extract the contents of the archive to. By default, the current working directory will
+        be used
+    Returns
+    -------
+    list[str]
+        List of the fully-qualified paths to the extracted files. This is determined by checking the directory contents
+        before and after extraction, so is NOT thread-safe, unless it is otherwise ensured e.g. by using a unique
+        temporary directory for each thread
+    """
+    qual_archive_filename = os.path.realpath(archive_filename)
+    # Determine if the file is of a known (un)supported archive type, and if it is, whether it's a zip or tar, and
+    # set up arguments appropriately to ensure security
+    unpack_kwargs: dict[str, str] = {}
+    if any([qual_archive_filename.endswith(x) for x in const.L_UNSUPPORTED_ARCHIVE_EXTENSIONS]):
+        raise ValueError(f"The archive file '{qual_archive_filename}' is of an unsupported archive type")
+    elif any([qual_archive_filename.endswith(x) for x in const.D_ZIP_FORMATS]):
+        # Zip types don't support the "filter" kwarg, but use similar security measures by default. This may prompt
+        # a warning, which can be ignored
+        pass
+    elif any([qual_archive_filename.endswith(x) for x in const.D_TAR_FORMATS]):
+        # Tar types need to set up the "filter" argument to ensure no files are unpacked outside the base directory
+        unpack_kwargs["filter"] = "data"
+    else:
+        raise ValueError(f"The archive file '{qual_archive_filename}' is not recognised as a valid archive type")
+    # To determine the names of extracted files, we call `os.listdir` before and after unpacking and look for the new
+    # elements
+    s_dir_before = set(os.listdir(extract_dir))
+    unpack_archive(qual_archive_filename, extract_dir=extract_dir, **unpack_kwargs)
+    s_dir_after = set(os.listdir(extract_dir))
+    # Get the new files, and in case they're in a directory, use glob to get their contents
+    s_new_files = s_dir_after.difference(s_dir_before)
+    l_qual_new_files = [os.path.join(extract_dir, x) for x in s_new_files]
+    l_new_globs = [glob.glob(x) if os.path.isfile(x)
+                   else glob.glob(os.path.join(x, "**"))
+                   for x in l_qual_new_files]
+    # This gives us a list of globs (individual files are set up as globs for consistency), so we unpack to a single
+    # list with nested list comprehension
+    l_new_files = [x for l_glob_files in l_new_globs for x in l_glob_files]
+    # Sort the file list for consistency in output
+    l_new_files.sort(key=lambda s: s.lower())
+    return l_new_files
+def pack_zip_or_tar(archive_filename: str,
+                    l_filenames: list[str],
+                    archive_format: str | None = None,
+                    source_dir: str = ".",
+                    cleanup=False) -> str:
+    """_summary_
+    Parameters
+    ----------
+    archive_filename : str
+        The desired name of the output archive to create, either fully-qualified or relative to the current directory
+    l_filenames : list[str]
+        List of files to be archived, either fully-qualified or relative to `source_dir`. If provided fully-qualified,
+        they will be placed in the root directory of the archive
+    source_dir : str, optional
+        Path to directory containing the files to be archived (default current directory). If filenames are provided
+        fully-qualified, this is ignored
+    cleanup : bool, optional
+        If True, source files will be deleted after the archive is successfully created
+    Returns
+    -------
+    str
+        The name of the created archive file
+    Raises
+    ------
+    ValueError
+        If `archive_filename` is not of a valid format
+    FileNotFoundError
+        If one of the listed files does not exist
+    """
+    if not archive_format and not is_supported_archive(archive_filename):
+        raise ValueError(f"Desired archive filename '{archive_filename}' is not of a supported type. Supported types "
+                         f"are: {const.D_SUPPORTED_ARCHIVE_FORMATS.keys()}")
+    # It's supported, so determine the specific format, and provide it and the base of the filename in the forms that
+    # `make_archive` wants
+    if archive_format is None:
+        for _ext, _format in const.D_SUPPORTED_ARCHIVE_FORMATS.items():
+            if archive_filename.endswith(_ext):
+                archive_format = _format
+                archive_root_filename = split_archive_ext(archive_filename)[0]
+                break
+        # Check that the format was found
+        if archive_format is None:
+            raise AssertionError("Invalid execution path entered - filename wasn't found with a valid archive "
+                                 "extension, but it did pass the `is_supported_archive` check")
+    else:
+        archive_root_filename = archive_filename
+        # Check that the provided archive format is valid, and add the appropriate extension to the filename
+        archive_extension: str | None = None
+        for _ext, _format in const.D_SUPPORTED_ARCHIVE_FORMATS.items():
+            if archive_format == _ext:
+                # Extension was provided instead of the format; we can work with that
+                archive_extension = archive_format
+                archive_format = _format
+                break
+            elif archive_format == _format:
+                archive_extension = _ext
+                break
+        if archive_extension is None:
+            raise ValueError(f"Invalid archive format '{archive_format}'. Valid formats are: "
+                             f"{const.D_SUPPORTED_ARCHIVE_FORMATS.keys()}")
+        # Check if the root filename already contained the extension so we don't add it again, and strip it from the
+        # root
+        if archive_root_filename.endswith(archive_extension):
+            archive_filename = archive_root_filename
+            archive_root_filename = archive_root_filename[:-len(archive_extension)]
+        else:
+            archive_filename = archive_root_filename+archive_extension
+    with TemporaryDirectory() as root_dir:
+        # Copy all files from the source dir to the root dir, which is what will be packed
+        l_files_to_cleanup: list[str] = []
+        for filename in l_filenames:
+            # Check if the filename is fully-qualified, and copy it from wherever it's found
+            if os.path.isfile(filename):
+                copyfile(filename, os.path.join(root_dir, os.path.basename(filename)))
+                l_files_to_cleanup.append(filename)
+                continue
+            qualified_filename = os.path.join(source_dir, filename)
+            if os.path.isfile(qualified_filename):
+                copyfile(qualified_filename, os.path.join(root_dir, os.path.basename(filename)))
+                l_files_to_cleanup.append(qualified_filename)
+            else:
+                raise FileNotFoundError(f"File '{filename}' could not be found, either fully-qualified or relative to "
+                                        f"{source_dir}")
+        make_archive(archive_root_filename,
+                     format=archive_format,
+                     root_dir=root_dir)
+    if cleanup:
+        for filename in l_files_to_cleanup:
+            try:
+                os.remove(filename)
+            except Exception:
+                pass
+    # Return the name of the created file
+    return archive_filename

psdi_data_conversion/log_utility.py ADDED Viewed

@@ -0,0 +1,241 @@
+"""@file psdi-data-conversion/psdi_data_conversion/logging.py
+Created 2024-12-09 by Bryan Gillis.
+Functions and classes related to logging and other messaging for the user
+"""
+from datetime import datetime
+import logging
+import os
+import re
+import sys
+from psdi_data_conversion import constants as const
+D_LOG_LEVELS = {"notset": logging.NOTSET,
+                "debug": logging.DEBUG,
+                "info": logging.INFO,
+                "warn": logging.WARNING,
+                "warning": logging.WARNING,
+                "error": logging.ERROR,
+                "critical": logging.CRITICAL,
+                "fatal": logging.CRITICAL}
+def get_log_level_from_str(log_level_str: str | None) -> int:
+    """Gets a log level, as one of the literal ints defined in the `logging` module, from the string representation
+    of it.
+    """
+    if not log_level_str:
+        return logging.NOTSET
+    try:
+        return D_LOG_LEVELS[log_level_str.lower()]
+    except KeyError:
+        raise ValueError(f"Unrecognised logging level: '{log_level_str}'. Allowed levels are (case-insensitive): "
+                         f"{list(D_LOG_LEVELS.keys())}")
+def set_up_data_conversion_logger(name=const.LOCAL_LOGGER_NAME,
+                                  local_log_file=None,
+                                  local_logger_level=const.DEFAULT_LOCAL_LOGGER_LEVEL,
+                                  local_logger_raw_output=False,
+                                  extra_loggers=None,
+                                  suppress_global_handler=False,
+                                  stdout_output_level=None,
+                                  mode="a"):
+    """Registers a logger with the provided name and sets it up with the desired options
+    Parameters
+    ----------
+    name : str | None
+        The desired logging channel for this logger. Should be a period-separated string such as "input.files" etc.
+        By default "data-conversion"
+    local_log_file : str | None
+        The file to log to for local logs. If None, will not set up local logging
+    local_logger_level : int
+        The logging level to set up for the local logger, using one of the levels defined in the base Python `logging`
+        module, by default `logging.INFO`
+    local_logger_raw_output : bool
+        If set to True, output to the local logger will be logged with no formatting, exactly as input. Otherwise
+        (default) it will include a timestamp and indicate the logging level
+    extra_loggers : Iterable[Tuple[str, int, bool, str]]
+        A list of one or more tuples of the format (`filename`, `level`, `raw_output`, `mode`) specifying these
+        options (defined the same as for the local logger) for one or more additional logging channels.
+    suppress_global_handler : bool
+        If set to True, will not add the handler which sends all logs to the global log file, default False
+    stdout_output_level : int | None
+        The logging level (using one of the levels defined in the base Python `logging` module) at and above which to
+        log output to stdout. If None (default), nothing will be sent to stdout
+    mode : str
+        Either "a" for append to existing log or "w" to overwrite existing log, default "a"
+    Returns
+    -------
+    Logger
+    """
+    # Get a logger using the inherited method before setting up any file handling for it
+    logger = logging.Logger(name)
+    if extra_loggers is None:
+        extra_loggers = []
+    # Set up filehandlers for the global and local logging
+    for (filename, level,
+         raw_output, write_mode) in ((const.GLOBAL_LOG_FILENAME, const.GLOBAL_LOGGER_LEVEL, False, "a"),
+                                     (local_log_file, local_logger_level, local_logger_raw_output, mode),
+                                     *extra_loggers):
+        if level is None or (suppress_global_handler and filename == const.GLOBAL_LOG_FILENAME):
+            continue
+        _add_filehandler_to_logger(logger, filename, level, raw_output, write_mode)
+    # Set up stdout output if desired
+    if stdout_output_level is not None:
+        stream_handler = logging.StreamHandler(sys.stdout)
+        # Check if stdout output is already handled, and update that handler if so
+        handler_already_present = False
+        for handler in logger.handlers:
+            if (isinstance(handler, logging.StreamHandler) and handler.stream == sys.stdout):
+                handler_already_present = True
+                stream_handler = handler
+                break
+        if not handler_already_present:
+            logger.addHandler(stream_handler)
+        stream_handler.setLevel(stdout_output_level)
+        if stdout_output_level < logger.level or logger.level == logging.NOTSET:
+            logger.setLevel(stdout_output_level)
+        stream_handler.setFormatter(logging.Formatter(const.LOG_FORMAT, datefmt=const.TIMESTAMP_FORMAT))
+    return logger
+def _add_filehandler_to_logger(logger, filename, level, raw_output, mode):
+    """Private function to add a file handler to a logger only if the logger doesn't already have a handler for that
+    file, and set the logging level for the handler
+    """
+    # Skip if filename is None
+    if filename is None:
+        return
+    file_handler = logging.FileHandler(filename, mode)
+    # Check if the file to log to is already in the logger's filehandlers
+    handler_already_present = False
+    for handler in logger.handlers:
+        if (isinstance(handler, logging.FileHandler) and
+                handler.baseFilename == os.path.abspath(filename)):
+            handler_already_present = True
+            file_handler = handler
+            break
+    # Add a FileHandler for the file if it's not already present, make sure the path to the log file exists,
+    # and set the logging level
+    if not handler_already_present:
+        filename_loc = os.path.split(filename)[0]
+        if filename_loc != "":
+            os.makedirs(filename_loc, exist_ok=True)
+        file_handler = logging.FileHandler(filename)
+        logger.addHandler(file_handler)
+    # Set or update the logging level and formatter for the handler
+    if level is not None:
+        file_handler.setLevel(level)
+        if level < logger.level or logger.level == logging.NOTSET:
+            logger.setLevel(level)
+    if not raw_output:
+        file_handler.setFormatter(logging.Formatter(const.LOG_FORMAT, datefmt=const.TIMESTAMP_FORMAT))
+    return
+def get_date():
+    """Retrieve current date as a string
+    Returns
+    -------
+    str
+        Current date in the format YYYY-MM-DD
+    """
+    today = datetime.today()
+    return str(today.year) + '-' + format(today.month) + '-' + format(today.day)
+def get_time():
+    """Retrieve current time as a string
+    Returns
+    -------
+    str
+        Current time in the format HH:MM:SS
+    """
+    today = datetime.today()
+    return format(today.hour) + ':' + format(today.minute) + ':' + format(today.second)
+def get_date_time():
+    """Retrieve current date and time as a string
+    Returns
+    -------
+    str
+        Current date and time in the format YYYY-MM-DD HH:MM:SS
+    """
+    return get_date() + ' ' + get_time()
+def format(time):
+    """Ensure that an element of date or time (month, day, hours, minutes or seconds) always has two digits.
+    Parameters
+    ----------
+    time : str or int
+        Digit(s) indicating date or month
+    Returns
+    -------
+    str
+        2-digit value indicating date or month
+    """
+    num = str(time)
+    if len(num) == 1:
+        return '0' + num
+    else:
+        return num
+def string_with_placeholders_matches(test_pattern: str, parent_str: str) -> bool:
+    """An advanced version of "`test_pattern` in `parent_str`" for checking if a substring is in a containing string,
+    which allows for `test_pattern` to contain placeholders (e.g. a string like "The file name is: {file}".).
+    The test here splits `test_pattern` up into segments which exclude the placeholders, and checks that all segments
+    are in `parent_string`. As such, it has the potential to return false positives in the segments are all in the
+    parent string but split far apart or out of order, so this method should not be used if it is critical that false
+    positives be avoided.
+    Parameters
+    ----------
+    test_pattern : str
+        The pattern to check if it's contained in `parent_str`
+    parent_str : str
+        The string to search in for `test_pattern`
+    Returns
+    -------
+    bool
+        True if `test_pattern` appears to be in `parent_str` with some placeholders filled in, False otherwise
+    """
+    l_test_segments = re.split(r"\{.*?\}", test_pattern)
+    all_segments_in_parent = all([s in parent_str for s in l_test_segments])
+    return all_segments_in_parent