PyPI - seabirdfilehandler - Versions diffs - 0.4.0__py3-none-any.whl - Mend

seabirdfilehandler 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of seabirdfilehandler might be problematic. Click here for more details.

Files changed (13) hide show

seabirdfilehandler/__init__.py +5 -0
seabirdfilehandler/dataframe_meta_accessor.py +184 -0
seabirdfilehandler/datatablefiles.py +886 -0
seabirdfilehandler/file_collection.py +269 -0
seabirdfilehandler/logging.yaml +23 -0
seabirdfilehandler/parameter.py +410 -0
seabirdfilehandler/seabirdfiles.py +200 -0
seabirdfilehandler/validation_modules.py +152 -0
seabirdfilehandler/xmlfiles.py +87 -0
seabirdfilehandler-0.4.0.dist-info/LICENSE +373 -0
seabirdfilehandler-0.4.0.dist-info/METADATA +29 -0
seabirdfilehandler-0.4.0.dist-info/RECORD +13 -0
seabirdfilehandler-0.4.0.dist-info/WHEEL +4 -0

seabirdfilehandler/seabirdfiles.py ADDED Viewed

@@ -0,0 +1,200 @@
+from pathlib import Path
+from dataclasses import dataclass
+from datetime import datetime, timezone
+import xmltodict
+import logging
+logger = logging.getLogger(__name__)
+@dataclass
+class SeaBirdFile:
+    """Base class to describe any kind of file generated by the Seasoft
+    software. Such a file should be given as input to this class and the
+    information it contains should subsequently be extracted and structured
+    automatically. Various classes inherit from this one for a more file
+    specific behaviour
+    Parameters
+    ----------
+    Returns
+    -------
+    """
+    def __init__(
+        self,
+        path_to_file: Path | str,
+        only_header: bool = False,
+    ):
+        self.path_to_file = Path(path_to_file)
+        self.file_name = self.path_to_file.stem
+        self.file_dir = self.path_to_file.parents[0]
+        self.timestamp = datetime.now(timezone.utc)
+        self.raw_file_data = []  # the text file input
+        self.header = []  # the full file header
+        self.sbe9_data = []  # device specific information
+        self.metadata = {}  # non-SeaBird metadata
+        self.metadata_list = []  # unstructured metadata for easier export
+        self.data_table_description = []  # the column names and other info
+        self.data_table_stats = {}
+        self.data_table_names_and_spans = []
+        self.data_table_misc = {}
+        self.sensor_data = []
+        self.sensors = {}  # xml-parsed sensor data
+        self.processing_info = []  # everything after the sensor data
+        self.data = []  # the data table
+        self.file_data = self.raw_file_data  # variable file information
+        with self.path_to_file.open("r", encoding="latin-1") as file:
+            for line in file:
+                self.raw_file_data.append(line)
+                if only_header and line.startswith("*END*"):
+                    break
+        self.extract_file_information(only_header)
+        if len(self.sensor_data) > 0:
+            self.sensors = self.sensor_xml_to_flattened_dict("".join(self.sensor_data))
+    def __str__(self) -> str:
+        return "/n".join(self.file_data)
+    def __repr__(self) -> str:
+        return str(self.path_to_file.absolute())
+    def __eq__(self, other) -> bool:
+        return self.file_data == other.file_data
+    def extract_file_information(self, only_header: bool = False):
+        """Reads and structures all the different information present in the
+        file. Lists and Dictionaries are the data structures of choice. Uses
+        basic prefix checking to distinguish different header information.
+        Parameters
+        ----------
+        Returns
+        -------
+        """
+        self.metadata_list = []
+        past_sensors = False
+        for line in self.raw_file_data:
+            line_prefix = line[:2]
+            if line_prefix == "* ":
+                self.header.append(line)
+                self.sbe9_data.append(line[2:])
+            elif line_prefix == "**":
+                self.header.append(line)
+                self.metadata_list.append(line[3:])
+            elif line_prefix == "# ":
+                self.header.append(line)
+                if line[2:].strip()[0] == "<":
+                    self.sensor_data.append(line[2:])
+                    past_sensors = True
+                else:
+                    if past_sensors:
+                        self.processing_info.append(line[2:])
+                    else:
+                        self.data_table_description.append(line[2:])
+            elif line_prefix == "*E":
+                self.header.append(line)
+                if only_header:
+                    break
+            else:
+                self.data.append(line)
+        self.metadata = self.structure_metadata(self.metadata_list)
+        self.differentiate_table_description()
+    def differentiate_table_description(self):
+        past_spans = False
+        pre = []
+        column_names = []
+        column_value_spans = []
+        post = []
+        for line in self.data_table_description:
+            if line.startswith("name"):
+                column_names.append(line.split("=")[1].strip())
+            elif line.startswith("span"):
+                past_spans = True
+                column_value_spans.append(line.split("=")[1].strip())
+            else:
+                if not past_spans:
+                    pre.append(line)
+                else:
+                    post.append(line)
+        assert len(column_names) == len(column_value_spans)
+        self.data_table_stats = {
+            line.split("=")[0].strip(): line.split("=")[1].strip() for line in pre
+        }
+        self.data_table_names_and_spans = [
+            (name, span) for name, span in zip(column_names, column_value_spans)
+        ]
+        self.data_table_misc = {line.split("=")[0].strip(): line.split("=")[1].strip() for line in post}
+    def sensor_xml_to_flattened_dict(self, sensor_data: str) -> list[dict] | dict:
+        """Reads the pure xml sensor input and creates a multilevel dictionary,
+        dropping the first two dictionaries, as they are single entry only
+        Parameters
+        ----------
+        Returns
+        -------
+        """
+        full_sensor_dict = xmltodict.parse(sensor_data, process_comments=True)
+        try:
+            sensors = full_sensor_dict["Sensors"]["sensor"]
+        except KeyError as error:
+            logger.error(f"XML is not formatted as expected: {error}")
+            return full_sensor_dict
+        else:
+            # create a tidied version of the xml-parsed sensor dict
+            tidied_sensor_list = []
+            for entry in sensors:
+                # use comment value as type descriptor
+                comment = entry["#comment"]
+                split_comment = comment.split(",")
+                new_entry = split_comment[1].strip()
+                if split_comment[-1] == " 2":
+                    new_entry += " 2"
+                # remove second-level dict
+                calibration_info = list(entry.values())[-1]
+                try:
+                    new_dict = {
+                        "Channel": entry["@Channel"],
+                        "SensorName": new_entry,
+                        **calibration_info,
+                    }
+                except TypeError:
+                    new_dict = {
+                        "Channel": entry["@Channel"],
+                        "SensorName": new_entry,
+                        "Info": calibration_info,
+                    }
+                tidied_sensor_list.append(new_dict)
+            return tidied_sensor_list
+    def structure_metadata(self, metadata_list: list) -> dict:
+        """Creates a dictionary to store the metadata that is added by using
+        werums dship API.
+        Parameters
+        ----------
+        metadata_list: list :
+            a list of the individual lines of metadata found in the file
+        Returns
+        -------
+        a dictionary of the lines of metadata divided into key-value pairs
+        """
+        out_dict = {}
+        for line in metadata_list:
+            try:
+                (key, val) = line.split("=")
+            except ValueError:
+                out_dict["text"] = line
+            else:
+                out_dict[key.strip()] = val.strip()
+        return out_dict

seabirdfilehandler/validation_modules.py ADDED Viewed

@@ -0,0 +1,152 @@
+from collections import UserDict
+class CnvValidationList(UserDict):
+    """A python representation of the individual validation steps conducted
+    in the process of a cnv file creation. These modules are stored in
+    a dictionary structure, together with all the variables/metadata/etc.
+    given in the header of a cnv file.
+    Parameters
+    ----------
+    Returns
+    -------
+    """
+    def __init__(self, cnv_header_val_modules: list):
+        self.cnv_header_val_modules = cnv_header_val_modules
+        self.data = {}
+        self.modules = self.extract_individual_modules()
+        for module in self.modules:
+            module_data = self.create_dict_for_module(module)
+            self.data[module] = module_data
+    def extract_individual_modules(self) -> list:
+        """ """
+        module_list = []
+        for line in self.cnv_header_val_modules:
+            module = line.split('_')[0]
+            if ((module not in module_list) and (line.split()[0] != 'file_type')):
+                module_list.append(module)
+        return module_list
+    def create_dict_for_module(self, module) -> dict:
+        """
+        Parameters
+        ----------
+        module :
+        Returns
+        -------
+        """
+        # TODO: probably need to split this into smaller bits
+        out_dict = {}
+        inner_action_dict = {}
+        action_dict_present = False
+        # extract lines corresponding to the module
+        for line in self.cnv_header_val_modules:
+            if module == line.split('_')[0]:
+                # removing the module names from the lines
+                shifting_index = len(module) + 1
+                line_content = line[shifting_index:]
+                # handle the case of the validation methods keyword being
+                # 'action', which corresponds to an entire dict of values
+                if line_content[:6] == 'action':
+                    action_dict_present = True
+                    inner_action_dict = self.module_dict_feeder(
+                        line_content[6:], inner_action_dict)
+                else:
+                    # handle the cases where after some date value, another value
+                    # is printed inside of [] brackets
+                    double_value_list = line_content.split('[')
+                    if len(double_value_list) > 1:
+                        out_dict = self.module_dict_feeder(
+                            double_value_list[1][shifting_index:-2], out_dict)
+                        line_content = double_value_list[0]
+                    if line_content[:11] == 'surface_bin':
+                        surface_bin_dict = {}
+                        for line in line_content.split(','):
+                            self.module_dict_feeder(line, surface_bin_dict)
+                        out_dict['surface_bin'] = surface_bin_dict
+                        continue
+                    # usual behavior, for 99% cases:
+                    # assigning key and value to the module dict
+                    out_dict = self.module_dict_feeder(line_content, out_dict)
+        if action_dict_present:
+            out_dict['action'] = inner_action_dict
+        return out_dict
+    def module_dict_feeder(self,
+                           line: str,
+                           dictionary: dict,
+                           split_value: str = '='):
+        """
+        Parameters
+        ----------
+        line: str :
+        dictionary: dict :
+        split_value: str :
+             (Default value = '=')
+        Returns
+        -------
+        """
+        # adds the values of a specific header line into a dictionary
+        try:
+            key, value = line.split(split_value)
+        except ValueError:
+            pass
+        else:
+            dictionary[key.strip()] = value.strip()
+        finally:
+            return dictionary
+    def get(self, module: str) -> dict:
+        """
+        Parameters
+        ----------
+        module: str :
+        Returns
+        -------
+        """
+        for element in self.data:
+            if str(element) == module:
+                return self.data[element]
+        else:
+            return {}
+class ValidationModule:
+    """Class that is meant to represent the individual validation modules of
+    the SeaSoft software. This includes all the input parameters and settins,
+    as well as a description of the output.
+    The idea is to inherit from this class for each individual module. But I
+    am not sure if its worth the effort.
+    Parameters
+    ----------
+    Returns
+    -------
+    """
+    def __init__(self, name):
+        self.name = name
+    def extract_information(self):
+        """ """
+        pass

seabirdfilehandler/xmlfiles.py ADDED Viewed

@@ -0,0 +1,87 @@
+from pathlib import Path
+from collections import UserDict
+import xml.etree.ElementTree as ET
+import json
+import xmltodict
+class XMLFile(UserDict):
+    """
+    Parent class for XML and psa representation that loads XML as a
+    python-internal tree and as a dict.
+    Parameters
+    ----------
+    path_to_file : Path | str :
+        the path to the xml file
+    Returns
+    -------
+    """
+    def __init__(self, path_to_file: Path | str):
+        self.path_to_file = Path(path_to_file)
+        self.file_name = self.path_to_file.name
+        self.file_dir = self.path_to_file.parents[0]
+        self.input = ""
+        with open(self.path_to_file, "r") as file:
+            for line in file:
+                self.input += line
+        self.xml_tree = ET.fromstring(self.input)
+        self.data = xmltodict.parse(self.input)
+    def to_xml(self, file_name=None, file_path=None):
+        """
+        Writes the dictionary to xml.
+        Parameters
+        ----------
+        file_name : str :
+            the original files name (Default value = self.file_name)
+        file_path : pathlib.Path :
+            the directory of the file (Default value = self.file_dir)
+        Returns
+        -------
+        """
+        file_path = self.file_dir if file_path is None else file_path
+        file_name = self.file_name if file_name is None else file_name
+        with open(Path(file_path).joinpath(file_name), "w") as file:
+            file.write(xmltodict.unparse(self.data, pretty=True))
+    def to_json(self, file_name=None, file_path=None):
+        """
+        Writes the dictionary representation of the XML input to a json
+        file.
+        Parameters
+        ----------
+        file_name : str :
+            the original files name (Default value = self.file_name)
+        file_path : pathlib.Path :
+            the directory of the file (Default value = self.file_dir)
+        Returns
+        -------
+        """
+        file_path = self.file_dir if file_path is None else file_path
+        file_name = self.file_name if file_name is None else file_name
+        with open(Path(file_path).joinpath(file_name + ".json"), "w") as file:
+            json.dump(self.data, file, indent=4)
+class XMLCONFile(XMLFile):
+    """ """
+    def __init__(self, path_to_file):
+        super().__init__(path_to_file)
+class PsaFile(XMLFile):
+    """ """
+    def __init__(self, path_to_file):
+        super().__init__(path_to_file)