PyPI - seabirdfilehandler - Versions diffs - 0.6.0__tar.gz → 0.7.0__tar.gz - Mend

seabirdfilehandler 0.6.0tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of seabirdfilehandler might be problematic. Click here for more details.

Files changed (17) hide show

{seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: seabirdfilehandler
-Version: 0.6.0
+Version: 0.7.0
 Summary: Library of parsers to interact with SeaBird CTD files.
 Keywords: CTD,parser,seabird,data
 Author: Emil Michels

{seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/pyproject.toml RENAMED Viewed

@@ -20,7 +20,7 @@ urls.homepage = "https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler
 urls.repository = "https://git.io-warnemuende.de/CTD-Software/SeabirdFileHandler"
 urls.documentation = "https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler"
 dynamic = []
-version = "0.6.0"
+version = "0.7.0"
 [tool.poetry]

{seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/__init__.py RENAMED Viewed

@@ -1,10 +1,10 @@
 from .datafiles import *
 from .bottlefile import *
 from .bottlelogfile import *
+from .parameter import *
+from .processing_steps import *
 from .cnvfile import *
 from .xmlfiles import *
 from .hexfile import *
-from .validation_modules import *
 from .geomar_ctd_file_parser import *
-from .parameter import *
 from .file_collection import *

{seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/cnvfile.py RENAMED Viewed

@@ -3,9 +3,7 @@ from datetime import datetime, timedelta
 import pandas as pd
 import numpy as np
 import logging
-from seabirdfilehandler import DataFile
-from seabirdfilehandler.parameter import Parameters
-from seabirdfilehandler.validation_modules import CnvValidationList
+from seabirdfilehandler import DataFile, Parameters, CnvProcessingSteps
 logger = logging.getLogger(__name__)
@@ -58,11 +56,11 @@ class CnvFile(DataFile):
         coordinate_columns: bool = False,
     ):
         super().__init__(path_to_file, only_header)
-        self.validation_modules = self.obtaining_validation_modules()
-        self.start_time = self.reading_start_time()
+        self.processing_steps = self.get_processing_step_infos()
         self.parameters = Parameters(
             self.data, self.data_table_description, only_header
         )
+        self.start_time = self.reading_start_time()
         if create_dataframe:
             self.df = self.create_dataframe()
         if absolute_time_calculation:
@@ -79,21 +77,15 @@ class CnvFile(DataFile):
         self.df = self.parameters.get_pandas_dataframe()
         return self.df
-    def reading_start_time(
-        self,
-        time_source: str = "System UTC",
-    ) -> datetime | None:
+    def reading_start_time(self) -> datetime | None:
         """
         Extracts the Cast start time from the metadata header.
         """
-        for line in self.sbe9_data:
-            if line.startswith(time_source):
-                start_time = line.split("=")[1]
-                start_time = datetime.strptime(
-                    start_time, " %b %d %Y %H:%M:%S "
-                )
-                return start_time
-        return None
+        start_time = (
+            self.parameters.data_table_misc["start_time"].split("[")[0].strip()
+        )
+        start_time = datetime.strptime(start_time, "%b %d %Y %H:%M:%S")
+        return start_time
     def absolute_time_calculation(self) -> bool:
         """
@@ -134,13 +126,12 @@ class CnvFile(DataFile):
             return True
         return False
-    def obtaining_validation_modules(self) -> CnvValidationList:
+    def get_processing_step_infos(self) -> CnvProcessingSteps:
         """
         Collects the individual validation modules and their respective
         information, usually present in key-value pairs.
         """
-        validation_modules = self.processing_info
-        return CnvValidationList(validation_modules)
+        return CnvProcessingSteps(self.processing_info)
     def df2cnv(self, df: pd.DataFrame | None = None) -> list:
         """
@@ -212,6 +203,7 @@ class CnvFile(DataFile):
     def _update_header(self):
         """Re-creates the cnv header."""
         self.data_table_description = self.parameters._form_data_table_info()
+        self.processing_info = self.processing_steps._form_processing_info()
         self.header = [
             *[f"* {data}" for data in self.sbe9_data[:-1]],
             *[f"** {data}" for data in self.metadata_list],
@@ -221,6 +213,8 @@ class CnvFile(DataFile):
             *[f"# {data}" for data in self.processing_info],
             "*END*\n",
         ]
+        self.data = self.array2cnv()
+        self.file_data = [*self.header, *self.data]
     def add_processing_metadata(self, addition: str | list):
         """
@@ -232,13 +226,14 @@ class CnvFile(DataFile):
             the new information line
         """
-        # TODO: use CnvprocessingList here
         if isinstance(addition, str):
             addition = [addition]
-        for line in addition:
-            self.file_data.append(line)
-            # add the new info line *before* the 'file_type = ascii' line
-            self.processing_info.insert(-1, line)
+        self.processing_steps.append(
+            self.processing_steps.create_step_instance(
+                module=addition[0].split("_")[0], raw_info=addition
+            )
+        )
+        self._update_header()
     def add_station_and_event_column(self) -> bool:
         """

{seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/datafiles.py RENAMED Viewed

@@ -77,27 +77,27 @@ class DataFile:
         file. Lists and Dictionaries are the data structures of choice. Uses
         basic prefix checking to distinguish different header information.
         """
-        past_sensors = False
+        past_bad_flag = False
         with self.path_to_file.open("r", encoding="latin-1") as file:
             for line in file:
                 self.raw_file_data.append(line)
-                line_prefix = line[:2]
-                if line_prefix == "* ":
+                if line.startswith("* "):
                     self.header.append(line)
                     self.sbe9_data.append(line[2:])
-                elif line_prefix == "**":
+                elif line.startswith("**"):
                     self.header.append(line)
                     self.metadata_list.append(line[3:])
-                elif line_prefix == "# ":
+                elif line.startswith("#"):
                     self.header.append(line)
-                    if line[2:].strip()[0] == "<":
+                    if line[2:].strip().startswith("<"):
                         self.sensor_data.append(line[2:])
-                        past_sensors = True
                     else:
-                        if past_sensors:
+                        if past_bad_flag:
                             self.processing_info.append(line[2:])
                         else:
                             self.data_table_description.append(line[2:])
+                            if line.startswith("# bad_flag"):
+                                past_bad_flag = True
                 else:
                     if line.startswith("*END*"):
                         self.header.append(line)

{seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/file_collection.py RENAMED Viewed

@@ -3,6 +3,7 @@ from pathlib import Path
 import logging
 from collections import UserList
 from typing import Callable, Type
+import warnings
 import pandas as pd
 import numpy as np
 from seabirdfilehandler import (
@@ -353,12 +354,15 @@ class CnvCollection(FileCollection):
         *args,
         **kwargs,
     ):
+        if len(args) < 3 and "file_suffix" not in kwargs:
+            kwargs["file_suffix"] = "cnv"
         super().__init__(*args, **kwargs)
         self.data_meta_info = self.get_data_table_meta_info()
         self.sensor_data = get_unique_sensor_data(
             [file.sensors for file in self.data]
         )
         self.array = self.get_array()
+        self.processing_steps = self.get_processing_steps()
     def get_dataframes(
         self,
@@ -430,6 +434,26 @@ class CnvCollection(FileCollection):
             [file.parameters.create_full_ndarray() for file in self.data]
         )
+    def get_processing_steps(self) -> list:
+        """
+        Checks the processing steps in the different files for consistency.
+        Returns the steps of the first file, which should be the same as for
+        all other files.
+        Returns
+        -------
+        A list of ProcessingSteps.
+        """
+        individual_processing_steps = [
+            file.processing_steps for file in self.data
+        ]
+        for index, step_info in enumerate(individual_processing_steps):
+            if step_info != individual_processing_steps[0]:
+                message = f"The processing steps conducted on these files differ. First occurence between index 0 and {index}."
+                warnings.warn(message)
+                logger.warning(message)
+        return individual_processing_steps[0]
 class HexCollection(FileCollection):
     """
@@ -445,6 +469,8 @@ class HexCollection(FileCollection):
         path_to_xmlcons: Path | str = "",
         **kwargs,
     ):
+        if len(args) < 3 and "file_suffix" not in kwargs:
+            kwargs["file_suffix"] = "hex"
         # force only_metadata, as the hex data cannot be put into a DataFrame
         kwargs["only_metadata"] = True
         super().__init__(*args, **kwargs)

{seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/parameter.py RENAMED Viewed

@@ -104,11 +104,19 @@ class Parameters(UserDict):
         )
         parameter_dict = {}
         list_of_metadata_shortnames = list(metadata.keys())
+        # if column number and metadata number is different, we are propably
+        # working with duplicate_columns and will drop the duplicates
         if self.full_data_array.shape[1] != len(list_of_metadata_shortnames):
-            raise AssertionError(
-                f"Array column width {
-                    self.full_data_array.shape[1]
-                } does not fit metadata length {len(metadata)}"
+            self.full_data_array = np.delete(
+                self.full_data_array, self.duplicate_columns, 1
+            )
+            assert self.full_data_array.shape[1] == len(
+                list_of_metadata_shortnames
+            )
+            # rewrite the column number in the metadata header
+            self.data_table_stats["nquan"] = str(
+                int(self.data_table_stats["nquan"])
+                - len(self.duplicate_columns)
             )
         for i in range(self.full_data_array.shape[1]):
             column_data = self.full_data_array[:, i]
@@ -286,14 +294,10 @@ class Parameters(UserDict):
         assert data.shape[1] == len(columns)
         df = pd.DataFrame(data=data, columns=columns)
         for column in df.columns:
-            if column.lower() not in [
-                "latitude",
-                "longitude",
-                "event",
-                "cast",
-                "flag",
-            ]:
+            try:
                 df[column].astype("float64")
+            except (TypeError, ValueError):
+                df[columns].astype("str")
         return df
     def with_name_type(self, name_type: str = "shortname"):

seabirdfilehandler-0.7.0/src/seabirdfilehandler/processing_steps.py ADDED Viewed

@@ -0,0 +1,196 @@
+from __future__ import annotations
+from collections import UserList
+class CnvProcessingSteps(UserList):
+    """
+    A python representation of the individual processing steps conducted
+    in the process of a cnv file creation. These modules are stored in
+    a dictionary structure, together with all the variables/metadata/etc.
+    given in the header of a cnv file.
+    Parameters
+    ----------
+    Returns
+    -------
+    """
+    def __init__(self, raw_processing_info: list):
+        self.modules = self.extract_individual_modules(raw_processing_info)
+        self.data = []
+        for module in self.modules:
+            self.data.append(
+                self.create_step_instance(module, raw_processing_info)
+            )
+    def _form_processing_info(self) -> list:
+        out_list = []
+        for module in self.data:
+            if "vars" in module.metadata and module.name != "wildedit":
+                module.metadata["date"] = (
+                    module.metadata["date"]
+                    + f" [{module.name.lower()}_vars = {module.metadata.pop('vars')}]"
+                )
+            if module.name == "binavg":
+                collection_string = module.metadata["binavg_surface_bin"][
+                    "surface_bin"
+                ]
+                for k, v in module.metadata["binavg_surface_bin"].items():
+                    if k != "surface_bin":
+                        collection_string += f", {k} = {v}"
+                module.metadata["binavg_surface_bin"] = collection_string
+            for key, value in module.metadata.items():
+                if module.name == "wfilter" and key == "action":
+                    for action_key, action_value in value.items():
+                        out_list.append(
+                            f"wfilter_action {action_key} = {action_value}\n"
+                        )
+                else:
+                    out_list.append(f"{module.name}_{key} = {value}\n")
+        out_list.append("file_type = ascii\n")
+        return out_list
+    def get_names(self) -> list[str]:
+        return [step.name for step in self.data]
+    def extract_individual_modules(self, raw_info: list[str]) -> list:
+        """ """
+        module_list = []
+        for line in raw_info:
+            module = line.split("_")[0]
+            if (module not in module_list) and (
+                line.split()[0] != "file_type"
+            ):
+                module_list.append(module)
+        return module_list
+    def create_step_instance(
+        self,
+        module: str,
+        raw_info: list[str],
+    ) -> ProcessingStep:
+        """
+        Parameters
+        ----------
+        module :
+        Returns
+        -------
+        """
+        # TODO: probably need to split this into smaller bits
+        out_dict = {}
+        inner_action_dict = {}
+        # extract lines corresponding to the module
+        for line in raw_info:
+            if module == line.split("_")[0]:
+                # removing the module names from the lines
+                shifting_index = len(module) + 1
+                line_content = line[shifting_index:]
+                # handle the case of the validation methods keyword being
+                # 'action', which corresponds to an entire dict of values
+                if line_content[:6] == "action":
+                    inner_action_dict = self._module_dict_feeder(
+                        line_content[6:], inner_action_dict
+                    )
+                else:
+                    # handle the cases where after some date value, another value
+                    # is printed inside of [] brackets
+                    double_value_list = line_content.split("[")
+                    if len(double_value_list) > 1:
+                        out_dict = self._module_dict_feeder(
+                            double_value_list[1][shifting_index:-2], out_dict
+                        )
+                        line_content = double_value_list[0]
+                    if line_content[:11] == "surface_bin":
+                        surface_bin_dict = {}
+                        for line in line_content.split(","):
+                            self._module_dict_feeder(line, surface_bin_dict)
+                        out_dict["surface_bin"] = surface_bin_dict
+                        continue
+                    # usual behavior, for 99% cases:
+                    # assigning key and value to the module dict
+                    out_dict = self._module_dict_feeder(line_content, out_dict)
+        if inner_action_dict:
+            out_dict["action"] = inner_action_dict
+        return ProcessingStep(module, out_dict)
+    def _module_dict_feeder(
+        self,
+        line: str,
+        dictionary: dict,
+        split_value: str = "=",
+    ):
+        """
+        Parameters
+        ----------
+        line: str :
+        dictionary: dict :
+        split_value: str :
+             (Default value = '=')
+        Returns
+        -------
+        """
+        # adds the values of a specific header line into a dictionary
+        try:
+            key, value = line.split(split_value)
+        except ValueError:
+            pass
+        else:
+            dictionary[key.strip()] = value.strip()
+        finally:
+            return dictionary
+    def get_step(self, step: str) -> ProcessingStep | None:
+        """
+        Parameters
+        ----------
+        module: str :
+        Returns
+        -------
+        """
+        for index, element in enumerate(self.data):
+            if str(element) == step:
+                return self.data[index]
+        return None
+class ProcessingStep:
+    """
+    Class that is meant to represent one individual processing step, that lead
+    to the current status of the cnv file. Can be a custom processing step or
+    one of the original Sea-Bird ones.
+    Parameters
+    ----------
+    Returns
+    -------
+    """
+    def __init__(self, name: str, metadata: dict):
+        self.name = name
+        self.metadata = metadata
+    def __str__(self) -> str:
+        return self.name
+    def __repr__(self) -> str:
+        return self.__str__()
+    def __eq__(self, other) -> bool:
+        return self.metadata == other.metadata

seabirdfilehandler-0.6.0/src/seabirdfilehandler/validation_modules.py DELETED Viewed

@@ -1,155 +0,0 @@
-from collections import UserDict
-class CnvValidationList(UserDict):
-    """A python representation of the individual validation steps conducted
-    in the process of a cnv file creation. These modules are stored in
-    a dictionary structure, together with all the variables/metadata/etc.
-    given in the header of a cnv file.
-    Parameters
-    ----------
-    Returns
-    -------
-    """
-    def __init__(self, cnv_header_val_modules: list):
-        self.cnv_header_val_modules = cnv_header_val_modules
-        self.data = {}
-        self.modules = self.extract_individual_modules()
-        for module in self.modules:
-            module_data = self.create_dict_for_module(module)
-            self.data[module] = module_data
-    def extract_individual_modules(self) -> list:
-        """ """
-        module_list = []
-        for line in self.cnv_header_val_modules:
-            module = line.split("_")[0]
-            if (module not in module_list) and (
-                line.split()[0] != "file_type"
-            ):
-                module_list.append(module)
-        return module_list
-    def create_dict_for_module(self, module) -> dict:
-        """
-        Parameters
-        ----------
-        module :
-        Returns
-        -------
-        """
-        # TODO: probably need to split this into smaller bits
-        out_dict = {}
-        inner_action_dict = {}
-        action_dict_present = False
-        # extract lines corresponding to the module
-        for line in self.cnv_header_val_modules:
-            if module == line.split("_")[0]:
-                # removing the module names from the lines
-                shifting_index = len(module) + 1
-                line_content = line[shifting_index:]
-                # handle the case of the validation methods keyword being
-                # 'action', which corresponds to an entire dict of values
-                if line_content[:6] == "action":
-                    action_dict_present = True
-                    inner_action_dict = self.module_dict_feeder(
-                        line_content[6:], inner_action_dict
-                    )
-                else:
-                    # handle the cases where after some date value, another value
-                    # is printed inside of [] brackets
-                    double_value_list = line_content.split("[")
-                    if len(double_value_list) > 1:
-                        out_dict = self.module_dict_feeder(
-                            double_value_list[1][shifting_index:-2], out_dict
-                        )
-                        line_content = double_value_list[0]
-                    if line_content[:11] == "surface_bin":
-                        surface_bin_dict = {}
-                        for line in line_content.split(","):
-                            self.module_dict_feeder(line, surface_bin_dict)
-                        out_dict["surface_bin"] = surface_bin_dict
-                        continue
-                    # usual behavior, for 99% cases:
-                    # assigning key and value to the module dict
-                    out_dict = self.module_dict_feeder(line_content, out_dict)
-        if action_dict_present:
-            out_dict["action"] = inner_action_dict
-        return out_dict
-    def module_dict_feeder(
-        self, line: str, dictionary: dict, split_value: str = "="
-    ):
-        """
-        Parameters
-        ----------
-        line: str :
-        dictionary: dict :
-        split_value: str :
-             (Default value = '=')
-        Returns
-        -------
-        """
-        # adds the values of a specific header line into a dictionary
-        try:
-            key, value = line.split(split_value)
-        except ValueError:
-            pass
-        else:
-            dictionary[key.strip()] = value.strip()
-        finally:
-            return dictionary
-    def get(self, module: str) -> dict:
-        """
-        Parameters
-        ----------
-        module: str :
-        Returns
-        -------
-        """
-        for element in self.data:
-            if str(element) == module:
-                return self.data[element]
-        else:
-            return {}
-class ValidationModule:
-    """Class that is meant to represent the individual validation modules of
-    the SeaSoft software. This includes all the input parameters and settins,
-    as well as a description of the output.
-    The idea is to inherit from this class for each individual module. But I
-    am not sure if its worth the effort.
-    Parameters
-    ----------
-    Returns
-    -------
-    """
-    def __init__(self, name):
-        self.name = name
-    def extract_information(self):
-        """ """
-        pass