PyPI - pydartdiags - Versions diffs - 0.5.1__tar.gz → 0.6.0__tar.gz - Mend

pydartdiags 0.5.1tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydartdiags might be problematic. Click here for more details.

Files changed (24) hide show

{pydartdiags-0.5.1/src/pydartdiags.egg-info → pydartdiags-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,15 @@
 Metadata-Version: 2.4
 Name: pydartdiags
-Version: 0.5.1
+Version: 0.6.0
 Summary: Observation Sequence Diagnostics for DART
 Home-page: https://github.com/NCAR/pyDARTdiags.git
 Author: Helen Kershaw
 Author-email: Helen Kershaw <hkershaw@ucar.edu>
+License-Expression: Apache-2.0
 Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
 Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
 Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
 Classifier: Programming Language :: Python :: 3
-Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown

{pydartdiags-0.5.1 → pydartdiags-0.6.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pydartdiags"
-version = "0.5.1"
+version = "0.6.0"
 authors = [
   { name="Helen Kershaw", email="hkershaw@ucar.edu" },
 ]
@@ -13,9 +13,9 @@ readme = "README.md"
 requires-python = ">=3.8"
 classifiers = [
     "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: Apache Software License",
     "Operating System :: OS Independent",
 ]
+license = "Apache-2.0"
 dependencies = [
     "pandas>=2.2.0",
     "numpy>=1.26",

{pydartdiags-0.5.1 → pydartdiags-0.6.0}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="pydartdiags",
-    version="0.5.1",
+    version="0.6.0",
     packages=find_packages(where="src"),
     package_dir={"": "src"},
     include_package_data=True,

{pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/obs_sequence/obs_sequence.py RENAMED Viewed

@@ -19,17 +19,46 @@ def requires_assimilation_info(func):
     return wrapper
-class obs_sequence:
+class ObsSequence:
     """
-    Initialize an obs_sequence object from an ASCII or binary observation sequence file,
-    or create an empty obs_sequence object from scratch.
+    Initialize an ObsSequence object from an ASCII or binary observation sequence file,
+    or create an empty ObsSequence object from scratch.
+    1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
+    3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
     Args:
         file (str): The input observation sequence ASCII or binary file.
-                If None, an empty obs_sequence object is created from scratch.
+            If None, an empty ObsSequence object is created from scratch.
+        synonyms (list, optional): List of additional synonyms for the observation column in the DataFrame.
+            The default list is
+            .. code-block:: python
+                ['NCEP BUFR observation',
+                'AIRS observation',
+                'GTSPP observation',
+                'SST observation',
+                'observations',
+                'WOD observation']
+            You can add more synonyms by providing a list of strings when
+            creating the ObsSequence object.
+            .. code-block:: python
+                ObsSequence(file, synonyms=['synonym1', 'synonym2'])
+    Raises:
+        ValueError: If neither 'loc3d' nor 'loc1d' could be found in the observation sequence.
+    Examples:
+        .. code-block:: python
+            obs_seq = ObsSequence(file='obs_seq.final')
-    Returns:
-        An obs_sequence object
     Attributes:
         df (pandas.DataFrame): The DataFrame containing the observation sequence data.
@@ -54,34 +83,18 @@ class obs_sequence:
             - scale height: 'VERTISSCALEHEIGHT' (unitless)
         loc_mod (str): The location model, either 'loc3d' or 'loc1d'.
             For 3D sphere models: latitude and longitude are in degrees in the DataFrame.
-        types (dict): Dictionary of types of observations the observation sequence,
+        types (dict): Dictionary of types of observations in the observation sequence,
             e.g. {23: 'ACARS_TEMPERATURE'},
         reverse_types (dict): Dictionary of types with keys and values reversed, e.g
             {'ACARS_TEMPERATURE': 23}
         synonyms_for_obs (list): List of synonyms for the observation column in the DataFrame.
-            The default list is
-            .. code-block:: python
-                [ 'NCEP BUFR observation',
-                'AIRS observation',
-                'GTSPP observation',
-                'SST observation',
-                'observations',
-                'WOD observation']
-            You can add more synonyms by providing a list of strings when
-            creating the obs_sequence object.
-            .. code-block:: python
-                obs_sequence(file, synonyms=['synonym1', 'synonym2']).df
         seq (generator): Generator of observations from the observation sequence file.
         all_obs (list): List of all observations, each observation is a list.
-            Valid when the obs_sequence is created from a file.
-            Set to None when the obs_sequence is created from scratch or multiple
-            obs_sequences are joined.
+            Valid when the ObsSequence is created from a file.
+            Set to None when the ObsSequence is created from scratch or multiple
+            ObsSequences are joined.
     """
     vert = {
@@ -96,27 +109,6 @@ class obs_sequence:
     reversed_vert = {value: key for key, value in vert.items()}
     def __init__(self, file, synonyms=None):
-        """
-        Create an obs_sequence object from an ASCII or binary observation sequence file,
-        or create an empty obs_sequence object from scratch.
-        Args:
-            file (str): The input observation sequence ASCII or binary file.
-                    If None, an empty obs_sequence object is created from scratch.
-            synonyms (list, optional): List of synonyms for the observation column in the DataFrame.
-        Returns:
-            an obs_sequence object
-            1D observations are given a datetime of days, seconds since 2000-01-01 00:00:00
-            3D observations are given a datetime of days, seconds since 1601-01-01 00:00:00 (DART Gregorian calendar)
-        Examples:
-            .. code-block:: python
-                obs_seq = obs_sequence(file='obs_seq.final')
-        """
         self.loc_mod = "None"
         self.file = file
@@ -214,7 +206,7 @@ class obs_sequence:
             data.append(float(location[0]))  # location x
             data.append(float(location[1]))  # location y
             data.append(float(location[2]))  # location z
-            data.append(obs_sequence.vert[int(location[3])])
+            data.append(ObsSequence.vert[int(location[3])])
             self.loc_mod = "loc3d"
         except ValueError:
             try:
@@ -372,7 +364,7 @@ class obs_sequence:
             )  # sort the DataFrame by time
             df_copy.reset_index(drop=True, inplace=True)
             df_copy["obs_num"] = df_copy.index + 1  # obs_num in time order
-            df_copy["linked_list"] = obs_sequence.generate_linked_list_pattern(
+            df_copy["linked_list"] = ObsSequence.generate_linked_list_pattern(
                 len(df_copy)
             )  # linked list pattern
@@ -594,7 +586,7 @@ class obs_sequence:
         with open(file, "rb") as f:
             while True:
                 # Read the record length
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 if record_length is None:
                     break
                 record = f.read(record_length)
@@ -602,7 +594,7 @@ class obs_sequence:
                     break
                 # Read the trailing record length (should match the leading one)
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 linecount += 1
@@ -620,7 +612,7 @@ class obs_sequence:
             f.seek(0)
             for _ in range(2):
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 if record_length is None:
                     break
@@ -628,7 +620,7 @@ class obs_sequence:
                 if not record:  # end of file
                     break
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 header.append(record.decode("utf-8").strip())
             header.append(str(obs_types_definitions))
@@ -636,7 +628,7 @@ class obs_sequence:
             # obs_types_definitions
             for _ in range(3, 4 + obs_types_definitions):
                 # Read the record length
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 if record_length is None:
                     break
@@ -645,7 +637,7 @@ class obs_sequence:
                 if not record:  # end of file
                     break
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 if _ == 3:
                     continue  # num obs_types_definitions
@@ -663,7 +655,7 @@ class obs_sequence:
                 5 + obs_types_definitions + num_copies + num_qcs + 1,
             ):
                 # Read the record length
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 if record_length is None:
                     break
@@ -672,7 +664,7 @@ class obs_sequence:
                 if not record:
                     break
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 if _ == 5 + obs_types_definitions:
                     continue
@@ -683,12 +675,12 @@ class obs_sequence:
             # first and last obs
             # Read the record length
-            record_length = obs_sequence.read_record_length(f)
+            record_length = ObsSequence.read_record_length(f)
             # Read the actual record
             record = f.read(record_length)
-            obs_sequence.check_trailing_record_length(f, record_length)
+            ObsSequence.check_trailing_record_length(f, record_length)
             # Read the whole record as a two integers
             first, last = struct.unpack("ii", record)[:8]
@@ -813,7 +805,7 @@ class obs_sequence:
             # Skip the first len(obs_seq.header) lines
             for _ in range(header_length - 1):
                 # Read the record length
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 if record_length is None:  # End of file
                     break
@@ -830,7 +822,7 @@ class obs_sequence:
                 obs.append(f"OBS        {obs_num}")
                 for _ in range(n):  # number of copies
                     # Read the record length
-                    record_length = obs_sequence.read_record_length(f)
+                    record_length = ObsSequence.read_record_length(f)
                     if record_length is None:
                         break
                     # Read the actual record (copie)
@@ -838,10 +830,10 @@ class obs_sequence:
                     obs.append(struct.unpack("d", record)[0])
                     # Read the trailing record length (should match the leading one)
-                    obs_sequence.check_trailing_record_length(f, record_length)
+                    ObsSequence.check_trailing_record_length(f, record_length)
                 # linked list info
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 if record_length is None:
                     break
@@ -850,17 +842,17 @@ class obs_sequence:
                 linked_list_string = f"{int1:<12} {int2:<10} {int3:<12}"
                 obs.append(linked_list_string)
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 # location (note no location header "loc3d" or "loc1d" for binary files)
                 obs.append("loc3d")
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 record = f.read(record_length)
                 x, y, z, vert = struct.unpack("dddi", record[:28])
                 location_string = f"{x} {y} {z} {vert}"
                 obs.append(location_string)
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 #   kind (type of observation) value
                 obs.append("kind")
@@ -870,23 +862,23 @@ class obs_sequence:
                 kind = f"{struct.unpack('i', record)[0]}"
                 obs.append(kind)
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 # time (seconds, days)
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 record = f.read(record_length)
                 seconds, days = struct.unpack("ii", record)[:8]
                 time_string = f"{seconds} {days}"
                 obs.append(time_string)
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 # obs error variance
-                record_length = obs_sequence.read_record_length(f)
+                record_length = ObsSequence.read_record_length(f)
                 record = f.read(record_length)
                 obs.append(struct.unpack("d", record)[0])
-                obs_sequence.check_trailing_record_length(f, record_length)
+                ObsSequence.check_trailing_record_length(f, record_length)
                 yield obs
@@ -945,29 +937,29 @@ class obs_sequence:
         """
         Join a list of observation sequences together.
-        This method combines the headers and observations from a list of obs_sequence objects
-        into a single obs_sequence object.
+        This method combines the headers and observations from a list of ObsSequence objects
+        into a single ObsSequence object.
         Args:
-            obs_sequences (list of obs_sequences): The list of observation sequences objects to join.
+            obs_sequences (list of ObsSequences): The list of observation sequences objects to join.
             copies (list of str, optional): A list of copy names to include in the combined data.
                     If not provided, all copies are included.
         Returns:
-            A new obs_sequence object containing the combined data.
+            A new ObsSequence object containing the combined data.
         Example:
             .. code-block:: python
-                obs_seq1 = obs_sequence(file='obs_seq1.final')
-                obs_seq2 = obs_sequence(file='obs_seq2.final')
-                obs_seq3 = obs_sequence(file='obs_seq3.final')
-                combined = obs_sequence.join([obs_seq1, obs_seq2, obs_seq3])
+                obs_seq1 = ObsSequence(file='obs_seq1.final')
+                obs_seq2 = ObsSequence(file='obs_seq2.final')
+                obs_seq3 = ObsSequence(file='obs_seq3.final')
+                combined = ObsSequence.join([obs_seq1, obs_seq2, obs_seq3])
         """
         if not obs_sequences:
             raise ValueError("The list of observation sequences is empty.")
-        # Create a new obs_sequnece object with the combined data
+        # Create a new ObsSequence object with the combined data
         combo = cls(file=None)
         # Check if all obs_sequences have compatible attributes
@@ -1092,7 +1084,7 @@ class obs_sequence:
         # create linked list for obs
         combo.df = combined_df.sort_values(by="time").reset_index(drop=True)
-        combo.df["linked_list"] = obs_sequence.generate_linked_list_pattern(
+        combo.df["linked_list"] = ObsSequence.generate_linked_list_pattern(
             len(combo.df)
         )
         combo.df["obs_num"] = combined_df.index + 1
@@ -1125,7 +1117,7 @@ class obs_sequence:
         )
     def create_header(self, n):
-        """Create a header for the obs_seq file from the obs_sequence object."""
+        """Create a header for the obs_seq file from the ObsSequence object."""
         assert (
             self.n_copies == self.n_non_qc + self.n_qc
         ), "n_copies must be equal to n_non_qc + n_qc"

{pydartdiags-0.5.1 → pydartdiags-0.6.0}/src/pydartdiags/stats/stats.py RENAMED Viewed

@@ -4,8 +4,6 @@ import numpy as np
 from functools import wraps
 from datetime import datetime, timedelta
-# from pydartdiags.obs_sequence import obs_sequence as obsq
 def apply_to_phases_in_place(func):
     """
@@ -93,6 +91,12 @@ def calculate_rank(df, phase):
     """
     Calculate the rank of observations within an ensemble.
+    Note:
+        This function is decorated with @apply_to_phases_by_obs, which modifies its usage.
+        You should call it as calculate_rank(df), and the decorator will automatically apply the
+        function to all relevant phases (‘prior’ and ‘posterior’).
     This function takes a DataFrame containing ensemble predictions and observed values,
     adds sampling noise to the ensemble predictions, and calculates the rank of the observed
     value within the perturbed ensemble for each observation. The rank indicates the position
@@ -103,8 +107,6 @@ def calculate_rank(df, phase):
     Parameters:
         df (pd.DataFrame): A DataFrame with columns for rank, and observation type.
-        phase (str): The phase for which to calculate the statistics ('prior' or 'posterior')
     Returns:
         DataFrame containing columns for 'rank' and observation 'type'.
     """
@@ -158,15 +160,20 @@ def diag_stats(df, phase):
     """
     Calculate diagnostic statistics for a given phase and add them to the DataFrame.
+    Note:
+        This function is decorated with @apply_to_phases_in_place, which modifies its usage.
+        You should call it as diag_stats(df), and the decorator will automatically apply the
+        function to all relevant phases (‘prior’ and ‘posterior’) modifying the DataFrame
+        in place.
     Args:
         df (pandas.DataFrame): The input DataFrame containing observation data and ensemble statistics.
-                               The DataFrame must include the following columns:
-                               - 'observation': The actual observation values.
-                               - 'obs_err_var': The variance of the observation error.
-                               - 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
-                               - 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
+            The DataFrame must include the following columns:
-        phase (str): The phase for which to calculate the statistics ('prior' or 'posterior')
+            - 'observation': The actual observation values.
+            - 'obs_err_var': The variance of the observation error.
+            - 'prior_ensemble_mean' and/or 'posterior_ensemble_mean': The mean of the ensemble.
+            - 'prior_ensemble_spread' and/or 'posterior_ensemble_spread': The spread of the ensemble.
     Returns:
         None: The function modifies the DataFrame in place by adding the following columns:
@@ -203,9 +210,12 @@ def bin_by_layer(df, levels, verticalUnit="pressure (Pa)"):
     vertical level bin. Only observations (row) with the specified vertical unit are binned.
     Args:
-        df (pandas.DataFrame): The input DataFrame containing observation data. The DataFrame must include the following columns:
+        df (pandas.DataFrame): The input DataFrame containing observation data.
+                               The DataFrame must include the following columns:
                                - 'vertical': The vertical coordinate values of the observations.
                                - 'vert_unit': The unit of the vertical coordinate values.
         levels (list): A list of bin edges for the vertical levels.
         verticalUnit (str, optional): The unit of the vertical axis (e.g., 'pressure (Pa)'). Default is 'pressure (Pa)'.
@@ -261,6 +271,28 @@ def bin_by_time(df, time_value):
 @apply_to_phases_by_type_return_df
 def grand_statistics(df, phase):
+    """
+    Calculate grand statistics (RMSE, bias, total spread) for each observation type and phase.
+    This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
+    have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data by observation
+    type and computes the root mean square error (RMSE), mean bias, and total spread for the specified phase.
+    Note:
+        This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
+        You should call it as grand_statistics(df), and the decorator will automatically apply the function
+        to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
+    Args:
+        df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
+    Returns:
+        pandas.DataFrame: A DataFrame with columns:
+            - 'type': The observation type.
+            - '{phase}_rmse': The root mean square error for the phase.
+            - '{phase}_bias': The mean bias for the phase.
+            - '{phase}_totalspread': The total spread for the phase.
+    """
     # assuming diag_stats has been called
     grand = (
@@ -283,6 +315,33 @@ def grand_statistics(df, phase):
 @apply_to_phases_by_type_return_df
 def layer_statistics(df, phase):
+    """
+    Calculate statistics (RMSE, bias, total spread) for each observation type and vertical layer.
+    This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
+    have already been computed with :func:`diag_stats` and are present in the DataFrame. It groups the data by
+    vertical layer midpoint and observation type, and computes the root mean square error (RMSE),
+    mean bias, and total spread for the specified phase for each vertical layer.
+    Note:
+        This function is decorated with @apply_to_phases_by_type_return_df, which modifies its usage
+        You should call it as layer_statistics(df), and the decorator will automatically apply the function
+        to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
+    Args:
+        df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
+        phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
+    Returns:
+        pandas.DataFrame: A DataFrame with columns:
+            - 'midpoint': The midpoint of the vertical layer.
+            - 'type': The observation type.
+            - '{phase}_rmse': The root mean square error for the phase.
+            - '{phase}_bias': The mean bias for the phase.
+            - '{phase}_totalspread': The total spread for the phase.
+            - 'vert_unit': The vertical unit.
+            - 'vlevels': The categorized vertical level.
+    """
     # assuming diag_stats has been called
     layer_stats = (
@@ -310,14 +369,31 @@ def layer_statistics(df, phase):
 @apply_to_phases_by_type_return_df
 def time_statistics(df, phase):
     """
-    Calculate time-based statistics for a given phase and return a new DataFrame.
+    Calculate time-based statistics (RMSE, bias, total spread) for each observation type and time bin.
+    This function assumes that diagnostic statistics (such as squared error, bias, and total variance)
+    have already been computed by :func:`diag_stats` and are present in the DataFrame. It groups the data
+    by time bin midpoint and observation type, and computes the root mean square error (RMSE), mean bias,
+    and total spread for the specified phase for each time bin.
+    Note:
+        This function is decorated with @apply_to_phases_by_type_return_df.
+        You should call it as time_statistics(df), and the decorator will automatically apply the function
+        to all relevant phases ('prior' and 'posterior') and return a merged DataFrame.
     Args:
-        df (pandas.DataFrame): The input DataFrame containing observation data and ensemble statistics.
+        df (pandas.DataFrame): The input DataFrame containing diagnostic statistics for observations.
         phase (str): The phase for which to calculate the statistics ('prior' or 'posterior').
     Returns:
-        pandas.DataFrame: A DataFrame containing time-based statistics for the specified phase.
+        pandas.DataFrame: A DataFrame with columns:
+            - 'time_bin_midpoint': The midpoint of the time bin.
+            - 'type': The observation type.
+            - '{phase}_rmse': The root mean square error for the phase.
+            - '{phase}_bias': The mean bias for the phase.
+            - '{phase}_totalspread': The total spread for the phase.
+            - 'time_bin': The time bin interval.
+            - 'time': The first time value in the bin.
     """
     # Assuming diag_stats has been called
     time_stats = (
@@ -402,7 +478,9 @@ def possible_vs_used_by_time(df):
     Calculates the count of possible vs. used observations by type and time bin.
     Args:
-        df (pd.DataFrame): The input DataFrame containing observation data. The DataFrame must include:
+        df (pd.DataFrame): The input DataFrame containing observation data.
+                           The DataFrame must include:
                            - 'type': The observation type.
                            - 'time_bin_midpoint': The midpoint of the time bin.
                            - 'observation': The observation values.

{pydartdiags-0.5.1 → pydartdiags-0.6.0/src/pydartdiags.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,15 +1,15 @@
 Metadata-Version: 2.4
 Name: pydartdiags
-Version: 0.5.1
+Version: 0.6.0
 Summary: Observation Sequence Diagnostics for DART
 Home-page: https://github.com/NCAR/pyDARTdiags.git
 Author: Helen Kershaw
 Author-email: Helen Kershaw <hkershaw@ucar.edu>
+License-Expression: Apache-2.0
 Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
 Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
 Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
 Classifier: Programming Language :: Python :: 3
-Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown

{pydartdiags-0.5.1 → pydartdiags-0.6.0}/tests/test_obs_sequence.py RENAMED Viewed

@@ -43,7 +43,7 @@ class TestSanitizeInput:
             ValueError,
             match="Neither 'loc3d' nor 'loc1d' could be found in the observation sequence.",
         ):
-            obj = obsq.obs_sequence(bad_loc_file_path)
+            obj = obsq.ObsSequence(bad_loc_file_path)
 class TestOneDimensional:
@@ -53,7 +53,7 @@ class TestOneDimensional:
         return os.path.join(test_dir, "data", "obs_seq.1d.final")
     def test_read1d(self, obs_seq_file_path):
-        obj = obsq.obs_sequence(obs_seq_file_path)
+        obj = obsq.ObsSequence(obs_seq_file_path)
         assert obj.loc_mod == "loc1d"
         assert len(obj.df) == 40  # 40 obs in the file
         assert (
@@ -69,11 +69,11 @@ class TestSynonyms:
         return os.path.join(test_dir, "data", "obs_seq.final.ascii.syn")
     def test_single(self, synonym_file_path):
-        obj1 = obsq.obs_sequence(synonym_file_path, synonyms="observationx")
+        obj1 = obsq.ObsSequence(synonym_file_path, synonyms="observationx")
         assert "observationx" in obj1.synonyms_for_obs
     def test_list(self, synonym_file_path):
-        obj2 = obsq.obs_sequence(
+        obj2 = obsq.ObsSequence(
             synonym_file_path, synonyms=["synonym1", "synonym2", "observationx"]
         )
         assert "synonym1" in obj2.synonyms_for_obs
@@ -87,7 +87,7 @@ class TestBinaryObsSequence:
         return os.path.join(test_dir, "data", "obs_seq.final.binary.small")
     def test_read_binary(self, binary_obs_seq_file_path):
-        obj = obsq.obs_sequence(binary_obs_seq_file_path)
+        obj = obsq.ObsSequence(binary_obs_seq_file_path)
         assert len(obj.df) > 0  # Ensure the DataFrame is not empty
@@ -172,7 +172,7 @@ class TestWriteAscii:
         temp_output_file_path = os.path.join(temp_dir, "obs_seq.final.ascii.write")
         # Create an instance of the obs_sequence class and write the output file
-        obj = obsq.obs_sequence(ascii_obs_seq_file_path)
+        obj = obsq.ObsSequence(ascii_obs_seq_file_path)
         obj.write_obs_seq(temp_output_file_path)
         # Ensure the output file exists
@@ -199,7 +199,7 @@ class TestWriteAscii:
         )
         # Create an instance of the obs_sequence class and write the output file
-        obj = obsq.obs_sequence(obs_seq_file_path)
+        obj = obsq.ObsSequence(obs_seq_file_path)
         stats.diag_stats(obj.df)  # add the stats columns
         obj.write_obs_seq(temp_output_file_path)
@@ -227,7 +227,7 @@ class TestWriteAscii:
         )
         # Create an instance of the obs_sequence class and write the output file
-        obj = obsq.obs_sequence(obs_seq_file_path)
+        obj = obsq.ObsSequence(obs_seq_file_path)
         hPalevels = [
             0.0,
             100.0,
@@ -264,7 +264,7 @@ class TestWriteAscii:
         obs_seq_file_path = os.path.join(
             os.path.dirname(__file__), "data", "obs_seq.final.ascii.small"
         )
-        obj = obsq.obs_sequence(obs_seq_file_path)
+        obj = obsq.ObsSequence(obs_seq_file_path)
         # Remove obs except ACARS_TEMPERATURE
         obj.df = obj.df[(obj.df["type"] == "ACARS_TEMPERATURE")]
@@ -297,7 +297,7 @@ class TestObsDataframe:
         df = pd.DataFrame(data)
         # Create an instance of ObsSequence with the sample DataFrame
-        obs_seq = obsq.obs_sequence(file=None)
+        obs_seq = obsq.ObsSequence(file=None)
         obs_seq.df = df
         return obs_seq
@@ -394,15 +394,15 @@ class TestJoin:
         with pytest.raises(
             ValueError, match="The list of observation sequences is empty."
         ):
-            obsq.obs_sequence.join([])
+            obsq.ObsSequence.join([])
     def test_join_diff_locs(self, obs_seq1d_file_path, binary_obs_seq_file_path):
-        obj1 = obsq.obs_sequence(obs_seq1d_file_path)
-        obj2 = obsq.obs_sequence(binary_obs_seq_file_path)
+        obj1 = obsq.ObsSequence(obs_seq1d_file_path)
+        obj2 = obsq.ObsSequence(binary_obs_seq_file_path)
         with pytest.raises(
             ValueError, match="All observation sequences must have the same loc_mod."
         ):
-            obsq.obs_sequence.join([obj1, obj2])
+            obsq.ObsSequence.join([obj1, obj2])
     def test_join_three_obs_seqs(
         self,
@@ -410,10 +410,10 @@ class TestJoin:
         ascii_obs_seq_file_path2,
         ascii_obs_seq_file_path3,
     ):
-        obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
-        obj2 = obsq.obs_sequence(ascii_obs_seq_file_path2)
-        obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
-        obs_seq_mega = obsq.obs_sequence.join([obj1, obj2, obj3])
+        obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
+        obj2 = obsq.ObsSequence(ascii_obs_seq_file_path2)
+        obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
+        obs_seq_mega = obsq.ObsSequence.join([obj1, obj2, obj3])
         assert obs_seq_mega.all_obs == None
         assert len(obs_seq_mega.df) == 16  # obs in the dataframe
@@ -457,9 +457,9 @@ class TestJoin:
     def test_join_list_sub_copies(
         self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
     ):
-        obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
-        obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
-        obs_seq_mega = obsq.obs_sequence.join(
+        obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
+        obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
+        obs_seq_mega = obsq.ObsSequence.join(
             [obj1, obj3], ["prior_ensemble_mean", "observation", "Data_QC"]
         )
@@ -475,9 +475,9 @@ class TestJoin:
     def test_join_list_sub_copies_no_qc(
         self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
     ):
-        obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
-        obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
-        obs_seq_mega = obsq.obs_sequence.join(
+        obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
+        obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
+        obs_seq_mega = obsq.ObsSequence.join(
             [obj1, obj3], ["observation", "prior_ensemble_spread"]
         )
@@ -489,29 +489,29 @@ class TestJoin:
     def test_join_copies_not_in_all(
         self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path4
     ):
-        obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
-        obj4 = obsq.obs_sequence(ascii_obs_seq_file_path4)
+        obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
+        obj4 = obsq.ObsSequence(ascii_obs_seq_file_path4)
         with pytest.raises(
             ValueError, match="All observation sequences must have the same copies."
         ):
-            obsq.obs_sequence.join([obj1, obj4])
+            obsq.ObsSequence.join([obj1, obj4])
     def test_join_copies_not_all_have_subset(
         self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path4
     ):
-        obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
-        obj4 = obsq.obs_sequence(ascii_obs_seq_file_path4)
+        obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
+        obj4 = obsq.ObsSequence(ascii_obs_seq_file_path4)
         with pytest.raises(
             ValueError, match="All observation sequences must have the selected copies."
         ):
-            obsq.obs_sequence.join([obj1, obj4], ["prior_ensemble_member_41"])
+            obsq.ObsSequence.join([obj1, obj4], ["prior_ensemble_member_41"])
     def test_join_list_sub_copies(
         self, ascii_obs_seq_file_path1, ascii_obs_seq_file_path3
     ):
-        obj1 = obsq.obs_sequence(ascii_obs_seq_file_path1)
-        obj3 = obsq.obs_sequence(ascii_obs_seq_file_path3)
-        obs_seq_mega = obsq.obs_sequence.join(
+        obj1 = obsq.ObsSequence(ascii_obs_seq_file_path1)
+        obj3 = obsq.ObsSequence(ascii_obs_seq_file_path3)
+        obs_seq_mega = obsq.ObsSequence.join(
             [obj1, obj3], ["prior_ensemble_mean", "observation", "Data_QC"]
         )
         assert obs_seq_mega.has_assimilation_info() == False
@@ -520,7 +520,7 @@ class TestJoin:
 class TestCreateHeader:
     def test_create_header(self):
-        obj = obsq.obs_sequence(file=None)
+        obj = obsq.ObsSequence(file=None)
         obj.types = {1: "ACARS_BELLYBUTTON", 2: "NCEP_TOES"}
         obj.n_non_qc = 2
@@ -551,7 +551,7 @@ class TestCreateHeader:
 class TestSplitMetadata:
     def test_split_metadata_with_external_FO(self):
         metadata = ["meta1", "meta2", "external_FO1", "meta3", "meta4"]
-        before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
+        before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
             metadata
         )
         assert before_external_FO == ["meta1", "meta2"]
@@ -559,7 +559,7 @@ class TestSplitMetadata:
     def test_split_metadata_without_external_FO(self):
         metadata = ["meta1", "meta2", "meta3", "meta4"]
-        before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
+        before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
             metadata
         )
         assert before_external_FO == ["meta1", "meta2", "meta3", "meta4"]
@@ -567,7 +567,7 @@ class TestSplitMetadata:
     def test_split_metadata_multiple_external_FO(self):
         metadata = ["meta1", "external_FO1", "meta2", "external_FO2", "meta3"]
-        before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
+        before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
             metadata
         )
         assert before_external_FO == ["meta1"]
@@ -575,7 +575,7 @@ class TestSplitMetadata:
     def test_split_metadata_empty_list(self):
         metadata = []
-        before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
+        before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
             metadata
         )
         assert before_external_FO == []
@@ -583,7 +583,7 @@ class TestSplitMetadata:
     def test_split_metadata_no_external_FO(self):
         metadata = ["meta1", "meta2", "meta3"]
-        before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
+        before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
             metadata
         )
         assert before_external_FO == ["meta1", "meta2", "meta3"]
@@ -591,7 +591,7 @@ class TestSplitMetadata:
     def test_split_metadata_external_FO_at_start(self):
         metadata = ["external_FO1", "meta1", "meta2"]
-        before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
+        before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
             metadata
         )
         assert before_external_FO == []
@@ -599,7 +599,7 @@ class TestSplitMetadata:
     def test_split_metadata_external_FO_at_end(self):
         metadata = ["meta1", "meta2", "external_FO1"]
-        before_external_FO, after_external_FO = obsq.obs_sequence.split_metadata(
+        before_external_FO, after_external_FO = obsq.ObsSequence.split_metadata(
             metadata
         )
         assert before_external_FO == ["meta1", "meta2"]
@@ -610,7 +610,7 @@ class TestGenerateLinkedListPattern:
     def test_generate_linked_list_pattern(self):
         n = 1
         expected_pattern = ["0           -1         -1"]
-        result = obsq.obs_sequence.generate_linked_list_pattern(n)
+        result = obsq.ObsSequence.generate_linked_list_pattern(n)
         assert result == expected_pattern
         n = 3
@@ -619,7 +619,7 @@ class TestGenerateLinkedListPattern:
             "1           3          -1",
             "2           -1         -1",
         ]
-        result = obsq.obs_sequence.generate_linked_list_pattern(n)
+        result = obsq.ObsSequence.generate_linked_list_pattern(n)
         assert result == expected_pattern
         n = 6
@@ -631,7 +631,7 @@ class TestGenerateLinkedListPattern:
             "4           6          -1",
             "5           -1         -1",
         ]
-        result = obsq.obs_sequence.generate_linked_list_pattern(n)
+        result = obsq.ObsSequence.generate_linked_list_pattern(n)
         assert result == expected_pattern
@@ -665,7 +665,7 @@ class TestCreateHeaderFromDataFrame:
         df = pd.DataFrame(data)
         # Create an instance of obs_sequence with the sample DataFrame
-        obs_seq = obsq.obs_sequence(file=None)
+        obs_seq = obsq.ObsSequence(file=None)
         obs_seq.df = df
         obs_seq.reverse_types = {
             "ACARS_TEMPERATURE": 1,
@@ -732,7 +732,7 @@ class TestUpdateTypesDicts:
             "52": "PINEAPPLE_COUNT",
         }
-        updated_reverse_types, types = obsq.obs_sequence.update_types_dicts(
+        updated_reverse_types, types = obsq.ObsSequence.update_types_dicts(
             sample_df, reverse_types
         )
@@ -747,7 +747,7 @@ class TestCompositeTypes:
         file_path = os.path.join(test_dir, "data", "three-obs.final")
         # Create an instance of obs_sequence with the 'three-obs.final' file
-        obs_seq = obsq.obs_sequence(file_path)
+        obs_seq = obsq.ObsSequence(file_path)
         return obs_seq
     @pytest.mark.parametrize(
@@ -850,7 +850,7 @@ class TestCompositeTypes:
         test_dir = os.path.dirname(__file__)
         file_path = os.path.join(test_dir, "data", "dups-obs.final")
-        dup = obsq.obs_sequence(file_path)
+        dup = obsq.ObsSequence(file_path)
         # Test that composite_types raises an error
         with pytest.raises(Exception, match="There are duplicates in the components."):
             dup.composite_types()