PyPI - pydartdiags - Versions diffs - 0.0.42__py3-none-any.whl → 0.0.43__py3-none-any.whl - Mend

pydartdiags 0.0.42py3-none-any.whl → 0.0.43py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pydartdiags might be problematic. Click here for more details.

Files changed (9) hide show

pydartdiags/obs_sequence/obs_sequence.py +127 -63
pydartdiags/plots/plots.py +210 -32
pydartdiags-0.0.43.dist-info/METADATA +45 -0
pydartdiags-0.0.43.dist-info/RECORD +10 -0
{pydartdiags-0.0.42.dist-info → pydartdiags-0.0.43.dist-info}/WHEEL +1 -1
pydartdiags-0.0.42.dist-info/METADATA +0 -404
pydartdiags-0.0.42.dist-info/RECORD +0 -10
{pydartdiags-0.0.42.dist-info → pydartdiags-0.0.43.dist-info}/LICENSE +0 -0
{pydartdiags-0.0.42.dist-info → pydartdiags-0.0.43.dist-info}/top_level.txt +0 -0

pydartdiags/obs_sequence/obs_sequence.py CHANGED Viewed

@@ -5,6 +5,23 @@ import os
 import yaml
 import struct
+def requires_assimilation_info(func):
+    def wrapper(self, *args, **kwargs):
+        if self.has_assimilation_info:
+            return func(self, *args, **kwargs)
+        else:
+            raise ValueError("Assimilation information is required to call this function.")
+    return wrapper
+def requires_posterior_info(func):
+    def wrapper(self, *args, **kwargs):
+        if self.has_posterior_info:
+            return func(self, *args, **kwargs)
+        else:
+            raise ValueError("Posterior information is required to call this function.")
+    return wrapper
 class obs_sequence:
     """Create an obs_sequence object from an ascii observation sequence file.
@@ -59,6 +76,8 @@ class obs_sequence:
     def __init__(self, file, synonyms=None):
         self.loc_mod = 'None'
+        self.has_assimilation_info = False
+        self.has_posterior = False
         self.file = file
         self.synonyms_for_obs = ['NCEP BUFR observation',
                                  'AIRS observation',
@@ -72,6 +91,17 @@ class obs_sequence:
             else:
                 self.synonyms_for_obs.append(synonyms)
+        if file is None:
+            # Early exit for testing purposes
+            self.df = pd.DataFrame()
+            self.types = {}
+            self.reverse_types = {}
+            self.copie_names = []
+            self.n_copies = 0
+            self.seq = []
+            self.all_obs = []
+            return
         module_dir = os.path.dirname(__file__)
         self.default_composite_types = os.path.join(module_dir,"composite_types.yaml")
@@ -103,11 +133,16 @@ class obs_sequence:
         self.synonyms_for_obs = [synonym.replace(' ', '_') for synonym in self.synonyms_for_obs]
         rename_dict = {old: 'observation' for old in self.synonyms_for_obs  if old in self.df.columns}
         self.df = self.df.rename(columns=rename_dict)
         # calculate bias and sq_err is the obs_seq is an obs_seq.final
         if 'prior_ensemble_mean'.casefold() in map(str.casefold, self.columns):
-            self.df['bias'] = (self.df['prior_ensemble_mean'] - self.df['observation'])
-            self.df['sq_err'] = self.df['bias']**2  # squared error
+            self.has_assimilation_info = True
+            self.df['prior_bias'] = (self.df['prior_ensemble_mean'] - self.df['observation'])
+            self.df['prior_sq_err'] = self.df['prior_bias']**2  # squared error
+        if 'posterior_ensemble_mean'.casefold() in map(str.casefold, self.columns):
+            self.has_posterior_info = True
+            self.df['posterior_bias'] = (self.df['posterior_ensemble_mean'] - self.df['observation'])
+            self.df['posterior_sq_err'] = self.df['posterior_bias']**2
     def create_all_obs(self):
         """ steps through the generator to create a
@@ -152,14 +187,38 @@ class obs_sequence:
             data.append(self.types[type_value]) # observation type
         # any observation specific obs def info is between here and the end of the list
+        # can be obs_def & external forward operator
+        metadata = obs[typeI+2:-2]
+        obs_def_metadata, external_metadata = self.split_metadata(metadata)
+        data.append(obs_def_metadata)
+        data.append(external_metadata)
         time = obs[-2].split()
         data.append(int(time[0])) # seconds
         data.append(int(time[1])) # days
         data.append(convert_dart_time(int(time[0]), int(time[1]))) # datetime   # HK todo what is approprate for 1d models?
         data.append(float(obs[-1])) # obs error variance ?convert to sd?
         return data
+    @staticmethod
+    def split_metadata(metadata):
+        """
+        Split the metadata list at the first occurrence of an element starting with 'externalF0'.
+        Args:
+            metadata (list of str): The metadata list to be split.
+        Returns:
+            tuple: Two sublists, the first containing elements before 'externalF0', and the second
+                containing 'externalF0' and all elements after it. If 'externalF0' is not found,
+                the first sublist contains the entire metadata list, and the second is empty.
+        """
+        for i, item in enumerate(metadata):
+            if item.startswith('external_FO'):
+                return metadata[:i], metadata[i:]
+        return metadata, []
     def list_to_obs(self, data):
         obs = []
         obs.append('OBS        ' + str(data[0]))  # obs_num lots of space
@@ -171,10 +230,16 @@ class obs_sequence:
             obs.append('   '.join(map(str, data[self.n_copies+2:self.n_copies+5])) + '   ' + str(self.reversed_vert[data[self.n_copies+5]]) )  # location x, y, z, vert
             obs.append('kind') # this is type of observation
             obs.append(self.reverse_types[data[self.n_copies + 6]])  # observation type
+            # Convert metadata to a string and append
+            obs.extend(data[self.n_copies + 7])  # metadata
         elif self.loc_mod == 'loc1d':
             obs.append(data[self.n_copies+2])  # 1d location
             obs.append('kind') # this is type of observation
             obs.append(self.reverse_types[data[self.n_copies + 3]])  # observation type
+            # Convert metadata to a string and append
+            metadata = ' '.join(map(str, data[self.n_copies + 4:-4]))
+            if metadata:
+                obs.append(metadata)  # metadata
         obs.append(' '.join(map(str, data[-4:-2])))  # seconds, days
         obs.append(data[-1])  # obs error variance
@@ -273,12 +338,70 @@ class obs_sequence:
         elif self.loc_mod == 'loc1d':
             heading.append('location')
         heading.append('type')
+        heading.append('metadata')
+        heading.append('external_FO')
         heading.append('seconds')
         heading.append('days')
         heading.append('time')
         heading.append('obs_err_var')
         return heading
+    @requires_assimilation_info
+    def select_by_dart_qc(self, dart_qc):
+        """
+        Selects rows from a DataFrame based on the DART quality control flag.
+        Parameters:
+            df (DataFrame): A pandas DataFrame.
+            dart_qc (int): The DART quality control flag to select.
+        Returns:
+            DataFrame: A DataFrame containing only the rows with the specified DART quality control flag.
+        Raises:
+            ValueError: If the DART quality control flag is not present in the DataFrame.
+        """
+        if dart_qc not in self.df['DART_quality_control'].unique():
+            raise ValueError(f"DART quality control flag '{dart_qc}' not found in DataFrame.")
+        else:
+            return self.df[self.df['DART_quality_control'] == dart_qc]
+    @requires_assimilation_info
+    def select_failed_qcs(self):
+        """
+        Select rows from the DataFrame where the DART quality control flag is greater than 0.
+        Returns:
+            pandas.DataFrame: A DataFrame containing only the rows with a DART quality control flag greater than 0.
+        """
+        return self.df[self.df['DART_quality_control'] > 0]
+    @requires_assimilation_info
+    def possible_vs_used(self):
+        """
+        Calculates the count of possible vs. used observations by type.
+        This function takes a DataFrame containing observation data, including a 'type' column for the observation
+        type and an 'observation' column. The number of used observations ('used'), is the total number
+        minus the observations that failed quality control checks (as determined by the `select_failed_qcs` function).
+        The result is a DataFrame with each observation type, the count of possible observations, and the count of
+        used observations.
+        Returns:
+            pd.DataFrame: A DataFrame with three columns: 'type', 'possible', and 'used'. 'type' is the observation type,
+            'possible' is the count of all observations of that type, and 'used' is the count of observations of that type
+            that passed quality control checks.
+        """
+        possible = self.df.groupby('type')['observation'].count()
+        possible.rename('possible', inplace=True)
+        failed_qcs = self.select_failed_qcs().groupby('type')['observation'].count()
+        used = possible - failed_qcs.reindex(possible.index, fill_value=0)
+        used.rename('used', inplace=True)
+        return pd.concat([possible, used], axis=1).reset_index()
     @staticmethod
     def is_binary(file):
         """Check if a file is binary file."""
@@ -659,65 +782,6 @@ def convert_dart_time(seconds, days):
     """
     time = dt.datetime(1601,1,1) + dt.timedelta(days=days, seconds=seconds)
     return time
-def select_by_dart_qc(df, dart_qc):
-    """
-    Selects rows from a DataFrame based on the DART quality control flag.
-    Parameters:
-        df (DataFrame): A pandas DataFrame.
-        dart_qc (int): The DART quality control flag to select.
-    Returns:
-        DataFrame: A DataFrame containing only the rows with the specified DART quality control flag.
-    Raises:
-        ValueError: If the DART quality control flag is not present in the DataFrame.
-    """
-    if dart_qc not in df['DART_quality_control'].unique():
-        raise ValueError(f"DART quality control flag '{dart_qc}' not found in DataFrame.")
-    else:
-        return df[df['DART_quality_control'] == dart_qc]
-def select_failed_qcs(df):
-    """
-    Selects rows from a DataFrame where the DART quality control flag is greater than 0.
-    Parameters:
-        df (DataFrame): A pandas DataFrame.
-    Returns:
-        DataFrame: A DataFrame containing only the rows with a DART quality control flag greater than 0.
-    """
-    return df[df['DART_quality_control'] > 0]
-def possible_vs_used(df):
-    """
-    Calculates the count of possible vs. used observations by type.
-    This function takes a DataFrame containing observation data, including a 'type' column for the observation
-    type and an 'observation' column. The number of used observations ('used'), is the total number
-    minus the observations that failed quality control checks (as determined by the `select_failed_qcs` function).
-    The result is a DataFrame with each observation type, the count of possible observations, and the count of
-    used observations.
-    Parameters:
-        df (pd.DataFrame): A DataFrame with at least two columns: 'type' for the observation type and 'observation'
-        for the observation data. It may also contain other columns required by the `select_failed_qcs` function
-        to determine failed quality control checks.
-    Returns:
-        pd.DataFrame: A DataFrame with three columns: 'type', 'possible', and 'used'. 'type' is the observation type,
-        'possible' is the count of all observations of that type, and 'used' is the count of observations of that type
-        that passed quality control checks.
-    """
-    possible = df.groupby('type')['observation'].count()
-    possible.rename('possible', inplace=True)
-    used = df.groupby('type')['observation'].count() - select_failed_qcs(df).groupby('type')['observation'].count()
-    used.rename('used', inplace=True)
-    return pd.concat([possible, used], axis=1).reset_index()
 def construct_composit(df_comp, composite, components):
     """

pydartdiags/plots/plots.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import numpy as np
 import plotly.express as px
+import plotly.graph_objects as go
 import pandas as pd
 def plot_rank_histogram(df):
@@ -62,55 +63,208 @@ def calculate_rank(df):
     return (rank, ens_size, result_df)
-def plot_profile(df, levels):
+def plot_profile(df, levels, verticalUnit = "pressure (Pa)"):
     """
-    Plots RMSE and Bias profiles for different observation types across specified pressure levels.
+    Plots RMSE, bias, and total spread profiles for different observation types across specified vertical levels.
     This function takes a DataFrame containing observational data and model predictions, categorizes
-    the data into specified pressure levels, and calculates the RMSE and Bias for each level and
-    observation type. It then plots two line charts: one for RMSE and another for Bias, both as functions
-    of pressure level. The pressure levels are plotted on the y-axis in reversed order to represent
-    the vertical profile in the atmosphere correctly.
+    the data into specified vertical levels, and calculates the RMSE, bias and total spread for each level and
+    observation type. It then plots three line charts: one for RMSE, one for bias, one for total spread, as functions
+    of vertical level. The vertical levels are plotted on the y-axis in reversed order to represent
+    the vertical profile in the atmosphere correctly if the vertical units are pressure.
     Parameters:
-        df (pd.DataFrame): The input DataFrame containing at least the 'vertical' column for pressure levels,
-        and other columns required by the `rmse_bias` function for calculating RMSE and Bias.
-        levels (array-like): The bin edges for categorizing the 'vertical' column values into pressure levels.
+        df (pd.DataFrame): The input DataFrame containing at least the 'vertical' column for vertical levels,
+        the vert_unit column, and other columns required by the `rmse_bias` function for calculating RMSE and
+        Bias.
+        levels (array-like): The bin edges for categorizing the 'vertical' column values into the desired
+        vertical levels.
+        verticalUnit (string) (optional): The vertical unit to be used. Only observations in df which have this
+        string in the vert_unit column will be plotted. Defaults to 'pressure (Pa)'.
     Returns:
-        tuple: A tuple containing the DataFrame with RMSE and Bias calculations, the RMSE plot figure, and the
-        Bias plot figure. The DataFrame includes a 'plevels' column representing the categorized pressure levels
-        and 'hPa' column representing the midpoint of each pressure level bin.
+        tuple: A tuple containing the DataFrame with RMSE, bias and total spread calculations,
+        The DataFrame includes a 'vlevels' column representing the categorized vertical levels
+        and 'midpoint' column representing the midpoint of each vertical level bin. And the three figures.
     Raises:
         ValueError: If there are missing values in the 'vertical' column of the input DataFrame.
+        ValueError: If none of the input obs have 'verticalUnit' in the 'vert_unit' column of the input DataFrame.
     Note:
-        - The function modifies the input DataFrame by adding 'plevels' and 'hPa' columns.
-        - The 'hPa' values are calculated as half the midpoint of each pressure level bin, which may need
-          adjustment based on the specific requirements for pressure level representation.
+        - The function modifies the input DataFrame by adding 'vlevels' and 'midpoint' columns.
+        - The 'midpoint' values are calculated as half the midpoint of each vertical level bin, which may need
+          adjustment based on the specific requirements for vertical level representation.
         - The plots are generated using Plotly Express and are displayed inline. The y-axis of the plots is
-          reversed to align with standard atmospheric pressure level representation.
+          reversed to align with standard atmospheric pressure level representation if the vertical units
+          are atmospheric pressure.
     """
     pd.options.mode.copy_on_write = True
     if df['vertical'].isnull().values.any(): # what about horizontal observations?
         raise ValueError("Missing values in 'vertical' column.")
+    elif verticalUnit not in df['vert_unit'].values:
+        raise ValueError("No obs with expected vertical unit '"+verticalUnit+"'.")
     else:
-        df.loc[:,'plevels'] = pd.cut(df['vertical'], levels)
-        df.loc[:,'hPa'] = df['plevels'].apply(lambda x: x.mid / 1000.) # HK todo units
+        df = df[df["vert_unit"].isin({verticalUnit})] # Subset to only rows with the correct vertical unit
+        df.loc[:,'vlevels'] = pd.cut(df['vertical'], levels)
+        if verticalUnit == "pressure (Pa)":
+            df.loc[:,'midpoint'] = df['vlevels'].apply(lambda x: x.mid / 100.) # HK todo units
+        else:
+            df.loc[:,'midpoint'] = df['vlevels'].apply(lambda x: x.mid)
-    df_profile = rmse_bias(df)
-    fig_rmse = px.line(df_profile, y='hPa', x='rmse', title='RMSE by Level', markers=True, color='type', width=800, height=800)
-    fig_rmse.update_yaxes(autorange="reversed")
-    fig_rmse.show()
+    # Calculations
+    df_profile_prior = rmse_bias_totalspread(df, phase='prior')
+    df_profile_posterior = None
+    if 'posterior_ensemble_mean' in df.columns:
+        df_profile_posterior = rmse_bias_totalspread(df, phase='posterior')
-    fig_bias = px.line(df_profile, y='hPa', x='bias', title='Bias by Level', markers=True, color='type', width=800, height=800)
-    fig_bias.update_yaxes(autorange="reversed")
-    fig_bias.show()
+    # Merge prior and posterior dataframes
+    if df_profile_posterior is not None:
+        df_profile = pd.merge(df_profile_prior, df_profile_posterior, on=['midpoint', 'type'], suffixes=('_prior', '_posterior'))
+        fig_rmse = plot_profile_prior_post(df_profile, 'rmse', verticalUnit)
+        fig_rmse.show()
+        fig_bias = plot_profile_prior_post(df_profile, 'bias', verticalUnit)
+        fig_bias.show()
+        fig_ts = plot_profile_prior_post(df_profile, 'totalspread', verticalUnit)
+        fig_ts.show()
+    else:
+        df_profile = df_profile_prior
+        fig_rmse = plot_profile_prior(df_profile, 'rmse', verticalUnit)
+        fig_rmse.show()
+        fig_bias = plot_profile_prior(df_profile, 'bias', verticalUnit)
+        fig_bias.show()
+        fig_ts = plot_profile_prior(df_profile, 'totalspread', verticalUnit)
+        fig_ts.show()
-    return df_profile, fig_rmse, fig_bias
+    return df_profile, fig_rmse, fig_ts, fig_bias
+def plot_profile_prior_post(df_profile, stat, verticalUnit):
+    """
+    Plots prior and posterior statistics by vertical level for different observation types.
+    Parameters:
+        df_profile (pd.DataFrame): DataFrame containing the prior and posterior statistics.
+        stat (str): The statistic to plot (e.g., 'rmse', 'bias', 'totalspread').
+        verticalUnit (str): The unit of the vertical axis (e.g., 'pressure (Pa)').
+    Returns:
+        plotly.graph_objects.Figure: The generated Plotly figure.
+    """
+    # Reshape DataFrame to long format for easier plotting
+    df_long = pd.melt(
+        df_profile,
+        id_vars=["midpoint", "type"],
+        value_vars=["prior_"+stat, "posterior_"+stat],
+        var_name=stat+"_type",
+        value_name=stat+"_value"
+    )
+    # Define a color mapping for observation each type
+    unique_types = df_long["type"].unique()
+    colors = px.colors.qualitative.Plotly
+    color_mapping = {type_: colors[i % len(colors)] for i, type_ in enumerate(unique_types)}
+    # Create a mapping for line styles based on stat
+    line_styles = {"prior_"+stat: "solid", "posterior_"+stat: "dash"}
+    # Create the figure
+    fig_stat = go.Figure()
+    # Loop through each type and type to add traces
+    for t in df_long["type"].unique():
+        for stat_type, dash_style in line_styles.items():
+            # Filter the DataFrame for this type and stat
+            df_filtered = df_long[(df_long[stat+"_type"] == stat_type) & (df_long["type"] == t)]
+            # Add a trace
+            fig_stat.add_trace(go.Scatter(
+                x=df_filtered[stat+"_value"],
+                y=df_filtered["midpoint"],
+                mode='lines+markers',
+                name='prior '+t if stat_type == "prior_"+stat else 'post ',  # Show legend for "prior_stat OBS TYPE" only
+                line=dict(dash=dash_style, color=color_mapping[t]),  # Same color for all traces in group
+                marker=dict(size=8, color=color_mapping[t]),
+                legendgroup=t  # Group traces by type
+            ))
+        # Update layout
+        fig_stat.update_layout(
+            title= stat+' by Level',
+            xaxis_title=stat,
+            yaxis_title=verticalUnit,
+            width=800,
+            height=800,
+            template="plotly_white"
+        )
+    if verticalUnit == "pressure (Pa)":
+        fig_stat.update_yaxes(autorange="reversed")
+    return fig_stat
+def plot_profile_prior(df_profile, stat, verticalUnit):
+    """
+    Plots prior statistics by vertical level for different observation types.
+    Parameters:
+        df_profile (pd.DataFrame): DataFrame containing the prior statistics.
+        stat (str): The statistic to plot (e.g., 'rmse', 'bias', 'totalspread').
+        verticalUnit (str): The unit of the vertical axis (e.g., 'pressure (Pa)').
+    Returns:
+        plotly.graph_objects.Figure: The generated Plotly figure.
+    """
+    # Reshape DataFrame to long format for easier plotting - not needed for prior only, but
+    #   leaving it in for consistency with the plot_profile_prior_post function for now
+    df_long = pd.melt(
+        df_profile,
+        id_vars=["midpoint", "type"],
+        value_vars=["prior_"+stat],
+        var_name=stat+"_type",
+        value_name=stat+"_value"
+    )
+    # Define a color mapping for observation each type
+    unique_types = df_long["type"].unique()
+    colors = px.colors.qualitative.Plotly
+    color_mapping = {type_: colors[i % len(colors)] for i, type_ in enumerate(unique_types)}
+    # Create the figure
+    fig_stat = go.Figure()
+    # Loop through each type to add traces
+    for t in df_long["type"].unique():
+        # Filter the DataFrame for this type and stat
+        df_filtered = df_long[(df_long["type"] == t)]
+        # Add a trace
+        fig_stat.add_trace(go.Scatter(
+            x=df_filtered[stat+"_value"],
+            y=df_filtered["midpoint"],
+            mode='lines+markers',
+            name='prior ' + t,
+            line=dict(color=color_mapping[t]),  # Same color for all traces in group
+            marker=dict(size=8, color=color_mapping[t]),
+            legendgroup=t  # Group traces by type
+        ))
+    # Update layout
+    fig_stat.update_layout(
+        title=stat + ' by Level',
+        xaxis_title=stat,
+        yaxis_title=verticalUnit,
+        width=800,
+        height=800,
+        template="plotly_white"
+    )
+    if verticalUnit == "pressure (Pa)":
+        fig_stat.update_yaxes(autorange="reversed")
+    return fig_stat
 def mean_then_sqrt(x):
     """
@@ -130,12 +284,36 @@ def mean_then_sqrt(x):
     return np.sqrt(np.mean(x))
-def rmse_bias(df):
-    rmse_bias_df = df.groupby(['hPa', 'type']).agg({'sq_err':mean_then_sqrt, 'bias':'mean'}).reset_index()
-    rmse_bias_df.rename(columns={'sq_err':'rmse'}, inplace=True)
-    return rmse_bias_df
+def rmse_bias_totalspread(df, phase='prior'):
+    if phase == 'prior':
+        sq_err_column = 'prior_sq_err'
+        bias_column = 'prior_bias'
+        rmse_column = 'prior_rmse'
+        spread_column = 'prior_ensemble_spread'
+        totalspread_column = 'prior_totalspread'
+    elif phase == 'posterior':
+        sq_err_column = 'posterior_sq_err'
+        bias_column = 'posterior_bias'
+        rmse_column = 'posterior_rmse'
+        spread_column = 'posterior_ensemble_spread'
+        totalspread_column = 'posterior_totalspread'
+    else:
+        raise ValueError("Invalid phase. Must be 'prior' or 'posterior'.")
+    rmse_bias_ts_df = df.groupby(['midpoint', 'type'], observed=False).agg({
+        sq_err_column: mean_then_sqrt,
+        bias_column: 'mean',
+        spread_column: mean_then_sqrt,
+        'obs_err_var': mean_then_sqrt
+    }).reset_index()
+    # Add column for totalspread
+    rmse_bias_ts_df[totalspread_column] = np.sqrt(rmse_bias_ts_df[spread_column] + rmse_bias_ts_df['obs_err_var'])
+    # Rename square error to root mean square error
+    rmse_bias_ts_df.rename(columns={sq_err_column: rmse_column}, inplace=True)
+    return rmse_bias_ts_df
 def rmse_bias_by_obs_type(df, obs_type):
     """
@@ -155,7 +333,7 @@ def rmse_bias_by_obs_type(df, obs_type):
         raise ValueError(f"Observation type '{obs_type}' not found in DataFrame.")
     else:
         obs_type_df = df[df['type'] == obs_type]
-        obs_type_agg = obs_type_df.groupby('plevels').agg({'sq_err':mean_then_sqrt, 'bias':'mean'}).reset_index()
+        obs_type_agg = obs_type_df.groupby('vlevels', observed=False).agg({'sq_err':mean_then_sqrt, 'bias':'mean'}).reset_index()
         obs_type_agg.rename(columns={'sq_err':'rmse'}, inplace=True)
         return obs_type_agg

pydartdiags-0.0.43.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,45 @@
+Metadata-Version: 2.1
+Name: pydartdiags
+Version: 0.0.43
+Summary: Observation Sequence Diagnostics for DART
+Home-page: https://github.com/NCAR/pyDARTdiags.git
+Author: Helen Kershaw
+Author-email: Helen Kershaw <hkershaw@ucar.edu>
+Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
+Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
+Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pandas>=2.2.0
+Requires-Dist: numpy>=1.26
+Requires-Dist: plotly>=5.22.0
+Requires-Dist: pyyaml>=6.0.2
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+[![codecov](https://codecov.io/gh/NCAR/pyDARTdiags/graph/badge.svg?token=VK55SQZSVD)](https://codecov.io/gh/NCAR/pyDARTdiags)
+[![PyPI version](https://badge.fury.io/py/pydartdiags.svg)](https://pypi.org/project/pydartdiags/)
+# pyDARTdiags
+pyDARTdiags is a Python library for obsevation space diagnostics for the Data Assimilation Research Testbed ([DART](https://github.com/NCAR/DART)).
+pyDARTdiags is under initial development, so please use caution.
+The MATLAB [observation space diagnostics](https://docs.dart.ucar.edu/en/latest/guide/matlab-observation-space.html) are available through [DART](https://github.com/NCAR/DART).
+pyDARTdiags can be installed through pip: https://pypi.org/project/pydartdiags/
+Documenation : https://ncar.github.io/pyDARTdiags/
+## Contributing
+Contributions are welcome! If you have a feature request, bug report, or a suggestion, please open an issue on our GitHub repository.
+Please read our [Contributors Guide](https://github.com/NCAR/pyDARTdiags/blob/main/CONTRIBUTING.md) if you would like to contribute to
+pyDARTdiags.
+## License
+pyDARTdiags is released under the Apache License 2.0. For more details, see the LICENSE file in the root directory of this source tree or visit [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).

pydartdiags-0.0.43.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,10 @@
+pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pydartdiags/obs_sequence/obs_sequence.py,sha256=2pddiJ6VRFkaDizYq8HvGUpC4rw7TTV14XjmemjqCNg,34187
+pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+pydartdiags/plots/plots.py,sha256=UecLgWauO9L_EaGhEVxW3IuKcSU95uRA2mptsxh4-0E,13901
+pydartdiags-0.0.43.dist-info/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
+pydartdiags-0.0.43.dist-info/METADATA,sha256=udwmddMTrqFpyj0tjOffWVf2xbTI_3IwQCS4ZVvnnuU,2185
+pydartdiags-0.0.43.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
+pydartdiags-0.0.43.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
+pydartdiags-0.0.43.dist-info/RECORD,,

{pydartdiags-0.0.42.dist-info → pydartdiags-0.0.43.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.1.0)
+Generator: setuptools (75.7.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

pydartdiags-0.0.42.dist-info/METADATA DELETED Viewed

@@ -1,404 +0,0 @@
-Metadata-Version: 2.1
-Name: pydartdiags
-Version: 0.0.42
-Summary: Observation Sequence Diagnostics for DART
-Home-page: https://github.com/NCAR/pyDARTdiags.git
-Author: Helen Kershaw
-Author-email: Helen Kershaw <hkershaw@ucar.edu>
-Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
-Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
-Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
-Classifier: Programming Language :: Python :: 3
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Operating System :: OS Independent
-Requires-Python: >=3.8
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: pandas>=2.2.0
-Requires-Dist: numpy>=1.26
-Requires-Dist: plotly>=5.22.0
-Requires-Dist: pyyaml>=6.0.2
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
-[![codecov](https://codecov.io/gh/NCAR/pyDARTdiags/graph/badge.svg?token=VK55SQZSVD)](https://codecov.io/gh/NCAR/pyDARTdiags)
-[![PyPI version](https://badge.fury.io/py/pydartdiags.svg)](https://pypi.org/project/pydartdiags/)
-# pyDARTdiags
-pyDARTdiags is a Python library for obsevation space diagnostics for the Data Assimilation Research Testbed ([DART](https://github.com/NCAR/DART)).
-pyDARTdiags is under initial development, so please use caution.
-The MATLAB [observation space diagnostics](https://docs.dart.ucar.edu/en/latest/guide/matlab-observation-space.html) are available through [DART](https://github.com/NCAR/DART).
-pyDARTdiags can be installed through pip: https://pypi.org/project/pydartdiags/
-Documenation : https://ncar.github.io/pyDARTdiags/
-We recommend installing pydartdiags in a virtual enviroment:
-```
-python3 -m venv dartdiags
-source dartdiags/bin/activate
-pip install pydartdiags
-```
-## Example importing the obs\_sequence and plots modules
-```python
-from pydartdiags.obs_sequence import obs_sequence as obsq
-from pydartdiags.plots import plots
-```
-## Examining the dataframe
-```python
-obs_seq = obsq.obs_sequence('obs_seq.final.ascii')
-obs_seq.df.head()
-```
-<table border="1" class="dataframe">
-  <thead>
-    <tr style="text-align: right;">
-      <th></th>
-      <th>obs_num</th>
-      <th>observation</th>
-      <th>prior_ensemble_mean</th>
-      <th>prior_ensemble_spread</th>
-      <th>prior_ensemble_member_1</th>
-      <th>prior_ensemble_member_2</th>
-      <th>prior_ensemble_member_3</th>
-      <th>prior_ensemble_member_4</th>
-      <th>prior_ensemble_member_5</th>
-      <th>prior_ensemble_member_6</th>
-      <th>...</th>
-      <th>latitude</th>
-      <th>vertical</th>
-      <th>vert_unit</th>
-      <th>type</th>
-      <th>seconds</th>
-      <th>days</th>
-      <th>time</th>
-      <th>obs_err_var</th>
-      <th>bias</th>
-      <th>sq_err</th>
-    </tr>
-  </thead>
-  <tbody>
-    <tr>
-      <th>0</th>
-      <td>1</td>
-      <td>230.16</td>
-      <td>231.310652</td>
-      <td>0.405191</td>
-      <td>231.304725</td>
-      <td>231.562874</td>
-      <td>231.333915</td>
-      <td>231.297690</td>
-      <td>232.081416</td>
-      <td>231.051063</td>
-      <td>...</td>
-      <td>0.012188</td>
-      <td>23950.0</td>
-      <td>pressure (Pa)</td>
-      <td>ACARS_TEMPERATURE</td>
-      <td>75603</td>
-      <td>153005</td>
-      <td>2019-12-01 21:00:03</td>
-      <td>1.00</td>
-      <td>1.150652</td>
-      <td>1.324001</td>
-    </tr>
-    <tr>
-      <th>1</th>
-      <td>2</td>
-      <td>18.40</td>
-      <td>15.720527</td>
-      <td>0.630827</td>
-      <td>14.217207</td>
-      <td>15.558196</td>
-      <td>15.805599</td>
-      <td>16.594644</td>
-      <td>14.877743</td>
-      <td>16.334438</td>
-      <td>...</td>
-      <td>0.012188</td>
-      <td>23950.0</td>
-      <td>pressure (Pa)</td>
-      <td>ACARS_U_WIND_COMPONENT</td>
-      <td>75603</td>
-      <td>153005</td>
-      <td>2019-12-01 21:00:03</td>
-      <td>6.25</td>
-      <td>-2.679473</td>
-      <td>7.179578</td>
-    </tr>
-    <tr>
-      <th>2</th>
-      <td>3</td>
-      <td>1.60</td>
-      <td>-4.932073</td>
-      <td>0.825899</td>
-      <td>-5.270562</td>
-      <td>-5.955998</td>
-      <td>-4.209766</td>
-      <td>-5.105016</td>
-      <td>-4.669405</td>
-      <td>-4.365305</td>
-      <td>...</td>
-      <td>0.012188</td>
-      <td>23950.0</td>
-      <td>pressure (Pa)</td>
-      <td>ACARS_V_WIND_COMPONENT</td>
-      <td>75603</td>
-      <td>153005</td>
-      <td>2019-12-01 21:00:03</td>
-      <td>6.25</td>
-      <td>-6.532073</td>
-      <td>42.667980</td>
-    </tr>
-    <tr>
-      <th>3</th>
-      <td>4</td>
-      <td>264.16</td>
-      <td>264.060532</td>
-      <td>0.035584</td>
-      <td>264.107192</td>
-      <td>264.097270</td>
-      <td>264.073212</td>
-      <td>264.047718</td>
-      <td>264.074140</td>
-      <td>264.019895</td>
-      <td>...</td>
-      <td>0.010389</td>
-      <td>56260.0</td>
-      <td>pressure (Pa)</td>
-      <td>ACARS_TEMPERATURE</td>
-      <td>75603</td>
-      <td>153005</td>
-      <td>2019-12-01 21:00:03</td>
-      <td>1.00</td>
-      <td>-0.099468</td>
-      <td>0.009894</td>
-    </tr>
-    <tr>
-      <th>4</th>
-      <td>5</td>
-      <td>11.60</td>
-      <td>10.134115</td>
-      <td>0.063183</td>
-      <td>10.067956</td>
-      <td>10.078798</td>
-      <td>10.120263</td>
-      <td>10.084885</td>
-      <td>10.135112</td>
-      <td>10.140610</td>
-      <td>...</td>
-      <td>0.010389</td>
-      <td>56260.0</td>
-      <td>pressure (Pa)</td>
-      <td>ACARS_U_WIND_COMPONENT</td>
-      <td>75603</td>
-      <td>153005</td>
-      <td>2019-12-01 21:00:03</td>
-      <td>6.25</td>
-      <td>-1.465885</td>
-      <td>2.148818</td>
-    </tr>
-  </tbody>
-</table>
-<p>5 rows × 97 columns</p>
-</div>
-Find the numeber of assimilated (used) observations vs. possible observations by type
-```python
-obsq.possible_vs_used(obs_seq.df)
-```
-<table border="1" class="dataframe">
-  <thead>
-    <tr style="text-align: right;">
-      <th></th>
-      <th>type</th>
-      <th>possible</th>
-      <th>used</th>
-    </tr>
-  </thead>
-  <tbody>
-    <tr>
-      <th>0</th>
-      <td>ACARS_TEMPERATURE</td>
-      <td>175429</td>
-      <td>128040</td>
-    </tr>
-    <tr>
-      <th>1</th>
-      <td>ACARS_U_WIND_COMPONENT</td>
-      <td>176120</td>
-      <td>126946</td>
-    </tr>
-    <tr>
-      <th>2</th>
-      <td>ACARS_V_WIND_COMPONENT</td>
-      <td>176120</td>
-      <td>127834</td>
-    </tr>
-    <tr>
-      <th>3</th>
-      <td>AIRCRAFT_TEMPERATURE</td>
-      <td>21335</td>
-      <td>13663</td>
-    </tr>
-    <tr>
-      <th>4</th>
-      <td>AIRCRAFT_U_WIND_COMPONENT</td>
-      <td>21044</td>
-      <td>13694</td>
-    </tr>
-    <tr>
-      <th>5</th>
-      <td>AIRCRAFT_V_WIND_COMPONENT</td>
-      <td>21044</td>
-      <td>13642</td>
-    </tr>
-    <tr>
-      <th>6</th>
-      <td>AIRS_SPECIFIC_HUMIDITY</td>
-      <td>6781</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>7</th>
-      <td>AIRS_TEMPERATURE</td>
-      <td>19583</td>
-      <td>7901</td>
-    </tr>
-    <tr>
-      <th>8</th>
-      <td>GPSRO_REFRACTIVITY</td>
-      <td>81404</td>
-      <td>54626</td>
-    </tr>
-    <tr>
-      <th>9</th>
-      <td>LAND_SFC_ALTIMETER</td>
-      <td>21922</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>10</th>
-      <td>MARINE_SFC_ALTIMETER</td>
-      <td>9987</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>11</th>
-      <td>MARINE_SFC_SPECIFIC_HUMIDITY</td>
-      <td>4196</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>12</th>
-      <td>MARINE_SFC_TEMPERATURE</td>
-      <td>8646</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>13</th>
-      <td>MARINE_SFC_U_WIND_COMPONENT</td>
-      <td>8207</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>14</th>
-      <td>MARINE_SFC_V_WIND_COMPONENT</td>
-      <td>8207</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>15</th>
-      <td>RADIOSONDE_SPECIFIC_HUMIDITY</td>
-      <td>14272</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>16</th>
-      <td>RADIOSONDE_SURFACE_ALTIMETER</td>
-      <td>601</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <th>17</th>
-      <td>RADIOSONDE_TEMPERATURE</td>
-      <td>29275</td>
-      <td>22228</td>
-    </tr>
-    <tr>
-      <th>18</th>
-      <td>RADIOSONDE_U_WIND_COMPONENT</td>
-      <td>36214</td>
-      <td>27832</td>
-    </tr>
-    <tr>
-      <th>19</th>
-      <td>RADIOSONDE_V_WIND_COMPONENT</td>
-      <td>36214</td>
-      <td>27975</td>
-    </tr>
-    <tr>
-      <th>20</th>
-      <td>SAT_U_WIND_COMPONENT</td>
-      <td>107212</td>
-      <td>82507</td>
-    </tr>
-    <tr>
-      <th>21</th>
-      <td>SAT_V_WIND_COMPONENT</td>
-      <td>107212</td>
-      <td>82647</td>
-    </tr>
-  </tbody>
-</table>
-## Example plotting
-### rank histogram
-* Select only observations that were assimliated (QC === 0).
-* plot the rank histogram
-```python
-df_qc0 = obsq.select_by_dart_qc(obs_seq.df, 0)
-plots.plot_rank_histogram(df_qc0)
-```
-![Rank Histogram](https://raw.githubusercontent.com/NCAR/pydartdiags/main/docs/images/rankhist.png)
-### plot profile of RMSE and Bias
-* Chose levels
-* Select only observations that were assimliated (QC === 0).
-* plot the profiles
-```python
-hPalevels = [0.0, 100.0,  150.0, 200.0, 250.0, 300.0, 400.0, 500.0, 700, 850, 925, 1000]# float("inf")] # Pa?
-plevels = [i * 100 for i in hPalevels]
-df_qc0 = obsq.select_by_dart_qc(obs_seq.df, 0)  # only qc 0
-df_profile, figrmse, figbias = plots.plot_profile(df_qc0, plevels)
-```
-![RMSE Plot](https://raw.githubusercontent.com/NCAR/pydartdiags/main/docs/images/rmse.png)
-![Bias Plot](https://raw.githubusercontent.com/NCAR/pydartdiags/main/docs/images/bias.png)
-## Contributing
-Contributions are welcome! If you have a feature request, bug report, or a suggestion, please open an issue on our GitHub repository.
-## License
-pyDARTdiags is released under the Apache License 2.0. For more details, see the LICENSE file in the root directory of this source tree or visit [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).

pydartdiags-0.0.42.dist-info/RECORD DELETED Viewed

@@ -1,10 +0,0 @@
-pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pydartdiags/obs_sequence/obs_sequence.py,sha256=UBaNMJR3MOro47KyJMdgJ0p-aEqcrp817-SOtpFQ1bg,31511
-pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pydartdiags/plots/plots.py,sha256=_vZFgQ9qrmtwE_HAP6_nx3pV4JHRdnYckZ5xUxUH4io,6753
-pydartdiags-0.0.42.dist-info/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
-pydartdiags-0.0.42.dist-info/METADATA,sha256=oJe2bv1cjoIhlpJQF2tLIIGhS5zu5qJJzFYEZw9ffgQ,9857
-pydartdiags-0.0.42.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-pydartdiags-0.0.42.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
-pydartdiags-0.0.42.dist-info/RECORD,,

{pydartdiags-0.0.42.dist-info → pydartdiags-0.0.43.dist-info}/LICENSE RENAMED Viewed

File without changes

{pydartdiags-0.0.42.dist-info → pydartdiags-0.0.43.dist-info}/top_level.txt RENAMED Viewed

File without changes

pydartdiags 0.0.42__py3-none-any.whl → 0.0.43__py3-none-any.whl

Potentially problematic release.

pydartdiags 0.0.42py3-none-any.whl → 0.0.43py3-none-any.whl