PyPI - py-ewr - Versions diffs - 2.2.0__tar.gz → 2.2.3__tar.gz - Mend

py-ewr 2.2.0tar.gz → 2.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{py_ewr-2.2.0 → py_ewr-2.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py_ewr
-Version: 2.2.0
+Version: 2.2.3
 Summary: Environmental Water Requirement calculator
 Home-page: https://github.com/MDBAuth/EWR_tool
 Author: Martin Job
@@ -23,9 +23,10 @@ Requires-Dist: ipython==8.8.0
 Requires-Dist: ipywidgets==7.7.0
 Requires-Dist: pandas==2.0.3
 Requires-Dist: requests==2.25.1
-Requires-Dist: tqdm==4.64.0
 Requires-Dist: mdba-gauge-getter==0.5.1
 Requires-Dist: cachetools==5.2.0
+Requires-Dist: xarray==2023.01.0
+Requires-Dist: netCDF4==1.6.4
 Requires-Dist: numpy<2
 [![CI](https://github.com/MDBAuth/EWR_tool/actions/workflows/test-release.yml/badge.svg)]()
@@ -33,9 +34,12 @@ Requires-Dist: numpy<2
 [![PyPI](https://img.shields.io/pypi/v/py-ewr)](https://pypi.org/project/py-ewr/)
 [![DOI](https://zenodo.org/badge/342122359.svg)](https://zenodo.org/badge/latestdoi/342122359)
-### **EWR tool version 2.2.0 README**
+### **EWR tool version 2.2.3 README**
 ### **Notes on recent version update**
+- Remove TQDM loading bars
+- Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
+- Adding new model format handling - 'IQQM - netcdf'
 - Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
 - ten thousand year handling - This has been briefly taken offline for this version.
 - bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
@@ -222,3 +226,11 @@ NSW:
 Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
+To disable progress bars, as for example when running remote scripted runs, use
+``` python
+import os
+os.environ["TQDM_DISABLE"] = "1"
+```
+*before* importing py-ewr in your script.

{py_ewr-2.2.0 → py_ewr-2.2.3}/README.md RENAMED Viewed

@@ -3,9 +3,12 @@
 [![PyPI](https://img.shields.io/pypi/v/py-ewr)](https://pypi.org/project/py-ewr/)
 [![DOI](https://zenodo.org/badge/342122359.svg)](https://zenodo.org/badge/latestdoi/342122359)
-### **EWR tool version 2.2.0 README**
+### **EWR tool version 2.2.3 README**
 ### **Notes on recent version update**
+- Remove TQDM loading bars
+- Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
+- Adding new model format handling - 'IQQM - netcdf'
 - Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
 - ten thousand year handling - This has been briefly taken offline for this version.
 - bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
@@ -192,3 +195,11 @@ NSW:
 Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
+To disable progress bars, as for example when running remote scripted runs, use
+``` python
+import os
+os.environ["TQDM_DISABLE"] = "1"
+```
+*before* importing py-ewr in your script.

{py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/data_inputs.py RENAMED Viewed

@@ -138,6 +138,19 @@ def get_NSW_codes() -> pd.DataFrame:
     return metadata
+def get_iqqm_codes() -> dict:
+    '''
+    Load metadata file for Macquarie containing model nodes
+    and gauges they correspond to
+    Returns:
+        dict: dict for linking model nodes to gauges
+    '''
+    metadf = pd.read_csv( BASE_PATH / 'model_metadata/iqqm_stations.csv', dtype=str)
+    metadata = metadf.set_index(metadf.columns[0]).to_dict()[metadf.columns[1]]
+    return metadata
 def get_level_gauges() -> tuple:
     '''Returning level gauges with EWRs

{py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/evaluate_EWRs.py RENAMED Viewed

@@ -10,7 +10,6 @@ import logging
 import pandas as pd
 import numpy as np
-from tqdm import tqdm
 from . import data_inputs
@@ -5086,9 +5085,7 @@ def calc_sorter(df_F:pd.DataFrame, df_L:pd.DataFrame, gauge:str, EWR_table:pd.Da
         EWR_codes = PU_table['Code']
         PU_df = pd.DataFrame()
         PU_events = {}
-        for i, EWR in enumerate(tqdm(EWR_codes, position = 0, leave = False,
-                                     bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
-                                     desc= str('Evaluating ewrs for '+ gauge))):
+        for i, EWR in enumerate(EWR_codes):
             events = {}
             MULTIGAUGE = is_multigauge(EWR_table, gauge, EWR, PU)

py_ewr-2.2.3/py_ewr/io.py ADDED Viewed

@@ -0,0 +1,24 @@
+import xarray as xr
+from pandas import DataFrame as Dataframe
+def read_netcdf_as_dataframe(netcdf_path: str) -> Dataframe:
+    dataset = xr.open_dataset(netcdf_path, engine='netcdf4')
+    df = dataset.to_dataframe()
+    dataset.close()
+    return df
+def save_dataframe_as_netcdf(df, output_path: str) -> None:
+    # Convert DataFrame to Xarray Dataset
+    ds = xr.Dataset.from_dataframe(df)
+    # Modify variable names to ensure they are valid for NetCDF
+    for var_name in ds.variables:
+        new_var_name = var_name.replace(" ", "_")  # Replace spaces with underscores
+        new_var_name = ''.join(c for c in new_var_name if c.isalnum() or c == "_")  # Remove non-alphanumeric characters
+        ds = ds.rename({var_name: new_var_name})
+    # Save the modified Xarray Dataset as a NetCDF file
+    ds.to_netcdf(output_path)

py_ewr-2.2.3/py_ewr/model_metadata/iqqm_stations.csv ADDED Viewed

@@ -0,0 +1,11 @@
+IQQM,gauge
+229,421023
+42,421001
+464,421011
+240,421019
+266,421146
+951,421090
+487,421022
+130,421012
+171,421004

{py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/observed_handling.py RENAMED Viewed

@@ -3,7 +3,6 @@ from typing import Dict, List
 import logging
 import pandas as pd
-from tqdm import tqdm
 import numpy as np
 from . import data_inputs, evaluate_EWRs, summarise_results, scenario_handling

{py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/scenario_handling.py RENAMED Viewed

@@ -7,7 +7,8 @@ from datetime import datetime, date
 import logging
 import pandas as pd
-from tqdm import tqdm
+import xarray as xr
+import netCDF4
 log = logging.getLogger(__name__)
 log.addHandler(logging.NullHandler())
@@ -15,6 +16,61 @@ log.addHandler(logging.NullHandler())
 from . import data_inputs, evaluate_EWRs, summarise_results
 #----------------------------------- Scenario testing handling functions--------------------------#
+def is_valid_netcdf_file(file_path: str) -> bool:
+    try:
+        with netCDF4.Dataset(file_path, 'r'):
+            # If the file opens successfully, it's a valid NetCDF file
+            return True
+    except Exception as e:
+        # If an exception is raised, it's not a valid NetCDF file
+        return False
+def unpack_netcdf_as_dataframe(netcdf_file: str) -> pd.DataFrame:
+    '''Ingesting netCDF files and outputting as dataframes in memory.
+    # Example usage:
+    # df = unpack_netcdf_as_dataframe('your_file.nc')
+    Args:
+        netcdf_file (str): location of netCDF file
+    Results:
+        pd.Dataframe: netCDF file converted to dataframe
+    '''
+    try:
+        # Check if the file is a valid NetCDF file
+        if not is_valid_netcdf_file(netcdf_file):
+            raise ValueError("Not a valid NetCDF file.")
+        # Open the NetCDF file
+        dataset = xr.open_dataset(netcdf_file, engine='netcdf4')
+        # Check if the dataset is empty
+        if dataset is None:
+            raise ValueError("NetCDF dataset is empty.")
+        # extract the bits we actually can use
+            # Some of this needs to move/get cleaned up
+        iqqm_dict = data_inputs.get_iqqm_codes()
+        # the nodes are ints, but the above is str
+        ints_list = list(map(int, list(iqqm_dict)))
+        # Is there any reason to do these in one step?
+        dataset = dataset.sel(node=dataset['node'].isin(ints_list))
+        dataset = dataset[['Simulated flow']]
+        # Convert to DataFrame
+        df = dataset.to_dataframe()
+        # Close the dataset
+        dataset.close()
+        return df
+    except Exception as e:
+        # Handle any exceptions that may occur
+        print(f"Error: {str(e)}")
+        return None
 def unpack_model_file(csv_file: str, main_key: str, header_key: str) -> tuple:
     '''Ingesting scenario file locations of model files with all formats (excluding standard timeseries format), seperates the flow data and header data
@@ -280,6 +336,52 @@ def cleaner_standard_timeseries(input_df: pd.DataFrame, ewr_table_path: str = No
             log.info('Could not identify gauge in column name:', gauge, ', skipping analysis of data in this column.')
     return df_flow, df_level
+def cleaner_netcdf_werp(input_df: pd.DataFrame, stations: dict) -> pd.DataFrame:
+    '''Ingests dataframe, cleans up into a format matching IQQM csv
+    Args:
+        input_df (pd.DataFrame): raw xarray dataframe read-in
+        statios(dict):  dict mapping IQQM stations to gauge numbers
+    Results:
+        tuple[pd.DataFrame, pd.DataFrame]: Cleaned flow dataframe; cleaned water level dataframe
+    '''
+    # organise like the rest of the dataframes- make this look just like we've read it in from an IQQM csv
+    cleaned_df = input_df.reset_index(level = 'node')
+    cleaned_df['node'] = cleaned_df['node'].astype(str)
+    cleaned_df['gauge'] = cleaned_df['node'].map(stations)
+    cleaned_df = cleaned_df.drop('node', axis = 1)
+    # drop the values that don't map to a gauge (lots of nodes in iqqm don't)
+        # This should be deprecated with the new way of choosing nodes on read-in, but being careful
+    cleaned_df = cleaned_df.query('gauge.notna()')
+    # give each gauge its own column- that's what the tool expects
+    cleaned_df = cleaned_df.pivot(columns = 'gauge', values = 'Simulated flow')
+    cleaned_df.columns.name = None
+    # the csvs return an 'object' type, not a datetime in the index
+    # but it gets converted to datetime in cleaner_***, so leave it.
+    cleaned_df.index.names = ['Date']
+    # Split gauges into flow and level, allocate to respective dataframe
+    flow_gauges = data_inputs.get_gauges('flow gauges')
+    level_gauges = data_inputs.get_gauges('level gauges')
+    df_flow = pd.DataFrame(index = cleaned_df.index)
+    df_level = pd.DataFrame(index = cleaned_df.index)
+    for gauge in cleaned_df.columns:
+        if gauge in flow_gauges:
+            df_flow[gauge] = cleaned_df[gauge].copy(deep=True)
+        if gauge in level_gauges:
+            df_level[gauge] = cleaned_df[gauge].copy(deep=True)
+    return df_flow, df_level
 def cleaner_ten_thousand_year(input_df: pd.DataFrame, ewr_table_path: str = None) -> pd.DataFrame:
     '''Ingests dataframe, removes junk columns, fixes date, allocates gauges to either flow/level
@@ -334,6 +436,8 @@ def extract_gauge_from_string(input_string: str) -> str:
     gauge = input_string.split('_')[0]
     return gauge
 def match_MDBA_nodes(input_df: pd.DataFrame, model_metadata: pd.DataFrame, ewr_table_path: str) -> tuple:
     '''Checks if the source file columns have EWRs available, returns a flow and level dataframe with only
     the columns with EWRs available. Renames columns to gauges
@@ -358,11 +462,19 @@ def match_MDBA_nodes(input_df: pd.DataFrame, model_metadata: pd.DataFrame, ewr_t
         measure = col_clean.split('-')[1]
         if ((measure in measurands) and (model_metadata['SITEID'] == site).any()):
             subset = model_metadata.query("SITEID==@site")
-            gauge = subset["AWRC"].iloc[0]
-            if gauge in flow_gauges and measure == '1':
-                df_flow[gauge] = input_df[col]
-            if gauge in level_gauges and measure == '35':
-                df_level[gauge] = input_df[col]
+            for iset in range(len(subset)):
+                gauge = subset["AWRC"].iloc[iset]
+                if gauge in flow_gauges and measure == '1':
+                    df_flow[gauge] = input_df[col]
+                if gauge in level_gauges and measure == '35':
+                    aa=input_df[[col]]
+                    if (len(aa.columns)>1):
+                        print('More than one site has been identified, the first site is used')
+                        print('Site info: ', col)
+                        df_level[gauge] = aa.iloc[:,0]
+                    else:
+                        df_level[gauge] = input_df[col]
     if df_flow.empty:
         raise ValueError('No relevant gauges and or measurands found in dataset, the EWR tool cannot evaluate this model output file')
     return df_flow, df_level
@@ -435,9 +547,7 @@ class ScenarioHandler:
         # Analyse all scenarios for EWRs
         detailed_results = {}
         detailed_events = {}
-        for scenario in tqdm(scenarios, position = 0, leave = True,
-                            bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
-                            desc= 'Evaluating scenarios'):
+        for scenario in scenarios:
             if self.model_format == 'Bigmod - MDBA':
                 data, header = unpack_model_file(scenarios[scenario], 'Dy', 'Field')
@@ -455,6 +565,10 @@ class ScenarioHandler:
                 df_clean = cleaner_NSW(data)
                 df_F, df_L = match_NSW_nodes(df_clean, data_inputs.get_NSW_codes())
+            elif self.model_format == 'IQQM - netcdf':
+                df_unpacked = unpack_netcdf_as_dataframe(scenarios[scenario])
+                df_F, df_L = cleaner_netcdf_werp(df_unpacked, data_inputs.get_iqqm_codes())
             elif self.model_format == 'ten thousand year':
                 df = pd.read_csv(scenarios[scenario], index_col = 'Date')
                 df_F, df_L = cleaner_ten_thousand_year(df, self.parameter_sheet)

{py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: py_ewr
-Version: 2.2.0
+Version: 2.2.3
 Summary: Environmental Water Requirement calculator
 Home-page: https://github.com/MDBAuth/EWR_tool
 Author: Martin Job
@@ -23,9 +23,10 @@ Requires-Dist: ipython==8.8.0
 Requires-Dist: ipywidgets==7.7.0
 Requires-Dist: pandas==2.0.3
 Requires-Dist: requests==2.25.1
-Requires-Dist: tqdm==4.64.0
 Requires-Dist: mdba-gauge-getter==0.5.1
 Requires-Dist: cachetools==5.2.0
+Requires-Dist: xarray==2023.01.0
+Requires-Dist: netCDF4==1.6.4
 Requires-Dist: numpy<2
 [![CI](https://github.com/MDBAuth/EWR_tool/actions/workflows/test-release.yml/badge.svg)]()
@@ -33,9 +34,12 @@ Requires-Dist: numpy<2
 [![PyPI](https://img.shields.io/pypi/v/py-ewr)](https://pypi.org/project/py-ewr/)
 [![DOI](https://zenodo.org/badge/342122359.svg)](https://zenodo.org/badge/latestdoi/342122359)
-### **EWR tool version 2.2.0 README**
+### **EWR tool version 2.2.3 README**
 ### **Notes on recent version update**
+- Remove TQDM loading bars
+- Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
+- Adding new model format handling - 'IQQM - netcdf'
 - Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
 - ten thousand year handling - This has been briefly taken offline for this version.
 - bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
@@ -222,3 +226,11 @@ NSW:
 Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
+To disable progress bars, as for example when running remote scripted runs, use
+``` python
+import os
+os.environ["TQDM_DISABLE"] = "1"
+```
+*before* importing py-ewr in your script.

{py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,7 @@ setup.py
 py_ewr/__init__.py
 py_ewr/data_inputs.py
 py_ewr/evaluate_EWRs.py
+py_ewr/io.py
 py_ewr/observed_handling.py
 py_ewr/scenario_handling.py
 py_ewr/summarise_results.py
@@ -15,6 +16,7 @@ py_ewr.egg-info/requires.txt
 py_ewr.egg-info/top_level.txt
 py_ewr/model_metadata/SiteID_MDBA.csv
 py_ewr/model_metadata/SiteID_NSW.csv
+py_ewr/model_metadata/iqqm_stations.csv
 py_ewr/parameter_metadata/ewr_calc_config.json
 py_ewr/parameter_metadata/parameter_sheet.csv
 tests/test_data_inputs.py

{py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/requires.txt RENAMED Viewed

@@ -2,7 +2,8 @@ ipython==8.8.0
 ipywidgets==7.7.0
 pandas==2.0.3
 requests==2.25.1
-tqdm==4.64.0
 mdba-gauge-getter==0.5.1
 cachetools==5.2.0
+xarray==2023.01.0
+netCDF4==1.6.4
 numpy<2

{py_ewr-2.2.0 → py_ewr-2.2.3}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 setup(
     name="py_ewr",
-    version="2.2.0",
+    version="2.2.3",
     author="Martin Job",
     author_email="Martin.Job@mdba.gov.au",
     description="Environmental Water Requirement calculator",
@@ -35,9 +35,10 @@ setup(
         "ipywidgets==7.7.0",
         "pandas==2.0.3",
         "requests==2.25.1",
-        "tqdm==4.64.0",
         "mdba-gauge-getter==0.5.1",
         "cachetools==5.2.0",
+        "xarray==2023.01.0",
+        "netCDF4==1.6.4",
         "numpy<2"
     ],
     package_data={'': ["model_metadata/*.csv", "parameter_metadata/*.csv","parameter_metadata/*.json"]},

{py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_data_inputs.py RENAMED Viewed

@@ -105,4 +105,18 @@ def test_get_cllmm_gauges():
 def test_get_scenario_gauges(gauge_results, expected_results):
     result = data_inputs.get_scenario_gauges(gauge_results)
     assert sorted(result) == expected_results
+def test_get_iqqm_codes():
+    result = data_inputs.get_iqqm_codes()
+    stations = {
+        '229': '421023',
+        '42': '421001',
+        '464': '421011',
+        '240': '421019',
+        '266': '421146',
+        '951': '421090',
+        '487': '421022',
+        '130': '421012',
+        '171': '421004',
+    }
+    assert stations == result

{py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_scenario_handling.py RENAMED Viewed

@@ -12,7 +12,9 @@ from py_ewr import scenario_handling, data_inputs
 def test_match_MDBA_nodes():
     '''
     1. Ensure dataframe with flows and levels is split into two dataframes (one flow and one level dataframe)
+    2. Ensure first column is used when duplicate columns are loaded
     '''
+    # TEST 1 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
     # Set up input data and pass to test function:
     model_metadata = data_inputs.get_MDBA_codes()
     data_df = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
@@ -39,6 +41,36 @@ def test_match_MDBA_nodes():
     assert_frame_equal(df_F, expected_df_F)
     assert_frame_equal(df_L, expected_df_L)
+    # TEST 2 #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+        # Set up input data and pass to test function:
+    model_metadata = data_inputs.get_MDBA_codes()
+    data_df = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
+                'EUSTDS-1-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Use
+                'EUSTDS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
+                'EUSTUS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
+                'EUSTUS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Use
+                'EUSTUS-1-8': [1]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
+                }
+    df = pd.DataFrame(data = data_df)
+    df = df.set_index('Date')
+    df_F, df_L = scenario_handling.match_MDBA_nodes(df, model_metadata, 'py_ewr/parameter_metadata/parameter_sheet.csv')
+    # Set up expected outputs and test:
+    data_expected_df_L = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
+                            '414209': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10
+                            }
+    expected_df_L = pd.DataFrame(data_expected_df_L)
+    expected_df_L = expected_df_L.set_index('Date')
+    data_expected_df_F = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
+                            '414203': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10
+                            }
+    expected_df_F = pd.DataFrame(data_expected_df_F)
+    expected_df_F = expected_df_F.set_index('Date')
+    assert_frame_equal(df_F, expected_df_F)
+    assert_frame_equal(df_L, expected_df_L)
 def test_match_NSW_nodes():
     '''
     1. Check NSW model nodes are mapped correctly to their gauges
@@ -197,6 +229,30 @@ def test_cleaner_MDBA():
     assert_frame_equal(df_clean, expected_df)
+def test_cleaner_netcdf_werp():
+    '''
+    1. check ncdf is unpacked correctly
+    '''
+    df = scenario_handling.unpack_netcdf_as_dataframe('unit_testing_files/werp_ncdf.nc')
+    df_F, df_L = scenario_handling.cleaner_netcdf_werp(df, data_inputs.get_iqqm_codes())
+    # the test ncdf is too big to mock, so check properties
+    assert df_F.dtypes.iloc[0] == 'float32'
+    assert isinstance(df_F.index, pd.DatetimeIndex)
+    assert all(df_F.columns == ['421001', '421004', '421012', '421019', '421022', '421023', '421090', '421146'])
+def test_csv_input():
+    '''
+    1. check we can feed scenario_handling a csv that looks like gauge data
+    '''
+    # Can we use standard time-series to feed csv scenarios?
+    ewr_sh_standard = scenario_handling.ScenarioHandler('unit_testing_files/multi_gauge_input_label.csv', 'Standard time-series')
+    standardout = ewr_sh_standard.get_ewr_results()
+    assert isinstance(standardout, pd.DataFrame)
 def test_build_NSW_columns():
     '''
@@ -266,6 +322,7 @@ def test_unpack_model_file():
 #     assert_frame_equal(flow, expected_flow)
 def test_scenario_handler_class(scenario_handler_expected_detail, scenario_handler_instance):
     detailed = scenario_handler_instance.pu_ewr_statistics
@@ -325,11 +382,43 @@ def test_get_ewr_results(scenario_handler_instance):
     assert ewr_results.columns.to_list() == ['Scenario', 'Gauge', 'PlanningUnit', 'EwrCode', 'Multigauge','EventYears',
        'Frequency', 'TargetFrequency', 'AchievementCount',
        'AchievementPerYear', 'EventCount', 'EventCountAll', 'EventsPerYear', 'EventsPerYearAll',
-       'AverageEventLength', 'ThresholdDays',
+       'AverageEventLength', 'ThresholdDays', #'InterEventExceedingCount',
        'MaxInterEventYears', 'NoDataDays', 'TotalDays']
+def test_unpack_netcdf_as_dataframe():
+    test_flowcdf = 'unit_testing_files/werp_ncdf.nc'
+    result_flow = scenario_handling.unpack_netcdf_as_dataframe(test_flowcdf)
+    expected_flow_shape = (16000, 1)
+    assert result_flow.shape == expected_flow_shape
+def test_unpack_netcdf_as_dataframe_invalid_file():
+    test_invalid_file = 'unit_testing_files/NSW_source_res_test_file_header_result.csv'
+    try:
+        result_df = scenario_handling.unpack_netcdf_as_dataframe(test_invalid_file)
+    except ValueError as e:
+        assert "Not a valid NetCDF file." in str(e)
 def test_any_cllmm_to_process(gauge_results):
     result = scenario_handling.any_cllmm_to_process(gauge_results)
     assert result == True
+# This *should* likely use something like conftest.scenario_handler_instance, but that seems to be locked to bigmod.
+def test_netcdf_processes():
+    # Testing the netcdf format:
+    # Input params
+    # scenarios =  'unit_testing_files/ex_tasker.nc'
+    scenarios = 'unit_testing_files/werp_ncdf.nc'
+    model_format = 'IQQM - netcdf'
+    # allowance = {'minThreshold': 1.0, 'maxThreshold': 1.0, 'duration': 1.0, 'drawdown': 1.0}
+    # climate = 'Standard - 1911 to 2018 climate categorisation'
+    # Pass to the class
+    ewr_sh = scenario_handling.ScenarioHandler(scenarios, model_format)
+    ewr_summary = ewr_sh.get_ewr_results()
+    assert ewr_summary.shape == (202, 19)