py-ewr 2.2.0__tar.gz → 2.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {py_ewr-2.2.0 → py_ewr-2.2.3}/PKG-INFO +15 -3
  2. {py_ewr-2.2.0 → py_ewr-2.2.3}/README.md +12 -1
  3. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/data_inputs.py +13 -0
  4. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/evaluate_EWRs.py +1 -4
  5. py_ewr-2.2.3/py_ewr/io.py +24 -0
  6. py_ewr-2.2.3/py_ewr/model_metadata/iqqm_stations.csv +11 -0
  7. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/observed_handling.py +0 -1
  8. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/scenario_handling.py +123 -9
  9. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/PKG-INFO +15 -3
  10. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/SOURCES.txt +2 -0
  11. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/requires.txt +2 -1
  12. {py_ewr-2.2.0 → py_ewr-2.2.3}/setup.py +3 -2
  13. {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_data_inputs.py +15 -1
  14. {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_scenario_handling.py +91 -2
  15. {py_ewr-2.2.0 → py_ewr-2.2.3}/LICENSE +0 -0
  16. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/__init__.py +0 -0
  17. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/model_metadata/SiteID_MDBA.csv +0 -0
  18. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/model_metadata/SiteID_NSW.csv +0 -0
  19. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/parameter_metadata/ewr_calc_config.json +0 -0
  20. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/parameter_metadata/parameter_sheet.csv +0 -0
  21. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/summarise_results.py +0 -0
  22. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/dependency_links.txt +0 -0
  23. {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/top_level.txt +0 -0
  24. {py_ewr-2.2.0 → py_ewr-2.2.3}/pyproject.toml +0 -0
  25. {py_ewr-2.2.0 → py_ewr-2.2.3}/setup.cfg +0 -0
  26. {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_evaluate_ewr_rest.py +0 -0
  27. {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_evaluate_ewrs.py +0 -0
  28. {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_observed_handling.py +0 -0
  29. {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_summarise_results.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py_ewr
3
- Version: 2.2.0
3
+ Version: 2.2.3
4
4
  Summary: Environmental Water Requirement calculator
5
5
  Home-page: https://github.com/MDBAuth/EWR_tool
6
6
  Author: Martin Job
@@ -23,9 +23,10 @@ Requires-Dist: ipython==8.8.0
23
23
  Requires-Dist: ipywidgets==7.7.0
24
24
  Requires-Dist: pandas==2.0.3
25
25
  Requires-Dist: requests==2.25.1
26
- Requires-Dist: tqdm==4.64.0
27
26
  Requires-Dist: mdba-gauge-getter==0.5.1
28
27
  Requires-Dist: cachetools==5.2.0
28
+ Requires-Dist: xarray==2023.01.0
29
+ Requires-Dist: netCDF4==1.6.4
29
30
  Requires-Dist: numpy<2
30
31
 
31
32
  [![CI](https://github.com/MDBAuth/EWR_tool/actions/workflows/test-release.yml/badge.svg)]()
@@ -33,9 +34,12 @@ Requires-Dist: numpy<2
33
34
  [![PyPI](https://img.shields.io/pypi/v/py-ewr)](https://pypi.org/project/py-ewr/)
34
35
  [![DOI](https://zenodo.org/badge/342122359.svg)](https://zenodo.org/badge/latestdoi/342122359)
35
36
 
36
- ### **EWR tool version 2.2.0 README**
37
+ ### **EWR tool version 2.2.3 README**
37
38
 
38
39
  ### **Notes on recent version update**
40
+ - Remove TQDM loading bars
41
+ - Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
42
+ - Adding new model format handling - 'IQQM - netcdf'
39
43
  - Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
40
44
  - ten thousand year handling - This has been briefly taken offline for this version.
41
45
  - bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
@@ -222,3 +226,11 @@ NSW:
222
226
 
223
227
  Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
224
228
 
229
+ To disable progress bars, as for example when running remote scripted runs, use
230
+
231
+ ``` python
232
+ import os
233
+ os.environ["TQDM_DISABLE"] = "1"
234
+ ```
235
+ *before* importing py-ewr in your script.
236
+
@@ -3,9 +3,12 @@
3
3
  [![PyPI](https://img.shields.io/pypi/v/py-ewr)](https://pypi.org/project/py-ewr/)
4
4
  [![DOI](https://zenodo.org/badge/342122359.svg)](https://zenodo.org/badge/latestdoi/342122359)
5
5
 
6
- ### **EWR tool version 2.2.0 README**
6
+ ### **EWR tool version 2.2.3 README**
7
7
 
8
8
  ### **Notes on recent version update**
9
+ - Remove TQDM loading bars
10
+ - Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
11
+ - Adding new model format handling - 'IQQM - netcdf'
9
12
  - Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
10
13
  - ten thousand year handling - This has been briefly taken offline for this version.
11
14
  - bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
@@ -192,3 +195,11 @@ NSW:
192
195
 
193
196
  Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
194
197
 
198
+ To disable progress bars, as for example when running remote scripted runs, use
199
+
200
+ ``` python
201
+ import os
202
+ os.environ["TQDM_DISABLE"] = "1"
203
+ ```
204
+ *before* importing py-ewr in your script.
205
+
@@ -138,6 +138,19 @@ def get_NSW_codes() -> pd.DataFrame:
138
138
 
139
139
  return metadata
140
140
 
141
+ def get_iqqm_codes() -> dict:
142
+ '''
143
+ Load metadata file for Macquarie containing model nodes
144
+ and gauges they correspond to
145
+
146
+ Returns:
147
+ dict: dict for linking model nodes to gauges
148
+ '''
149
+
150
+ metadf = pd.read_csv( BASE_PATH / 'model_metadata/iqqm_stations.csv', dtype=str)
151
+ metadata = metadf.set_index(metadf.columns[0]).to_dict()[metadf.columns[1]]
152
+ return metadata
153
+
141
154
  def get_level_gauges() -> tuple:
142
155
  '''Returning level gauges with EWRs
143
156
 
@@ -10,7 +10,6 @@ import logging
10
10
 
11
11
  import pandas as pd
12
12
  import numpy as np
13
- from tqdm import tqdm
14
13
 
15
14
  from . import data_inputs
16
15
 
@@ -5086,9 +5085,7 @@ def calc_sorter(df_F:pd.DataFrame, df_L:pd.DataFrame, gauge:str, EWR_table:pd.Da
5086
5085
  EWR_codes = PU_table['Code']
5087
5086
  PU_df = pd.DataFrame()
5088
5087
  PU_events = {}
5089
- for i, EWR in enumerate(tqdm(EWR_codes, position = 0, leave = False,
5090
- bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
5091
- desc= str('Evaluating ewrs for '+ gauge))):
5088
+ for i, EWR in enumerate(EWR_codes):
5092
5089
  events = {}
5093
5090
 
5094
5091
  MULTIGAUGE = is_multigauge(EWR_table, gauge, EWR, PU)
@@ -0,0 +1,24 @@
1
+ import xarray as xr
2
+ from pandas import DataFrame as Dataframe
3
+
4
+
5
+ def read_netcdf_as_dataframe(netcdf_path: str) -> Dataframe:
6
+ dataset = xr.open_dataset(netcdf_path, engine='netcdf4')
7
+ df = dataset.to_dataframe()
8
+ dataset.close()
9
+
10
+ return df
11
+
12
+
13
+ def save_dataframe_as_netcdf(df, output_path: str) -> None:
14
+ # Convert DataFrame to Xarray Dataset
15
+ ds = xr.Dataset.from_dataframe(df)
16
+
17
+ # Modify variable names to ensure they are valid for NetCDF
18
+ for var_name in ds.variables:
19
+ new_var_name = var_name.replace(" ", "_") # Replace spaces with underscores
20
+ new_var_name = ''.join(c for c in new_var_name if c.isalnum() or c == "_") # Remove non-alphanumeric characters
21
+ ds = ds.rename({var_name: new_var_name})
22
+
23
+ # Save the modified Xarray Dataset as a NetCDF file
24
+ ds.to_netcdf(output_path)
@@ -0,0 +1,11 @@
1
+ IQQM,gauge
2
+ 229,421023
3
+ 42,421001
4
+ 464,421011
5
+ 240,421019
6
+ 266,421146
7
+ 951,421090
8
+ 487,421022
9
+ 130,421012
10
+ 171,421004
11
+
@@ -3,7 +3,6 @@ from typing import Dict, List
3
3
  import logging
4
4
 
5
5
  import pandas as pd
6
- from tqdm import tqdm
7
6
  import numpy as np
8
7
 
9
8
  from . import data_inputs, evaluate_EWRs, summarise_results, scenario_handling
@@ -7,7 +7,8 @@ from datetime import datetime, date
7
7
  import logging
8
8
 
9
9
  import pandas as pd
10
- from tqdm import tqdm
10
+ import xarray as xr
11
+ import netCDF4
11
12
 
12
13
  log = logging.getLogger(__name__)
13
14
  log.addHandler(logging.NullHandler())
@@ -15,6 +16,61 @@ log.addHandler(logging.NullHandler())
15
16
 
16
17
  from . import data_inputs, evaluate_EWRs, summarise_results
17
18
  #----------------------------------- Scenario testing handling functions--------------------------#
19
+ def is_valid_netcdf_file(file_path: str) -> bool:
20
+ try:
21
+ with netCDF4.Dataset(file_path, 'r'):
22
+ # If the file opens successfully, it's a valid NetCDF file
23
+ return True
24
+ except Exception as e:
25
+ # If an exception is raised, it's not a valid NetCDF file
26
+ return False
27
+
28
+
29
+ def unpack_netcdf_as_dataframe(netcdf_file: str) -> pd.DataFrame:
30
+ '''Ingesting netCDF files and outputting as dataframes in memory.
31
+ # Example usage:
32
+ # df = unpack_netcdf_as_dataframe('your_file.nc')
33
+
34
+ Args:
35
+ netcdf_file (str): location of netCDF file
36
+
37
+ Results:
38
+ pd.Dataframe: netCDF file converted to dataframe
39
+ '''
40
+ try:
41
+ # Check if the file is a valid NetCDF file
42
+ if not is_valid_netcdf_file(netcdf_file):
43
+ raise ValueError("Not a valid NetCDF file.")
44
+
45
+ # Open the NetCDF file
46
+ dataset = xr.open_dataset(netcdf_file, engine='netcdf4')
47
+
48
+ # Check if the dataset is empty
49
+ if dataset is None:
50
+ raise ValueError("NetCDF dataset is empty.")
51
+
52
+ # extract the bits we actually can use
53
+ # Some of this needs to move/get cleaned up
54
+ iqqm_dict = data_inputs.get_iqqm_codes()
55
+ # the nodes are ints, but the above is str
56
+ ints_list = list(map(int, list(iqqm_dict)))
57
+
58
+ # Is there any reason to do these in one step?
59
+ dataset = dataset.sel(node=dataset['node'].isin(ints_list))
60
+ dataset = dataset[['Simulated flow']]
61
+
62
+ # Convert to DataFrame
63
+ df = dataset.to_dataframe()
64
+
65
+ # Close the dataset
66
+ dataset.close()
67
+
68
+ return df
69
+ except Exception as e:
70
+ # Handle any exceptions that may occur
71
+ print(f"Error: {str(e)}")
72
+ return None
73
+
18
74
 
19
75
  def unpack_model_file(csv_file: str, main_key: str, header_key: str) -> tuple:
20
76
  '''Ingesting scenario file locations of model files with all formats (excluding standard timeseries format), seperates the flow data and header data
@@ -280,6 +336,52 @@ def cleaner_standard_timeseries(input_df: pd.DataFrame, ewr_table_path: str = No
280
336
  log.info('Could not identify gauge in column name:', gauge, ', skipping analysis of data in this column.')
281
337
  return df_flow, df_level
282
338
 
339
+ def cleaner_netcdf_werp(input_df: pd.DataFrame, stations: dict) -> pd.DataFrame:
340
+
341
+ '''Ingests dataframe, cleans up into a format matching IQQM csv
342
+
343
+ Args:
344
+ input_df (pd.DataFrame): raw xarray dataframe read-in
345
+
346
+ statios(dict): dict mapping IQQM stations to gauge numbers
347
+
348
+ Results:
349
+ tuple[pd.DataFrame, pd.DataFrame]: Cleaned flow dataframe; cleaned water level dataframe
350
+
351
+ '''
352
+
353
+ # organise like the rest of the dataframes- make this look just like we've read it in from an IQQM csv
354
+ cleaned_df = input_df.reset_index(level = 'node')
355
+ cleaned_df['node'] = cleaned_df['node'].astype(str)
356
+
357
+ cleaned_df['gauge'] = cleaned_df['node'].map(stations)
358
+ cleaned_df = cleaned_df.drop('node', axis = 1)
359
+
360
+ # drop the values that don't map to a gauge (lots of nodes in iqqm don't)
361
+ # This should be deprecated with the new way of choosing nodes on read-in, but being careful
362
+ cleaned_df = cleaned_df.query('gauge.notna()')
363
+
364
+ # give each gauge its own column- that's what the tool expects
365
+ cleaned_df = cleaned_df.pivot(columns = 'gauge', values = 'Simulated flow')
366
+ cleaned_df.columns.name = None
367
+
368
+ # the csvs return an 'object' type, not a datetime in the index
369
+ # but it gets converted to datetime in cleaner_***, so leave it.
370
+ cleaned_df.index.names = ['Date']
371
+
372
+ # Split gauges into flow and level, allocate to respective dataframe
373
+ flow_gauges = data_inputs.get_gauges('flow gauges')
374
+ level_gauges = data_inputs.get_gauges('level gauges')
375
+ df_flow = pd.DataFrame(index = cleaned_df.index)
376
+ df_level = pd.DataFrame(index = cleaned_df.index)
377
+ for gauge in cleaned_df.columns:
378
+ if gauge in flow_gauges:
379
+ df_flow[gauge] = cleaned_df[gauge].copy(deep=True)
380
+ if gauge in level_gauges:
381
+ df_level[gauge] = cleaned_df[gauge].copy(deep=True)
382
+
383
+ return df_flow, df_level
384
+
283
385
 
284
386
  def cleaner_ten_thousand_year(input_df: pd.DataFrame, ewr_table_path: str = None) -> pd.DataFrame:
285
387
  '''Ingests dataframe, removes junk columns, fixes date, allocates gauges to either flow/level
@@ -334,6 +436,8 @@ def extract_gauge_from_string(input_string: str) -> str:
334
436
  gauge = input_string.split('_')[0]
335
437
  return gauge
336
438
 
439
+
440
+
337
441
  def match_MDBA_nodes(input_df: pd.DataFrame, model_metadata: pd.DataFrame, ewr_table_path: str) -> tuple:
338
442
  '''Checks if the source file columns have EWRs available, returns a flow and level dataframe with only
339
443
  the columns with EWRs available. Renames columns to gauges
@@ -358,11 +462,19 @@ def match_MDBA_nodes(input_df: pd.DataFrame, model_metadata: pd.DataFrame, ewr_t
358
462
  measure = col_clean.split('-')[1]
359
463
  if ((measure in measurands) and (model_metadata['SITEID'] == site).any()):
360
464
  subset = model_metadata.query("SITEID==@site")
361
- gauge = subset["AWRC"].iloc[0]
362
- if gauge in flow_gauges and measure == '1':
363
- df_flow[gauge] = input_df[col]
364
- if gauge in level_gauges and measure == '35':
365
- df_level[gauge] = input_df[col]
465
+ for iset in range(len(subset)):
466
+ gauge = subset["AWRC"].iloc[iset]
467
+ if gauge in flow_gauges and measure == '1':
468
+ df_flow[gauge] = input_df[col]
469
+ if gauge in level_gauges and measure == '35':
470
+ aa=input_df[[col]]
471
+ if (len(aa.columns)>1):
472
+ print('More than one site has been identified, the first site is used')
473
+ print('Site info: ', col)
474
+ df_level[gauge] = aa.iloc[:,0]
475
+ else:
476
+ df_level[gauge] = input_df[col]
477
+
366
478
  if df_flow.empty:
367
479
  raise ValueError('No relevant gauges and or measurands found in dataset, the EWR tool cannot evaluate this model output file')
368
480
  return df_flow, df_level
@@ -435,9 +547,7 @@ class ScenarioHandler:
435
547
  # Analyse all scenarios for EWRs
436
548
  detailed_results = {}
437
549
  detailed_events = {}
438
- for scenario in tqdm(scenarios, position = 0, leave = True,
439
- bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
440
- desc= 'Evaluating scenarios'):
550
+ for scenario in scenarios:
441
551
  if self.model_format == 'Bigmod - MDBA':
442
552
 
443
553
  data, header = unpack_model_file(scenarios[scenario], 'Dy', 'Field')
@@ -455,6 +565,10 @@ class ScenarioHandler:
455
565
  df_clean = cleaner_NSW(data)
456
566
  df_F, df_L = match_NSW_nodes(df_clean, data_inputs.get_NSW_codes())
457
567
 
568
+ elif self.model_format == 'IQQM - netcdf':
569
+ df_unpacked = unpack_netcdf_as_dataframe(scenarios[scenario])
570
+ df_F, df_L = cleaner_netcdf_werp(df_unpacked, data_inputs.get_iqqm_codes())
571
+
458
572
  elif self.model_format == 'ten thousand year':
459
573
  df = pd.read_csv(scenarios[scenario], index_col = 'Date')
460
574
  df_F, df_L = cleaner_ten_thousand_year(df, self.parameter_sheet)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py_ewr
3
- Version: 2.2.0
3
+ Version: 2.2.3
4
4
  Summary: Environmental Water Requirement calculator
5
5
  Home-page: https://github.com/MDBAuth/EWR_tool
6
6
  Author: Martin Job
@@ -23,9 +23,10 @@ Requires-Dist: ipython==8.8.0
23
23
  Requires-Dist: ipywidgets==7.7.0
24
24
  Requires-Dist: pandas==2.0.3
25
25
  Requires-Dist: requests==2.25.1
26
- Requires-Dist: tqdm==4.64.0
27
26
  Requires-Dist: mdba-gauge-getter==0.5.1
28
27
  Requires-Dist: cachetools==5.2.0
28
+ Requires-Dist: xarray==2023.01.0
29
+ Requires-Dist: netCDF4==1.6.4
29
30
  Requires-Dist: numpy<2
30
31
 
31
32
  [![CI](https://github.com/MDBAuth/EWR_tool/actions/workflows/test-release.yml/badge.svg)]()
@@ -33,9 +34,12 @@ Requires-Dist: numpy<2
33
34
  [![PyPI](https://img.shields.io/pypi/v/py-ewr)](https://pypi.org/project/py-ewr/)
34
35
  [![DOI](https://zenodo.org/badge/342122359.svg)](https://zenodo.org/badge/latestdoi/342122359)
35
36
 
36
- ### **EWR tool version 2.2.0 README**
37
+ ### **EWR tool version 2.2.3 README**
37
38
 
38
39
  ### **Notes on recent version update**
40
+ - Remove TQDM loading bars
41
+ - Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
42
+ - Adding new model format handling - 'IQQM - netcdf'
39
43
  - Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
40
44
  - ten thousand year handling - This has been briefly taken offline for this version.
41
45
  - bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
@@ -222,3 +226,11 @@ NSW:
222
226
 
223
227
  Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
224
228
 
229
+ To disable progress bars, as for example when running remote scripted runs, use
230
+
231
+ ``` python
232
+ import os
233
+ os.environ["TQDM_DISABLE"] = "1"
234
+ ```
235
+ *before* importing py-ewr in your script.
236
+
@@ -5,6 +5,7 @@ setup.py
5
5
  py_ewr/__init__.py
6
6
  py_ewr/data_inputs.py
7
7
  py_ewr/evaluate_EWRs.py
8
+ py_ewr/io.py
8
9
  py_ewr/observed_handling.py
9
10
  py_ewr/scenario_handling.py
10
11
  py_ewr/summarise_results.py
@@ -15,6 +16,7 @@ py_ewr.egg-info/requires.txt
15
16
  py_ewr.egg-info/top_level.txt
16
17
  py_ewr/model_metadata/SiteID_MDBA.csv
17
18
  py_ewr/model_metadata/SiteID_NSW.csv
19
+ py_ewr/model_metadata/iqqm_stations.csv
18
20
  py_ewr/parameter_metadata/ewr_calc_config.json
19
21
  py_ewr/parameter_metadata/parameter_sheet.csv
20
22
  tests/test_data_inputs.py
@@ -2,7 +2,8 @@ ipython==8.8.0
2
2
  ipywidgets==7.7.0
3
3
  pandas==2.0.3
4
4
  requests==2.25.1
5
- tqdm==4.64.0
6
5
  mdba-gauge-getter==0.5.1
7
6
  cachetools==5.2.0
7
+ xarray==2023.01.0
8
+ netCDF4==1.6.4
8
9
  numpy<2
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
6
6
 
7
7
  setup(
8
8
  name="py_ewr",
9
- version="2.2.0",
9
+ version="2.2.3",
10
10
  author="Martin Job",
11
11
  author_email="Martin.Job@mdba.gov.au",
12
12
  description="Environmental Water Requirement calculator",
@@ -35,9 +35,10 @@ setup(
35
35
  "ipywidgets==7.7.0",
36
36
  "pandas==2.0.3",
37
37
  "requests==2.25.1",
38
- "tqdm==4.64.0",
39
38
  "mdba-gauge-getter==0.5.1",
40
39
  "cachetools==5.2.0",
40
+ "xarray==2023.01.0",
41
+ "netCDF4==1.6.4",
41
42
  "numpy<2"
42
43
  ],
43
44
  package_data={'': ["model_metadata/*.csv", "parameter_metadata/*.csv","parameter_metadata/*.json"]},
@@ -105,4 +105,18 @@ def test_get_cllmm_gauges():
105
105
  def test_get_scenario_gauges(gauge_results, expected_results):
106
106
  result = data_inputs.get_scenario_gauges(gauge_results)
107
107
  assert sorted(result) == expected_results
108
-
108
+
109
+ def test_get_iqqm_codes():
110
+ result = data_inputs.get_iqqm_codes()
111
+ stations = {
112
+ '229': '421023',
113
+ '42': '421001',
114
+ '464': '421011',
115
+ '240': '421019',
116
+ '266': '421146',
117
+ '951': '421090',
118
+ '487': '421022',
119
+ '130': '421012',
120
+ '171': '421004',
121
+ }
122
+ assert stations == result
@@ -12,7 +12,9 @@ from py_ewr import scenario_handling, data_inputs
12
12
  def test_match_MDBA_nodes():
13
13
  '''
14
14
  1. Ensure dataframe with flows and levels is split into two dataframes (one flow and one level dataframe)
15
+ 2. Ensure first column is used when duplicate columns are loaded
15
16
  '''
17
+ # TEST 1 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
16
18
  # Set up input data and pass to test function:
17
19
  model_metadata = data_inputs.get_MDBA_codes()
18
20
  data_df = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
@@ -39,6 +41,36 @@ def test_match_MDBA_nodes():
39
41
  assert_frame_equal(df_F, expected_df_F)
40
42
  assert_frame_equal(df_L, expected_df_L)
41
43
 
44
+ # TEST 2 #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
45
+ # Set up input data and pass to test function:
46
+ model_metadata = data_inputs.get_MDBA_codes()
47
+ data_df = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
48
+ 'EUSTDS-1-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Use
49
+ 'EUSTDS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
50
+ 'EUSTUS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
51
+ 'EUSTUS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Use
52
+ 'EUSTUS-1-8': [1]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
53
+ }
54
+ df = pd.DataFrame(data = data_df)
55
+ df = df.set_index('Date')
56
+
57
+ df_F, df_L = scenario_handling.match_MDBA_nodes(df, model_metadata, 'py_ewr/parameter_metadata/parameter_sheet.csv')
58
+
59
+ # Set up expected outputs and test:
60
+ data_expected_df_L = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
61
+ '414209': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10
62
+ }
63
+ expected_df_L = pd.DataFrame(data_expected_df_L)
64
+ expected_df_L = expected_df_L.set_index('Date')
65
+ data_expected_df_F = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
66
+ '414203': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10
67
+ }
68
+ expected_df_F = pd.DataFrame(data_expected_df_F)
69
+ expected_df_F = expected_df_F.set_index('Date')
70
+
71
+ assert_frame_equal(df_F, expected_df_F)
72
+ assert_frame_equal(df_L, expected_df_L)
73
+
42
74
  def test_match_NSW_nodes():
43
75
  '''
44
76
  1. Check NSW model nodes are mapped correctly to their gauges
@@ -197,6 +229,30 @@ def test_cleaner_MDBA():
197
229
 
198
230
  assert_frame_equal(df_clean, expected_df)
199
231
 
232
+ def test_cleaner_netcdf_werp():
233
+ '''
234
+ 1. check ncdf is unpacked correctly
235
+ '''
236
+ df = scenario_handling.unpack_netcdf_as_dataframe('unit_testing_files/werp_ncdf.nc')
237
+ df_F, df_L = scenario_handling.cleaner_netcdf_werp(df, data_inputs.get_iqqm_codes())
238
+
239
+ # the test ncdf is too big to mock, so check properties
240
+ assert df_F.dtypes.iloc[0] == 'float32'
241
+ assert isinstance(df_F.index, pd.DatetimeIndex)
242
+ assert all(df_F.columns == ['421001', '421004', '421012', '421019', '421022', '421023', '421090', '421146'])
243
+
244
+
245
+ def test_csv_input():
246
+ '''
247
+ 1. check we can feed scenario_handling a csv that looks like gauge data
248
+ '''
249
+
250
+ # Can we use standard time-series to feed csv scenarios?
251
+ ewr_sh_standard = scenario_handling.ScenarioHandler('unit_testing_files/multi_gauge_input_label.csv', 'Standard time-series')
252
+ standardout = ewr_sh_standard.get_ewr_results()
253
+
254
+ assert isinstance(standardout, pd.DataFrame)
255
+
200
256
 
201
257
  def test_build_NSW_columns():
202
258
  '''
@@ -266,6 +322,7 @@ def test_unpack_model_file():
266
322
 
267
323
  # assert_frame_equal(flow, expected_flow)
268
324
 
325
+
269
326
  def test_scenario_handler_class(scenario_handler_expected_detail, scenario_handler_instance):
270
327
 
271
328
  detailed = scenario_handler_instance.pu_ewr_statistics
@@ -325,11 +382,43 @@ def test_get_ewr_results(scenario_handler_instance):
325
382
  assert ewr_results.columns.to_list() == ['Scenario', 'Gauge', 'PlanningUnit', 'EwrCode', 'Multigauge','EventYears',
326
383
  'Frequency', 'TargetFrequency', 'AchievementCount',
327
384
  'AchievementPerYear', 'EventCount', 'EventCountAll', 'EventsPerYear', 'EventsPerYearAll',
328
- 'AverageEventLength', 'ThresholdDays',
385
+ 'AverageEventLength', 'ThresholdDays', #'InterEventExceedingCount',
329
386
  'MaxInterEventYears', 'NoDataDays', 'TotalDays']
330
387
 
388
+
389
+ def test_unpack_netcdf_as_dataframe():
390
+ test_flowcdf = 'unit_testing_files/werp_ncdf.nc'
391
+ result_flow = scenario_handling.unpack_netcdf_as_dataframe(test_flowcdf)
392
+ expected_flow_shape = (16000, 1)
393
+ assert result_flow.shape == expected_flow_shape
394
+
395
+
396
+ def test_unpack_netcdf_as_dataframe_invalid_file():
397
+ test_invalid_file = 'unit_testing_files/NSW_source_res_test_file_header_result.csv'
398
+ try:
399
+ result_df = scenario_handling.unpack_netcdf_as_dataframe(test_invalid_file)
400
+ except ValueError as e:
401
+ assert "Not a valid NetCDF file." in str(e)
402
+
403
+
331
404
  def test_any_cllmm_to_process(gauge_results):
332
405
  result = scenario_handling.any_cllmm_to_process(gauge_results)
333
406
  assert result == True
334
407
 
335
-
408
+ # This *should* likely use something like conftest.scenario_handler_instance, but that seems to be locked to bigmod.
409
+ def test_netcdf_processes():
410
+ # Testing the netcdf format:
411
+ # Input params
412
+ # scenarios = 'unit_testing_files/ex_tasker.nc'
413
+ scenarios = 'unit_testing_files/werp_ncdf.nc'
414
+ model_format = 'IQQM - netcdf'
415
+ # allowance = {'minThreshold': 1.0, 'maxThreshold': 1.0, 'duration': 1.0, 'drawdown': 1.0}
416
+ # climate = 'Standard - 1911 to 2018 climate categorisation'
417
+
418
+ # Pass to the class
419
+
420
+ ewr_sh = scenario_handling.ScenarioHandler(scenarios, model_format)
421
+
422
+ ewr_summary = ewr_sh.get_ewr_results()
423
+
424
+ assert ewr_summary.shape == (202, 19)
File without changes
File without changes
File without changes
File without changes