py-ewr 2.2.0__tar.gz → 2.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {py_ewr-2.2.0 → py_ewr-2.2.3}/PKG-INFO +15 -3
- {py_ewr-2.2.0 → py_ewr-2.2.3}/README.md +12 -1
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/data_inputs.py +13 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/evaluate_EWRs.py +1 -4
- py_ewr-2.2.3/py_ewr/io.py +24 -0
- py_ewr-2.2.3/py_ewr/model_metadata/iqqm_stations.csv +11 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/observed_handling.py +0 -1
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/scenario_handling.py +123 -9
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/PKG-INFO +15 -3
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/SOURCES.txt +2 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/requires.txt +2 -1
- {py_ewr-2.2.0 → py_ewr-2.2.3}/setup.py +3 -2
- {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_data_inputs.py +15 -1
- {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_scenario_handling.py +91 -2
- {py_ewr-2.2.0 → py_ewr-2.2.3}/LICENSE +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/__init__.py +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/model_metadata/SiteID_MDBA.csv +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/model_metadata/SiteID_NSW.csv +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/parameter_metadata/ewr_calc_config.json +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/parameter_metadata/parameter_sheet.csv +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr/summarise_results.py +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/dependency_links.txt +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/py_ewr.egg-info/top_level.txt +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/pyproject.toml +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/setup.cfg +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_evaluate_ewr_rest.py +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_evaluate_ewrs.py +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_observed_handling.py +0 -0
- {py_ewr-2.2.0 → py_ewr-2.2.3}/tests/test_summarise_results.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: py_ewr
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.3
|
|
4
4
|
Summary: Environmental Water Requirement calculator
|
|
5
5
|
Home-page: https://github.com/MDBAuth/EWR_tool
|
|
6
6
|
Author: Martin Job
|
|
@@ -23,9 +23,10 @@ Requires-Dist: ipython==8.8.0
|
|
|
23
23
|
Requires-Dist: ipywidgets==7.7.0
|
|
24
24
|
Requires-Dist: pandas==2.0.3
|
|
25
25
|
Requires-Dist: requests==2.25.1
|
|
26
|
-
Requires-Dist: tqdm==4.64.0
|
|
27
26
|
Requires-Dist: mdba-gauge-getter==0.5.1
|
|
28
27
|
Requires-Dist: cachetools==5.2.0
|
|
28
|
+
Requires-Dist: xarray==2023.01.0
|
|
29
|
+
Requires-Dist: netCDF4==1.6.4
|
|
29
30
|
Requires-Dist: numpy<2
|
|
30
31
|
|
|
31
32
|
[]()
|
|
@@ -33,9 +34,12 @@ Requires-Dist: numpy<2
|
|
|
33
34
|
[](https://pypi.org/project/py-ewr/)
|
|
34
35
|
[](https://zenodo.org/badge/latestdoi/342122359)
|
|
35
36
|
|
|
36
|
-
### **EWR tool version 2.2.
|
|
37
|
+
### **EWR tool version 2.2.3 README**
|
|
37
38
|
|
|
38
39
|
### **Notes on recent version update**
|
|
40
|
+
- Remove TQDM loading bars
|
|
41
|
+
- Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
|
|
42
|
+
- Adding new model format handling - 'IQQM - netcdf'
|
|
39
43
|
- Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
|
|
40
44
|
- ten thousand year handling - This has been briefly taken offline for this version.
|
|
41
45
|
- bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
|
|
@@ -222,3 +226,11 @@ NSW:
|
|
|
222
226
|
|
|
223
227
|
Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
|
|
224
228
|
|
|
229
|
+
To disable progress bars, as for example when running remote scripted runs, use
|
|
230
|
+
|
|
231
|
+
``` python
|
|
232
|
+
import os
|
|
233
|
+
os.environ["TQDM_DISABLE"] = "1"
|
|
234
|
+
```
|
|
235
|
+
*before* importing py-ewr in your script.
|
|
236
|
+
|
|
@@ -3,9 +3,12 @@
|
|
|
3
3
|
[](https://pypi.org/project/py-ewr/)
|
|
4
4
|
[](https://zenodo.org/badge/latestdoi/342122359)
|
|
5
5
|
|
|
6
|
-
### **EWR tool version 2.2.
|
|
6
|
+
### **EWR tool version 2.2.3 README**
|
|
7
7
|
|
|
8
8
|
### **Notes on recent version update**
|
|
9
|
+
- Remove TQDM loading bars
|
|
10
|
+
- Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
|
|
11
|
+
- Adding new model format handling - 'IQQM - netcdf'
|
|
9
12
|
- Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
|
|
10
13
|
- ten thousand year handling - This has been briefly taken offline for this version.
|
|
11
14
|
- bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
|
|
@@ -192,3 +195,11 @@ NSW:
|
|
|
192
195
|
|
|
193
196
|
Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
|
|
194
197
|
|
|
198
|
+
To disable progress bars, as for example when running remote scripted runs, use
|
|
199
|
+
|
|
200
|
+
``` python
|
|
201
|
+
import os
|
|
202
|
+
os.environ["TQDM_DISABLE"] = "1"
|
|
203
|
+
```
|
|
204
|
+
*before* importing py-ewr in your script.
|
|
205
|
+
|
|
@@ -138,6 +138,19 @@ def get_NSW_codes() -> pd.DataFrame:
|
|
|
138
138
|
|
|
139
139
|
return metadata
|
|
140
140
|
|
|
141
|
+
def get_iqqm_codes() -> dict:
|
|
142
|
+
'''
|
|
143
|
+
Load metadata file for Macquarie containing model nodes
|
|
144
|
+
and gauges they correspond to
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
dict: dict for linking model nodes to gauges
|
|
148
|
+
'''
|
|
149
|
+
|
|
150
|
+
metadf = pd.read_csv( BASE_PATH / 'model_metadata/iqqm_stations.csv', dtype=str)
|
|
151
|
+
metadata = metadf.set_index(metadf.columns[0]).to_dict()[metadf.columns[1]]
|
|
152
|
+
return metadata
|
|
153
|
+
|
|
141
154
|
def get_level_gauges() -> tuple:
|
|
142
155
|
'''Returning level gauges with EWRs
|
|
143
156
|
|
|
@@ -10,7 +10,6 @@ import logging
|
|
|
10
10
|
|
|
11
11
|
import pandas as pd
|
|
12
12
|
import numpy as np
|
|
13
|
-
from tqdm import tqdm
|
|
14
13
|
|
|
15
14
|
from . import data_inputs
|
|
16
15
|
|
|
@@ -5086,9 +5085,7 @@ def calc_sorter(df_F:pd.DataFrame, df_L:pd.DataFrame, gauge:str, EWR_table:pd.Da
|
|
|
5086
5085
|
EWR_codes = PU_table['Code']
|
|
5087
5086
|
PU_df = pd.DataFrame()
|
|
5088
5087
|
PU_events = {}
|
|
5089
|
-
for i, EWR in enumerate(
|
|
5090
|
-
bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
|
|
5091
|
-
desc= str('Evaluating ewrs for '+ gauge))):
|
|
5088
|
+
for i, EWR in enumerate(EWR_codes):
|
|
5092
5089
|
events = {}
|
|
5093
5090
|
|
|
5094
5091
|
MULTIGAUGE = is_multigauge(EWR_table, gauge, EWR, PU)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import xarray as xr
|
|
2
|
+
from pandas import DataFrame as Dataframe
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def read_netcdf_as_dataframe(netcdf_path: str) -> Dataframe:
|
|
6
|
+
dataset = xr.open_dataset(netcdf_path, engine='netcdf4')
|
|
7
|
+
df = dataset.to_dataframe()
|
|
8
|
+
dataset.close()
|
|
9
|
+
|
|
10
|
+
return df
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def save_dataframe_as_netcdf(df, output_path: str) -> None:
|
|
14
|
+
# Convert DataFrame to Xarray Dataset
|
|
15
|
+
ds = xr.Dataset.from_dataframe(df)
|
|
16
|
+
|
|
17
|
+
# Modify variable names to ensure they are valid for NetCDF
|
|
18
|
+
for var_name in ds.variables:
|
|
19
|
+
new_var_name = var_name.replace(" ", "_") # Replace spaces with underscores
|
|
20
|
+
new_var_name = ''.join(c for c in new_var_name if c.isalnum() or c == "_") # Remove non-alphanumeric characters
|
|
21
|
+
ds = ds.rename({var_name: new_var_name})
|
|
22
|
+
|
|
23
|
+
# Save the modified Xarray Dataset as a NetCDF file
|
|
24
|
+
ds.to_netcdf(output_path)
|
|
@@ -7,7 +7,8 @@ from datetime import datetime, date
|
|
|
7
7
|
import logging
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
|
-
|
|
10
|
+
import xarray as xr
|
|
11
|
+
import netCDF4
|
|
11
12
|
|
|
12
13
|
log = logging.getLogger(__name__)
|
|
13
14
|
log.addHandler(logging.NullHandler())
|
|
@@ -15,6 +16,61 @@ log.addHandler(logging.NullHandler())
|
|
|
15
16
|
|
|
16
17
|
from . import data_inputs, evaluate_EWRs, summarise_results
|
|
17
18
|
#----------------------------------- Scenario testing handling functions--------------------------#
|
|
19
|
+
def is_valid_netcdf_file(file_path: str) -> bool:
|
|
20
|
+
try:
|
|
21
|
+
with netCDF4.Dataset(file_path, 'r'):
|
|
22
|
+
# If the file opens successfully, it's a valid NetCDF file
|
|
23
|
+
return True
|
|
24
|
+
except Exception as e:
|
|
25
|
+
# If an exception is raised, it's not a valid NetCDF file
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def unpack_netcdf_as_dataframe(netcdf_file: str) -> pd.DataFrame:
|
|
30
|
+
'''Ingesting netCDF files and outputting as dataframes in memory.
|
|
31
|
+
# Example usage:
|
|
32
|
+
# df = unpack_netcdf_as_dataframe('your_file.nc')
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
netcdf_file (str): location of netCDF file
|
|
36
|
+
|
|
37
|
+
Results:
|
|
38
|
+
pd.Dataframe: netCDF file converted to dataframe
|
|
39
|
+
'''
|
|
40
|
+
try:
|
|
41
|
+
# Check if the file is a valid NetCDF file
|
|
42
|
+
if not is_valid_netcdf_file(netcdf_file):
|
|
43
|
+
raise ValueError("Not a valid NetCDF file.")
|
|
44
|
+
|
|
45
|
+
# Open the NetCDF file
|
|
46
|
+
dataset = xr.open_dataset(netcdf_file, engine='netcdf4')
|
|
47
|
+
|
|
48
|
+
# Check if the dataset is empty
|
|
49
|
+
if dataset is None:
|
|
50
|
+
raise ValueError("NetCDF dataset is empty.")
|
|
51
|
+
|
|
52
|
+
# extract the bits we actually can use
|
|
53
|
+
# Some of this needs to move/get cleaned up
|
|
54
|
+
iqqm_dict = data_inputs.get_iqqm_codes()
|
|
55
|
+
# the nodes are ints, but the above is str
|
|
56
|
+
ints_list = list(map(int, list(iqqm_dict)))
|
|
57
|
+
|
|
58
|
+
# Is there any reason to do these in one step?
|
|
59
|
+
dataset = dataset.sel(node=dataset['node'].isin(ints_list))
|
|
60
|
+
dataset = dataset[['Simulated flow']]
|
|
61
|
+
|
|
62
|
+
# Convert to DataFrame
|
|
63
|
+
df = dataset.to_dataframe()
|
|
64
|
+
|
|
65
|
+
# Close the dataset
|
|
66
|
+
dataset.close()
|
|
67
|
+
|
|
68
|
+
return df
|
|
69
|
+
except Exception as e:
|
|
70
|
+
# Handle any exceptions that may occur
|
|
71
|
+
print(f"Error: {str(e)}")
|
|
72
|
+
return None
|
|
73
|
+
|
|
18
74
|
|
|
19
75
|
def unpack_model_file(csv_file: str, main_key: str, header_key: str) -> tuple:
|
|
20
76
|
'''Ingesting scenario file locations of model files with all formats (excluding standard timeseries format), seperates the flow data and header data
|
|
@@ -280,6 +336,52 @@ def cleaner_standard_timeseries(input_df: pd.DataFrame, ewr_table_path: str = No
|
|
|
280
336
|
log.info('Could not identify gauge in column name:', gauge, ', skipping analysis of data in this column.')
|
|
281
337
|
return df_flow, df_level
|
|
282
338
|
|
|
339
|
+
def cleaner_netcdf_werp(input_df: pd.DataFrame, stations: dict) -> pd.DataFrame:
|
|
340
|
+
|
|
341
|
+
'''Ingests dataframe, cleans up into a format matching IQQM csv
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
input_df (pd.DataFrame): raw xarray dataframe read-in
|
|
345
|
+
|
|
346
|
+
statios(dict): dict mapping IQQM stations to gauge numbers
|
|
347
|
+
|
|
348
|
+
Results:
|
|
349
|
+
tuple[pd.DataFrame, pd.DataFrame]: Cleaned flow dataframe; cleaned water level dataframe
|
|
350
|
+
|
|
351
|
+
'''
|
|
352
|
+
|
|
353
|
+
# organise like the rest of the dataframes- make this look just like we've read it in from an IQQM csv
|
|
354
|
+
cleaned_df = input_df.reset_index(level = 'node')
|
|
355
|
+
cleaned_df['node'] = cleaned_df['node'].astype(str)
|
|
356
|
+
|
|
357
|
+
cleaned_df['gauge'] = cleaned_df['node'].map(stations)
|
|
358
|
+
cleaned_df = cleaned_df.drop('node', axis = 1)
|
|
359
|
+
|
|
360
|
+
# drop the values that don't map to a gauge (lots of nodes in iqqm don't)
|
|
361
|
+
# This should be deprecated with the new way of choosing nodes on read-in, but being careful
|
|
362
|
+
cleaned_df = cleaned_df.query('gauge.notna()')
|
|
363
|
+
|
|
364
|
+
# give each gauge its own column- that's what the tool expects
|
|
365
|
+
cleaned_df = cleaned_df.pivot(columns = 'gauge', values = 'Simulated flow')
|
|
366
|
+
cleaned_df.columns.name = None
|
|
367
|
+
|
|
368
|
+
# the csvs return an 'object' type, not a datetime in the index
|
|
369
|
+
# but it gets converted to datetime in cleaner_***, so leave it.
|
|
370
|
+
cleaned_df.index.names = ['Date']
|
|
371
|
+
|
|
372
|
+
# Split gauges into flow and level, allocate to respective dataframe
|
|
373
|
+
flow_gauges = data_inputs.get_gauges('flow gauges')
|
|
374
|
+
level_gauges = data_inputs.get_gauges('level gauges')
|
|
375
|
+
df_flow = pd.DataFrame(index = cleaned_df.index)
|
|
376
|
+
df_level = pd.DataFrame(index = cleaned_df.index)
|
|
377
|
+
for gauge in cleaned_df.columns:
|
|
378
|
+
if gauge in flow_gauges:
|
|
379
|
+
df_flow[gauge] = cleaned_df[gauge].copy(deep=True)
|
|
380
|
+
if gauge in level_gauges:
|
|
381
|
+
df_level[gauge] = cleaned_df[gauge].copy(deep=True)
|
|
382
|
+
|
|
383
|
+
return df_flow, df_level
|
|
384
|
+
|
|
283
385
|
|
|
284
386
|
def cleaner_ten_thousand_year(input_df: pd.DataFrame, ewr_table_path: str = None) -> pd.DataFrame:
|
|
285
387
|
'''Ingests dataframe, removes junk columns, fixes date, allocates gauges to either flow/level
|
|
@@ -334,6 +436,8 @@ def extract_gauge_from_string(input_string: str) -> str:
|
|
|
334
436
|
gauge = input_string.split('_')[0]
|
|
335
437
|
return gauge
|
|
336
438
|
|
|
439
|
+
|
|
440
|
+
|
|
337
441
|
def match_MDBA_nodes(input_df: pd.DataFrame, model_metadata: pd.DataFrame, ewr_table_path: str) -> tuple:
|
|
338
442
|
'''Checks if the source file columns have EWRs available, returns a flow and level dataframe with only
|
|
339
443
|
the columns with EWRs available. Renames columns to gauges
|
|
@@ -358,11 +462,19 @@ def match_MDBA_nodes(input_df: pd.DataFrame, model_metadata: pd.DataFrame, ewr_t
|
|
|
358
462
|
measure = col_clean.split('-')[1]
|
|
359
463
|
if ((measure in measurands) and (model_metadata['SITEID'] == site).any()):
|
|
360
464
|
subset = model_metadata.query("SITEID==@site")
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
465
|
+
for iset in range(len(subset)):
|
|
466
|
+
gauge = subset["AWRC"].iloc[iset]
|
|
467
|
+
if gauge in flow_gauges and measure == '1':
|
|
468
|
+
df_flow[gauge] = input_df[col]
|
|
469
|
+
if gauge in level_gauges and measure == '35':
|
|
470
|
+
aa=input_df[[col]]
|
|
471
|
+
if (len(aa.columns)>1):
|
|
472
|
+
print('More than one site has been identified, the first site is used')
|
|
473
|
+
print('Site info: ', col)
|
|
474
|
+
df_level[gauge] = aa.iloc[:,0]
|
|
475
|
+
else:
|
|
476
|
+
df_level[gauge] = input_df[col]
|
|
477
|
+
|
|
366
478
|
if df_flow.empty:
|
|
367
479
|
raise ValueError('No relevant gauges and or measurands found in dataset, the EWR tool cannot evaluate this model output file')
|
|
368
480
|
return df_flow, df_level
|
|
@@ -435,9 +547,7 @@ class ScenarioHandler:
|
|
|
435
547
|
# Analyse all scenarios for EWRs
|
|
436
548
|
detailed_results = {}
|
|
437
549
|
detailed_events = {}
|
|
438
|
-
for scenario in
|
|
439
|
-
bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
|
|
440
|
-
desc= 'Evaluating scenarios'):
|
|
550
|
+
for scenario in scenarios:
|
|
441
551
|
if self.model_format == 'Bigmod - MDBA':
|
|
442
552
|
|
|
443
553
|
data, header = unpack_model_file(scenarios[scenario], 'Dy', 'Field')
|
|
@@ -455,6 +565,10 @@ class ScenarioHandler:
|
|
|
455
565
|
df_clean = cleaner_NSW(data)
|
|
456
566
|
df_F, df_L = match_NSW_nodes(df_clean, data_inputs.get_NSW_codes())
|
|
457
567
|
|
|
568
|
+
elif self.model_format == 'IQQM - netcdf':
|
|
569
|
+
df_unpacked = unpack_netcdf_as_dataframe(scenarios[scenario])
|
|
570
|
+
df_F, df_L = cleaner_netcdf_werp(df_unpacked, data_inputs.get_iqqm_codes())
|
|
571
|
+
|
|
458
572
|
elif self.model_format == 'ten thousand year':
|
|
459
573
|
df = pd.read_csv(scenarios[scenario], index_col = 'Date')
|
|
460
574
|
df_F, df_L = cleaner_ten_thousand_year(df, self.parameter_sheet)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: py_ewr
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.3
|
|
4
4
|
Summary: Environmental Water Requirement calculator
|
|
5
5
|
Home-page: https://github.com/MDBAuth/EWR_tool
|
|
6
6
|
Author: Martin Job
|
|
@@ -23,9 +23,10 @@ Requires-Dist: ipython==8.8.0
|
|
|
23
23
|
Requires-Dist: ipywidgets==7.7.0
|
|
24
24
|
Requires-Dist: pandas==2.0.3
|
|
25
25
|
Requires-Dist: requests==2.25.1
|
|
26
|
-
Requires-Dist: tqdm==4.64.0
|
|
27
26
|
Requires-Dist: mdba-gauge-getter==0.5.1
|
|
28
27
|
Requires-Dist: cachetools==5.2.0
|
|
28
|
+
Requires-Dist: xarray==2023.01.0
|
|
29
|
+
Requires-Dist: netCDF4==1.6.4
|
|
29
30
|
Requires-Dist: numpy<2
|
|
30
31
|
|
|
31
32
|
[]()
|
|
@@ -33,9 +34,12 @@ Requires-Dist: numpy<2
|
|
|
33
34
|
[](https://pypi.org/project/py-ewr/)
|
|
34
35
|
[](https://zenodo.org/badge/latestdoi/342122359)
|
|
35
36
|
|
|
36
|
-
### **EWR tool version 2.2.
|
|
37
|
+
### **EWR tool version 2.2.3 README**
|
|
37
38
|
|
|
38
39
|
### **Notes on recent version update**
|
|
40
|
+
- Remove TQDM loading bars
|
|
41
|
+
- Handle duplicate sites in MDBA siteID file - where a duplicate exists, the first match is used and the rest are skipped over
|
|
42
|
+
- Adding new model format handling - 'IQQM - netcdf'
|
|
39
43
|
- Standard time-series handling added - each column needs a gauge, followed by and underscore, followed by either flow or level (e.g. 409025_flow). This handling also has missing date filling - so any missing dates will be filled with NaN values in all columns.
|
|
40
44
|
- ten thousand year handling - This has been briefly taken offline for this version.
|
|
41
45
|
- bug fixes: spells of length equal to the minimum required spell length were getting filtered out of the successful events table and successful interevents table, fixed misclassification of some gauges to flow, level, and lake level categories
|
|
@@ -222,3 +226,11 @@ NSW:
|
|
|
222
226
|
|
|
223
227
|
Consult the user manual for instructions on how to run the tool. Please email the above email addresses for a copy of the user manual.
|
|
224
228
|
|
|
229
|
+
To disable progress bars, as for example when running remote scripted runs, use
|
|
230
|
+
|
|
231
|
+
``` python
|
|
232
|
+
import os
|
|
233
|
+
os.environ["TQDM_DISABLE"] = "1"
|
|
234
|
+
```
|
|
235
|
+
*before* importing py-ewr in your script.
|
|
236
|
+
|
|
@@ -5,6 +5,7 @@ setup.py
|
|
|
5
5
|
py_ewr/__init__.py
|
|
6
6
|
py_ewr/data_inputs.py
|
|
7
7
|
py_ewr/evaluate_EWRs.py
|
|
8
|
+
py_ewr/io.py
|
|
8
9
|
py_ewr/observed_handling.py
|
|
9
10
|
py_ewr/scenario_handling.py
|
|
10
11
|
py_ewr/summarise_results.py
|
|
@@ -15,6 +16,7 @@ py_ewr.egg-info/requires.txt
|
|
|
15
16
|
py_ewr.egg-info/top_level.txt
|
|
16
17
|
py_ewr/model_metadata/SiteID_MDBA.csv
|
|
17
18
|
py_ewr/model_metadata/SiteID_NSW.csv
|
|
19
|
+
py_ewr/model_metadata/iqqm_stations.csv
|
|
18
20
|
py_ewr/parameter_metadata/ewr_calc_config.json
|
|
19
21
|
py_ewr/parameter_metadata/parameter_sheet.csv
|
|
20
22
|
tests/test_data_inputs.py
|
|
@@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
6
6
|
|
|
7
7
|
setup(
|
|
8
8
|
name="py_ewr",
|
|
9
|
-
version="2.2.
|
|
9
|
+
version="2.2.3",
|
|
10
10
|
author="Martin Job",
|
|
11
11
|
author_email="Martin.Job@mdba.gov.au",
|
|
12
12
|
description="Environmental Water Requirement calculator",
|
|
@@ -35,9 +35,10 @@ setup(
|
|
|
35
35
|
"ipywidgets==7.7.0",
|
|
36
36
|
"pandas==2.0.3",
|
|
37
37
|
"requests==2.25.1",
|
|
38
|
-
"tqdm==4.64.0",
|
|
39
38
|
"mdba-gauge-getter==0.5.1",
|
|
40
39
|
"cachetools==5.2.0",
|
|
40
|
+
"xarray==2023.01.0",
|
|
41
|
+
"netCDF4==1.6.4",
|
|
41
42
|
"numpy<2"
|
|
42
43
|
],
|
|
43
44
|
package_data={'': ["model_metadata/*.csv", "parameter_metadata/*.csv","parameter_metadata/*.json"]},
|
|
@@ -105,4 +105,18 @@ def test_get_cllmm_gauges():
|
|
|
105
105
|
def test_get_scenario_gauges(gauge_results, expected_results):
|
|
106
106
|
result = data_inputs.get_scenario_gauges(gauge_results)
|
|
107
107
|
assert sorted(result) == expected_results
|
|
108
|
-
|
|
108
|
+
|
|
109
|
+
def test_get_iqqm_codes():
|
|
110
|
+
result = data_inputs.get_iqqm_codes()
|
|
111
|
+
stations = {
|
|
112
|
+
'229': '421023',
|
|
113
|
+
'42': '421001',
|
|
114
|
+
'464': '421011',
|
|
115
|
+
'240': '421019',
|
|
116
|
+
'266': '421146',
|
|
117
|
+
'951': '421090',
|
|
118
|
+
'487': '421022',
|
|
119
|
+
'130': '421012',
|
|
120
|
+
'171': '421004',
|
|
121
|
+
}
|
|
122
|
+
assert stations == result
|
|
@@ -12,7 +12,9 @@ from py_ewr import scenario_handling, data_inputs
|
|
|
12
12
|
def test_match_MDBA_nodes():
|
|
13
13
|
'''
|
|
14
14
|
1. Ensure dataframe with flows and levels is split into two dataframes (one flow and one level dataframe)
|
|
15
|
+
2. Ensure first column is used when duplicate columns are loaded
|
|
15
16
|
'''
|
|
17
|
+
# TEST 1 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
|
|
16
18
|
# Set up input data and pass to test function:
|
|
17
19
|
model_metadata = data_inputs.get_MDBA_codes()
|
|
18
20
|
data_df = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
|
|
@@ -39,6 +41,36 @@ def test_match_MDBA_nodes():
|
|
|
39
41
|
assert_frame_equal(df_F, expected_df_F)
|
|
40
42
|
assert_frame_equal(df_L, expected_df_L)
|
|
41
43
|
|
|
44
|
+
# TEST 2 #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
|
|
45
|
+
# Set up input data and pass to test function:
|
|
46
|
+
model_metadata = data_inputs.get_MDBA_codes()
|
|
47
|
+
data_df = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
|
|
48
|
+
'EUSTDS-1-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Use
|
|
49
|
+
'EUSTDS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
|
|
50
|
+
'EUSTUS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
|
|
51
|
+
'EUSTUS-35-8': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Use
|
|
52
|
+
'EUSTUS-1-8': [1]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10, # Skip
|
|
53
|
+
}
|
|
54
|
+
df = pd.DataFrame(data = data_df)
|
|
55
|
+
df = df.set_index('Date')
|
|
56
|
+
|
|
57
|
+
df_F, df_L = scenario_handling.match_MDBA_nodes(df, model_metadata, 'py_ewr/parameter_metadata/parameter_sheet.csv')
|
|
58
|
+
|
|
59
|
+
# Set up expected outputs and test:
|
|
60
|
+
data_expected_df_L = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
|
|
61
|
+
'414209': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10
|
|
62
|
+
}
|
|
63
|
+
expected_df_L = pd.DataFrame(data_expected_df_L)
|
|
64
|
+
expected_df_L = expected_df_L.set_index('Date')
|
|
65
|
+
data_expected_df_F = {'Date': pd.date_range(start= datetime.strptime('2012-07-01', '%Y-%m-%d'), end = datetime.strptime('2016-06-30', '%Y-%m-%d')),
|
|
66
|
+
'414203': [0]*1+[250]*350+[0]*9+[0]*5 + [0]*360+[0]*5 + [0]*2+[250]*345+[0]*1+[250]*17 + [0]*5+[250]*351+[250]*10
|
|
67
|
+
}
|
|
68
|
+
expected_df_F = pd.DataFrame(data_expected_df_F)
|
|
69
|
+
expected_df_F = expected_df_F.set_index('Date')
|
|
70
|
+
|
|
71
|
+
assert_frame_equal(df_F, expected_df_F)
|
|
72
|
+
assert_frame_equal(df_L, expected_df_L)
|
|
73
|
+
|
|
42
74
|
def test_match_NSW_nodes():
|
|
43
75
|
'''
|
|
44
76
|
1. Check NSW model nodes are mapped correctly to their gauges
|
|
@@ -197,6 +229,30 @@ def test_cleaner_MDBA():
|
|
|
197
229
|
|
|
198
230
|
assert_frame_equal(df_clean, expected_df)
|
|
199
231
|
|
|
232
|
+
def test_cleaner_netcdf_werp():
|
|
233
|
+
'''
|
|
234
|
+
1. check ncdf is unpacked correctly
|
|
235
|
+
'''
|
|
236
|
+
df = scenario_handling.unpack_netcdf_as_dataframe('unit_testing_files/werp_ncdf.nc')
|
|
237
|
+
df_F, df_L = scenario_handling.cleaner_netcdf_werp(df, data_inputs.get_iqqm_codes())
|
|
238
|
+
|
|
239
|
+
# the test ncdf is too big to mock, so check properties
|
|
240
|
+
assert df_F.dtypes.iloc[0] == 'float32'
|
|
241
|
+
assert isinstance(df_F.index, pd.DatetimeIndex)
|
|
242
|
+
assert all(df_F.columns == ['421001', '421004', '421012', '421019', '421022', '421023', '421090', '421146'])
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def test_csv_input():
|
|
246
|
+
'''
|
|
247
|
+
1. check we can feed scenario_handling a csv that looks like gauge data
|
|
248
|
+
'''
|
|
249
|
+
|
|
250
|
+
# Can we use standard time-series to feed csv scenarios?
|
|
251
|
+
ewr_sh_standard = scenario_handling.ScenarioHandler('unit_testing_files/multi_gauge_input_label.csv', 'Standard time-series')
|
|
252
|
+
standardout = ewr_sh_standard.get_ewr_results()
|
|
253
|
+
|
|
254
|
+
assert isinstance(standardout, pd.DataFrame)
|
|
255
|
+
|
|
200
256
|
|
|
201
257
|
def test_build_NSW_columns():
|
|
202
258
|
'''
|
|
@@ -266,6 +322,7 @@ def test_unpack_model_file():
|
|
|
266
322
|
|
|
267
323
|
# assert_frame_equal(flow, expected_flow)
|
|
268
324
|
|
|
325
|
+
|
|
269
326
|
def test_scenario_handler_class(scenario_handler_expected_detail, scenario_handler_instance):
|
|
270
327
|
|
|
271
328
|
detailed = scenario_handler_instance.pu_ewr_statistics
|
|
@@ -325,11 +382,43 @@ def test_get_ewr_results(scenario_handler_instance):
|
|
|
325
382
|
assert ewr_results.columns.to_list() == ['Scenario', 'Gauge', 'PlanningUnit', 'EwrCode', 'Multigauge','EventYears',
|
|
326
383
|
'Frequency', 'TargetFrequency', 'AchievementCount',
|
|
327
384
|
'AchievementPerYear', 'EventCount', 'EventCountAll', 'EventsPerYear', 'EventsPerYearAll',
|
|
328
|
-
'AverageEventLength', 'ThresholdDays',
|
|
385
|
+
'AverageEventLength', 'ThresholdDays', #'InterEventExceedingCount',
|
|
329
386
|
'MaxInterEventYears', 'NoDataDays', 'TotalDays']
|
|
330
387
|
|
|
388
|
+
|
|
389
|
+
def test_unpack_netcdf_as_dataframe():
|
|
390
|
+
test_flowcdf = 'unit_testing_files/werp_ncdf.nc'
|
|
391
|
+
result_flow = scenario_handling.unpack_netcdf_as_dataframe(test_flowcdf)
|
|
392
|
+
expected_flow_shape = (16000, 1)
|
|
393
|
+
assert result_flow.shape == expected_flow_shape
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def test_unpack_netcdf_as_dataframe_invalid_file():
|
|
397
|
+
test_invalid_file = 'unit_testing_files/NSW_source_res_test_file_header_result.csv'
|
|
398
|
+
try:
|
|
399
|
+
result_df = scenario_handling.unpack_netcdf_as_dataframe(test_invalid_file)
|
|
400
|
+
except ValueError as e:
|
|
401
|
+
assert "Not a valid NetCDF file." in str(e)
|
|
402
|
+
|
|
403
|
+
|
|
331
404
|
def test_any_cllmm_to_process(gauge_results):
|
|
332
405
|
result = scenario_handling.any_cllmm_to_process(gauge_results)
|
|
333
406
|
assert result == True
|
|
334
407
|
|
|
335
|
-
|
|
408
|
+
# This *should* likely use something like conftest.scenario_handler_instance, but that seems to be locked to bigmod.
|
|
409
|
+
def test_netcdf_processes():
|
|
410
|
+
# Testing the netcdf format:
|
|
411
|
+
# Input params
|
|
412
|
+
# scenarios = 'unit_testing_files/ex_tasker.nc'
|
|
413
|
+
scenarios = 'unit_testing_files/werp_ncdf.nc'
|
|
414
|
+
model_format = 'IQQM - netcdf'
|
|
415
|
+
# allowance = {'minThreshold': 1.0, 'maxThreshold': 1.0, 'duration': 1.0, 'drawdown': 1.0}
|
|
416
|
+
# climate = 'Standard - 1911 to 2018 climate categorisation'
|
|
417
|
+
|
|
418
|
+
# Pass to the class
|
|
419
|
+
|
|
420
|
+
ewr_sh = scenario_handling.ScenarioHandler(scenarios, model_format)
|
|
421
|
+
|
|
422
|
+
ewr_summary = ewr_sh.get_ewr_results()
|
|
423
|
+
|
|
424
|
+
assert ewr_summary.shape == (202, 19)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|