bulum 0.2.10__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bulum-0.2.10/src/bulum.egg-info → bulum-0.3.0}/PKG-INFO +7 -5
- {bulum-0.2.10 → bulum-0.3.0}/README.md +6 -4
- {bulum-0.2.10 → bulum-0.3.0}/setup.py +1 -1
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/clim/clim.py +39 -41
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/__init__.py +1 -1
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/csv_io.py +31 -19
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/general_io.py +11 -2
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/idx_io.py +22 -21
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/idx_io_native.py +56 -33
- bulum-0.3.0/src/bulum/io/iqqm_out_reader.py +337 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/lqn_io.py +24 -9
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/res_csv_io.py +25 -13
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/maps/station_maps.py +51 -27
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/altair_plots.py +274 -150
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/ensemble_altair_plots.py +50 -35
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/node_diagrams.py +3 -1
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/plot_functions.py +37 -21
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/tests/test_plot_functions.py +1 -1
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/__init__.py +2 -1
- bulum-0.3.0/src/bulum/stats/aggregate_stats.py +196 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/ensemble_stats.py +75 -30
- bulum-0.3.0/src/bulum/stats/negflo.py +573 -0
- bulum-0.3.0/src/bulum/stats/negflo_helpers.py +154 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/reliability_stats_class.py +82 -45
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/stochastic_data_check.py +9 -4
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/storage_level_assessment.py +51 -25
- bulum-0.3.0/src/bulum/stoch/generate.py +91 -0
- bulum-0.3.0/src/bulum/trans/transformers.py +100 -0
- bulum-0.3.0/src/bulum/utils/__init__.py +16 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/utils/dataframe_extensions.py +54 -71
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/utils/dataframe_functions.py +102 -111
- bulum-0.3.0/src/bulum/utils/datetime_functions.py +327 -0
- bulum-0.3.0/src/bulum/utils/interpolation.py +15 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/version.py +1 -1
- {bulum-0.2.10 → bulum-0.3.0/src/bulum.egg-info}/PKG-INFO +7 -5
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/SOURCES.txt +1 -1
- bulum-0.2.10/src/bulum/io/iqqm_out_reader.py +0 -127
- bulum-0.2.10/src/bulum/stats/aggregate_stats.py +0 -116
- bulum-0.2.10/src/bulum/stats/negflo.py +0 -652
- bulum-0.2.10/src/bulum/stats/negflo_scratch.gitignore.py +0 -8
- bulum-0.2.10/src/bulum/stoch/generate.py +0 -58
- bulum-0.2.10/src/bulum/trans/transformers.py +0 -83
- bulum-0.2.10/src/bulum/utils/__init__.py +0 -4
- bulum-0.2.10/src/bulum/utils/datetime_functions.py +0 -307
- bulum-0.2.10/src/bulum/utils/interpolation.py +0 -15
- {bulum-0.2.10 → bulum-0.3.0}/LICENSE.md +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/setup.cfg +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/clim/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/demo.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/maps/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/plotly_helpers.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/tests/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/swflo2s/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/swflo2s/swflo2s.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stoch/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stoch/analyse.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/test_circular_import.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum/trans/__init__.py +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/dependency_links.txt +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/requires.txt +0 -0
- {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/top_level.txt +0 -0
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: bulum
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Open source python library for assessing hydrologic model results in Queensland
|
5
|
-
Home-page: https://
|
5
|
+
Home-page: https://github.com/odhydrology/bulum
|
6
6
|
Author: Chas Egan
|
7
7
|
Author-email: chas@odhydrology.com
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
@@ -56,6 +56,8 @@ bulum.__version__
|
|
56
56
|
bulum.hello_world()
|
57
57
|
```
|
58
58
|
|
59
|
+
API documentation is available at [odhydrology.github.io/bulum](https://odhydrology.github.io/bulum/).
|
60
|
+
|
59
61
|
## Build and Upload to PyPi
|
60
62
|
|
61
63
|
First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
|
@@ -68,7 +70,7 @@ python setup.py sdist
|
|
68
70
|
twine upload dist\bulum-0.0.32.tar.gz
|
69
71
|
```
|
70
72
|
|
71
|
-
As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "
|
73
|
+
As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "\_\_token__", and password is the API token which is very long string starting with "pypi-".
|
72
74
|
|
73
75
|
``` bash
|
74
76
|
username = __token__
|
@@ -81,7 +83,7 @@ How do I make a new API token? Go to your PyPi account settings, and click on "A
|
|
81
83
|
|
82
84
|
## Unit Tests
|
83
85
|
|
84
|
-
WARNING: Run unit tests from an anaconda environment with
|
86
|
+
WARNING: Run unit tests from an anaconda environment with compatible dependencies!
|
85
87
|
|
86
88
|
Install the nose2 test-runner framework.
|
87
89
|
|
@@ -89,7 +91,7 @@ Install the nose2 test-runner framework.
|
|
89
91
|
pip install nose2
|
90
92
|
```
|
91
93
|
|
92
|
-
Then from the root project folder run the nose2 module. You can do this as a python modules, or just
|
94
|
+
Then from the root project folder run the nose2 module. You can do this as a python modules, or just directly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
|
93
95
|
|
94
96
|
```bash
|
95
97
|
python -m nose2
|
@@ -28,6 +28,8 @@ bulum.__version__
|
|
28
28
|
bulum.hello_world()
|
29
29
|
```
|
30
30
|
|
31
|
+
API documentation is available at [odhydrology.github.io/bulum](https://odhydrology.github.io/bulum/).
|
32
|
+
|
31
33
|
## Build and Upload to PyPi
|
32
34
|
|
33
35
|
First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
|
@@ -40,7 +42,7 @@ python setup.py sdist
|
|
40
42
|
twine upload dist\bulum-0.0.32.tar.gz
|
41
43
|
```
|
42
44
|
|
43
|
-
As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "
|
45
|
+
As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "\_\_token__", and password is the API token which is very long string starting with "pypi-".
|
44
46
|
|
45
47
|
``` bash
|
46
48
|
username = __token__
|
@@ -53,7 +55,7 @@ How do I make a new API token? Go to your PyPi account settings, and click on "A
|
|
53
55
|
|
54
56
|
## Unit Tests
|
55
57
|
|
56
|
-
WARNING: Run unit tests from an anaconda environment with
|
58
|
+
WARNING: Run unit tests from an anaconda environment with compatible dependencies!
|
57
59
|
|
58
60
|
Install the nose2 test-runner framework.
|
59
61
|
|
@@ -61,7 +63,7 @@ Install the nose2 test-runner framework.
|
|
61
63
|
pip install nose2
|
62
64
|
```
|
63
65
|
|
64
|
-
Then from the root project folder run the nose2 module. You can do this as a python modules, or just
|
66
|
+
Then from the root project folder run the nose2 module. You can do this as a python modules, or just directly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
|
65
67
|
|
66
68
|
```bash
|
67
69
|
python -m nose2
|
@@ -79,4 +81,4 @@ nose2 src.bulum.stats.tests
|
|
79
81
|
|
80
82
|
## License
|
81
83
|
|
82
|
-
Refer to LICENCE.md
|
84
|
+
Refer to LICENCE.md
|
@@ -14,7 +14,7 @@ setuptools.setup(
|
|
14
14
|
description="Open source python library for assessing hydrologic model results in Queensland",
|
15
15
|
long_description=long_description,
|
16
16
|
long_description_content_type="text/markdown",
|
17
|
-
url="https://
|
17
|
+
url="https://github.com/odhydrology/bulum",
|
18
18
|
package_dir={'': 'src'},
|
19
19
|
packages=setuptools.find_packages('src'),
|
20
20
|
classifiers=[
|
@@ -3,20 +3,20 @@ import pandas as pd
|
|
3
3
|
|
4
4
|
|
5
5
|
def derive_transformation_curves(original_ts: pd.Series, augmented_ts: pd.Series, season_start_months=[1,2,3,4,5,6,7,8,9,10,11,12], epsilon=1e-3) -> dict:
|
6
|
-
"""Returns a dictionary of exceedence-based transformation curves - one for
|
7
|
-
with the season's start month as the key. These are tables that
|
8
|
-
(cunnane plotting position as a fraction) to a scaling
|
9
|
-
be used to
|
10
|
-
|
11
|
-
|
6
|
+
"""Returns a dictionary of exceedence-based transformation curves - one for
|
7
|
+
each season with the season's start month as the key. These are tables that
|
8
|
+
map from exceedance (cunnane plotting position as a fraction) to a scaling
|
9
|
+
factor. These are intended to be used to effectively summarise
|
10
|
+
climate-change adjustments, and allow them to be transported from one
|
11
|
+
timeseries to another.
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
Parameters
|
14
|
+
----------
|
15
|
+
original_ts : pd.Series
|
16
|
+
augmented_ts : pd.Series
|
17
|
+
season_start_months : list, optional
|
18
|
+
Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
|
19
|
+
|
20
20
|
"""
|
21
21
|
df = pd.DataFrame()
|
22
22
|
df["x"] = original_ts
|
@@ -43,17 +43,19 @@ def derive_transformation_curves(original_ts: pd.Series, augmented_ts: pd.Series
|
|
43
43
|
|
44
44
|
def apply_transformation_curves(tranformation_curves: dict, series: pd.Series) -> pd.Series:
|
45
45
|
"""Applies seasonal transformation curves to an input series.
|
46
|
-
Refer to the function
|
46
|
+
Refer to the function `derive_transformation_curves`.
|
47
47
|
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
Parameters
|
49
|
+
----------
|
50
|
+
tranformation_curves : dict
|
51
|
+
series : pd.Series
|
51
52
|
|
52
|
-
Returns
|
53
|
-
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
pd.Series
|
54
56
|
"""
|
55
57
|
dates = series.index
|
56
|
-
answer = series.copy()
|
58
|
+
answer = series.copy()
|
57
59
|
# Apply each transformation curves to the whole series. Splice the appropriate
|
58
60
|
# parts (seasons) into the 'answer' series as we go.
|
59
61
|
season_start_months = sorted(tranformation_curves.keys())
|
@@ -73,7 +75,7 @@ def apply_transformation_curves(tranformation_curves: dict, series: pd.Series) -
|
|
73
75
|
# And get their ranks and plotting positions
|
74
76
|
rank_starting_at_one = values.rank(ascending=True) # This function is nice because equal values are assigned the same (averaged) rank.
|
75
77
|
n = len(values)
|
76
|
-
p = [(r - 0.4)/(n + 0.2) for r in rank_starting_at_one] # plotting position
|
78
|
+
p = [(r - 0.4)/(n + 0.2) for r in rank_starting_at_one] # Cunnane plotting position
|
77
79
|
f = np.interp(p, xp, fp) # interpolated scaling factors
|
78
80
|
# Calcualte new values and update the answer
|
79
81
|
new_values = pd.Series([values.iloc[i] * f[i] for i in range(n)], index=season_dates)
|
@@ -88,14 +90,15 @@ def derive_transformation_factors(original_ts: pd.Series, augmented_ts: pd.Serie
|
|
88
90
|
be used to effectively summarise climate-change adjustments, and allow them to be
|
89
91
|
transported from one timeseries to another.
|
90
92
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
93
|
+
Parameters
|
94
|
+
----------
|
95
|
+
original_ts : pd.Series
|
96
|
+
augmented_ts : pd.Series
|
97
|
+
season_start_months : list, optional
|
98
|
+
[1,2,3,4,5,6,7,8,9,10,11,12].
|
99
|
+
epsilon : float
|
100
|
+
Threshold below which values are treated as zero, and the associated factor defaults to 1.
|
96
101
|
|
97
|
-
Returns:
|
98
|
-
dict: _description_
|
99
102
|
"""
|
100
103
|
# Create a map of month -> season_start_month (for all months)
|
101
104
|
month_to_season_map = {}
|
@@ -116,25 +119,24 @@ def derive_transformation_factors(original_ts: pd.Series, augmented_ts: pd.Serie
|
|
116
119
|
return df2['f'].to_dict()
|
117
120
|
|
118
121
|
|
119
|
-
def apply_transformation_factors(
|
122
|
+
def apply_transformation_factors(transformation_factors: dict, series: pd.Series) -> pd.Series:
|
120
123
|
"""Applies seasonal transformation factors to an input series.
|
121
|
-
Refer to the function
|
124
|
+
Refer to the function `derive_transformation_curves`.
|
122
125
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
+
Parameters
|
127
|
+
----------
|
128
|
+
transformation_curves : dict
|
129
|
+
series : pd.Series
|
126
130
|
|
127
|
-
Returns:
|
128
|
-
pd.Series: _description_
|
129
131
|
"""
|
130
132
|
# Create a map of month -> factor (containing all months)
|
131
|
-
season_start_months = sorted(
|
133
|
+
season_start_months = sorted(transformation_factors.keys())
|
132
134
|
month_to_factor_map = {}
|
133
135
|
key = max(season_start_months)
|
134
136
|
for m in [1,2,3,4,5,6,7,8,9,10,11,12]:
|
135
137
|
if m in season_start_months:
|
136
138
|
key = m
|
137
|
-
month_to_factor_map[m] =
|
139
|
+
month_to_factor_map[m] = transformation_factors[key]
|
138
140
|
# Apply transformation factors to the whole series. Splice the appropriate
|
139
141
|
df = pd.DataFrame()
|
140
142
|
df['x'] = series
|
@@ -144,7 +146,3 @@ def apply_transformation_factors(tranformation_factors: dict, series: pd.Series)
|
|
144
146
|
answer = df['y']
|
145
147
|
answer.name = series.name
|
146
148
|
return answer
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
@@ -1,20 +1,36 @@
|
|
1
|
+
"""
|
2
|
+
Functions for reading CSVs, particularly time-series CSVs.
|
3
|
+
"""
|
4
|
+
|
1
5
|
import numpy as np
|
2
6
|
import pandas as pd
|
3
7
|
from bulum import utils
|
8
|
+
import os
|
9
|
+
|
4
10
|
na_values = ['', ' ', 'null', 'NULL', 'NAN', 'NaN', 'nan', 'NA', 'na', 'N/A' 'n/a', '#N/A', '#NA', '-NaN', '-nan']
|
5
11
|
|
6
12
|
|
7
|
-
def read_ts_csv(filename
|
8
|
-
|
9
|
-
|
13
|
+
def read_ts_csv(filename: str | os.PathLike, date_format=None,
|
14
|
+
df=None, colprefix=None, allow_nonnumeric=False,
|
15
|
+
assert_date=True, **kwargs) -> utils.TimeseriesDataframe:
|
16
|
+
"""
|
17
|
+
Reads a daily timeseries csv into a DataFrame, and sets the index to string
|
18
|
+
dates in the "%Y-%m-%d" format. The method assumes the first column are
|
19
|
+
dates.
|
10
20
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
21
|
+
Parameters
|
22
|
+
----------
|
23
|
+
filename : str | PathLike
|
24
|
+
date_format : str, optional
|
25
|
+
defaults to "%d/%m/%Y" as per Fors. Other common formats include "%Y-%m-%d", "%Y/%m/%d".
|
26
|
+
df : pd.DataFrame, optional
|
27
|
+
If provided, the reader will append columns to this dataframe. Defaults to None.
|
28
|
+
colprefix : str, optional
|
29
|
+
If provided, the reader will append this prefix to the start of each column name. Defaults to None.
|
30
|
+
allow_nonnumeric : bool, optional
|
31
|
+
If false, the method will assert that all columns are numerical. Defaults to False.
|
32
|
+
assert_date : bool, optional
|
33
|
+
If true, the method will assert that date index meets "%Y-%m-%d" format. Defaults to True.
|
18
34
|
|
19
35
|
Returns:
|
20
36
|
pd.DataFrame: Dataframe containing the data from the csv file.
|
@@ -34,7 +50,7 @@ def read_ts_csv(filename, date_format=None, df=None, colprefix=None, allow_nonnu
|
|
34
50
|
# Rename columns if required
|
35
51
|
if colprefix is not None:
|
36
52
|
for c in new_df.columns:
|
37
|
-
new_df.rename(columns
|
53
|
+
new_df.rename(columns={c: f"{colprefix}{c}"}, inplace=True)
|
38
54
|
# Join to existing dataframe if required
|
39
55
|
if df is None:
|
40
56
|
df = new_df
|
@@ -49,11 +65,7 @@ def read_ts_csv(filename, date_format=None, df=None, colprefix=None, allow_nonnu
|
|
49
65
|
return utils.TimeseriesDataframe.from_dataframe(df)
|
50
66
|
|
51
67
|
|
52
|
-
def write_ts_csv(df: pd.DataFrame, filename: str
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
df (pd.DataFrame): _description_
|
57
|
-
filename (str): _description_
|
58
|
-
"""
|
59
|
-
df.to_csv(filename)
|
68
|
+
def write_ts_csv(df: pd.DataFrame, filename: str,
|
69
|
+
*args, **kwargs):
|
70
|
+
"""Wrapper around ``pandas.DataFrame.to_csv()``."""
|
71
|
+
df.to_csv(filename, *args, **kwargs)
|
@@ -1,10 +1,19 @@
|
|
1
|
-
|
1
|
+
"""
|
2
|
+
General use IO functions.
|
3
|
+
"""
|
4
|
+
import re
|
5
|
+
|
2
6
|
import bulum.io as bio
|
3
7
|
from bulum import utils
|
4
|
-
import re
|
5
8
|
|
6
9
|
|
7
10
|
def read(filename: str, **kwargs) -> utils.TimeseriesDataframe:
|
11
|
+
"""
|
12
|
+
Read the input file.
|
13
|
+
|
14
|
+
It will attempt to determine the filetype and dispatch to the appropriate
|
15
|
+
function in `bulum.io`.
|
16
|
+
"""
|
8
17
|
filename_lower = filename.lower()
|
9
18
|
df = None
|
10
19
|
if filename_lower.endswith(".res.csv"):
|
@@ -1,20 +1,20 @@
|
|
1
|
+
"""
|
2
|
+
IO functions for IDX files.
|
3
|
+
|
4
|
+
See also :py:mod:`bulum.op.idx_io_native`.
|
5
|
+
"""
|
1
6
|
import os
|
2
|
-
import pandas as pd
|
3
|
-
import uuid
|
4
7
|
import shutil
|
5
8
|
import subprocess
|
6
|
-
|
7
|
-
from .csv_io import *
|
9
|
+
import uuid
|
8
10
|
|
11
|
+
from bulum import utils
|
9
12
|
|
13
|
+
from .csv_io import *
|
10
14
|
|
11
|
-
def write_idx(df, filename, cleanup_tempfile=True):
|
12
|
-
"""_summary_
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
filename (_type_): _description_
|
17
|
-
"""
|
16
|
+
def write_idx(df: pd.DataFrame, filename, cleanup_tempfile=True):
|
17
|
+
"""Write IDX file from dataframe, requires csvidx.exe."""
|
18
18
|
if shutil.which('csvidx') is None:
|
19
19
|
raise Exception("This method relies on the external program 'csvidx.exe'. Please ensure it is in your path.")
|
20
20
|
temp_filename = f"{uuid.uuid4().hex}.tempfile.csv"
|
@@ -26,17 +26,20 @@ def write_idx(df, filename, cleanup_tempfile=True):
|
|
26
26
|
os.remove(temp_filename)
|
27
27
|
|
28
28
|
|
29
|
-
|
30
|
-
def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
|
29
|
+
def write_area_ts_csv(df, filename, units="(mm.d^-1)"):
|
31
30
|
"""_summary_
|
32
31
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
Parameters
|
33
|
+
----------
|
34
|
+
df : DataFrame
|
35
|
+
filename
|
36
|
+
units : str, optional
|
37
|
+
Defaults to "(mm.d^-1)".
|
37
38
|
|
38
|
-
Raises
|
39
|
-
|
39
|
+
Raises
|
40
|
+
------
|
41
|
+
Exception
|
42
|
+
If shortened field names are going to clash in output file.
|
40
43
|
"""
|
41
44
|
# ensures dataframe adheres to standards
|
42
45
|
utils.assert_df_format_standards(df)
|
@@ -45,7 +48,7 @@ def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
|
|
45
48
|
for c in df.columns:
|
46
49
|
c12 = f"{c[:12]:<12}"
|
47
50
|
if c12 in fields.keys():
|
48
|
-
raise Exception(f"Field names clash when
|
51
|
+
raise Exception(f"Field names clash when shortened to 12 chars: {c} and {fields[c12]}")
|
49
52
|
fields[c12] = c
|
50
53
|
# create the header text
|
51
54
|
header = f"{units}"
|
@@ -60,5 +63,3 @@ def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
|
|
60
63
|
with open(filename, "w+", newline='', encoding='utf-8') as file:
|
61
64
|
file.write(header)
|
62
65
|
df.to_csv(file, header=False, na_rep=' NaN')
|
63
|
-
|
64
|
-
|
@@ -1,18 +1,26 @@
|
|
1
|
+
"""
|
2
|
+
IO functions for IDX format (binary) written in native Python.
|
3
|
+
"""
|
1
4
|
import os
|
2
|
-
|
5
|
+
from typing import Optional
|
6
|
+
|
3
7
|
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
|
4
10
|
from bulum import utils
|
5
11
|
|
6
12
|
|
7
13
|
def _detect_header_bytes(b_data: np.ndarray) -> bool:
|
8
14
|
"""
|
9
|
-
Helper function for read_idx
|
10
|
-
a version of IQQM with an old compiler with metadata/junk data
|
11
|
-
|
12
|
-
|
15
|
+
Helper function for :func:`read_idx`. Detects whether the .OUT file was
|
16
|
+
written with a version of IQQM with an old compiler with metadata/junk data
|
17
|
+
as a header. Fails if the run was undertaken with only one source of data,
|
18
|
+
i.e. the .idx file has only one entry.
|
13
19
|
|
14
|
-
|
15
|
-
|
20
|
+
Parameters
|
21
|
+
----------
|
22
|
+
b_data : np.ndarray
|
23
|
+
2d array of binary data filled with float32 data
|
16
24
|
"""
|
17
25
|
b_data_slice: tuple[np.float32] = b_data[0]
|
18
26
|
first_non_zero = b_data_slice[0] != 0.0
|
@@ -20,17 +28,22 @@ def _detect_header_bytes(b_data: np.ndarray) -> bool:
|
|
20
28
|
return first_non_zero and rest_zeroes
|
21
29
|
|
22
30
|
|
23
|
-
def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
|
24
|
-
"""
|
31
|
+
def read_idx(filename, skip_header_bytes: Optional[bool] = None) -> utils.TimeseriesDataframe:
|
32
|
+
"""
|
33
|
+
Read IDX file.
|
25
34
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
35
|
+
Parameters
|
36
|
+
----------
|
37
|
+
filename
|
38
|
+
Name of the IDX file.
|
39
|
+
skip_header_bytes : bool, optional
|
40
|
+
Whether to skip header bytes in the corresponding OUTs file (related to
|
41
|
+
the compiler used for IQQM). If set to None, attempt to detect the
|
42
|
+
presence of header bytes automatically.
|
31
43
|
|
32
|
-
Returns
|
33
|
-
|
44
|
+
Returns
|
45
|
+
-------
|
46
|
+
utils.TimeseriesDataframe
|
34
47
|
"""
|
35
48
|
if not os.path.exists(filename):
|
36
49
|
raise FileNotFoundError(f"File does not exist: {filename}")
|
@@ -41,7 +54,7 @@ def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
|
|
41
54
|
# Start date, end date, date interval
|
42
55
|
stmp = f.readline().split()
|
43
56
|
date_start = utils.standardize_datestring_format([stmp[0]])[0]
|
44
|
-
date_end = utils.standardize_datestring_format([stmp[1]])[0]
|
57
|
+
date_end = utils.standardize_datestring_format([stmp[1]])[0]
|
45
58
|
date_flag = int(stmp[2])
|
46
59
|
snames = []
|
47
60
|
for n, line in enumerate(f):
|
@@ -65,14 +78,14 @@ def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
|
|
65
78
|
# Read data
|
66
79
|
if date_flag == 0:
|
67
80
|
daily_date_values = utils.datetime_functions.get_dates(
|
68
|
-
date_start, end_date=date_end, include_end_date=True)
|
81
|
+
date_start, end_date=date_end, include_end_date=True)
|
69
82
|
df = pd.DataFrame.from_records(b_data, index=daily_date_values)
|
70
83
|
df.columns = snames
|
71
84
|
df.index.name = "Date"
|
72
85
|
# Check data types. If not 'float64' or 'int64', convert to 'float64'
|
73
|
-
x = df.select_dtypes(exclude=['int64','float64']).columns
|
74
|
-
if x.__len__()>0:
|
75
|
-
df=df.astype({i: 'float64' for i in x})
|
86
|
+
x = df.select_dtypes(exclude=['int64', 'float64']).columns
|
87
|
+
if x.__len__() > 0:
|
88
|
+
df = df.astype({i: 'float64' for i in x})
|
76
89
|
elif date_flag == 1:
|
77
90
|
raise NotImplementedError("Monthly data not yet supported")
|
78
91
|
elif date_flag == 3:
|
@@ -84,21 +97,29 @@ def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
|
|
84
97
|
|
85
98
|
|
86
99
|
def write_idx_native(df: pd.DataFrame, filepath, type="None", units="None") -> None:
|
87
|
-
"""Writer for .IDX and corresponding .OUT binary files written in native
|
88
|
-
Currently only supports daily data (date flag 0), as with the reader
|
100
|
+
"""Writer for .IDX and corresponding .OUT binary files written in native
|
101
|
+
Python. Currently only supports daily data (date flag 0), as with the reader
|
102
|
+
:func:`read_idx`.
|
89
103
|
|
90
|
-
Assumes that data are homogeneous in units and type e.g. Precipitation & mm
|
104
|
+
Assumes that data are homogeneous in units and type e.g. Precipitation & mm
|
105
|
+
resp., or Flow & ML/d.
|
91
106
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
107
|
+
Parameters
|
108
|
+
----------
|
109
|
+
df : pd.Dataframe
|
110
|
+
DataFrame as per the output of :func:`read_idx`.
|
111
|
+
filepath
|
112
|
+
Path to the IDX file to be written to including .IDX extension.
|
113
|
+
units : str, optional
|
114
|
+
Units for data in df.
|
115
|
+
type : str, optional
|
116
|
+
Data specifier for data in df, e.g. Gauged Flow, Precipitation, etc.
|
97
117
|
"""
|
98
118
|
date_flag = 0
|
99
|
-
# TODO: When generalising to other frequencies, we may be able to simply
|
100
|
-
#
|
101
|
-
#
|
119
|
+
# TODO: When generalising to other frequencies, we may be able to simply
|
120
|
+
# read the data type off the time delta in df.index values As is, I've
|
121
|
+
# essentially copied what was done in the reader to flag that this should be
|
122
|
+
# implemented at the "same time". Verify valid date_flag
|
102
123
|
match date_flag:
|
103
124
|
case 0:
|
104
125
|
pass # valid
|
@@ -122,7 +143,9 @@ def write_idx_native(df: pd.DataFrame, filepath, type="None", units="None") -> N
|
|
122
143
|
f.write(f"{first_date} {last_date} {date_flag}\n")
|
123
144
|
# data
|
124
145
|
# inline fn to ensure padded string is exactly l characters long
|
125
|
-
|
146
|
+
|
147
|
+
def ljust_or_truncate(s, l):
|
148
|
+
return s.ljust(l)[0:l]
|
126
149
|
for idx, col_name in enumerate(col_names):
|
127
150
|
source_entry = ljust_or_truncate(f"df_col{idx+1}", 12)
|
128
151
|
name_entry = ljust_or_truncate(f"{col_name}", 40)
|