bulum 0.0.0__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bulum-0.2.9/PKG-INFO +100 -0
- bulum-0.2.9/README.md +82 -0
- bulum-0.2.9/setup.py +32 -0
- bulum-0.2.9/src/bulum/__init__.py +3 -0
- bulum-0.2.9/src/bulum/clim/__init__.py +1 -0
- bulum-0.2.9/src/bulum/clim/clim.py +150 -0
- bulum-0.2.9/src/bulum/demo.py +6 -0
- bulum-0.2.9/src/bulum/io/__init__.py +7 -0
- bulum-0.2.9/src/bulum/io/csv_io.py +59 -0
- bulum-0.2.9/src/bulum/io/general_io.py +24 -0
- bulum-0.2.9/src/bulum/io/idx_io.py +64 -0
- bulum-0.2.9/src/bulum/io/idx_io_native.py +136 -0
- bulum-0.2.9/src/bulum/io/iqqm_out_reader.py +127 -0
- bulum-0.2.9/src/bulum/io/lqn_io.py +39 -0
- bulum-0.2.9/src/bulum/io/res_csv_io.py +152 -0
- bulum-0.2.9/src/bulum/maps/__init__.py +1 -0
- bulum-0.2.9/src/bulum/maps/station_maps.py +112 -0
- bulum-0.2.9/src/bulum/plots/__init__.py +5 -0
- bulum-0.2.9/src/bulum/plots/altair_plots.py +726 -0
- bulum-0.2.9/src/bulum/plots/ensemble_altair_plots.py +87 -0
- bulum-0.2.9/src/bulum/plots/node_diagrams.py +8 -0
- bulum-0.2.9/src/bulum/plots/plot_functions.py +90 -0
- bulum-0.2.9/src/bulum/plots/tests/__init__.py +1 -0
- bulum-0.2.9/src/bulum/plots/tests/test_plot_functions.py +224 -0
- bulum-0.2.9/src/bulum/stats/__init__.py +6 -0
- bulum-0.2.9/src/bulum/stats/aggregate_stats.py +116 -0
- bulum-0.2.9/src/bulum/stats/ensemble_stats.py +95 -0
- bulum-0.2.9/src/bulum/stats/negflo.py +647 -0
- bulum-0.2.9/src/bulum/stats/reliability_stats_class.py +225 -0
- bulum-0.2.9/src/bulum/stats/stochastic_data_check.py +409 -0
- bulum-0.2.9/src/bulum/stats/storage_level_assessment.py +228 -0
- bulum-0.2.9/src/bulum/stats/swflo2s/__init__.py +1 -0
- bulum-0.2.9/src/bulum/stats/swflo2s/swflo2s.py +163 -0
- bulum-0.2.9/src/bulum/stoch/__init__.py +1 -0
- bulum-0.2.9/src/bulum/stoch/analyse.py +0 -0
- bulum-0.2.9/src/bulum/stoch/generate.py +58 -0
- bulum-0.2.9/src/bulum/trans/__init__.py +1 -0
- bulum-0.2.9/src/bulum/trans/transformers.py +83 -0
- bulum-0.2.9/src/bulum/utils/__init__.py +4 -0
- bulum-0.2.9/src/bulum/utils/dataframe_extensions.py +212 -0
- bulum-0.2.9/src/bulum/utils/dataframe_functions.py +293 -0
- bulum-0.2.9/src/bulum/utils/datetime_functions.py +307 -0
- bulum-0.2.9/src/bulum/utils/interpolation.py +15 -0
- bulum-0.2.9/src/bulum/version.py +5 -0
- bulum-0.2.9/src/bulum.egg-info/PKG-INFO +100 -0
- bulum-0.2.9/src/bulum.egg-info/SOURCES.txt +49 -0
- bulum-0.2.9/src/bulum.egg-info/requires.txt +6 -0
- bulum-0.0.0/PKG-INFO +0 -5
- bulum-0.0.0/bulum.egg-info/PKG-INFO +0 -5
- bulum-0.0.0/bulum.egg-info/SOURCES.txt +0 -6
- bulum-0.0.0/pyproject.toml +0 -7
- {bulum-0.0.0 → bulum-0.2.9}/setup.cfg +0 -0
- /bulum-0.0.0/bulum/__init__.py → /bulum-0.2.9/src/bulum/plots/plotly_helpers.py +0 -0
- {bulum-0.0.0 → bulum-0.2.9/src}/bulum.egg-info/dependency_links.txt +0 -0
- {bulum-0.0.0 → bulum-0.2.9/src}/bulum.egg-info/top_level.txt +0 -0
bulum-0.2.9/PKG-INFO
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: bulum
|
3
|
+
Version: 0.2.9
|
4
|
+
Summary: Open source python library for assessing hydrologic model results in Queensland
|
5
|
+
Home-page: https://bitbucket.org/odhydrology/bulum.git
|
6
|
+
Author: Chas Egan
|
7
|
+
Author-email: chas@odhydrology.com
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.9
|
11
|
+
Description-Content-Type: text/markdown
|
12
|
+
Requires-Dist: altair[all]>=5.5.0
|
13
|
+
Requires-Dist: folium>=0.14
|
14
|
+
Requires-Dist: matplotlib>=3.8.3
|
15
|
+
Requires-Dist: numpy>=1.26.4
|
16
|
+
Requires-Dist: pandas>=2.2.0
|
17
|
+
Requires-Dist: plotly>=5.18.0
|
18
|
+
|
19
|
+
# bulum
|
20
|
+
|
21
|
+
## Installation
|
22
|
+
|
23
|
+
This package may be installed using pip from Bitbucket (requires authentication), or directly from PyPi (public), or from a .tar.gz. Examples are shown below.
|
24
|
+
|
25
|
+
```bash
|
26
|
+
pip install git+https://bitbucket.org/odhydrology/bulum.git
|
27
|
+
```
|
28
|
+
|
29
|
+
```bash
|
30
|
+
pip install bulum
|
31
|
+
```
|
32
|
+
|
33
|
+
```bash
|
34
|
+
pip install .\dist\bulum-0.0.32.tar.gz
|
35
|
+
```
|
36
|
+
|
37
|
+
## Usage
|
38
|
+
|
39
|
+
```python
|
40
|
+
import bulum
|
41
|
+
|
42
|
+
# returns the package version
|
43
|
+
bulum.__version__
|
44
|
+
|
45
|
+
# prints 'Hello world!' to the console
|
46
|
+
bulum.hello_world()
|
47
|
+
```
|
48
|
+
|
49
|
+
## Build and Upload to PyPi
|
50
|
+
|
51
|
+
First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
|
52
|
+
|
53
|
+
```bash
|
54
|
+
python setup.py sdist
|
55
|
+
```
|
56
|
+
|
57
|
+
```bash
|
58
|
+
twine upload dist\bulum-0.0.32.tar.gz
|
59
|
+
```
|
60
|
+
|
61
|
+
As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "__token__", and password is the API token which is very long string starting with "pypi-".
|
62
|
+
|
63
|
+
``` bash
|
64
|
+
username = __token__
|
65
|
+
password = pypi-#####################################################################################
|
66
|
+
```
|
67
|
+
|
68
|
+
Where can I find the API token password? Chas has it in his emails. It is also here on the network at *.\ODH working files\Professional development, reading, etc\Software\ODHSoftware\bulum\PyPi_password_and_instructions.txt*.
|
69
|
+
|
70
|
+
How do I make a new API token? Go to your PyPi account settings, and click on "API tokens". Then click on "Add API token", and give it a name. The token will be displayed on the next screen.
|
71
|
+
|
72
|
+
## Unit Tests
|
73
|
+
|
74
|
+
WARNING: Run unit tests from an anaconda environment with compatable dependencies!
|
75
|
+
|
76
|
+
Install the nose2 test-runner framework.
|
77
|
+
|
78
|
+
```bash
|
79
|
+
pip install nose2
|
80
|
+
```
|
81
|
+
|
82
|
+
Then from the root project folder run the nose2 module. You can do this as a python modules, or just direcly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
|
83
|
+
|
84
|
+
```bash
|
85
|
+
python -m nose2
|
86
|
+
```
|
87
|
+
|
88
|
+
```bash
|
89
|
+
nose2
|
90
|
+
```
|
91
|
+
|
92
|
+
You can run specific tests by specifying the module name. Example below.
|
93
|
+
|
94
|
+
```bash
|
95
|
+
nose2 src.bulum.stats.tests
|
96
|
+
```
|
97
|
+
|
98
|
+
## License
|
99
|
+
|
100
|
+
None.
|
bulum-0.2.9/README.md
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
# bulum
|
2
|
+
|
3
|
+
## Installation
|
4
|
+
|
5
|
+
This package may be installed using pip from Bitbucket (requires authentication), or directly from PyPi (public), or from a .tar.gz. Examples are shown below.
|
6
|
+
|
7
|
+
```bash
|
8
|
+
pip install git+https://bitbucket.org/odhydrology/bulum.git
|
9
|
+
```
|
10
|
+
|
11
|
+
```bash
|
12
|
+
pip install bulum
|
13
|
+
```
|
14
|
+
|
15
|
+
```bash
|
16
|
+
pip install .\dist\bulum-0.0.32.tar.gz
|
17
|
+
```
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
```python
|
22
|
+
import bulum
|
23
|
+
|
24
|
+
# returns the package version
|
25
|
+
bulum.__version__
|
26
|
+
|
27
|
+
# prints 'Hello world!' to the console
|
28
|
+
bulum.hello_world()
|
29
|
+
```
|
30
|
+
|
31
|
+
## Build and Upload to PyPi
|
32
|
+
|
33
|
+
First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
|
34
|
+
|
35
|
+
```bash
|
36
|
+
python setup.py sdist
|
37
|
+
```
|
38
|
+
|
39
|
+
```bash
|
40
|
+
twine upload dist\bulum-0.0.32.tar.gz
|
41
|
+
```
|
42
|
+
|
43
|
+
As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "__token__", and password is the API token which is very long string starting with "pypi-".
|
44
|
+
|
45
|
+
``` bash
|
46
|
+
username = __token__
|
47
|
+
password = pypi-#####################################################################################
|
48
|
+
```
|
49
|
+
|
50
|
+
Where can I find the API token password? Chas has it in his emails. It is also here on the network at *.\ODH working files\Professional development, reading, etc\Software\ODHSoftware\bulum\PyPi_password_and_instructions.txt*.
|
51
|
+
|
52
|
+
How do I make a new API token? Go to your PyPi account settings, and click on "API tokens". Then click on "Add API token", and give it a name. The token will be displayed on the next screen.
|
53
|
+
|
54
|
+
## Unit Tests
|
55
|
+
|
56
|
+
WARNING: Run unit tests from an anaconda environment with compatable dependencies!
|
57
|
+
|
58
|
+
Install the nose2 test-runner framework.
|
59
|
+
|
60
|
+
```bash
|
61
|
+
pip install nose2
|
62
|
+
```
|
63
|
+
|
64
|
+
Then from the root project folder run the nose2 module. You can do this as a python modules, or just direcly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
|
65
|
+
|
66
|
+
```bash
|
67
|
+
python -m nose2
|
68
|
+
```
|
69
|
+
|
70
|
+
```bash
|
71
|
+
nose2
|
72
|
+
```
|
73
|
+
|
74
|
+
You can run specific tests by specifying the module name. Example below.
|
75
|
+
|
76
|
+
```bash
|
77
|
+
nose2 src.bulum.stats.tests
|
78
|
+
```
|
79
|
+
|
80
|
+
## License
|
81
|
+
|
82
|
+
None.
|
bulum-0.2.9/setup.py
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
import setuptools
|
2
|
+
|
3
|
+
with open("README.md", "r") as fh:
|
4
|
+
long_description = fh.read()
|
5
|
+
|
6
|
+
exec(open('src/bulum/version.py').read())
|
7
|
+
|
8
|
+
setuptools.setup(
|
9
|
+
name="bulum",
|
10
|
+
version=__version__,
|
11
|
+
python_requires=">=3.9",
|
12
|
+
author="Chas Egan",
|
13
|
+
author_email="chas@odhydrology.com",
|
14
|
+
description="Open source python library for assessing hydrologic model results in Queensland",
|
15
|
+
long_description=long_description,
|
16
|
+
long_description_content_type="text/markdown",
|
17
|
+
url="https://bitbucket.org/odhydrology/bulum.git",
|
18
|
+
package_dir={'': 'src'},
|
19
|
+
packages=setuptools.find_packages('src'),
|
20
|
+
classifiers=[
|
21
|
+
"Programming Language :: Python :: 3",
|
22
|
+
"Operating System :: OS Independent",
|
23
|
+
],
|
24
|
+
install_requires=[
|
25
|
+
'altair[all]>=5.5.0',
|
26
|
+
'folium>=0.14',
|
27
|
+
'matplotlib>=3.8.3',
|
28
|
+
'numpy>=1.26.4',
|
29
|
+
'pandas>=2.2.0',
|
30
|
+
'plotly>=5.18.0',
|
31
|
+
],
|
32
|
+
)
|
@@ -0,0 +1 @@
|
|
1
|
+
from .clim import *
|
@@ -0,0 +1,150 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
|
5
|
+
def derive_transformation_curves(original_ts: pd.Series, augmented_ts: pd.Series, season_start_months=[1,2,3,4,5,6,7,8,9,10,11,12], epsilon=1e-3) -> dict:
|
6
|
+
"""Returns a dictionary of exceedence-based transformation curves - one for each season
|
7
|
+
with the season's start month as the key. These are tables that map from exceedance
|
8
|
+
(cunnane plotting position as a fraction) to a scaling factor. These are intended to
|
9
|
+
be used to
|
10
|
+
effectively summarise climate-change adjustments, and allow them to be transported from
|
11
|
+
one timeseries to another.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
original_ts (pd.Series): _description_
|
15
|
+
augmented_ts (pd.Series): _description_
|
16
|
+
season_start_months (list, optional): _description_. Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
dict: _description_
|
20
|
+
"""
|
21
|
+
df = pd.DataFrame()
|
22
|
+
df["x"] = original_ts
|
23
|
+
df["y"] = augmented_ts
|
24
|
+
df = df.dropna() # Force common period
|
25
|
+
answer = {}
|
26
|
+
for i in range(len(season_start_months)):
|
27
|
+
# Get a list of the months in this season
|
28
|
+
start_month = season_start_months[i]
|
29
|
+
season_len = (season_start_months + [m + 12 for m in season_start_months])[i + 1] - start_month
|
30
|
+
months_in_this_season = [1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12][start_month - 1: start_month - 1 + season_len]
|
31
|
+
# Find the data for this season
|
32
|
+
df_m = df[[int(d[5:7]) in months_in_this_season for d in df.index]] #d[5:7] is the month part of the date string
|
33
|
+
x = np.sort(df_m.x.values)
|
34
|
+
y = np.sort(df_m.y.values)
|
35
|
+
# The transformation factor is y/x except when the original value x is zero (<epsilon) in which case we default to 1.0
|
36
|
+
f = np.where(x < epsilon, 1.0, y / x)
|
37
|
+
n = len(x)
|
38
|
+
ii = [i + 1 for i in range(n)] #index starting at 1
|
39
|
+
p = [(i - 0.4)/(n + 0.2) for i in ii]
|
40
|
+
answer[start_month] = [p,f]
|
41
|
+
return answer
|
42
|
+
|
43
|
+
|
44
|
+
def apply_transformation_curves(tranformation_curves: dict, series: pd.Series) -> pd.Series:
|
45
|
+
"""Applies seasonal transformation curves to an input series.
|
46
|
+
Refer to the function 'derive_transformation_curves(...)'.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
tranformation_curves (dict): _description_
|
50
|
+
series (pd.Series): _description_
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
pd.Series: _description_
|
54
|
+
"""
|
55
|
+
dates = series.index
|
56
|
+
answer = series.copy()
|
57
|
+
# Apply each transformation curves to the whole series. Splice the appropriate
|
58
|
+
# parts (seasons) into the 'answer' series as we go.
|
59
|
+
season_start_months = sorted(tranformation_curves.keys())
|
60
|
+
for i in range(len(season_start_months)):
|
61
|
+
# Identify the transform curve for this season
|
62
|
+
start_month = season_start_months[i]
|
63
|
+
t = tranformation_curves[start_month]
|
64
|
+
xp = t[0]
|
65
|
+
fp = t[1]
|
66
|
+
# Get a list of the months in this season
|
67
|
+
season_len = (season_start_months + [m + 12 for m in season_start_months])[i + 1] - start_month
|
68
|
+
months_in_this_season = [1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12][start_month - 1: start_month - 1 + season_len]
|
69
|
+
# Find the data for this season
|
70
|
+
m = len(series)
|
71
|
+
season_dates = pd.Series([d for d in dates if int(d[5:7]) in months_in_this_season]) #d[5:7] is the month part of the date string
|
72
|
+
values = answer[season_dates]
|
73
|
+
# And get their ranks and plotting positions
|
74
|
+
rank_starting_at_one = values.rank(ascending=True) # This function is nice because equal values are assigned the same (averaged) rank.
|
75
|
+
n = len(values)
|
76
|
+
p = [(r - 0.4)/(n + 0.2) for r in rank_starting_at_one] # plotting position
|
77
|
+
f = np.interp(p, xp, fp) # interpolated scaling factors
|
78
|
+
# Calcualte new values and update the answer
|
79
|
+
new_values = pd.Series([values.iloc[i] * f[i] for i in range(n)], index=season_dates)
|
80
|
+
answer.update(new_values)
|
81
|
+
# Return a pd.Series so user can easily join it back into a dataframe
|
82
|
+
return pd.Series(answer, index=dates, name=series.name)
|
83
|
+
|
84
|
+
|
85
|
+
def derive_transformation_factors(original_ts: pd.Series, augmented_ts: pd.Series, season_start_months=[1,2,3,4,5,6,7,8,9,10,11,12], epsilon=1e-3) -> dict:
|
86
|
+
"""Returns a dictionary of transformation factors - one for each season
|
87
|
+
with the season's start month as the key. These scaling factors are intended to
|
88
|
+
be used to effectively summarise climate-change adjustments, and allow them to be
|
89
|
+
transported from one timeseries to another.
|
90
|
+
|
91
|
+
Args:
|
92
|
+
original_ts (pd.Series): _description_
|
93
|
+
augmented_ts (pd.Series): _description_
|
94
|
+
season_start_months (list, optional): _description_. Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
|
95
|
+
epsilon: Threshold below which values are treated as zero, and the associated factor defaults to 1.
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
dict: _description_
|
99
|
+
"""
|
100
|
+
# Create a map of month -> season_start_month (for all months)
|
101
|
+
month_to_season_map = {}
|
102
|
+
key = max(season_start_months)
|
103
|
+
for m in [1,2,3,4,5,6,7,8,9,10,11,12]:
|
104
|
+
if m in season_start_months:
|
105
|
+
key = m
|
106
|
+
month_to_season_map[m] = key
|
107
|
+
# Put the data in a dataframe, and groupby season start month
|
108
|
+
df = pd.DataFrame()
|
109
|
+
df["x"] = original_ts
|
110
|
+
df["y"] = augmented_ts
|
111
|
+
df = df.dropna() # Force common period
|
112
|
+
df['m'] = df.index.month
|
113
|
+
df['s'] = df['m'].map(month_to_season_map)
|
114
|
+
df2 = df.groupby('s').agg('sum')
|
115
|
+
df2['f'] = np.where(df2.x < epsilon, 1.0, df2.y / df2.x)
|
116
|
+
return df2['f'].to_dict()
|
117
|
+
|
118
|
+
|
119
|
+
def apply_transformation_factors(tranformation_factors: dict, series: pd.Series) -> pd.Series:
|
120
|
+
"""Applies seasonal transformation factors to an input series.
|
121
|
+
Refer to the function 'derive_transformation_factors(...)'.
|
122
|
+
|
123
|
+
Args:
|
124
|
+
tranformation_curves (dict): _description_
|
125
|
+
series (pd.Series): _description_
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
pd.Series: _description_
|
129
|
+
"""
|
130
|
+
# Create a map of month -> factor (containing all months)
|
131
|
+
season_start_months = sorted(tranformation_factors.keys())
|
132
|
+
month_to_factor_map = {}
|
133
|
+
key = max(season_start_months)
|
134
|
+
for m in [1,2,3,4,5,6,7,8,9,10,11,12]:
|
135
|
+
if m in season_start_months:
|
136
|
+
key = m
|
137
|
+
month_to_factor_map[m] = tranformation_factors[key]
|
138
|
+
# Apply transformation factors to the whole series. Splice the appropriate
|
139
|
+
df = pd.DataFrame()
|
140
|
+
df['x'] = series
|
141
|
+
df['m'] = df.index.month
|
142
|
+
df['f'] = df['m'].map(month_to_factor_map)
|
143
|
+
df['y'] = df['x'] * df['f']
|
144
|
+
answer = df['y']
|
145
|
+
answer.name = series.name
|
146
|
+
return answer
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
from bulum import utils
|
4
|
+
na_values = ['', ' ', 'null', 'NULL', 'NAN', 'NaN', 'nan', 'NA', 'na', 'N/A' 'n/a', '#N/A', '#NA', '-NaN', '-nan']
|
5
|
+
|
6
|
+
|
7
|
+
def read_ts_csv(filename, date_format=None, df=None, colprefix=None, allow_nonnumeric=False, assert_date=True, **kwargs) -> utils.TimeseriesDataframe:
|
8
|
+
"""Reads a daily timeseries csv into a DataFrame, and sets the index to string dates in the "%Y-%m-%d" format.
|
9
|
+
The method assumes the first column are dates.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
filename (_type_): _description_
|
13
|
+
date_format (str, optional): defaults to "%d/%m/%Y" as per Fors. Other common formats include "%Y-%m-%d", "%Y/%m/%d".
|
14
|
+
df (pd.DataFrame, optional): If provided, the reader will append columns to this dataframe. Defaults to None.
|
15
|
+
colprefix (str, optional): If provided, the reader will append this prefix to the start of each column name. Defaults to None.
|
16
|
+
allow_nonnumeric (bool, optional): If false, the method will assert that all columns are numerical. Defaults to False.
|
17
|
+
assert_date (bool, optional): If true, the method will assert that date index meets "%Y-%m-%d" format. Defaults to True.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
pd.DataFrame: Dataframe containing the data from the csv file.
|
21
|
+
"""
|
22
|
+
new_df = pd.read_csv(filename, na_values=na_values, **kwargs)
|
23
|
+
# Date index
|
24
|
+
new_df.set_index(new_df.columns[0], inplace=True)
|
25
|
+
if assert_date:
|
26
|
+
new_df.index = utils.standardize_datestring_format(new_df.index)
|
27
|
+
new_df.index.name = "Date"
|
28
|
+
# df = df.replace(r'^\s*$', np.nan, regex=True)
|
29
|
+
# Check values
|
30
|
+
if not allow_nonnumeric:
|
31
|
+
for col in new_df.columns:
|
32
|
+
if not np.issubdtype(new_df[col].dtype, np.number):
|
33
|
+
raise Exception(f"ERROR: Column '{col}' is not numeric!")
|
34
|
+
# Rename columns if required
|
35
|
+
if colprefix is not None:
|
36
|
+
for c in new_df.columns:
|
37
|
+
new_df.rename(columns = {c:f"{colprefix}{c}"}, inplace = True)
|
38
|
+
# Join to existing dataframe if required
|
39
|
+
if df is None:
|
40
|
+
df = new_df
|
41
|
+
else:
|
42
|
+
if len(df) > 0:
|
43
|
+
# Check that the dates overlap
|
44
|
+
newdf_ends_before_df_starts = new_df.index[0] < df.index[-1]
|
45
|
+
df_ends_before_newdf_starts = df.index[-1] < new_df.index[0]
|
46
|
+
if newdf_ends_before_df_starts or df_ends_before_newdf_starts:
|
47
|
+
raise Exception("ERROR: The dates in the new dataframe do not overlap with the existing dataframe!")
|
48
|
+
df = df.join(new_df, how="outer")
|
49
|
+
return utils.TimeseriesDataframe.from_dataframe(df)
|
50
|
+
|
51
|
+
|
52
|
+
def write_ts_csv(df: pd.DataFrame, filename: str):
|
53
|
+
"""_summary_
|
54
|
+
|
55
|
+
Args:
|
56
|
+
df (pd.DataFrame): _description_
|
57
|
+
filename (str): _description_
|
58
|
+
"""
|
59
|
+
df.to_csv(filename)
|
@@ -0,0 +1,24 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import bulum.io as oio
|
3
|
+
from bulum import utils
|
4
|
+
import re
|
5
|
+
|
6
|
+
|
7
|
+
def read(filename: str, **kwargs) -> utils.TimeseriesDataframe:
|
8
|
+
filename_lower = filename.lower()
|
9
|
+
df = None
|
10
|
+
if filename_lower.endswith(".res.csv"):
|
11
|
+
df = oio.read_res_csv(filename, **kwargs)
|
12
|
+
if df is None:
|
13
|
+
raise ValueError("Res csv could not be read.")
|
14
|
+
elif filename_lower.endswith(".csv"):
|
15
|
+
df = oio.read_ts_csv(filename, **kwargs)
|
16
|
+
elif filename_lower.endswith(".idx"):
|
17
|
+
df = oio.read_idx(filename, **kwargs)
|
18
|
+
elif re.search(".[0-9]{2}d$", filename_lower):
|
19
|
+
df = oio.read_iqqm_lqn_output(filename, **kwargs)
|
20
|
+
else:
|
21
|
+
raise ValueError(f"Unknown file extension: {filename}")
|
22
|
+
assert isinstance(df, utils.TimeseriesDataframe), \
|
23
|
+
"Output of `read` is not a TimeseriesDataframe."
|
24
|
+
return df
|
@@ -0,0 +1,64 @@
|
|
1
|
+
import os
|
2
|
+
import pandas as pd
|
3
|
+
import uuid
|
4
|
+
import shutil
|
5
|
+
import subprocess
|
6
|
+
from bulum import utils
|
7
|
+
from .csv_io import *
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
def write_idx(df, filename, cleanup_tempfile=True):
|
12
|
+
"""_summary_
|
13
|
+
|
14
|
+
Args:
|
15
|
+
df (_type_): _description_
|
16
|
+
filename (_type_): _description_
|
17
|
+
"""
|
18
|
+
if shutil.which('csvidx') is None:
|
19
|
+
raise Exception("This method relies on the external program 'csvidx.exe'. Please ensure it is in your path.")
|
20
|
+
temp_filename = f"{uuid.uuid4().hex}.tempfile.csv"
|
21
|
+
write_area_ts_csv(df, temp_filename)
|
22
|
+
command = f"csvidx {temp_filename} {filename}"
|
23
|
+
process = subprocess.Popen(command)
|
24
|
+
process.wait()
|
25
|
+
if cleanup_tempfile:
|
26
|
+
os.remove(temp_filename)
|
27
|
+
|
28
|
+
|
29
|
+
|
30
|
+
def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
|
31
|
+
"""_summary_
|
32
|
+
|
33
|
+
Args:
|
34
|
+
df (_type_): _description_
|
35
|
+
filename (_type_): _description_
|
36
|
+
units (str, optional): _description_. Defaults to "(mm.d^-1)".
|
37
|
+
|
38
|
+
Raises:
|
39
|
+
Exception: If shortenned field names are going to clash in output file.
|
40
|
+
"""
|
41
|
+
# ensures dataframe adheres to standards
|
42
|
+
utils.assert_df_format_standards(df)
|
43
|
+
# convert field names to 12 chars and check for collisions
|
44
|
+
fields = {}
|
45
|
+
for c in df.columns:
|
46
|
+
c12 = f"{c[:12]:<12}"
|
47
|
+
if c12 in fields.keys():
|
48
|
+
raise Exception(f"Field names clash when shortenned to 12 chars: {c} and {fields[c12]}")
|
49
|
+
fields[c12] = c
|
50
|
+
# create the header text
|
51
|
+
header = f"{units}"
|
52
|
+
for k in fields.keys():
|
53
|
+
header += f',"{k}"'
|
54
|
+
header += os.linesep
|
55
|
+
header += "Catchment area (km^2)"
|
56
|
+
for k in fields.keys():
|
57
|
+
header += f", 1.00000000"
|
58
|
+
header += os.linesep
|
59
|
+
# open a file and write the header and the csv body
|
60
|
+
with open(filename, "w+", newline='', encoding='utf-8') as file:
|
61
|
+
file.write(header)
|
62
|
+
df.to_csv(file, header=False, na_rep=' NaN')
|
63
|
+
|
64
|
+
|
@@ -0,0 +1,136 @@
|
|
1
|
+
import os
|
2
|
+
import pandas as pd
|
3
|
+
import numpy as np
|
4
|
+
from bulum import utils
|
5
|
+
|
6
|
+
|
7
|
+
def _detect_header_bytes(b_data: np.ndarray) -> bool:
|
8
|
+
"""
|
9
|
+
Helper function for read_idx. Detects whether the OUT file was written with
|
10
|
+
a version of IQQM with an old compiler with metadata/junk data as a header.
|
11
|
+
Fails if (not necessarily only if) the run was undertaken with only one
|
12
|
+
source of data, i.e. the .idx file has only one entry.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
b_data (np.ndarray): 2d array of binary data filled with float32 data.
|
16
|
+
"""
|
17
|
+
b_data_slice: tuple[np.float32] = b_data[0]
|
18
|
+
first_non_zero = b_data_slice[0] != 0.0
|
19
|
+
rest_zeroes = not np.any(list(b_data_slice)[1:])
|
20
|
+
return first_non_zero and rest_zeroes
|
21
|
+
|
22
|
+
|
23
|
+
def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
|
24
|
+
"""_summary_
|
25
|
+
|
26
|
+
Args:
|
27
|
+
filename (_type_): Name of the IDX file.
|
28
|
+
skip_header_bytes (bool | None): Whether to skip header bytes in the IDX
|
29
|
+
file (related to the compiler used for IQQM). If set to None, attempt
|
30
|
+
to detect the presence of header bytes automatically.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
utils.TimeseriesDataframe: _description_
|
34
|
+
"""
|
35
|
+
if not os.path.exists(filename):
|
36
|
+
raise FileNotFoundError(f"File does not exist: {filename}")
|
37
|
+
# Read ".idx" file
|
38
|
+
with open(filename, 'r') as f:
|
39
|
+
# Skip line
|
40
|
+
stmp = f.readline()
|
41
|
+
# Start date, end date, date interval
|
42
|
+
stmp = f.readline().split()
|
43
|
+
date_start = utils.standardize_datestring_format([stmp[0]])[0]
|
44
|
+
date_end = utils.standardize_datestring_format([stmp[1]])[0]
|
45
|
+
date_flag = int(stmp[2])
|
46
|
+
snames = []
|
47
|
+
for n, line in enumerate(f):
|
48
|
+
sfile = line[0:13].strip()
|
49
|
+
sdesc = line[13:54].strip()
|
50
|
+
sname = f"{n + 1}>{sfile}>{sdesc}"
|
51
|
+
snames.append(sname)
|
52
|
+
# Read ".out" file
|
53
|
+
out_filename = filename.lower().replace('.idx', '.out')
|
54
|
+
if not os.path.exists(out_filename):
|
55
|
+
raise FileNotFoundError(f"File does not exist: {out_filename}")
|
56
|
+
# 4-byte reals
|
57
|
+
b_types = [(s, 'f4') for s in snames]
|
58
|
+
# Read all data in, drop header bytes (first row) if necessary
|
59
|
+
b_data = np.fromfile(out_filename, dtype=np.dtype(b_types))
|
60
|
+
# Detection of header bytes
|
61
|
+
if skip_header_bytes is None:
|
62
|
+
skip_header_bytes = _detect_header_bytes(b_data)
|
63
|
+
if skip_header_bytes:
|
64
|
+
b_data = b_data[1:] # skip header bytes
|
65
|
+
# Read data
|
66
|
+
if date_flag == 0:
|
67
|
+
daily_date_values = utils.datetime_functions.get_dates(
|
68
|
+
date_start, end_date=date_end, include_end_date=True)
|
69
|
+
df = pd.DataFrame.from_records(b_data, index=daily_date_values)
|
70
|
+
df.columns = snames
|
71
|
+
df.index.name = "Date"
|
72
|
+
# Check data types. If not 'float64' or 'int64', convert to 'float64'
|
73
|
+
x = df.select_dtypes(exclude=['int64','float64']).columns
|
74
|
+
if x.__len__()>0:
|
75
|
+
df=df.astype({i: 'float64' for i in x})
|
76
|
+
elif date_flag == 1:
|
77
|
+
raise NotImplementedError("Monthly data not yet supported")
|
78
|
+
elif date_flag == 3:
|
79
|
+
raise NotImplementedError("Annual data not yet supported")
|
80
|
+
else:
|
81
|
+
raise ValueError(f"Unsupported date interval: {date_flag}")
|
82
|
+
utils.assert_df_format_standards(df)
|
83
|
+
return utils.TimeseriesDataframe.from_dataframe(df)
|
84
|
+
|
85
|
+
|
86
|
+
def write_idx_native(df: pd.DataFrame, filepath, type="None", units="None") -> None:
|
87
|
+
"""Writer for .IDX and corresponding .OUT binary files written in native Python.
|
88
|
+
Currently only supports daily data (date flag 0), as with the reader read_idx(...).
|
89
|
+
|
90
|
+
Assumes that data are homogeneous in units and type e.g. Precipitation & mm resp., or Flow & ML/d.
|
91
|
+
|
92
|
+
Args:
|
93
|
+
df (pd.Dataframe): DataFrame as per the output of read_idx(...).
|
94
|
+
filepath (str) : Path to the IDX file to be written to including .IDX extension.
|
95
|
+
units (str, optional) : Units for data in df.
|
96
|
+
type (str, optional) : Data specifier for data in df, e.g. Gauged Flow, Precipitation, etc.
|
97
|
+
"""
|
98
|
+
date_flag = 0
|
99
|
+
# TODO: When generalising to other frequencies, we may be able to simply read the data type off the time delta in df.index values
|
100
|
+
# As is, I've essentially copied what was done in the reader to flag that this should be implemented at the "same time".
|
101
|
+
# Verify valid date_flag
|
102
|
+
match date_flag:
|
103
|
+
case 0:
|
104
|
+
pass # valid
|
105
|
+
case 1:
|
106
|
+
raise NotImplementedError("Monthly data not yet supported")
|
107
|
+
case 3:
|
108
|
+
raise NotImplementedError("Annual data not yet supported")
|
109
|
+
case _:
|
110
|
+
raise ValueError(f"Unsupported date interval: {date_flag}")
|
111
|
+
|
112
|
+
utils.assert_df_format_standards(df)
|
113
|
+
first_date = df.index[0]
|
114
|
+
last_date = df.index[-1]
|
115
|
+
col_names = df.columns
|
116
|
+
|
117
|
+
# write index
|
118
|
+
with open(filepath, 'w') as f:
|
119
|
+
# TODO: check whether this "skipped" line has important info
|
120
|
+
# For now I've just copied the data from ./tests/BUR_FLWX.IDX as it's likely just metadata.
|
121
|
+
f.write('6.36.1 06/11/2006 10:48:30.64\n')
|
122
|
+
f.write(f"{first_date} {last_date} {date_flag}\n")
|
123
|
+
# data
|
124
|
+
# inline fn to ensure padded string is exactly l characters long
|
125
|
+
def ljust_or_truncate(s, l): return s.ljust(l)[0:l]
|
126
|
+
for idx, col_name in enumerate(col_names):
|
127
|
+
source_entry = ljust_or_truncate(f"df_col{idx+1}", 12)
|
128
|
+
name_entry = ljust_or_truncate(f"{col_name}", 40)
|
129
|
+
type_entry = ljust_or_truncate(f"{type}", 15)
|
130
|
+
units_entry = ljust_or_truncate(f"{units}", 15)
|
131
|
+
f.write(f"{source_entry} {name_entry}" +
|
132
|
+
f" {type_entry} {units_entry}\n")
|
133
|
+
# write binary
|
134
|
+
out_filepath = filepath.lower().replace('.idx', '.out')
|
135
|
+
df.to_numpy().tofile(out_filepath)
|
136
|
+
return
|