bulum 0.0.0__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. bulum-0.2.9/PKG-INFO +100 -0
  2. bulum-0.2.9/README.md +82 -0
  3. bulum-0.2.9/setup.py +32 -0
  4. bulum-0.2.9/src/bulum/__init__.py +3 -0
  5. bulum-0.2.9/src/bulum/clim/__init__.py +1 -0
  6. bulum-0.2.9/src/bulum/clim/clim.py +150 -0
  7. bulum-0.2.9/src/bulum/demo.py +6 -0
  8. bulum-0.2.9/src/bulum/io/__init__.py +7 -0
  9. bulum-0.2.9/src/bulum/io/csv_io.py +59 -0
  10. bulum-0.2.9/src/bulum/io/general_io.py +24 -0
  11. bulum-0.2.9/src/bulum/io/idx_io.py +64 -0
  12. bulum-0.2.9/src/bulum/io/idx_io_native.py +136 -0
  13. bulum-0.2.9/src/bulum/io/iqqm_out_reader.py +127 -0
  14. bulum-0.2.9/src/bulum/io/lqn_io.py +39 -0
  15. bulum-0.2.9/src/bulum/io/res_csv_io.py +152 -0
  16. bulum-0.2.9/src/bulum/maps/__init__.py +1 -0
  17. bulum-0.2.9/src/bulum/maps/station_maps.py +112 -0
  18. bulum-0.2.9/src/bulum/plots/__init__.py +5 -0
  19. bulum-0.2.9/src/bulum/plots/altair_plots.py +726 -0
  20. bulum-0.2.9/src/bulum/plots/ensemble_altair_plots.py +87 -0
  21. bulum-0.2.9/src/bulum/plots/node_diagrams.py +8 -0
  22. bulum-0.2.9/src/bulum/plots/plot_functions.py +90 -0
  23. bulum-0.2.9/src/bulum/plots/tests/__init__.py +1 -0
  24. bulum-0.2.9/src/bulum/plots/tests/test_plot_functions.py +224 -0
  25. bulum-0.2.9/src/bulum/stats/__init__.py +6 -0
  26. bulum-0.2.9/src/bulum/stats/aggregate_stats.py +116 -0
  27. bulum-0.2.9/src/bulum/stats/ensemble_stats.py +95 -0
  28. bulum-0.2.9/src/bulum/stats/negflo.py +647 -0
  29. bulum-0.2.9/src/bulum/stats/reliability_stats_class.py +225 -0
  30. bulum-0.2.9/src/bulum/stats/stochastic_data_check.py +409 -0
  31. bulum-0.2.9/src/bulum/stats/storage_level_assessment.py +228 -0
  32. bulum-0.2.9/src/bulum/stats/swflo2s/__init__.py +1 -0
  33. bulum-0.2.9/src/bulum/stats/swflo2s/swflo2s.py +163 -0
  34. bulum-0.2.9/src/bulum/stoch/__init__.py +1 -0
  35. bulum-0.2.9/src/bulum/stoch/analyse.py +0 -0
  36. bulum-0.2.9/src/bulum/stoch/generate.py +58 -0
  37. bulum-0.2.9/src/bulum/trans/__init__.py +1 -0
  38. bulum-0.2.9/src/bulum/trans/transformers.py +83 -0
  39. bulum-0.2.9/src/bulum/utils/__init__.py +4 -0
  40. bulum-0.2.9/src/bulum/utils/dataframe_extensions.py +212 -0
  41. bulum-0.2.9/src/bulum/utils/dataframe_functions.py +293 -0
  42. bulum-0.2.9/src/bulum/utils/datetime_functions.py +307 -0
  43. bulum-0.2.9/src/bulum/utils/interpolation.py +15 -0
  44. bulum-0.2.9/src/bulum/version.py +5 -0
  45. bulum-0.2.9/src/bulum.egg-info/PKG-INFO +100 -0
  46. bulum-0.2.9/src/bulum.egg-info/SOURCES.txt +49 -0
  47. bulum-0.2.9/src/bulum.egg-info/requires.txt +6 -0
  48. bulum-0.0.0/PKG-INFO +0 -5
  49. bulum-0.0.0/bulum.egg-info/PKG-INFO +0 -5
  50. bulum-0.0.0/bulum.egg-info/SOURCES.txt +0 -6
  51. bulum-0.0.0/pyproject.toml +0 -7
  52. {bulum-0.0.0 → bulum-0.2.9}/setup.cfg +0 -0
  53. /bulum-0.0.0/bulum/__init__.py → /bulum-0.2.9/src/bulum/plots/plotly_helpers.py +0 -0
  54. {bulum-0.0.0 → bulum-0.2.9/src}/bulum.egg-info/dependency_links.txt +0 -0
  55. {bulum-0.0.0 → bulum-0.2.9/src}/bulum.egg-info/top_level.txt +0 -0
bulum-0.2.9/PKG-INFO ADDED
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.1
2
+ Name: bulum
3
+ Version: 0.2.9
4
+ Summary: Open source python library for assessing hydrologic model results in Queensland
5
+ Home-page: https://bitbucket.org/odhydrology/bulum.git
6
+ Author: Chas Egan
7
+ Author-email: chas@odhydrology.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Python: >=3.9
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: altair[all]>=5.5.0
13
+ Requires-Dist: folium>=0.14
14
+ Requires-Dist: matplotlib>=3.8.3
15
+ Requires-Dist: numpy>=1.26.4
16
+ Requires-Dist: pandas>=2.2.0
17
+ Requires-Dist: plotly>=5.18.0
18
+
19
+ # bulum
20
+
21
+ ## Installation
22
+
23
+ This package may be installed using pip from Bitbucket (requires authentication), or directly from PyPi (public), or from a .tar.gz. Examples are shown below.
24
+
25
+ ```bash
26
+ pip install git+https://bitbucket.org/odhydrology/bulum.git
27
+ ```
28
+
29
+ ```bash
30
+ pip install bulum
31
+ ```
32
+
33
+ ```bash
34
+ pip install .\dist\bulum-0.0.32.tar.gz
35
+ ```
36
+
37
+ ## Usage
38
+
39
+ ```python
40
+ import bulum
41
+
42
+ # returns the package version
43
+ bulum.__version__
44
+
45
+ # prints 'Hello world!' to the console
46
+ bulum.hello_world()
47
+ ```
48
+
49
+ ## Build and Upload to PyPi
50
+
51
+ First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
52
+
53
+ ```bash
54
+ python setup.py sdist
55
+ ```
56
+
57
+ ```bash
58
+ twine upload dist\bulum-0.0.32.tar.gz
59
+ ```
60
+
61
+ As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "__token__", and password is the API token which is very long string starting with "pypi-".
62
+
63
+ ``` bash
64
+ username = __token__
65
+ password = pypi-#####################################################################################
66
+ ```
67
+
68
+ Where can I find the API token password? Chas has it in his emails. It is also here on the network at *.\ODH working files\Professional development, reading, etc\Software\ODHSoftware\bulum\PyPi_password_and_instructions.txt*.
69
+
70
+ How do I make a new API token? Go to your PyPi account settings, and click on "API tokens". Then click on "Add API token", and give it a name. The token will be displayed on the next screen.
71
+
72
+ ## Unit Tests
73
+
74
+ WARNING: Run unit tests from an anaconda environment with compatable dependencies!
75
+
76
+ Install the nose2 test-runner framework.
77
+
78
+ ```bash
79
+ pip install nose2
80
+ ```
81
+
82
+ Then from the root project folder run the nose2 module. You can do this as a python modules, or just direcly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
83
+
84
+ ```bash
85
+ python -m nose2
86
+ ```
87
+
88
+ ```bash
89
+ nose2
90
+ ```
91
+
92
+ You can run specific tests by specifying the module name. Example below.
93
+
94
+ ```bash
95
+ nose2 src.bulum.stats.tests
96
+ ```
97
+
98
+ ## License
99
+
100
+ None.
bulum-0.2.9/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # bulum
2
+
3
+ ## Installation
4
+
5
+ This package may be installed using pip from Bitbucket (requires authentication), or directly from PyPi (public), or from a .tar.gz. Examples are shown below.
6
+
7
+ ```bash
8
+ pip install git+https://bitbucket.org/odhydrology/bulum.git
9
+ ```
10
+
11
+ ```bash
12
+ pip install bulum
13
+ ```
14
+
15
+ ```bash
16
+ pip install .\dist\bulum-0.0.32.tar.gz
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ ```python
22
+ import bulum
23
+
24
+ # returns the package version
25
+ bulum.__version__
26
+
27
+ # prints 'Hello world!' to the console
28
+ bulum.hello_world()
29
+ ```
30
+
31
+ ## Build and Upload to PyPi
32
+
33
+ First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
34
+
35
+ ```bash
36
+ python setup.py sdist
37
+ ```
38
+
39
+ ```bash
40
+ twine upload dist\bulum-0.0.32.tar.gz
41
+ ```
42
+
43
+ As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "__token__", and password is the API token which is very long string starting with "pypi-".
44
+
45
+ ``` bash
46
+ username = __token__
47
+ password = pypi-#####################################################################################
48
+ ```
49
+
50
+ Where can I find the API token password? Chas has it in his emails. It is also here on the network at *.\ODH working files\Professional development, reading, etc\Software\ODHSoftware\bulum\PyPi_password_and_instructions.txt*.
51
+
52
+ How do I make a new API token? Go to your PyPi account settings, and click on "API tokens". Then click on "Add API token", and give it a name. The token will be displayed on the next screen.
53
+
54
+ ## Unit Tests
55
+
56
+ WARNING: Run unit tests from an anaconda environment with compatable dependencies!
57
+
58
+ Install the nose2 test-runner framework.
59
+
60
+ ```bash
61
+ pip install nose2
62
+ ```
63
+
64
+ Then from the root project folder run the nose2 module. You can do this as a python modules, or just direcly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
65
+
66
+ ```bash
67
+ python -m nose2
68
+ ```
69
+
70
+ ```bash
71
+ nose2
72
+ ```
73
+
74
+ You can run specific tests by specifying the module name. Example below.
75
+
76
+ ```bash
77
+ nose2 src.bulum.stats.tests
78
+ ```
79
+
80
+ ## License
81
+
82
+ None.
bulum-0.2.9/setup.py ADDED
@@ -0,0 +1,32 @@
1
+ import setuptools
2
+
3
+ with open("README.md", "r") as fh:
4
+ long_description = fh.read()
5
+
6
+ exec(open('src/bulum/version.py').read())
7
+
8
+ setuptools.setup(
9
+ name="bulum",
10
+ version=__version__,
11
+ python_requires=">=3.9",
12
+ author="Chas Egan",
13
+ author_email="chas@odhydrology.com",
14
+ description="Open source python library for assessing hydrologic model results in Queensland",
15
+ long_description=long_description,
16
+ long_description_content_type="text/markdown",
17
+ url="https://bitbucket.org/odhydrology/bulum.git",
18
+ package_dir={'': 'src'},
19
+ packages=setuptools.find_packages('src'),
20
+ classifiers=[
21
+ "Programming Language :: Python :: 3",
22
+ "Operating System :: OS Independent",
23
+ ],
24
+ install_requires=[
25
+ 'altair[all]>=5.5.0',
26
+ 'folium>=0.14',
27
+ 'matplotlib>=3.8.3',
28
+ 'numpy>=1.26.4',
29
+ 'pandas>=2.2.0',
30
+ 'plotly>=5.18.0',
31
+ ],
32
+ )
@@ -0,0 +1,3 @@
1
+ from .version import __version__
2
+ from .demo import *
3
+ #from .stoch import *
@@ -0,0 +1 @@
1
+ from .clim import *
@@ -0,0 +1,150 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+
5
+ def derive_transformation_curves(original_ts: pd.Series, augmented_ts: pd.Series, season_start_months=[1,2,3,4,5,6,7,8,9,10,11,12], epsilon=1e-3) -> dict:
6
+ """Returns a dictionary of exceedence-based transformation curves - one for each season
7
+ with the season's start month as the key. These are tables that map from exceedance
8
+ (cunnane plotting position as a fraction) to a scaling factor. These are intended to
9
+ be used to
10
+ effectively summarise climate-change adjustments, and allow them to be transported from
11
+ one timeseries to another.
12
+
13
+ Args:
14
+ original_ts (pd.Series): _description_
15
+ augmented_ts (pd.Series): _description_
16
+ season_start_months (list, optional): _description_. Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
17
+
18
+ Returns:
19
+ dict: _description_
20
+ """
21
+ df = pd.DataFrame()
22
+ df["x"] = original_ts
23
+ df["y"] = augmented_ts
24
+ df = df.dropna() # Force common period
25
+ answer = {}
26
+ for i in range(len(season_start_months)):
27
+ # Get a list of the months in this season
28
+ start_month = season_start_months[i]
29
+ season_len = (season_start_months + [m + 12 for m in season_start_months])[i + 1] - start_month
30
+ months_in_this_season = [1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12][start_month - 1: start_month - 1 + season_len]
31
+ # Find the data for this season
32
+ df_m = df[[int(d[5:7]) in months_in_this_season for d in df.index]] #d[5:7] is the month part of the date string
33
+ x = np.sort(df_m.x.values)
34
+ y = np.sort(df_m.y.values)
35
+ # The transformation factor is y/x except when the original value x is zero (<epsilon) in which case we default to 1.0
36
+ f = np.where(x < epsilon, 1.0, y / x)
37
+ n = len(x)
38
+ ii = [i + 1 for i in range(n)] #index starting at 1
39
+ p = [(i - 0.4)/(n + 0.2) for i in ii]
40
+ answer[start_month] = [p,f]
41
+ return answer
42
+
43
+
44
+ def apply_transformation_curves(tranformation_curves: dict, series: pd.Series) -> pd.Series:
45
+ """Applies seasonal transformation curves to an input series.
46
+ Refer to the function 'derive_transformation_curves(...)'.
47
+
48
+ Args:
49
+ tranformation_curves (dict): _description_
50
+ series (pd.Series): _description_
51
+
52
+ Returns:
53
+ pd.Series: _description_
54
+ """
55
+ dates = series.index
56
+ answer = series.copy()
57
+ # Apply each transformation curves to the whole series. Splice the appropriate
58
+ # parts (seasons) into the 'answer' series as we go.
59
+ season_start_months = sorted(tranformation_curves.keys())
60
+ for i in range(len(season_start_months)):
61
+ # Identify the transform curve for this season
62
+ start_month = season_start_months[i]
63
+ t = tranformation_curves[start_month]
64
+ xp = t[0]
65
+ fp = t[1]
66
+ # Get a list of the months in this season
67
+ season_len = (season_start_months + [m + 12 for m in season_start_months])[i + 1] - start_month
68
+ months_in_this_season = [1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6,7,8,9,10,11,12][start_month - 1: start_month - 1 + season_len]
69
+ # Find the data for this season
70
+ m = len(series)
71
+ season_dates = pd.Series([d for d in dates if int(d[5:7]) in months_in_this_season]) #d[5:7] is the month part of the date string
72
+ values = answer[season_dates]
73
+ # And get their ranks and plotting positions
74
+ rank_starting_at_one = values.rank(ascending=True) # This function is nice because equal values are assigned the same (averaged) rank.
75
+ n = len(values)
76
+ p = [(r - 0.4)/(n + 0.2) for r in rank_starting_at_one] # plotting position
77
+ f = np.interp(p, xp, fp) # interpolated scaling factors
78
+ # Calcualte new values and update the answer
79
+ new_values = pd.Series([values.iloc[i] * f[i] for i in range(n)], index=season_dates)
80
+ answer.update(new_values)
81
+ # Return a pd.Series so user can easily join it back into a dataframe
82
+ return pd.Series(answer, index=dates, name=series.name)
83
+
84
+
85
+ def derive_transformation_factors(original_ts: pd.Series, augmented_ts: pd.Series, season_start_months=[1,2,3,4,5,6,7,8,9,10,11,12], epsilon=1e-3) -> dict:
86
+ """Returns a dictionary of transformation factors - one for each season
87
+ with the season's start month as the key. These scaling factors are intended to
88
+ be used to effectively summarise climate-change adjustments, and allow them to be
89
+ transported from one timeseries to another.
90
+
91
+ Args:
92
+ original_ts (pd.Series): _description_
93
+ augmented_ts (pd.Series): _description_
94
+ season_start_months (list, optional): _description_. Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
95
+ epsilon: Threshold below which values are treated as zero, and the associated factor defaults to 1.
96
+
97
+ Returns:
98
+ dict: _description_
99
+ """
100
+ # Create a map of month -> season_start_month (for all months)
101
+ month_to_season_map = {}
102
+ key = max(season_start_months)
103
+ for m in [1,2,3,4,5,6,7,8,9,10,11,12]:
104
+ if m in season_start_months:
105
+ key = m
106
+ month_to_season_map[m] = key
107
+ # Put the data in a dataframe, and groupby season start month
108
+ df = pd.DataFrame()
109
+ df["x"] = original_ts
110
+ df["y"] = augmented_ts
111
+ df = df.dropna() # Force common period
112
+ df['m'] = df.index.month
113
+ df['s'] = df['m'].map(month_to_season_map)
114
+ df2 = df.groupby('s').agg('sum')
115
+ df2['f'] = np.where(df2.x < epsilon, 1.0, df2.y / df2.x)
116
+ return df2['f'].to_dict()
117
+
118
+
119
+ def apply_transformation_factors(tranformation_factors: dict, series: pd.Series) -> pd.Series:
120
+ """Applies seasonal transformation factors to an input series.
121
+ Refer to the function 'derive_transformation_factors(...)'.
122
+
123
+ Args:
124
+ tranformation_curves (dict): _description_
125
+ series (pd.Series): _description_
126
+
127
+ Returns:
128
+ pd.Series: _description_
129
+ """
130
+ # Create a map of month -> factor (containing all months)
131
+ season_start_months = sorted(tranformation_factors.keys())
132
+ month_to_factor_map = {}
133
+ key = max(season_start_months)
134
+ for m in [1,2,3,4,5,6,7,8,9,10,11,12]:
135
+ if m in season_start_months:
136
+ key = m
137
+ month_to_factor_map[m] = tranformation_factors[key]
138
+ # Apply transformation factors to the whole series. Splice the appropriate
139
+ df = pd.DataFrame()
140
+ df['x'] = series
141
+ df['m'] = df.index.month
142
+ df['f'] = df['m'].map(month_to_factor_map)
143
+ df['y'] = df['x'] * df['f']
144
+ answer = df['y']
145
+ answer.name = series.name
146
+ return answer
147
+
148
+
149
+
150
+
@@ -0,0 +1,6 @@
1
+ def hello_world():
2
+ print("Hello world!")
3
+
4
+ if __name__ == "__main__":
5
+ print(type(hello_world))
6
+ hello_world()
@@ -0,0 +1,7 @@
1
+ from .csv_io import *
2
+ from .res_csv_io import *
3
+ from .idx_io import *
4
+ from .idx_io_native import *
5
+ from .iqqm_out_reader import *
6
+ from .lqn_io import *
7
+ from .general_io import *
@@ -0,0 +1,59 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from bulum import utils
4
+ na_values = ['', ' ', 'null', 'NULL', 'NAN', 'NaN', 'nan', 'NA', 'na', 'N/A' 'n/a', '#N/A', '#NA', '-NaN', '-nan']
5
+
6
+
7
+ def read_ts_csv(filename, date_format=None, df=None, colprefix=None, allow_nonnumeric=False, assert_date=True, **kwargs) -> utils.TimeseriesDataframe:
8
+ """Reads a daily timeseries csv into a DataFrame, and sets the index to string dates in the "%Y-%m-%d" format.
9
+ The method assumes the first column are dates.
10
+
11
+ Args:
12
+ filename (_type_): _description_
13
+ date_format (str, optional): defaults to "%d/%m/%Y" as per Fors. Other common formats include "%Y-%m-%d", "%Y/%m/%d".
14
+ df (pd.DataFrame, optional): If provided, the reader will append columns to this dataframe. Defaults to None.
15
+ colprefix (str, optional): If provided, the reader will append this prefix to the start of each column name. Defaults to None.
16
+ allow_nonnumeric (bool, optional): If false, the method will assert that all columns are numerical. Defaults to False.
17
+ assert_date (bool, optional): If true, the method will assert that date index meets "%Y-%m-%d" format. Defaults to True.
18
+
19
+ Returns:
20
+ pd.DataFrame: Dataframe containing the data from the csv file.
21
+ """
22
+ new_df = pd.read_csv(filename, na_values=na_values, **kwargs)
23
+ # Date index
24
+ new_df.set_index(new_df.columns[0], inplace=True)
25
+ if assert_date:
26
+ new_df.index = utils.standardize_datestring_format(new_df.index)
27
+ new_df.index.name = "Date"
28
+ # df = df.replace(r'^\s*$', np.nan, regex=True)
29
+ # Check values
30
+ if not allow_nonnumeric:
31
+ for col in new_df.columns:
32
+ if not np.issubdtype(new_df[col].dtype, np.number):
33
+ raise Exception(f"ERROR: Column '{col}' is not numeric!")
34
+ # Rename columns if required
35
+ if colprefix is not None:
36
+ for c in new_df.columns:
37
+ new_df.rename(columns = {c:f"{colprefix}{c}"}, inplace = True)
38
+ # Join to existing dataframe if required
39
+ if df is None:
40
+ df = new_df
41
+ else:
42
+ if len(df) > 0:
43
+ # Check that the dates overlap
44
+ newdf_ends_before_df_starts = new_df.index[0] < df.index[-1]
45
+ df_ends_before_newdf_starts = df.index[-1] < new_df.index[0]
46
+ if newdf_ends_before_df_starts or df_ends_before_newdf_starts:
47
+ raise Exception("ERROR: The dates in the new dataframe do not overlap with the existing dataframe!")
48
+ df = df.join(new_df, how="outer")
49
+ return utils.TimeseriesDataframe.from_dataframe(df)
50
+
51
+
52
+ def write_ts_csv(df: pd.DataFrame, filename: str):
53
+ """_summary_
54
+
55
+ Args:
56
+ df (pd.DataFrame): _description_
57
+ filename (str): _description_
58
+ """
59
+ df.to_csv(filename)
@@ -0,0 +1,24 @@
1
+ import pandas as pd
2
+ import bulum.io as oio
3
+ from bulum import utils
4
+ import re
5
+
6
+
7
+ def read(filename: str, **kwargs) -> utils.TimeseriesDataframe:
8
+ filename_lower = filename.lower()
9
+ df = None
10
+ if filename_lower.endswith(".res.csv"):
11
+ df = oio.read_res_csv(filename, **kwargs)
12
+ if df is None:
13
+ raise ValueError("Res csv could not be read.")
14
+ elif filename_lower.endswith(".csv"):
15
+ df = oio.read_ts_csv(filename, **kwargs)
16
+ elif filename_lower.endswith(".idx"):
17
+ df = oio.read_idx(filename, **kwargs)
18
+ elif re.search(".[0-9]{2}d$", filename_lower):
19
+ df = oio.read_iqqm_lqn_output(filename, **kwargs)
20
+ else:
21
+ raise ValueError(f"Unknown file extension: {filename}")
22
+ assert isinstance(df, utils.TimeseriesDataframe), \
23
+ "Output of `read` is not a TimeseriesDataframe."
24
+ return df
@@ -0,0 +1,64 @@
1
+ import os
2
+ import pandas as pd
3
+ import uuid
4
+ import shutil
5
+ import subprocess
6
+ from bulum import utils
7
+ from .csv_io import *
8
+
9
+
10
+
11
+ def write_idx(df, filename, cleanup_tempfile=True):
12
+ """_summary_
13
+
14
+ Args:
15
+ df (_type_): _description_
16
+ filename (_type_): _description_
17
+ """
18
+ if shutil.which('csvidx') is None:
19
+ raise Exception("This method relies on the external program 'csvidx.exe'. Please ensure it is in your path.")
20
+ temp_filename = f"{uuid.uuid4().hex}.tempfile.csv"
21
+ write_area_ts_csv(df, temp_filename)
22
+ command = f"csvidx {temp_filename} {filename}"
23
+ process = subprocess.Popen(command)
24
+ process.wait()
25
+ if cleanup_tempfile:
26
+ os.remove(temp_filename)
27
+
28
+
29
+
30
+ def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
31
+ """_summary_
32
+
33
+ Args:
34
+ df (_type_): _description_
35
+ filename (_type_): _description_
36
+ units (str, optional): _description_. Defaults to "(mm.d^-1)".
37
+
38
+ Raises:
39
+ Exception: If shortenned field names are going to clash in output file.
40
+ """
41
+ # ensures dataframe adheres to standards
42
+ utils.assert_df_format_standards(df)
43
+ # convert field names to 12 chars and check for collisions
44
+ fields = {}
45
+ for c in df.columns:
46
+ c12 = f"{c[:12]:<12}"
47
+ if c12 in fields.keys():
48
+ raise Exception(f"Field names clash when shortenned to 12 chars: {c} and {fields[c12]}")
49
+ fields[c12] = c
50
+ # create the header text
51
+ header = f"{units}"
52
+ for k in fields.keys():
53
+ header += f',"{k}"'
54
+ header += os.linesep
55
+ header += "Catchment area (km^2)"
56
+ for k in fields.keys():
57
+ header += f", 1.00000000"
58
+ header += os.linesep
59
+ # open a file and write the header and the csv body
60
+ with open(filename, "w+", newline='', encoding='utf-8') as file:
61
+ file.write(header)
62
+ df.to_csv(file, header=False, na_rep=' NaN')
63
+
64
+
@@ -0,0 +1,136 @@
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ from bulum import utils
5
+
6
+
7
+ def _detect_header_bytes(b_data: np.ndarray) -> bool:
8
+ """
9
+ Helper function for read_idx. Detects whether the OUT file was written with
10
+ a version of IQQM with an old compiler with metadata/junk data as a header.
11
+ Fails if (not necessarily only if) the run was undertaken with only one
12
+ source of data, i.e. the .idx file has only one entry.
13
+
14
+ Args:
15
+ b_data (np.ndarray): 2d array of binary data filled with float32 data.
16
+ """
17
+ b_data_slice: tuple[np.float32] = b_data[0]
18
+ first_non_zero = b_data_slice[0] != 0.0
19
+ rest_zeroes = not np.any(list(b_data_slice)[1:])
20
+ return first_non_zero and rest_zeroes
21
+
22
+
23
+ def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
24
+ """_summary_
25
+
26
+ Args:
27
+ filename (_type_): Name of the IDX file.
28
+ skip_header_bytes (bool | None): Whether to skip header bytes in the IDX
29
+ file (related to the compiler used for IQQM). If set to None, attempt
30
+ to detect the presence of header bytes automatically.
31
+
32
+ Returns:
33
+ utils.TimeseriesDataframe: _description_
34
+ """
35
+ if not os.path.exists(filename):
36
+ raise FileNotFoundError(f"File does not exist: {filename}")
37
+ # Read ".idx" file
38
+ with open(filename, 'r') as f:
39
+ # Skip line
40
+ stmp = f.readline()
41
+ # Start date, end date, date interval
42
+ stmp = f.readline().split()
43
+ date_start = utils.standardize_datestring_format([stmp[0]])[0]
44
+ date_end = utils.standardize_datestring_format([stmp[1]])[0]
45
+ date_flag = int(stmp[2])
46
+ snames = []
47
+ for n, line in enumerate(f):
48
+ sfile = line[0:13].strip()
49
+ sdesc = line[13:54].strip()
50
+ sname = f"{n + 1}>{sfile}>{sdesc}"
51
+ snames.append(sname)
52
+ # Read ".out" file
53
+ out_filename = filename.lower().replace('.idx', '.out')
54
+ if not os.path.exists(out_filename):
55
+ raise FileNotFoundError(f"File does not exist: {out_filename}")
56
+ # 4-byte reals
57
+ b_types = [(s, 'f4') for s in snames]
58
+ # Read all data in, drop header bytes (first row) if necessary
59
+ b_data = np.fromfile(out_filename, dtype=np.dtype(b_types))
60
+ # Detection of header bytes
61
+ if skip_header_bytes is None:
62
+ skip_header_bytes = _detect_header_bytes(b_data)
63
+ if skip_header_bytes:
64
+ b_data = b_data[1:] # skip header bytes
65
+ # Read data
66
+ if date_flag == 0:
67
+ daily_date_values = utils.datetime_functions.get_dates(
68
+ date_start, end_date=date_end, include_end_date=True)
69
+ df = pd.DataFrame.from_records(b_data, index=daily_date_values)
70
+ df.columns = snames
71
+ df.index.name = "Date"
72
+ # Check data types. If not 'float64' or 'int64', convert to 'float64'
73
+ x = df.select_dtypes(exclude=['int64','float64']).columns
74
+ if x.__len__()>0:
75
+ df=df.astype({i: 'float64' for i in x})
76
+ elif date_flag == 1:
77
+ raise NotImplementedError("Monthly data not yet supported")
78
+ elif date_flag == 3:
79
+ raise NotImplementedError("Annual data not yet supported")
80
+ else:
81
+ raise ValueError(f"Unsupported date interval: {date_flag}")
82
+ utils.assert_df_format_standards(df)
83
+ return utils.TimeseriesDataframe.from_dataframe(df)
84
+
85
+
86
+ def write_idx_native(df: pd.DataFrame, filepath, type="None", units="None") -> None:
87
+ """Writer for .IDX and corresponding .OUT binary files written in native Python.
88
+ Currently only supports daily data (date flag 0), as with the reader read_idx(...).
89
+
90
+ Assumes that data are homogeneous in units and type e.g. Precipitation & mm resp., or Flow & ML/d.
91
+
92
+ Args:
93
+ df (pd.Dataframe): DataFrame as per the output of read_idx(...).
94
+ filepath (str) : Path to the IDX file to be written to including .IDX extension.
95
+ units (str, optional) : Units for data in df.
96
+ type (str, optional) : Data specifier for data in df, e.g. Gauged Flow, Precipitation, etc.
97
+ """
98
+ date_flag = 0
99
+ # TODO: When generalising to other frequencies, we may be able to simply read the data type off the time delta in df.index values
100
+ # As is, I've essentially copied what was done in the reader to flag that this should be implemented at the "same time".
101
+ # Verify valid date_flag
102
+ match date_flag:
103
+ case 0:
104
+ pass # valid
105
+ case 1:
106
+ raise NotImplementedError("Monthly data not yet supported")
107
+ case 3:
108
+ raise NotImplementedError("Annual data not yet supported")
109
+ case _:
110
+ raise ValueError(f"Unsupported date interval: {date_flag}")
111
+
112
+ utils.assert_df_format_standards(df)
113
+ first_date = df.index[0]
114
+ last_date = df.index[-1]
115
+ col_names = df.columns
116
+
117
+ # write index
118
+ with open(filepath, 'w') as f:
119
+ # TODO: check whether this "skipped" line has important info
120
+ # For now I've just copied the data from ./tests/BUR_FLWX.IDX as it's likely just metadata.
121
+ f.write('6.36.1 06/11/2006 10:48:30.64\n')
122
+ f.write(f"{first_date} {last_date} {date_flag}\n")
123
+ # data
124
+ # inline fn to ensure padded string is exactly l characters long
125
+ def ljust_or_truncate(s, l): return s.ljust(l)[0:l]
126
+ for idx, col_name in enumerate(col_names):
127
+ source_entry = ljust_or_truncate(f"df_col{idx+1}", 12)
128
+ name_entry = ljust_or_truncate(f"{col_name}", 40)
129
+ type_entry = ljust_or_truncate(f"{type}", 15)
130
+ units_entry = ljust_or_truncate(f"{units}", 15)
131
+ f.write(f"{source_entry} {name_entry}" +
132
+ f" {type_entry} {units_entry}\n")
133
+ # write binary
134
+ out_filepath = filepath.lower().replace('.idx', '.out')
135
+ df.to_numpy().tofile(out_filepath)
136
+ return