bulum 0.2.10__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {bulum-0.2.10/src/bulum.egg-info → bulum-0.3.0}/PKG-INFO +7 -5
  2. {bulum-0.2.10 → bulum-0.3.0}/README.md +6 -4
  3. {bulum-0.2.10 → bulum-0.3.0}/setup.py +1 -1
  4. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/clim/clim.py +39 -41
  5. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/__init__.py +1 -1
  6. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/csv_io.py +31 -19
  7. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/general_io.py +11 -2
  8. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/idx_io.py +22 -21
  9. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/idx_io_native.py +56 -33
  10. bulum-0.3.0/src/bulum/io/iqqm_out_reader.py +337 -0
  11. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/lqn_io.py +24 -9
  12. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/io/res_csv_io.py +25 -13
  13. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/maps/station_maps.py +51 -27
  14. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/altair_plots.py +274 -150
  15. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/ensemble_altair_plots.py +50 -35
  16. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/node_diagrams.py +3 -1
  17. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/plot_functions.py +37 -21
  18. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/tests/test_plot_functions.py +1 -1
  19. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/__init__.py +2 -1
  20. bulum-0.3.0/src/bulum/stats/aggregate_stats.py +196 -0
  21. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/ensemble_stats.py +75 -30
  22. bulum-0.3.0/src/bulum/stats/negflo.py +573 -0
  23. bulum-0.3.0/src/bulum/stats/negflo_helpers.py +154 -0
  24. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/reliability_stats_class.py +82 -45
  25. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/stochastic_data_check.py +9 -4
  26. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/storage_level_assessment.py +51 -25
  27. bulum-0.3.0/src/bulum/stoch/generate.py +91 -0
  28. bulum-0.3.0/src/bulum/trans/transformers.py +100 -0
  29. bulum-0.3.0/src/bulum/utils/__init__.py +16 -0
  30. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/utils/dataframe_extensions.py +54 -71
  31. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/utils/dataframe_functions.py +102 -111
  32. bulum-0.3.0/src/bulum/utils/datetime_functions.py +327 -0
  33. bulum-0.3.0/src/bulum/utils/interpolation.py +15 -0
  34. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/version.py +1 -1
  35. {bulum-0.2.10 → bulum-0.3.0/src/bulum.egg-info}/PKG-INFO +7 -5
  36. {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/SOURCES.txt +1 -1
  37. bulum-0.2.10/src/bulum/io/iqqm_out_reader.py +0 -127
  38. bulum-0.2.10/src/bulum/stats/aggregate_stats.py +0 -116
  39. bulum-0.2.10/src/bulum/stats/negflo.py +0 -652
  40. bulum-0.2.10/src/bulum/stats/negflo_scratch.gitignore.py +0 -8
  41. bulum-0.2.10/src/bulum/stoch/generate.py +0 -58
  42. bulum-0.2.10/src/bulum/trans/transformers.py +0 -83
  43. bulum-0.2.10/src/bulum/utils/__init__.py +0 -4
  44. bulum-0.2.10/src/bulum/utils/datetime_functions.py +0 -307
  45. bulum-0.2.10/src/bulum/utils/interpolation.py +0 -15
  46. {bulum-0.2.10 → bulum-0.3.0}/LICENSE.md +0 -0
  47. {bulum-0.2.10 → bulum-0.3.0}/setup.cfg +0 -0
  48. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/__init__.py +0 -0
  49. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/clim/__init__.py +0 -0
  50. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/demo.py +0 -0
  51. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/maps/__init__.py +0 -0
  52. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/__init__.py +0 -0
  53. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/plotly_helpers.py +0 -0
  54. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/plots/tests/__init__.py +0 -0
  55. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/swflo2s/__init__.py +0 -0
  56. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stats/swflo2s/swflo2s.py +0 -0
  57. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stoch/__init__.py +0 -0
  58. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/stoch/analyse.py +0 -0
  59. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/test_circular_import.py +0 -0
  60. {bulum-0.2.10 → bulum-0.3.0}/src/bulum/trans/__init__.py +0 -0
  61. {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/dependency_links.txt +0 -0
  62. {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/requires.txt +0 -0
  63. {bulum-0.2.10 → bulum-0.3.0}/src/bulum.egg-info/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: bulum
3
- Version: 0.2.10
3
+ Version: 0.3.0
4
4
  Summary: Open source python library for assessing hydrologic model results in Queensland
5
- Home-page: https://bitbucket.org/odhydrology/bulum.git
5
+ Home-page: https://github.com/odhydrology/bulum
6
6
  Author: Chas Egan
7
7
  Author-email: chas@odhydrology.com
8
8
  Classifier: Programming Language :: Python :: 3
@@ -56,6 +56,8 @@ bulum.__version__
56
56
  bulum.hello_world()
57
57
  ```
58
58
 
59
+ API documentation is available at [odhydrology.github.io/bulum](https://odhydrology.github.io/bulum/).
60
+
59
61
  ## Build and Upload to PyPi
60
62
 
61
63
  First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
@@ -68,7 +70,7 @@ python setup.py sdist
68
70
  twine upload dist\bulum-0.0.32.tar.gz
69
71
  ```
70
72
 
71
- As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "__token__", and password is the API token which is very long string starting with "pypi-".
73
+ As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "\_\_token__", and password is the API token which is very long string starting with "pypi-".
72
74
 
73
75
  ``` bash
74
76
  username = __token__
@@ -81,7 +83,7 @@ How do I make a new API token? Go to your PyPi account settings, and click on "A
81
83
 
82
84
  ## Unit Tests
83
85
 
84
- WARNING: Run unit tests from an anaconda environment with compatable dependencies!
86
+ WARNING: Run unit tests from an anaconda environment with compatible dependencies!
85
87
 
86
88
  Install the nose2 test-runner framework.
87
89
 
@@ -89,7 +91,7 @@ Install the nose2 test-runner framework.
89
91
  pip install nose2
90
92
  ```
91
93
 
92
- Then from the root project folder run the nose2 module. You can do this as a python modules, or just direcly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
94
+ Then from the root project folder run the nose2 module. You can do this as a python modules, or just directly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
93
95
 
94
96
  ```bash
95
97
  python -m nose2
@@ -28,6 +28,8 @@ bulum.__version__
28
28
  bulum.hello_world()
29
29
  ```
30
30
 
31
+ API documentation is available at [odhydrology.github.io/bulum](https://odhydrology.github.io/bulum/).
32
+
31
33
  ## Build and Upload to PyPi
32
34
 
33
35
  First build a distribution from an anaconda prompt in the root of your project, and then upload the dist to PyPi using Twine.
@@ -40,7 +42,7 @@ python setup.py sdist
40
42
  twine upload dist\bulum-0.0.32.tar.gz
41
43
  ```
42
44
 
43
- As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "__token__", and password is the API token which is very long string starting with "pypi-".
45
+ As of Nov 2023, PyPi uses an API token instead of a conventional password. You can still use Twine, but the username is "\_\_token__", and password is the API token which is very long string starting with "pypi-".
44
46
 
45
47
  ``` bash
46
48
  username = __token__
@@ -53,7 +55,7 @@ How do I make a new API token? Go to your PyPi account settings, and click on "A
53
55
 
54
56
  ## Unit Tests
55
57
 
56
- WARNING: Run unit tests from an anaconda environment with compatable dependencies!
58
+ WARNING: Run unit tests from an anaconda environment with compatible dependencies!
57
59
 
58
60
  Install the nose2 test-runner framework.
59
61
 
@@ -61,7 +63,7 @@ Install the nose2 test-runner framework.
61
63
  pip install nose2
62
64
  ```
63
65
 
64
- Then from the root project folder run the nose2 module. You can do this as a python modules, or just direcly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
66
+ Then from the root project folder run the nose2 module. You can do this as a python modules, or just directly from the anaconda prompt (both examples given below). This will automatically find and run tests in any modules named "test_*".
65
67
 
66
68
  ```bash
67
69
  python -m nose2
@@ -79,4 +81,4 @@ nose2 src.bulum.stats.tests
79
81
 
80
82
  ## License
81
83
 
82
- Refer to LICENCE.md
84
+ Refer to LICENCE.md
@@ -14,7 +14,7 @@ setuptools.setup(
14
14
  description="Open source python library for assessing hydrologic model results in Queensland",
15
15
  long_description=long_description,
16
16
  long_description_content_type="text/markdown",
17
- url="https://bitbucket.org/odhydrology/bulum.git",
17
+ url="https://github.com/odhydrology/bulum",
18
18
  package_dir={'': 'src'},
19
19
  packages=setuptools.find_packages('src'),
20
20
  classifiers=[
@@ -3,20 +3,20 @@ import pandas as pd
3
3
 
4
4
 
5
5
  def derive_transformation_curves(original_ts: pd.Series, augmented_ts: pd.Series, season_start_months=[1,2,3,4,5,6,7,8,9,10,11,12], epsilon=1e-3) -> dict:
6
- """Returns a dictionary of exceedence-based transformation curves - one for each season
7
- with the season's start month as the key. These are tables that map from exceedance
8
- (cunnane plotting position as a fraction) to a scaling factor. These are intended to
9
- be used to
10
- effectively summarise climate-change adjustments, and allow them to be transported from
11
- one timeseries to another.
6
+ """Returns a dictionary of exceedence-based transformation curves - one for
7
+ each season with the season's start month as the key. These are tables that
8
+ map from exceedance (cunnane plotting position as a fraction) to a scaling
9
+ factor. These are intended to be used to effectively summarise
10
+ climate-change adjustments, and allow them to be transported from one
11
+ timeseries to another.
12
12
 
13
- Args:
14
- original_ts (pd.Series): _description_
15
- augmented_ts (pd.Series): _description_
16
- season_start_months (list, optional): _description_. Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
17
-
18
- Returns:
19
- dict: _description_
13
+ Parameters
14
+ ----------
15
+ original_ts : pd.Series
16
+ augmented_ts : pd.Series
17
+ season_start_months : list, optional
18
+ Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
19
+
20
20
  """
21
21
  df = pd.DataFrame()
22
22
  df["x"] = original_ts
@@ -43,17 +43,19 @@ def derive_transformation_curves(original_ts: pd.Series, augmented_ts: pd.Series
43
43
 
44
44
  def apply_transformation_curves(tranformation_curves: dict, series: pd.Series) -> pd.Series:
45
45
  """Applies seasonal transformation curves to an input series.
46
- Refer to the function 'derive_transformation_curves(...)'.
46
+ Refer to the function `derive_transformation_curves`.
47
47
 
48
- Args:
49
- tranformation_curves (dict): _description_
50
- series (pd.Series): _description_
48
+ Parameters
49
+ ----------
50
+ tranformation_curves : dict
51
+ series : pd.Series
51
52
 
52
- Returns:
53
- pd.Series: _description_
53
+ Returns
54
+ -------
55
+ pd.Series
54
56
  """
55
57
  dates = series.index
56
- answer = series.copy()
58
+ answer = series.copy()
57
59
  # Apply each transformation curves to the whole series. Splice the appropriate
58
60
  # parts (seasons) into the 'answer' series as we go.
59
61
  season_start_months = sorted(tranformation_curves.keys())
@@ -73,7 +75,7 @@ def apply_transformation_curves(tranformation_curves: dict, series: pd.Series) -
73
75
  # And get their ranks and plotting positions
74
76
  rank_starting_at_one = values.rank(ascending=True) # This function is nice because equal values are assigned the same (averaged) rank.
75
77
  n = len(values)
76
- p = [(r - 0.4)/(n + 0.2) for r in rank_starting_at_one] # plotting position
78
+ p = [(r - 0.4)/(n + 0.2) for r in rank_starting_at_one] # Cunnane plotting position
77
79
  f = np.interp(p, xp, fp) # interpolated scaling factors
78
80
  # Calcualte new values and update the answer
79
81
  new_values = pd.Series([values.iloc[i] * f[i] for i in range(n)], index=season_dates)
@@ -88,14 +90,15 @@ def derive_transformation_factors(original_ts: pd.Series, augmented_ts: pd.Serie
88
90
  be used to effectively summarise climate-change adjustments, and allow them to be
89
91
  transported from one timeseries to another.
90
92
 
91
- Args:
92
- original_ts (pd.Series): _description_
93
- augmented_ts (pd.Series): _description_
94
- season_start_months (list, optional): _description_. Defaults to [1,2,3,4,5,6,7,8,9,10,11,12].
95
- epsilon: Threshold below which values are treated as zero, and the associated factor defaults to 1.
93
+ Parameters
94
+ ----------
95
+ original_ts : pd.Series
96
+ augmented_ts : pd.Series
97
+ season_start_months : list, optional
98
+ [1,2,3,4,5,6,7,8,9,10,11,12].
99
+ epsilon : float
100
+ Threshold below which values are treated as zero, and the associated factor defaults to 1.
96
101
 
97
- Returns:
98
- dict: _description_
99
102
  """
100
103
  # Create a map of month -> season_start_month (for all months)
101
104
  month_to_season_map = {}
@@ -116,25 +119,24 @@ def derive_transformation_factors(original_ts: pd.Series, augmented_ts: pd.Serie
116
119
  return df2['f'].to_dict()
117
120
 
118
121
 
119
- def apply_transformation_factors(tranformation_factors: dict, series: pd.Series) -> pd.Series:
122
+ def apply_transformation_factors(transformation_factors: dict, series: pd.Series) -> pd.Series:
120
123
  """Applies seasonal transformation factors to an input series.
121
- Refer to the function 'derive_transformation_factors(...)'.
124
+ Refer to the function `derive_transformation_curves`.
122
125
 
123
- Args:
124
- tranformation_curves (dict): _description_
125
- series (pd.Series): _description_
126
+ Parameters
127
+ ----------
128
+ transformation_curves : dict
129
+ series : pd.Series
126
130
 
127
- Returns:
128
- pd.Series: _description_
129
131
  """
130
132
  # Create a map of month -> factor (containing all months)
131
- season_start_months = sorted(tranformation_factors.keys())
133
+ season_start_months = sorted(transformation_factors.keys())
132
134
  month_to_factor_map = {}
133
135
  key = max(season_start_months)
134
136
  for m in [1,2,3,4,5,6,7,8,9,10,11,12]:
135
137
  if m in season_start_months:
136
138
  key = m
137
- month_to_factor_map[m] = tranformation_factors[key]
139
+ month_to_factor_map[m] = transformation_factors[key]
138
140
  # Apply transformation factors to the whole series. Splice the appropriate
139
141
  df = pd.DataFrame()
140
142
  df['x'] = series
@@ -144,7 +146,3 @@ def apply_transformation_factors(tranformation_factors: dict, series: pd.Series)
144
146
  answer = df['y']
145
147
  answer.name = series.name
146
148
  return answer
147
-
148
-
149
-
150
-
@@ -4,4 +4,4 @@ from .idx_io import *
4
4
  from .idx_io_native import *
5
5
  from .iqqm_out_reader import *
6
6
  from .lqn_io import *
7
- from .general_io import *
7
+ from .general_io import *
@@ -1,20 +1,36 @@
1
+ """
2
+ Functions for reading CSVs, particularly time-series CSVs.
3
+ """
4
+
1
5
  import numpy as np
2
6
  import pandas as pd
3
7
  from bulum import utils
8
+ import os
9
+
4
10
  na_values = ['', ' ', 'null', 'NULL', 'NAN', 'NaN', 'nan', 'NA', 'na', 'N/A' 'n/a', '#N/A', '#NA', '-NaN', '-nan']
5
11
 
6
12
 
7
- def read_ts_csv(filename, date_format=None, df=None, colprefix=None, allow_nonnumeric=False, assert_date=True, **kwargs) -> utils.TimeseriesDataframe:
8
- """Reads a daily timeseries csv into a DataFrame, and sets the index to string dates in the "%Y-%m-%d" format.
9
- The method assumes the first column are dates.
13
+ def read_ts_csv(filename: str | os.PathLike, date_format=None,
14
+ df=None, colprefix=None, allow_nonnumeric=False,
15
+ assert_date=True, **kwargs) -> utils.TimeseriesDataframe:
16
+ """
17
+ Reads a daily timeseries csv into a DataFrame, and sets the index to string
18
+ dates in the "%Y-%m-%d" format. The method assumes the first column are
19
+ dates.
10
20
 
11
- Args:
12
- filename (_type_): _description_
13
- date_format (str, optional): defaults to "%d/%m/%Y" as per Fors. Other common formats include "%Y-%m-%d", "%Y/%m/%d".
14
- df (pd.DataFrame, optional): If provided, the reader will append columns to this dataframe. Defaults to None.
15
- colprefix (str, optional): If provided, the reader will append this prefix to the start of each column name. Defaults to None.
16
- allow_nonnumeric (bool, optional): If false, the method will assert that all columns are numerical. Defaults to False.
17
- assert_date (bool, optional): If true, the method will assert that date index meets "%Y-%m-%d" format. Defaults to True.
21
+ Parameters
22
+ ----------
23
+ filename : str | PathLike
24
+ date_format : str, optional
25
+ defaults to "%d/%m/%Y" as per Fors. Other common formats include "%Y-%m-%d", "%Y/%m/%d".
26
+ df : pd.DataFrame, optional
27
+ If provided, the reader will append columns to this dataframe. Defaults to None.
28
+ colprefix : str, optional
29
+ If provided, the reader will append this prefix to the start of each column name. Defaults to None.
30
+ allow_nonnumeric : bool, optional
31
+ If false, the method will assert that all columns are numerical. Defaults to False.
32
+ assert_date : bool, optional
33
+ If true, the method will assert that date index meets "%Y-%m-%d" format. Defaults to True.
18
34
 
19
35
  Returns:
20
36
  pd.DataFrame: Dataframe containing the data from the csv file.
@@ -34,7 +50,7 @@ def read_ts_csv(filename, date_format=None, df=None, colprefix=None, allow_nonnu
34
50
  # Rename columns if required
35
51
  if colprefix is not None:
36
52
  for c in new_df.columns:
37
- new_df.rename(columns = {c:f"{colprefix}{c}"}, inplace = True)
53
+ new_df.rename(columns={c: f"{colprefix}{c}"}, inplace=True)
38
54
  # Join to existing dataframe if required
39
55
  if df is None:
40
56
  df = new_df
@@ -49,11 +65,7 @@ def read_ts_csv(filename, date_format=None, df=None, colprefix=None, allow_nonnu
49
65
  return utils.TimeseriesDataframe.from_dataframe(df)
50
66
 
51
67
 
52
- def write_ts_csv(df: pd.DataFrame, filename: str):
53
- """_summary_
54
-
55
- Args:
56
- df (pd.DataFrame): _description_
57
- filename (str): _description_
58
- """
59
- df.to_csv(filename)
68
+ def write_ts_csv(df: pd.DataFrame, filename: str,
69
+ *args, **kwargs):
70
+ """Wrapper around ``pandas.DataFrame.to_csv()``."""
71
+ df.to_csv(filename, *args, **kwargs)
@@ -1,10 +1,19 @@
1
- import pandas as pd
1
+ """
2
+ General use IO functions.
3
+ """
4
+ import re
5
+
2
6
  import bulum.io as bio
3
7
  from bulum import utils
4
- import re
5
8
 
6
9
 
7
10
  def read(filename: str, **kwargs) -> utils.TimeseriesDataframe:
11
+ """
12
+ Read the input file.
13
+
14
+ It will attempt to determine the filetype and dispatch to the appropriate
15
+ function in `bulum.io`.
16
+ """
8
17
  filename_lower = filename.lower()
9
18
  df = None
10
19
  if filename_lower.endswith(".res.csv"):
@@ -1,20 +1,20 @@
1
+ """
2
+ IO functions for IDX files.
3
+
4
+ See also :py:mod:`bulum.op.idx_io_native`.
5
+ """
1
6
  import os
2
- import pandas as pd
3
- import uuid
4
7
  import shutil
5
8
  import subprocess
6
- from bulum import utils
7
- from .csv_io import *
9
+ import uuid
8
10
 
11
+ from bulum import utils
9
12
 
13
+ from .csv_io import *
10
14
 
11
- def write_idx(df, filename, cleanup_tempfile=True):
12
- """_summary_
13
15
 
14
- Args:
15
- df (_type_): _description_
16
- filename (_type_): _description_
17
- """
16
+ def write_idx(df: pd.DataFrame, filename, cleanup_tempfile=True):
17
+ """Write IDX file from dataframe, requires csvidx.exe."""
18
18
  if shutil.which('csvidx') is None:
19
19
  raise Exception("This method relies on the external program 'csvidx.exe'. Please ensure it is in your path.")
20
20
  temp_filename = f"{uuid.uuid4().hex}.tempfile.csv"
@@ -26,17 +26,20 @@ def write_idx(df, filename, cleanup_tempfile=True):
26
26
  os.remove(temp_filename)
27
27
 
28
28
 
29
-
30
- def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
29
+ def write_area_ts_csv(df, filename, units="(mm.d^-1)"):
31
30
  """_summary_
32
31
 
33
- Args:
34
- df (_type_): _description_
35
- filename (_type_): _description_
36
- units (str, optional): _description_. Defaults to "(mm.d^-1)".
32
+ Parameters
33
+ ----------
34
+ df : DataFrame
35
+ filename
36
+ units : str, optional
37
+ Defaults to "(mm.d^-1)".
37
38
 
38
- Raises:
39
- Exception: If shortenned field names are going to clash in output file.
39
+ Raises
40
+ ------
41
+ Exception
42
+ If shortened field names are going to clash in output file.
40
43
  """
41
44
  # ensures dataframe adheres to standards
42
45
  utils.assert_df_format_standards(df)
@@ -45,7 +48,7 @@ def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
45
48
  for c in df.columns:
46
49
  c12 = f"{c[:12]:<12}"
47
50
  if c12 in fields.keys():
48
- raise Exception(f"Field names clash when shortenned to 12 chars: {c} and {fields[c12]}")
51
+ raise Exception(f"Field names clash when shortened to 12 chars: {c} and {fields[c12]}")
49
52
  fields[c12] = c
50
53
  # create the header text
51
54
  header = f"{units}"
@@ -60,5 +63,3 @@ def write_area_ts_csv(df, filename, units = "(mm.d^-1)"):
60
63
  with open(filename, "w+", newline='', encoding='utf-8') as file:
61
64
  file.write(header)
62
65
  df.to_csv(file, header=False, na_rep=' NaN')
63
-
64
-
@@ -1,18 +1,26 @@
1
+ """
2
+ IO functions for IDX format (binary) written in native Python.
3
+ """
1
4
  import os
2
- import pandas as pd
5
+ from typing import Optional
6
+
3
7
  import numpy as np
8
+ import pandas as pd
9
+
4
10
  from bulum import utils
5
11
 
6
12
 
7
13
  def _detect_header_bytes(b_data: np.ndarray) -> bool:
8
14
  """
9
- Helper function for read_idx. Detects whether the OUT file was written with
10
- a version of IQQM with an old compiler with metadata/junk data as a header.
11
- Fails if (not necessarily only if) the run was undertaken with only one
12
- source of data, i.e. the .idx file has only one entry.
15
+ Helper function for :func:`read_idx`. Detects whether the .OUT file was
16
+ written with a version of IQQM with an old compiler with metadata/junk data
17
+ as a header. Fails if the run was undertaken with only one source of data,
18
+ i.e. the .idx file has only one entry.
13
19
 
14
- Args:
15
- b_data (np.ndarray): 2d array of binary data filled with float32 data.
20
+ Parameters
21
+ ----------
22
+ b_data : np.ndarray
23
+ 2d array of binary data filled with float32 data
16
24
  """
17
25
  b_data_slice: tuple[np.float32] = b_data[0]
18
26
  first_non_zero = b_data_slice[0] != 0.0
@@ -20,17 +28,22 @@ def _detect_header_bytes(b_data: np.ndarray) -> bool:
20
28
  return first_non_zero and rest_zeroes
21
29
 
22
30
 
23
- def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
24
- """_summary_
31
+ def read_idx(filename, skip_header_bytes: Optional[bool] = None) -> utils.TimeseriesDataframe:
32
+ """
33
+ Read IDX file.
25
34
 
26
- Args:
27
- filename (_type_): Name of the IDX file.
28
- skip_header_bytes (bool | None): Whether to skip header bytes in the IDX
29
- file (related to the compiler used for IQQM). If set to None, attempt
30
- to detect the presence of header bytes automatically.
35
+ Parameters
36
+ ----------
37
+ filename
38
+ Name of the IDX file.
39
+ skip_header_bytes : bool, optional
40
+ Whether to skip header bytes in the corresponding OUTs file (related to
41
+ the compiler used for IQQM). If set to None, attempt to detect the
42
+ presence of header bytes automatically.
31
43
 
32
- Returns:
33
- utils.TimeseriesDataframe: _description_
44
+ Returns
45
+ -------
46
+ utils.TimeseriesDataframe
34
47
  """
35
48
  if not os.path.exists(filename):
36
49
  raise FileNotFoundError(f"File does not exist: {filename}")
@@ -41,7 +54,7 @@ def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
41
54
  # Start date, end date, date interval
42
55
  stmp = f.readline().split()
43
56
  date_start = utils.standardize_datestring_format([stmp[0]])[0]
44
- date_end = utils.standardize_datestring_format([stmp[1]])[0]
57
+ date_end = utils.standardize_datestring_format([stmp[1]])[0]
45
58
  date_flag = int(stmp[2])
46
59
  snames = []
47
60
  for n, line in enumerate(f):
@@ -65,14 +78,14 @@ def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
65
78
  # Read data
66
79
  if date_flag == 0:
67
80
  daily_date_values = utils.datetime_functions.get_dates(
68
- date_start, end_date=date_end, include_end_date=True)
81
+ date_start, end_date=date_end, include_end_date=True)
69
82
  df = pd.DataFrame.from_records(b_data, index=daily_date_values)
70
83
  df.columns = snames
71
84
  df.index.name = "Date"
72
85
  # Check data types. If not 'float64' or 'int64', convert to 'float64'
73
- x = df.select_dtypes(exclude=['int64','float64']).columns
74
- if x.__len__()>0:
75
- df=df.astype({i: 'float64' for i in x})
86
+ x = df.select_dtypes(exclude=['int64', 'float64']).columns
87
+ if x.__len__() > 0:
88
+ df = df.astype({i: 'float64' for i in x})
76
89
  elif date_flag == 1:
77
90
  raise NotImplementedError("Monthly data not yet supported")
78
91
  elif date_flag == 3:
@@ -84,21 +97,29 @@ def read_idx(filename, skip_header_bytes=None) -> utils.TimeseriesDataframe:
84
97
 
85
98
 
86
99
  def write_idx_native(df: pd.DataFrame, filepath, type="None", units="None") -> None:
87
- """Writer for .IDX and corresponding .OUT binary files written in native Python.
88
- Currently only supports daily data (date flag 0), as with the reader read_idx(...).
100
+ """Writer for .IDX and corresponding .OUT binary files written in native
101
+ Python. Currently only supports daily data (date flag 0), as with the reader
102
+ :func:`read_idx`.
89
103
 
90
- Assumes that data are homogeneous in units and type e.g. Precipitation & mm resp., or Flow & ML/d.
104
+ Assumes that data are homogeneous in units and type e.g. Precipitation & mm
105
+ resp., or Flow & ML/d.
91
106
 
92
- Args:
93
- df (pd.Dataframe): DataFrame as per the output of read_idx(...).
94
- filepath (str) : Path to the IDX file to be written to including .IDX extension.
95
- units (str, optional) : Units for data in df.
96
- type (str, optional) : Data specifier for data in df, e.g. Gauged Flow, Precipitation, etc.
107
+ Parameters
108
+ ----------
109
+ df : pd.Dataframe
110
+ DataFrame as per the output of :func:`read_idx`.
111
+ filepath
112
+ Path to the IDX file to be written to including .IDX extension.
113
+ units : str, optional
114
+ Units for data in df.
115
+ type : str, optional
116
+ Data specifier for data in df, e.g. Gauged Flow, Precipitation, etc.
97
117
  """
98
118
  date_flag = 0
99
- # TODO: When generalising to other frequencies, we may be able to simply read the data type off the time delta in df.index values
100
- # As is, I've essentially copied what was done in the reader to flag that this should be implemented at the "same time".
101
- # Verify valid date_flag
119
+ # TODO: When generalising to other frequencies, we may be able to simply
120
+ # read the data type off the time delta in df.index values As is, I've
121
+ # essentially copied what was done in the reader to flag that this should be
122
+ # implemented at the "same time". Verify valid date_flag
102
123
  match date_flag:
103
124
  case 0:
104
125
  pass # valid
@@ -122,7 +143,9 @@ def write_idx_native(df: pd.DataFrame, filepath, type="None", units="None") -> N
122
143
  f.write(f"{first_date} {last_date} {date_flag}\n")
123
144
  # data
124
145
  # inline fn to ensure padded string is exactly l characters long
125
- def ljust_or_truncate(s, l): return s.ljust(l)[0:l]
146
+
147
+ def ljust_or_truncate(s, l):
148
+ return s.ljust(l)[0:l]
126
149
  for idx, col_name in enumerate(col_names):
127
150
  source_entry = ljust_or_truncate(f"df_col{idx+1}", 12)
128
151
  name_entry = ljust_or_truncate(f"{col_name}", 40)