pthelma 1.0.0__cp310-cp310-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- enhydris_api_client/__init__.py +252 -0
- enhydris_cache/__init__.py +5 -0
- enhydris_cache/cli.py +150 -0
- enhydris_cache/enhydris_cache.py +69 -0
- evaporation/__init__.py +4 -0
- evaporation/cli.py +729 -0
- evaporation/evaporation.py +437 -0
- haggregate/__init__.py +5 -0
- haggregate/cli.py +91 -0
- haggregate/haggregate.py +155 -0
- haggregate/regularize.cp310-win_amd64.pyd +0 -0
- haggregate/regularize.pyx +193 -0
- hspatial/__init__.py +4 -0
- hspatial/cli.py +310 -0
- hspatial/hspatial.py +425 -0
- hspatial/test.py +27 -0
- htimeseries/__init__.py +2 -0
- htimeseries/htimeseries.py +574 -0
- htimeseries/timezone_utils.py +44 -0
- pthelma/__init__.py +0 -0
- pthelma/_version.py +16 -0
- pthelma-1.0.0.dist-info/LICENSE.rst +34 -0
- pthelma-1.0.0.dist-info/METADATA +55 -0
- pthelma-1.0.0.dist-info/RECORD +27 -0
- pthelma-1.0.0.dist-info/WHEEL +5 -0
- pthelma-1.0.0.dist-info/entry_points.txt +5 -0
- pthelma-1.0.0.dist-info/top_level.txt +7 -0
@@ -0,0 +1,193 @@
|
|
1
|
+
# cython: language_level=3, linetrace=True
|
2
|
+
# distutils: define_macros=CYTHON_TRACE=1
|
3
|
+
|
4
|
+
import datetime as dt
|
5
|
+
cimport numpy as np
|
6
|
+
import numpy as np
|
7
|
+
import pandas as pd
|
8
|
+
from libc.math cimport isnan
|
9
|
+
from pandas.tseries.frequencies import to_offset
|
10
|
+
|
11
|
+
from htimeseries import HTimeseries
|
12
|
+
|
13
|
+
from .haggregate import RegularizationMode as RM
|
14
|
+
|
15
|
+
|
16
|
+
class RegularizeError(Exception):
|
17
|
+
pass
|
18
|
+
|
19
|
+
|
20
|
+
def regularize(ts, new_date_flag="DATEINSERT", mode=RM.INTERVAL):
|
21
|
+
# Sanity checks
|
22
|
+
if not hasattr(ts, "time_step"):
|
23
|
+
raise RegularizeError("The source time series does not specify a time step")
|
24
|
+
try:
|
25
|
+
pd.to_timedelta(to_offset(ts.time_step))
|
26
|
+
except ValueError:
|
27
|
+
raise RegularizeError(
|
28
|
+
"The time step is malformed or is specified in months. Only time steps "
|
29
|
+
"specified in minutes, hours or days are supported."
|
30
|
+
)
|
31
|
+
|
32
|
+
# Set metadata of result
|
33
|
+
result = HTimeseries()
|
34
|
+
attrs = (
|
35
|
+
"unit",
|
36
|
+
"timezone",
|
37
|
+
"time_step",
|
38
|
+
"interval_type",
|
39
|
+
"variable",
|
40
|
+
"precision",
|
41
|
+
"location",
|
42
|
+
)
|
43
|
+
for attr in attrs:
|
44
|
+
setattr(result, attr, getattr(ts, attr, None))
|
45
|
+
if hasattr(ts, "title"):
|
46
|
+
result.title = "Regularized " + ts.title
|
47
|
+
if hasattr(ts, "comment"):
|
48
|
+
result.comment = (
|
49
|
+
"Created by regularizing step of timeseries that had this comment:\n\n"
|
50
|
+
+ ts.comment
|
51
|
+
)
|
52
|
+
|
53
|
+
# Return immediately if empty
|
54
|
+
if len(ts.data) == 0:
|
55
|
+
return result
|
56
|
+
|
57
|
+
# Determine first and last timestamps
|
58
|
+
step = pd.Timedelta(ts.time_step)
|
59
|
+
first_timestamp_of_result = ts.data.index[0].round(step)
|
60
|
+
last_timestamp_of_result = ts.data.index[-1].round(step)
|
61
|
+
|
62
|
+
# Transform all pandas information to plain numpy, which is way faster and is also
|
63
|
+
# supported by numba and Cython
|
64
|
+
max_flags_length = max(ts.data["flags"].str.len()) + 1 + len(new_date_flag)
|
65
|
+
flags_dtype = "U" + str(max_flags_length)
|
66
|
+
ts_index = ts.data.index.values.astype(long)
|
67
|
+
ts_values = ts.data["value"].values
|
68
|
+
ts_flags = ts.data["flags"].values.astype(flags_dtype)
|
69
|
+
result_step = np.timedelta64(step).astype(int) * 1000
|
70
|
+
result_index = pd.date_range(
|
71
|
+
first_timestamp_of_result, last_timestamp_of_result, freq=ts.time_step
|
72
|
+
).values
|
73
|
+
result_values = np.full(len(result_index), np.nan, dtype=object)
|
74
|
+
result_flags = np.full(len(result_index), "", dtype=flags_dtype)
|
75
|
+
|
76
|
+
# Do the job
|
77
|
+
_perform_regularization(
|
78
|
+
result_index,
|
79
|
+
result_values,
|
80
|
+
result_flags,
|
81
|
+
ts_index,
|
82
|
+
ts_values,
|
83
|
+
ts_flags,
|
84
|
+
result_step,
|
85
|
+
new_date_flag,
|
86
|
+
mode.value,
|
87
|
+
)
|
88
|
+
|
89
|
+
result.data = pd.DataFrame(
|
90
|
+
index=result_index,
|
91
|
+
columns=["value", "flags"],
|
92
|
+
data=np.vstack((result_values, result_flags)).transpose(),
|
93
|
+
).tz_localize(dt.timezone.utc).tz_convert(first_timestamp_of_result.tz)
|
94
|
+
return result
|
95
|
+
|
96
|
+
|
97
|
+
def _perform_regularization(
|
98
|
+
np.ndarray result_index,
|
99
|
+
np.ndarray result_values,
|
100
|
+
np.ndarray result_flags,
|
101
|
+
np.ndarray ts_index,
|
102
|
+
np.ndarray ts_values,
|
103
|
+
np.ndarray ts_flags,
|
104
|
+
long result_step,
|
105
|
+
str new_date_flag,
|
106
|
+
int mode,
|
107
|
+
):
|
108
|
+
cdef int i, previous_pos
|
109
|
+
cdef long t
|
110
|
+
|
111
|
+
previous_pos = 0
|
112
|
+
for i in range(result_index.size):
|
113
|
+
t = result_index[i]
|
114
|
+
result_values[i], result_flags[i], previous_pos = _get_record(
|
115
|
+
ts_index,
|
116
|
+
ts_values,
|
117
|
+
ts_flags,
|
118
|
+
t,
|
119
|
+
result_step,
|
120
|
+
new_date_flag,
|
121
|
+
previous_pos,
|
122
|
+
mode,
|
123
|
+
)
|
124
|
+
|
125
|
+
|
126
|
+
def _get_record(
|
127
|
+
np.ndarray ts_index,
|
128
|
+
np.ndarray ts_values,
|
129
|
+
np.ndarray ts_flags,
|
130
|
+
long t,
|
131
|
+
long result_step,
|
132
|
+
str new_date_flag,
|
133
|
+
int previous_pos,
|
134
|
+
int mode,
|
135
|
+
):
|
136
|
+
cdef int i, found, count
|
137
|
+
cdef int nearest_i = -1
|
138
|
+
cdef int INTERVAL = RM.INTERVAL.value
|
139
|
+
cdef int INSTANTANEOUS = RM.INSTANTANEOUS.value
|
140
|
+
|
141
|
+
# Return the source record if it already exists
|
142
|
+
found = False
|
143
|
+
for i in range(previous_pos, ts_index.size):
|
144
|
+
if ts_index[i] == t and (mode == INTERVAL or not isnan(ts_values[i])):
|
145
|
+
found = True
|
146
|
+
break
|
147
|
+
if ts_index[i] > t:
|
148
|
+
break
|
149
|
+
if found:
|
150
|
+
return ts_values[i], ts_flags[i], i
|
151
|
+
|
152
|
+
# Otherwise get the nearby record, if it exists
|
153
|
+
start = t - result_step / 2
|
154
|
+
end = t + result_step / 2
|
155
|
+
count = 0
|
156
|
+
for i in range(previous_pos, ts_index.size):
|
157
|
+
ti = ts_index[i]
|
158
|
+
if ti >= start and ti < end and (mode == INTERVAL or not isnan(ts_values[i])):
|
159
|
+
count += 1
|
160
|
+
nearest_i = _get_nearest(nearest_i, i, ts_index, ts_values, t, mode)
|
161
|
+
if ts_index[i] >= end:
|
162
|
+
i -= 1
|
163
|
+
break
|
164
|
+
if count < 1 or (count > 1 and mode == INTERVAL):
|
165
|
+
return np.nan, "", i
|
166
|
+
value = ts_values[nearest_i]
|
167
|
+
flags = ts_flags[nearest_i]
|
168
|
+
if flags:
|
169
|
+
flags += " "
|
170
|
+
flags += new_date_flag
|
171
|
+
return value, flags, i + 1
|
172
|
+
|
173
|
+
|
174
|
+
def _get_nearest(
|
175
|
+
int previous_nearest_i,
|
176
|
+
int current_i,
|
177
|
+
np.ndarray ts_index,
|
178
|
+
np.ndarray ts_values,
|
179
|
+
long t,
|
180
|
+
int mode,
|
181
|
+
):
|
182
|
+
if mode == RM.INTERVAL.value:
|
183
|
+
# In that case it doesn't really matter which is the nearest, so long as it's
|
184
|
+
# only one (which is checked elsewhere), so we return immediately.
|
185
|
+
return current_i
|
186
|
+
if previous_nearest_i < 0:
|
187
|
+
return current_i
|
188
|
+
current_distance = abs(t - ts_index[current_i])
|
189
|
+
previous_distance = abs(t - ts_index[previous_nearest_i])
|
190
|
+
if current_distance < previous_distance:
|
191
|
+
return current_i
|
192
|
+
else:
|
193
|
+
return previous_nearest_i
|
hspatial/__init__.py
ADDED
hspatial/cli.py
ADDED
@@ -0,0 +1,310 @@
|
|
1
|
+
import configparser
|
2
|
+
import datetime as dt
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
import traceback
|
7
|
+
from glob import glob
|
8
|
+
from io import StringIO
|
9
|
+
|
10
|
+
import click
|
11
|
+
import iso8601
|
12
|
+
from osgeo import gdal, ogr, osr
|
13
|
+
from simpletail import ropen
|
14
|
+
|
15
|
+
from hspatial import create_ogr_layer_from_timeseries, h_integrate, idw
|
16
|
+
from htimeseries import HTimeseries, TzinfoFromString
|
17
|
+
from pthelma._version import __version__
|
18
|
+
|
19
|
+
gdal.UseExceptions()
|
20
|
+
|
21
|
+
|
22
|
+
class WrongValueError(configparser.Error):
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
26
|
+
class App:
|
27
|
+
def __init__(self, configfilename):
|
28
|
+
self.configfilename = configfilename
|
29
|
+
|
30
|
+
def run(self):
|
31
|
+
self.config = AppConfig(self.configfilename)
|
32
|
+
self.config.read()
|
33
|
+
self._setup_logger()
|
34
|
+
self._execute_with_error_handling()
|
35
|
+
|
36
|
+
def _execute_with_error_handling(self):
|
37
|
+
self.logger.info("Starting spatialize, " + dt.datetime.today().isoformat())
|
38
|
+
try:
|
39
|
+
self._execute()
|
40
|
+
except Exception as e:
|
41
|
+
self.logger.error(str(e))
|
42
|
+
self.logger.debug(traceback.format_exc())
|
43
|
+
self.logger.info(
|
44
|
+
"spatialize terminated with error, " + dt.datetime.today().isoformat()
|
45
|
+
)
|
46
|
+
raise click.ClickException(str(e))
|
47
|
+
else:
|
48
|
+
self.logger.info("Finished spatialize, " + dt.datetime.today().isoformat())
|
49
|
+
|
50
|
+
def _setup_logger(self):
|
51
|
+
self.logger = logging.getLogger("spatialize")
|
52
|
+
self._set_logger_handler()
|
53
|
+
self.logger.setLevel(self.config.loglevel.upper())
|
54
|
+
|
55
|
+
def _set_logger_handler(self):
|
56
|
+
if getattr(self.config, "logfile", None):
|
57
|
+
self.logger.addHandler(logging.FileHandler(self.config.logfile))
|
58
|
+
else:
|
59
|
+
self.logger.addHandler(logging.StreamHandler())
|
60
|
+
|
61
|
+
def _get_last_dates(self, filename, n):
|
62
|
+
"""
|
63
|
+
Assuming specified file contains a time series, scan it from the bottom
|
64
|
+
and return the list of the n last dates (may be less than n if the time
|
65
|
+
series is too small). 'filename' is used in error messages.
|
66
|
+
"""
|
67
|
+
# Get the time zone
|
68
|
+
with open(filename) as fp:
|
69
|
+
for line in fp:
|
70
|
+
if line.startswith("Timezone") or (line and line[0] in "0123456789"):
|
71
|
+
break
|
72
|
+
if not line.startswith("Timezone"):
|
73
|
+
raise click.ClickException("{} does not contain Timezone".format(filename))
|
74
|
+
zonestr = line.partition("=")[2].strip()
|
75
|
+
timezone = TzinfoFromString(zonestr)
|
76
|
+
|
77
|
+
result = []
|
78
|
+
previous_line_was_empty = False
|
79
|
+
with ropen(filename) as fp:
|
80
|
+
for i, line in enumerate(fp):
|
81
|
+
if i >= n:
|
82
|
+
break
|
83
|
+
line = line.strip()
|
84
|
+
|
85
|
+
# Ignore empty lines
|
86
|
+
if not line:
|
87
|
+
previous_line_was_empty = True
|
88
|
+
continue
|
89
|
+
|
90
|
+
# Is the line in the form of an ini file configuration line?
|
91
|
+
items = line.split("=")
|
92
|
+
if len(items) and ("," not in items[0]) and previous_line_was_empty:
|
93
|
+
break # Yes; we reached the start of the file
|
94
|
+
|
95
|
+
previous_line_was_empty = False
|
96
|
+
|
97
|
+
datestring = line.split(",")[0]
|
98
|
+
try:
|
99
|
+
result.insert(
|
100
|
+
0, iso8601.parse_date(datestring, default_timezone=timezone)
|
101
|
+
)
|
102
|
+
except iso8601.ParseError as e:
|
103
|
+
raise iso8601.ParseError(
|
104
|
+
str(e)
|
105
|
+
+ " (file {}, {} lines from the end)".format(filename, i + 1)
|
106
|
+
)
|
107
|
+
return result
|
108
|
+
|
109
|
+
@property
|
110
|
+
def _dates_to_calculate(self):
|
111
|
+
"""
|
112
|
+
Generator that yields the dates for which h_integrate should be run;
|
113
|
+
this is the latest list of dates such that:
|
114
|
+
* At least one of the time series has data
|
115
|
+
* The length of the list is the 'number_of_output_files' configuration
|
116
|
+
option (maybe less if the time series don't have enough data yet).
|
117
|
+
"""
|
118
|
+
n = self.config.number_of_output_files
|
119
|
+
dates = set()
|
120
|
+
for filename in self.config.files:
|
121
|
+
dates |= set(self._get_last_dates(filename, n))
|
122
|
+
dates = list(dates)
|
123
|
+
dates.sort()
|
124
|
+
dates = dates[-n:]
|
125
|
+
for d in dates:
|
126
|
+
yield d
|
127
|
+
|
128
|
+
@property
|
129
|
+
def _time_step(self):
|
130
|
+
"""
|
131
|
+
Return time step of all time series. If time step is not the same
|
132
|
+
for all time series, raises exception.
|
133
|
+
"""
|
134
|
+
time_step = None
|
135
|
+
for filename in self.config.files:
|
136
|
+
with open(filename, newline="\n") as f:
|
137
|
+
t = HTimeseries(f, start_date="0001-01-01 00:00")
|
138
|
+
item_time_step = t.time_step
|
139
|
+
if time_step and (item_time_step != time_step):
|
140
|
+
raise click.ClickException("Not all time series have the same step")
|
141
|
+
time_step = item_time_step
|
142
|
+
return time_step
|
143
|
+
|
144
|
+
@property
|
145
|
+
def _date_fmt(self):
|
146
|
+
"""
|
147
|
+
Determine date_fmt based on time series time step.
|
148
|
+
"""
|
149
|
+
if self._time_step.endswith("min") or self._time_step.endswith("H"):
|
150
|
+
return "%Y-%m-%d %H:%M%z"
|
151
|
+
elif self._time_step.endswith("D"):
|
152
|
+
return "%Y-%m-%d"
|
153
|
+
elif self._time_step.endswith("M"):
|
154
|
+
return "%Y-%m"
|
155
|
+
elif self._time_step.endswith("Y"):
|
156
|
+
return "%Y"
|
157
|
+
raise click.ClickException("Can't use time step " + str(self._time_step))
|
158
|
+
|
159
|
+
def _delete_obsolete_files(self):
|
160
|
+
"""
|
161
|
+
Delete all tif files produced in the past except the last N,
|
162
|
+
where N is the 'number_of_output_files' configuration option.
|
163
|
+
"""
|
164
|
+
pattern = os.path.join(
|
165
|
+
self.config.output_dir, "{}-*.tif".format(self.config.filename_prefix)
|
166
|
+
)
|
167
|
+
files = glob(pattern)
|
168
|
+
files.sort()
|
169
|
+
for filename in files[: -self.config.number_of_output_files]:
|
170
|
+
os.remove(filename)
|
171
|
+
|
172
|
+
def _execute(self):
|
173
|
+
# Create stations layer
|
174
|
+
stations = ogr.GetDriverByName("memory").CreateDataSource("stations")
|
175
|
+
stations_layer = create_ogr_layer_from_timeseries(
|
176
|
+
self.config.files, self.config.epsg, stations
|
177
|
+
)
|
178
|
+
|
179
|
+
# Get mask
|
180
|
+
mask = gdal.Open(self.config.mask)
|
181
|
+
|
182
|
+
# Setup integration method
|
183
|
+
if self.config.method == "idw":
|
184
|
+
funct = idw
|
185
|
+
kwargs = {"alpha": self.config.alpha}
|
186
|
+
else:
|
187
|
+
assert False
|
188
|
+
|
189
|
+
for date in self._dates_to_calculate:
|
190
|
+
self.logger.info("Processing date " + date.isoformat())
|
191
|
+
h_integrate(
|
192
|
+
mask,
|
193
|
+
stations_layer,
|
194
|
+
date,
|
195
|
+
os.path.join(self.config.output_dir, self.config.filename_prefix),
|
196
|
+
self._date_fmt,
|
197
|
+
funct,
|
198
|
+
kwargs,
|
199
|
+
)
|
200
|
+
self._delete_obsolete_files()
|
201
|
+
|
202
|
+
|
203
|
+
class AppConfig:
|
204
|
+
config_file_options = {
|
205
|
+
"logfile": {"fallback": ""},
|
206
|
+
"loglevel": {"fallback": "warning"},
|
207
|
+
"mask": {},
|
208
|
+
"epsg": {},
|
209
|
+
"output_dir": {},
|
210
|
+
"filename_prefix": {},
|
211
|
+
"number_of_output_files": {},
|
212
|
+
"method": {},
|
213
|
+
"alpha": {"fallback": "1"},
|
214
|
+
"files": {},
|
215
|
+
}
|
216
|
+
|
217
|
+
def __init__(self, configfilename):
|
218
|
+
self.configfilename = configfilename
|
219
|
+
|
220
|
+
def read(self):
|
221
|
+
try:
|
222
|
+
self._parse_config()
|
223
|
+
except (OSError, configparser.Error) as e:
|
224
|
+
sys.stderr.write(str(e))
|
225
|
+
raise click.ClickException(str(e))
|
226
|
+
|
227
|
+
def _parse_config(self):
|
228
|
+
self._read_config_file()
|
229
|
+
self._get_config_options()
|
230
|
+
self._parse_config_options()
|
231
|
+
|
232
|
+
def _read_config_file(self):
|
233
|
+
self.config = configparser.ConfigParser(interpolation=None)
|
234
|
+
try:
|
235
|
+
self._read_config_file_assuming_it_has_section_headers()
|
236
|
+
except configparser.MissingSectionHeaderError:
|
237
|
+
self._read_config_file_without_sections()
|
238
|
+
|
239
|
+
def _read_config_file_assuming_it_has_section_headers(self):
|
240
|
+
with open(self.configfilename) as f:
|
241
|
+
self.config.read_file(f)
|
242
|
+
|
243
|
+
def _read_config_file_without_sections(self):
|
244
|
+
with open(self.configfilename) as f:
|
245
|
+
configuration = "[General]\n" + f.read()
|
246
|
+
self.config.read_file(StringIO(configuration))
|
247
|
+
|
248
|
+
def _get_config_options(self):
|
249
|
+
self.options = {
|
250
|
+
opt: self.config.get("General", opt, **kwargs)
|
251
|
+
for opt, kwargs in self.config_file_options.items()
|
252
|
+
}
|
253
|
+
for key, value in self.options.items():
|
254
|
+
setattr(self, key, value)
|
255
|
+
|
256
|
+
def _parse_config_options(self):
|
257
|
+
self._parse_log_level()
|
258
|
+
self._parse_files()
|
259
|
+
self._check_method()
|
260
|
+
self._parse_epsg()
|
261
|
+
self._parse_number_of_output_files()
|
262
|
+
|
263
|
+
def _parse_log_level(self):
|
264
|
+
log_levels = ("ERROR", "WARNING", "INFO", "DEBUG")
|
265
|
+
self.loglevel = self.loglevel.upper()
|
266
|
+
if self.loglevel not in log_levels:
|
267
|
+
raise WrongValueError("loglevel must be one of " + ", ".join(log_levels))
|
268
|
+
|
269
|
+
def _parse_files(self):
|
270
|
+
self.files = self.files.split("\n")
|
271
|
+
|
272
|
+
def _check_method(self):
|
273
|
+
# Check method
|
274
|
+
if self.method != "idw":
|
275
|
+
raise WrongValueError('Option "method" can currently only be idw')
|
276
|
+
# Check alpha
|
277
|
+
try:
|
278
|
+
self.alpha = float(self.alpha)
|
279
|
+
except ValueError:
|
280
|
+
raise WrongValueError('Option "alpha" must be a number')
|
281
|
+
|
282
|
+
def _parse_epsg(self):
|
283
|
+
try:
|
284
|
+
self.epsg = int(self.epsg)
|
285
|
+
except ValueError:
|
286
|
+
raise WrongValueError('Option "epsg" must be an integer')
|
287
|
+
srs = osr.SpatialReference()
|
288
|
+
result = srs.ImportFromEPSG(self.epsg)
|
289
|
+
if result:
|
290
|
+
raise WrongValueError(
|
291
|
+
"An error occurred when trying to use epsg={}".format(self.epsg)
|
292
|
+
)
|
293
|
+
|
294
|
+
def _parse_number_of_output_files(self):
|
295
|
+
try:
|
296
|
+
self.number_of_output_files = int(self.number_of_output_files)
|
297
|
+
except ValueError:
|
298
|
+
raise WrongValueError('Option "number_of_output_files" must be an integer')
|
299
|
+
|
300
|
+
|
301
|
+
@click.command()
|
302
|
+
@click.argument("configfile")
|
303
|
+
@click.version_option(
|
304
|
+
version=__version__, message="%(prog)s from pthelma v.%(version)s"
|
305
|
+
)
|
306
|
+
def main(configfile):
|
307
|
+
"""Spatial integration"""
|
308
|
+
|
309
|
+
app = App(configfile)
|
310
|
+
app.run()
|