pthelma 1.0.0__cp310-cp310-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- enhydris_api_client/__init__.py +252 -0
- enhydris_cache/__init__.py +5 -0
- enhydris_cache/cli.py +150 -0
- enhydris_cache/enhydris_cache.py +69 -0
- evaporation/__init__.py +4 -0
- evaporation/cli.py +729 -0
- evaporation/evaporation.py +437 -0
- haggregate/__init__.py +5 -0
- haggregate/cli.py +91 -0
- haggregate/haggregate.py +155 -0
- haggregate/regularize.cp310-win_amd64.pyd +0 -0
- haggregate/regularize.pyx +193 -0
- hspatial/__init__.py +4 -0
- hspatial/cli.py +310 -0
- hspatial/hspatial.py +425 -0
- hspatial/test.py +27 -0
- htimeseries/__init__.py +2 -0
- htimeseries/htimeseries.py +574 -0
- htimeseries/timezone_utils.py +44 -0
- pthelma/__init__.py +0 -0
- pthelma/_version.py +16 -0
- pthelma-1.0.0.dist-info/LICENSE.rst +34 -0
- pthelma-1.0.0.dist-info/METADATA +55 -0
- pthelma-1.0.0.dist-info/RECORD +27 -0
- pthelma-1.0.0.dist-info/WHEEL +5 -0
- pthelma-1.0.0.dist-info/entry_points.txt +5 -0
- pthelma-1.0.0.dist-info/top_level.txt +7 -0
@@ -0,0 +1,437 @@
|
|
1
|
+
import datetime as dt
|
2
|
+
import math
|
3
|
+
import warnings
|
4
|
+
from math import cos, pi, sin, tan
|
5
|
+
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
# Note about RuntimeWarning
|
9
|
+
#
|
10
|
+
# When numpy makes calculations with masked arrays, it sometimes emits spurious
|
11
|
+
# RuntimeWarnings. This is because it occasionally does use the masked part of
|
12
|
+
# the array during the calculations (but masks the result). This is a known
|
13
|
+
# numpy bug (e.g. https://github.com/numpy/numpy/issues/4269). The numpy
|
14
|
+
# documentation, section "Operations on masked arrays", also has a related
|
15
|
+
# warning there.
|
16
|
+
#
|
17
|
+
# In order to avoid these spurious warnings, we have used, at various places in
|
18
|
+
# the code, "with warnings.catch_warnings()". We have attempted to unit test
|
19
|
+
# it, but sometimes it's hard to make the bug appear. A large array in
|
20
|
+
# production may cause the bug, but a small array in the unit test might not
|
21
|
+
# cause it, despite same python and numpy version. So the locations in which
|
22
|
+
# a fix was needed were largely located in production.
|
23
|
+
|
24
|
+
|
25
|
+
class PenmanMonteith(object):
|
26
|
+
# Stefan-Boltzmann constant (Allen et al., 1998, p. 52)
|
27
|
+
sigma = 4.903e-9
|
28
|
+
|
29
|
+
def __init__(
|
30
|
+
self,
|
31
|
+
albedo,
|
32
|
+
elevation,
|
33
|
+
latitude,
|
34
|
+
time_step,
|
35
|
+
longitude=None,
|
36
|
+
nighttime_solar_radiation_ratio=None,
|
37
|
+
unit_converters={},
|
38
|
+
):
|
39
|
+
self.albedo = albedo
|
40
|
+
self.nighttime_solar_radiation_ratio = nighttime_solar_radiation_ratio
|
41
|
+
self.elevation = elevation
|
42
|
+
self.latitude = latitude
|
43
|
+
self.longitude = longitude
|
44
|
+
self.time_step = time_step
|
45
|
+
self.unit_converters = unit_converters
|
46
|
+
|
47
|
+
def calculate(self, **kwargs):
|
48
|
+
if self.time_step == "H":
|
49
|
+
return self.calculate_hourly(**kwargs)
|
50
|
+
elif self.time_step == "D":
|
51
|
+
return self.calculate_daily(**kwargs)
|
52
|
+
else:
|
53
|
+
raise NotImplementedError(
|
54
|
+
"Evaporation for time steps other than hourly and daily "
|
55
|
+
"has not been implemented."
|
56
|
+
)
|
57
|
+
|
58
|
+
def calculate_daily(
|
59
|
+
self,
|
60
|
+
temperature_max,
|
61
|
+
temperature_min,
|
62
|
+
humidity_max,
|
63
|
+
humidity_min,
|
64
|
+
wind_speed,
|
65
|
+
adatetime,
|
66
|
+
sunshine_duration=None,
|
67
|
+
pressure=None,
|
68
|
+
solar_radiation=None,
|
69
|
+
):
|
70
|
+
if pressure is None:
|
71
|
+
# Eq. 7 p. 31
|
72
|
+
pressure = 101.3 * ((293 - 0.0065 * self.elevation) / 293) ** 5.26
|
73
|
+
variables = self.convert_units(
|
74
|
+
temperature_max=temperature_max,
|
75
|
+
temperature_min=temperature_min,
|
76
|
+
humidity_max=humidity_max,
|
77
|
+
humidity_min=humidity_min,
|
78
|
+
wind_speed=wind_speed,
|
79
|
+
sunshine_duration=sunshine_duration,
|
80
|
+
pressure=pressure,
|
81
|
+
)
|
82
|
+
|
83
|
+
# Radiation
|
84
|
+
r_a, N = self.get_extraterrestrial_radiation(adatetime)
|
85
|
+
if solar_radiation is None:
|
86
|
+
solar_radiation = (
|
87
|
+
0.25 + 0.50 * variables["sunshine_duration"] / N
|
88
|
+
) * r_a # Eq.35 p. 50
|
89
|
+
r_so = r_a * (0.75 + 2e-5 * self.elevation) # Eq. 37, p. 51
|
90
|
+
variables.update(self.convert_units(solar_radiation=solar_radiation))
|
91
|
+
|
92
|
+
with warnings.catch_warnings():
|
93
|
+
# See comment about RuntimeWarning on top of the file
|
94
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
95
|
+
temperature_mean = (
|
96
|
+
variables["temperature_max"] + variables["temperature_min"]
|
97
|
+
) / 2
|
98
|
+
variables["temperature_mean"] = temperature_mean
|
99
|
+
gamma = self.get_psychrometric_constant(temperature_mean, pressure)
|
100
|
+
return self.penman_monteith_daily(
|
101
|
+
incoming_solar_radiation=variables["solar_radiation"],
|
102
|
+
clear_sky_solar_radiation=r_so,
|
103
|
+
psychrometric_constant=gamma,
|
104
|
+
mean_wind_speed=variables["wind_speed"],
|
105
|
+
temperature_max=variables["temperature_max"],
|
106
|
+
temperature_min=variables["temperature_min"],
|
107
|
+
temperature_mean=variables["temperature_mean"],
|
108
|
+
humidity_max=variables["humidity_max"],
|
109
|
+
humidity_min=variables["humidity_min"],
|
110
|
+
adate=adatetime,
|
111
|
+
)
|
112
|
+
|
113
|
+
def calculate_hourly(
|
114
|
+
self,
|
115
|
+
temperature,
|
116
|
+
humidity,
|
117
|
+
wind_speed,
|
118
|
+
solar_radiation,
|
119
|
+
adatetime,
|
120
|
+
pressure=None,
|
121
|
+
):
|
122
|
+
if pressure is None:
|
123
|
+
# Eq. 7 p. 31
|
124
|
+
pressure = 101.3 * ((293 - 0.0065 * self.elevation) / 293) ** 5.26
|
125
|
+
variables = self.convert_units(
|
126
|
+
temperature=temperature,
|
127
|
+
humidity=humidity,
|
128
|
+
wind_speed=wind_speed,
|
129
|
+
pressure=pressure,
|
130
|
+
solar_radiation=solar_radiation,
|
131
|
+
)
|
132
|
+
gamma = self.get_psychrometric_constant(
|
133
|
+
variables["temperature"], variables["pressure"]
|
134
|
+
)
|
135
|
+
r_so = self.get_extraterrestrial_radiation(adatetime) * (
|
136
|
+
0.75 + 2e-5 * self.elevation
|
137
|
+
) # Eq. 37, p. 51
|
138
|
+
return self.penman_monteith_hourly(
|
139
|
+
incoming_solar_radiation=variables["solar_radiation"],
|
140
|
+
clear_sky_solar_radiation=r_so,
|
141
|
+
psychrometric_constant=gamma,
|
142
|
+
mean_wind_speed=variables["wind_speed"],
|
143
|
+
mean_temperature=variables["temperature"],
|
144
|
+
mean_relative_humidity=variables["humidity"],
|
145
|
+
adatetime=adatetime,
|
146
|
+
)
|
147
|
+
|
148
|
+
def convert_units(self, **kwargs):
|
149
|
+
result = {}
|
150
|
+
for item in kwargs:
|
151
|
+
varname = item
|
152
|
+
if item.endswith("_max") or item.endswith("_min"):
|
153
|
+
varname = item[:-4]
|
154
|
+
converter = self.unit_converters.get(varname, lambda x: x)
|
155
|
+
with warnings.catch_warnings():
|
156
|
+
# See comment about RuntimeWarning on top of the file
|
157
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
158
|
+
result[item] = converter(kwargs[item])
|
159
|
+
return result
|
160
|
+
|
161
|
+
def get_extraterrestrial_radiation(self, adatetime):
|
162
|
+
"""
|
163
|
+
Calculates the solar radiation we would receive if there were no
|
164
|
+
atmosphere. This is a function of date, time and location.
|
165
|
+
|
166
|
+
If adatetime is a datetime object, it merely returns the
|
167
|
+
extraterrestrial radiation R_a; if it is a date object, it returns a
|
168
|
+
tuple, (R_a, N), where N is the daylight hours.
|
169
|
+
"""
|
170
|
+
j = adatetime.timetuple().tm_yday # Day of year
|
171
|
+
|
172
|
+
# Inverse relative distance Earth-Sun, eq. 23, p. 46.
|
173
|
+
dr = 1 + 0.033 * cos(2 * pi * j / 365)
|
174
|
+
|
175
|
+
# Solar declination, eq. 24, p. 46.
|
176
|
+
decl = 0.409 * sin(2 * pi * j / 365 - 1.39)
|
177
|
+
|
178
|
+
if self.time_step == "D": # Daily?
|
179
|
+
phi = self.latitude / 180.0 * pi
|
180
|
+
omega_s = np.arccos(-np.tan(phi) * tan(decl)) # Eq. 25 p. 46
|
181
|
+
|
182
|
+
r_a = (
|
183
|
+
24
|
184
|
+
* 60
|
185
|
+
/ pi
|
186
|
+
* 0.0820
|
187
|
+
* dr
|
188
|
+
* (
|
189
|
+
omega_s * np.sin(phi) * sin(decl)
|
190
|
+
+ np.cos(phi) * cos(decl) * np.sin(omega_s)
|
191
|
+
)
|
192
|
+
) # Eq. 21 p. 46
|
193
|
+
n = 24 / pi * omega_s # Eq. 34 p. 48
|
194
|
+
return r_a, n
|
195
|
+
|
196
|
+
# Seasonal correction for solar time, eq. 32, p. 48.
|
197
|
+
b = 2 * pi * (j - 81) / 364
|
198
|
+
sc = 0.1645 * sin(2 * b) - 0.1255 * cos(b) - 0.025 * sin(b)
|
199
|
+
|
200
|
+
# Longitude at the centre of the local time zone
|
201
|
+
utc_offset = adatetime.utcoffset()
|
202
|
+
utc_offset_hours = utc_offset.days * 24 + utc_offset.seconds / 3600.0
|
203
|
+
lz = -utc_offset_hours * 15
|
204
|
+
|
205
|
+
# Solar time angle at midpoint of the time period, eq. 31, p. 48.
|
206
|
+
time_step_delta = (
|
207
|
+
self.time_step == "D" and dt.timedelta(days=1) or dt.timedelta(hours=1)
|
208
|
+
)
|
209
|
+
tm = adatetime - time_step_delta / 2
|
210
|
+
t = tm.hour + tm.minute / 60.0
|
211
|
+
omega = pi / 12 * ((t + 0.06667 * (lz + self.longitude) + sc) - 12)
|
212
|
+
|
213
|
+
# Solar time angles at beginning and end of the period, eqs. 29 and 30,
|
214
|
+
# p. 48.
|
215
|
+
t1 = time_step_delta.seconds / 3600.0
|
216
|
+
omega1 = omega - pi * t1 / 24
|
217
|
+
omega2 = omega + pi * t1 / 24
|
218
|
+
|
219
|
+
# Result: eq. 28, p. 47.
|
220
|
+
phi = self.latitude / 180.0 * pi
|
221
|
+
return (
|
222
|
+
12
|
223
|
+
* 60
|
224
|
+
/ pi
|
225
|
+
* 0.0820
|
226
|
+
* dr
|
227
|
+
* (
|
228
|
+
(omega2 - omega1) * np.sin(phi) * sin(decl)
|
229
|
+
+ np.cos(phi) * cos(decl) * (np.sin(omega2) - np.sin(omega1))
|
230
|
+
)
|
231
|
+
)
|
232
|
+
|
233
|
+
def get_psychrometric_constant(self, temperature, pressure):
|
234
|
+
"""
|
235
|
+
Allen et al. (1998), eq. 8, p. 32.
|
236
|
+
|
237
|
+
This is called a "constant" because, although it is a function of
|
238
|
+
temperature and pressure, its variations are small, and therefore it
|
239
|
+
can be assumed constant for a location assuming standard pressure at
|
240
|
+
that elevation and 20 degrees C. However, here we actually calculate
|
241
|
+
it, so it isn't a constant.
|
242
|
+
"""
|
243
|
+
lambda_ = 2.501 - (2.361e-3) * temperature # eq. 3-1, p. 223
|
244
|
+
return 1.013e-3 * pressure / 0.622 / lambda_
|
245
|
+
|
246
|
+
def penman_monteith_daily(
|
247
|
+
self,
|
248
|
+
incoming_solar_radiation,
|
249
|
+
clear_sky_solar_radiation,
|
250
|
+
psychrometric_constant,
|
251
|
+
mean_wind_speed,
|
252
|
+
temperature_max,
|
253
|
+
temperature_min,
|
254
|
+
temperature_mean,
|
255
|
+
humidity_max,
|
256
|
+
humidity_min,
|
257
|
+
adate,
|
258
|
+
):
|
259
|
+
"""
|
260
|
+
Calculates and returns the reference evapotranspiration according
|
261
|
+
to Allen et al. (1998), eq. 6, p. 24 & 65.
|
262
|
+
"""
|
263
|
+
|
264
|
+
# Saturation and actual vapour pressure
|
265
|
+
svp_max = self.get_saturation_vapour_pressure(temperature_max)
|
266
|
+
svp_min = self.get_saturation_vapour_pressure(temperature_min)
|
267
|
+
avp1 = svp_max * humidity_min / 100
|
268
|
+
avp2 = svp_min * humidity_max / 100
|
269
|
+
svp = (svp_max + svp_min) / 2 # Eq. 12 p. 36
|
270
|
+
avp = (avp1 + avp2) / 2 # Eq. 12 p. 36
|
271
|
+
|
272
|
+
# Saturation vapour pressure curve slope
|
273
|
+
delta = self.get_saturation_vapour_pressure_curve_slope(temperature_mean)
|
274
|
+
|
275
|
+
# Net incoming radiation; p. 51, eq. 38
|
276
|
+
albedo = (
|
277
|
+
self.albedo[adate.month - 1]
|
278
|
+
if self.albedo.__class__.__name__ in ("tuple", "list")
|
279
|
+
else self.albedo
|
280
|
+
)
|
281
|
+
rns = (1.0 - albedo) * incoming_solar_radiation
|
282
|
+
|
283
|
+
# Net outgoing radiation
|
284
|
+
rnl = self.get_net_outgoing_radiation(
|
285
|
+
(temperature_min, temperature_max),
|
286
|
+
incoming_solar_radiation,
|
287
|
+
clear_sky_solar_radiation,
|
288
|
+
avp,
|
289
|
+
)
|
290
|
+
|
291
|
+
# Net radiation at grass surface
|
292
|
+
rn = rns - rnl
|
293
|
+
|
294
|
+
# Soil heat flux
|
295
|
+
g_day = 0 # Eq. 42 p. 54
|
296
|
+
|
297
|
+
# Apply the formula
|
298
|
+
numerator_term1 = 0.408 * delta * (rn - g_day)
|
299
|
+
numerator_term2 = (
|
300
|
+
psychrometric_constant
|
301
|
+
* 900
|
302
|
+
/ (temperature_mean + 273.16)
|
303
|
+
* mean_wind_speed
|
304
|
+
* (svp - avp)
|
305
|
+
)
|
306
|
+
denominator = delta + psychrometric_constant * (1 + 0.34 * mean_wind_speed)
|
307
|
+
|
308
|
+
return (numerator_term1 + numerator_term2) / denominator
|
309
|
+
|
310
|
+
def penman_monteith_hourly(
|
311
|
+
self,
|
312
|
+
incoming_solar_radiation,
|
313
|
+
clear_sky_solar_radiation,
|
314
|
+
psychrometric_constant,
|
315
|
+
mean_wind_speed,
|
316
|
+
mean_temperature,
|
317
|
+
mean_relative_humidity,
|
318
|
+
adatetime,
|
319
|
+
):
|
320
|
+
"""
|
321
|
+
Calculates and returns the reference evapotranspiration according
|
322
|
+
to Allen et al. (1998), eq. 53, p. 74.
|
323
|
+
|
324
|
+
As explained in Allen et al. (1998, p. 74), the function is
|
325
|
+
modified in relation to the original Penman-Monteith equation, so
|
326
|
+
that it is suitable for hourly data.
|
327
|
+
"""
|
328
|
+
|
329
|
+
# Saturation and actual vapour pressure
|
330
|
+
svp = self.get_saturation_vapour_pressure(mean_temperature)
|
331
|
+
with warnings.catch_warnings():
|
332
|
+
# See comment about RuntimeWarning on top of the file
|
333
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
334
|
+
avp = svp * mean_relative_humidity / 100.0 # Eq. 54, p. 74
|
335
|
+
|
336
|
+
# Net incoming radiation; p. 51, eq. 38
|
337
|
+
albedo = (
|
338
|
+
self.albedo[adatetime.month - 1]
|
339
|
+
if self.albedo.__class__.__name__ in ("tuple", "list")
|
340
|
+
else self.albedo
|
341
|
+
)
|
342
|
+
rns = (1.0 - albedo) * incoming_solar_radiation
|
343
|
+
|
344
|
+
# Net outgoing radiation
|
345
|
+
rnl = self.get_net_outgoing_radiation(
|
346
|
+
mean_temperature, incoming_solar_radiation, clear_sky_solar_radiation, avp
|
347
|
+
)
|
348
|
+
|
349
|
+
# Net radiation at grass surface
|
350
|
+
rn = rns - rnl
|
351
|
+
|
352
|
+
# Saturation vapour pressure curve slope
|
353
|
+
delta = self.get_saturation_vapour_pressure_curve_slope(mean_temperature)
|
354
|
+
|
355
|
+
# Soil heat flux density
|
356
|
+
g = self.get_soil_heat_flux_density(incoming_solar_radiation, rn)
|
357
|
+
|
358
|
+
# Apply the formula
|
359
|
+
numerator_term1 = 0.408 * delta * (rn - g)
|
360
|
+
with warnings.catch_warnings():
|
361
|
+
# See comment about RuntimeWarning on top of the file
|
362
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
363
|
+
numerator_term2 = (
|
364
|
+
psychrometric_constant
|
365
|
+
* 37
|
366
|
+
/ (mean_temperature + 273.16)
|
367
|
+
* mean_wind_speed
|
368
|
+
* (svp - avp)
|
369
|
+
)
|
370
|
+
denominator = delta + psychrometric_constant * (1 + 0.34 * mean_wind_speed)
|
371
|
+
|
372
|
+
return (numerator_term1 + numerator_term2) / denominator
|
373
|
+
|
374
|
+
def get_net_outgoing_radiation(
|
375
|
+
self,
|
376
|
+
temperature,
|
377
|
+
incoming_solar_radiation,
|
378
|
+
clear_sky_solar_radiation,
|
379
|
+
mean_actual_vapour_pressure,
|
380
|
+
):
|
381
|
+
"""
|
382
|
+
Allen et al. (1998), p. 52, eq. 39. Temperature can be a tuple (a pair)
|
383
|
+
of min and max, or a single value. If it is a single value, the
|
384
|
+
equation is modified according to end of page 74.
|
385
|
+
"""
|
386
|
+
if temperature.__class__.__name__ in ("tuple", "list"):
|
387
|
+
with warnings.catch_warnings():
|
388
|
+
# See comment about RuntimeWarning on top of the file
|
389
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
390
|
+
factor1 = (
|
391
|
+
self.sigma
|
392
|
+
* ((temperature[0] + 273.16) ** 4 + (temperature[1] + 273.16) ** 4)
|
393
|
+
/ 2
|
394
|
+
)
|
395
|
+
else:
|
396
|
+
with warnings.catch_warnings():
|
397
|
+
# See comment about RuntimeWarning on top of the file
|
398
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
399
|
+
factor1 = self.sigma / 24 * (temperature + 273.16) ** 4
|
400
|
+
factor2 = 0.34 - 0.14 * (mean_actual_vapour_pressure**0.5)
|
401
|
+
|
402
|
+
# Solar radiation ratio Rs/Rs0 (Allen et al., 1998, top of p. 75).
|
403
|
+
with warnings.catch_warnings():
|
404
|
+
# See comment about RuntimeWarning on top of the file
|
405
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
406
|
+
solar_radiation_ratio = np.where(
|
407
|
+
clear_sky_solar_radiation > 0.05,
|
408
|
+
incoming_solar_radiation / clear_sky_solar_radiation,
|
409
|
+
self.nighttime_solar_radiation_ratio,
|
410
|
+
)
|
411
|
+
solar_radiation_ratio = np.maximum(solar_radiation_ratio, 0.3)
|
412
|
+
solar_radiation_ratio = np.minimum(solar_radiation_ratio, 1.0)
|
413
|
+
|
414
|
+
factor3 = 1.35 * solar_radiation_ratio - 0.35
|
415
|
+
|
416
|
+
return factor1 * factor2 * factor3
|
417
|
+
|
418
|
+
def get_saturation_vapour_pressure(self, temperature):
|
419
|
+
"Allen et al. (1998), p. 36, eq. 11."
|
420
|
+
with warnings.catch_warnings():
|
421
|
+
# See comment about RuntimeWarning on top of the file
|
422
|
+
warnings.simplefilter("ignore")
|
423
|
+
return 0.6108 * math.e ** (17.27 * temperature / (237.3 + temperature))
|
424
|
+
|
425
|
+
def get_soil_heat_flux_density(self, incoming_solar_radiation, rn):
|
426
|
+
"Allen et al. (1998), p. 55, eq. 45 & 46."
|
427
|
+
coefficient = np.where(incoming_solar_radiation > 0.05, 0.1, 0.5)
|
428
|
+
return coefficient * rn
|
429
|
+
|
430
|
+
def get_saturation_vapour_pressure_curve_slope(self, temperature):
|
431
|
+
"Allen et al. (1998), p. 37, eq. 13."
|
432
|
+
numerator = 4098 * self.get_saturation_vapour_pressure(temperature)
|
433
|
+
with warnings.catch_warnings():
|
434
|
+
# See comment about RuntimeWarning on top of the file
|
435
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
436
|
+
denominator = (temperature + 237.3) ** 2
|
437
|
+
return numerator / denominator
|
haggregate/__init__.py
ADDED
haggregate/cli.py
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
import configparser
|
2
|
+
import datetime as dt
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
import traceback
|
7
|
+
|
8
|
+
import click
|
9
|
+
|
10
|
+
from haggregate import RegularizationMode, aggregate
|
11
|
+
from haggregate.regularize import regularize
|
12
|
+
from htimeseries import HTimeseries
|
13
|
+
|
14
|
+
|
15
|
+
@click.command()
|
16
|
+
@click.argument("configfile")
|
17
|
+
def main(configfile):
|
18
|
+
"""Create lower-step timeseries from higher-step ones"""
|
19
|
+
|
20
|
+
# Start by setting logger to stdout; later we will switch it according to config
|
21
|
+
logger = logging.getLogger("haggregate")
|
22
|
+
stdout_handler = logging.StreamHandler()
|
23
|
+
logger.addHandler(stdout_handler)
|
24
|
+
|
25
|
+
try:
|
26
|
+
config = configparser.ConfigParser()
|
27
|
+
with open(configfile) as f:
|
28
|
+
config.read_file(f)
|
29
|
+
|
30
|
+
# Read the [General] section
|
31
|
+
logfile = config.get("General", "logfile", fallback="")
|
32
|
+
loglevel = config.get("General", "loglevel", fallback="warning")
|
33
|
+
base_dir = config.get("General", "base_dir", fallback=".")
|
34
|
+
target_step = config.get("General", "target_step")
|
35
|
+
min_count = config.getint("General", "min_count")
|
36
|
+
missing_flag = config.get("General", "missing_flag")
|
37
|
+
target_timestamp_offset = config.get(
|
38
|
+
"General", "target_timestamp_offset", fallback=None
|
39
|
+
)
|
40
|
+
|
41
|
+
# Remove [General] and make sure there are more sections
|
42
|
+
config.pop("General")
|
43
|
+
if not len(config.sections()):
|
44
|
+
raise configparser.NoSectionError("No time series have been specified")
|
45
|
+
|
46
|
+
# Setup logger
|
47
|
+
logger.setLevel(loglevel.upper())
|
48
|
+
if logfile:
|
49
|
+
logger.removeHandler(stdout_handler)
|
50
|
+
logger.addHandler(logging.FileHandler(logfile))
|
51
|
+
|
52
|
+
# Log start of execution
|
53
|
+
logger.info("Starting haggregate, " + dt.datetime.today().isoformat())
|
54
|
+
|
55
|
+
# Read each section and do the work for it
|
56
|
+
for section_name in config.sections():
|
57
|
+
section = config[section_name]
|
58
|
+
source_filename = os.path.join(base_dir, section.get("source_file"))
|
59
|
+
target_filename = os.path.join(base_dir, section.get("target_file"))
|
60
|
+
method = section.get("method")
|
61
|
+
with open(source_filename, newline="\n") as f:
|
62
|
+
ts = HTimeseries(
|
63
|
+
f, format=HTimeseries.FILE, default_tzinfo=dt.timezone.utc
|
64
|
+
)
|
65
|
+
if method == "mean":
|
66
|
+
regularization_mode = RegularizationMode.INSTANTANEOUS
|
67
|
+
else:
|
68
|
+
regularization_mode = RegularizationMode.INTERVAL
|
69
|
+
regts = regularize(ts, new_date_flag="DATEINSERT", mode=regularization_mode)
|
70
|
+
aggts = aggregate(
|
71
|
+
regts,
|
72
|
+
target_step,
|
73
|
+
method,
|
74
|
+
min_count=min_count,
|
75
|
+
missing_flag=missing_flag,
|
76
|
+
target_timestamp_offset=target_timestamp_offset,
|
77
|
+
)
|
78
|
+
with open(target_filename, "w") as f:
|
79
|
+
aggts.write(f, format=HTimeseries.FILE)
|
80
|
+
|
81
|
+
# Log end of execution
|
82
|
+
logger.info("Finished haggregate, " + dt.datetime.today().isoformat())
|
83
|
+
|
84
|
+
except Exception as e:
|
85
|
+
logger.error(str(e))
|
86
|
+
logger.debug(traceback.format_exc())
|
87
|
+
raise click.ClickException(str(e))
|
88
|
+
|
89
|
+
|
90
|
+
if __name__ == "__main__":
|
91
|
+
sys.exit(main())
|
haggregate/haggregate.py
ADDED
@@ -0,0 +1,155 @@
|
|
1
|
+
import re
|
2
|
+
from enum import Enum
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
from htimeseries import HTimeseries
|
8
|
+
|
9
|
+
methods = {
|
10
|
+
"sum": pd.Series.sum,
|
11
|
+
"mean": pd.Series.mean,
|
12
|
+
"max": pd.Series.max,
|
13
|
+
"min": pd.Series.min,
|
14
|
+
}
|
15
|
+
|
16
|
+
|
17
|
+
class AggregateError(Exception):
|
18
|
+
pass
|
19
|
+
|
20
|
+
|
21
|
+
def aggregate(
|
22
|
+
hts,
|
23
|
+
target_step,
|
24
|
+
method,
|
25
|
+
min_count=1,
|
26
|
+
missing_flag="MISS",
|
27
|
+
target_timestamp_offset=None,
|
28
|
+
):
|
29
|
+
aggregation = Aggregation(
|
30
|
+
source_timeseries=hts,
|
31
|
+
target_step=target_step,
|
32
|
+
method=method,
|
33
|
+
min_count=min_count,
|
34
|
+
missing_flag=missing_flag,
|
35
|
+
target_timestamp_offset=target_timestamp_offset,
|
36
|
+
)
|
37
|
+
aggregation.execute()
|
38
|
+
return aggregation.result
|
39
|
+
|
40
|
+
|
41
|
+
class Aggregation:
|
42
|
+
def __init__(self, **kwargs):
|
43
|
+
for key, value in kwargs.items():
|
44
|
+
setattr(self, key, value)
|
45
|
+
self.source = SourceTimeseries(self.source_timeseries)
|
46
|
+
self.result = AggregatedTimeseries()
|
47
|
+
self.result.time_step = self.target_step
|
48
|
+
|
49
|
+
def execute(self):
|
50
|
+
self.result.set_metadata(self.source_timeseries)
|
51
|
+
try:
|
52
|
+
self.source.normalize(self.target_step)
|
53
|
+
except CannotInferFrequency:
|
54
|
+
return
|
55
|
+
self.do_aggregation()
|
56
|
+
self.result.remove_leading_and_trailing_nans()
|
57
|
+
self.result.add_timestamp_offset(self.target_timestamp_offset)
|
58
|
+
|
59
|
+
def do_aggregation(self):
|
60
|
+
self.create_resampler()
|
61
|
+
self.get_result_values()
|
62
|
+
self.get_result_flags()
|
63
|
+
|
64
|
+
def create_resampler(self):
|
65
|
+
self.resampler = self.source.data["value"].resample(
|
66
|
+
self.result.time_step, closed="right", label="right"
|
67
|
+
)
|
68
|
+
|
69
|
+
def get_result_values(self):
|
70
|
+
result_values = self.resampler.agg(methods[self.method])
|
71
|
+
values_count = self.resampler.count()
|
72
|
+
result_values[values_count < self.min_count] = np.nan
|
73
|
+
self.result.data["value"] = result_values
|
74
|
+
|
75
|
+
def get_result_flags(self):
|
76
|
+
max_count = int(pd.Timedelta(self.result.time_step) / self.source.freq)
|
77
|
+
values_count = self.resampler.count()
|
78
|
+
self.result.data["flags"] = (values_count < max_count).apply(
|
79
|
+
lambda x: self.missing_flag if x else ""
|
80
|
+
)
|
81
|
+
|
82
|
+
|
83
|
+
class CannotInferFrequency(Exception):
|
84
|
+
pass
|
85
|
+
|
86
|
+
|
87
|
+
attrs = ("unit", "timezone", "interval_type", "variable", "precision", "location")
|
88
|
+
|
89
|
+
|
90
|
+
class SourceTimeseries(HTimeseries):
|
91
|
+
def __init__(self, s):
|
92
|
+
for attr in attrs:
|
93
|
+
setattr(self, attr, getattr(s, attr, None))
|
94
|
+
self.data = s.data
|
95
|
+
|
96
|
+
def normalize(self, target_step):
|
97
|
+
"""Reindex so that it has no missing records but has NaNs instead, starting from
|
98
|
+
one before and ending in one after.
|
99
|
+
"""
|
100
|
+
current_range = self.data.index
|
101
|
+
try:
|
102
|
+
self.freq = pd.tseries.frequencies.to_offset(pd.infer_freq(current_range))
|
103
|
+
if self.freq is None:
|
104
|
+
raise AggregateError(
|
105
|
+
"Can't infer time series step; maybe it's not regularized"
|
106
|
+
)
|
107
|
+
except ValueError:
|
108
|
+
raise CannotInferFrequency()
|
109
|
+
first_timestamp = (current_range[0] - pd.Timedelta("1S")).floor(target_step)
|
110
|
+
end_timestamp = current_range[-1].ceil(target_step)
|
111
|
+
new_range = pd.date_range(first_timestamp, end_timestamp, freq=self.freq)
|
112
|
+
self.data = self.data.reindex(new_range)
|
113
|
+
|
114
|
+
|
115
|
+
class AggregatedTimeseries(HTimeseries):
|
116
|
+
def set_metadata(self, source_timeseries):
|
117
|
+
for attr in attrs:
|
118
|
+
setattr(self, attr, getattr(source_timeseries, attr, None))
|
119
|
+
if self.time_step not in ("1H", "1D"):
|
120
|
+
raise AggregateError("The target step can currently only be 1H or 1D")
|
121
|
+
if hasattr(source_timeseries, "title"):
|
122
|
+
self.title = "Aggregated " + source_timeseries.title
|
123
|
+
if hasattr(source_timeseries, "comment"):
|
124
|
+
self.comment = (
|
125
|
+
"Created by aggregating the time series that had this comment:\n\n"
|
126
|
+
+ source_timeseries.comment
|
127
|
+
)
|
128
|
+
|
129
|
+
def remove_leading_and_trailing_nans(self):
|
130
|
+
while len(self.data.index) > 0 and pd.isnull(self.data["value"]).iloc[0]:
|
131
|
+
self.data = self.data.drop(self.data.index[0])
|
132
|
+
while len(self.data.index) > 0 and pd.isnull(self.data["value"]).iloc[-1]:
|
133
|
+
self.data = self.data.drop(self.data.index[-1])
|
134
|
+
|
135
|
+
def add_timestamp_offset(self, target_timestamp_offset):
|
136
|
+
if target_timestamp_offset:
|
137
|
+
periods = target_timestamp_offset.startswith("-") and 1 or -1
|
138
|
+
freq = target_timestamp_offset.lstrip("-")
|
139
|
+
self.data = self.data.shift(periods, freq=freq)
|
140
|
+
|
141
|
+
|
142
|
+
def _get_offset_in_minutes(timestamp_offset):
|
143
|
+
m = re.match(r"(-?)(\d*)(T|min)$", timestamp_offset)
|
144
|
+
if not m:
|
145
|
+
raise AggregateError(
|
146
|
+
"The target timestamp offset can currently only be a number of minutes "
|
147
|
+
"such as 1min"
|
148
|
+
)
|
149
|
+
sign = m.group(1) == "-" and -1 or 1
|
150
|
+
return sign * int(m.group(2))
|
151
|
+
|
152
|
+
|
153
|
+
class RegularizationMode(Enum):
|
154
|
+
INSTANTANEOUS = 1
|
155
|
+
INTERVAL = 2
|
Binary file
|