pthelma 1.0.0__cp310-cp310-musllinux_1_2_x86_64.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,437 @@
1
+ import datetime as dt
2
+ import math
3
+ import warnings
4
+ from math import cos, pi, sin, tan
5
+
6
+ import numpy as np
7
+
8
+ # Note about RuntimeWarning
9
+ #
10
+ # When numpy makes calculations with masked arrays, it sometimes emits spurious
11
+ # RuntimeWarnings. This is because it occasionally does use the masked part of
12
+ # the array during the calculations (but masks the result). This is a known
13
+ # numpy bug (e.g. https://github.com/numpy/numpy/issues/4269). The numpy
14
+ # documentation, section "Operations on masked arrays", also has a related
15
+ # warning there.
16
+ #
17
+ # In order to avoid these spurious warnings, we have used, at various places in
18
+ # the code, "with warnings.catch_warnings()". We have attempted to unit test
19
+ # it, but sometimes it's hard to make the bug appear. A large array in
20
+ # production may cause the bug, but a small array in the unit test might not
21
+ # cause it, despite same python and numpy version. So the locations in which
22
+ # a fix was needed were largely located in production.
23
+
24
+
25
+ class PenmanMonteith(object):
26
+ # Stefan-Boltzmann constant (Allen et al., 1998, p. 52)
27
+ sigma = 4.903e-9
28
+
29
+ def __init__(
30
+ self,
31
+ albedo,
32
+ elevation,
33
+ latitude,
34
+ time_step,
35
+ longitude=None,
36
+ nighttime_solar_radiation_ratio=None,
37
+ unit_converters={},
38
+ ):
39
+ self.albedo = albedo
40
+ self.nighttime_solar_radiation_ratio = nighttime_solar_radiation_ratio
41
+ self.elevation = elevation
42
+ self.latitude = latitude
43
+ self.longitude = longitude
44
+ self.time_step = time_step
45
+ self.unit_converters = unit_converters
46
+
47
+ def calculate(self, **kwargs):
48
+ if self.time_step == "H":
49
+ return self.calculate_hourly(**kwargs)
50
+ elif self.time_step == "D":
51
+ return self.calculate_daily(**kwargs)
52
+ else:
53
+ raise NotImplementedError(
54
+ "Evaporation for time steps other than hourly and daily "
55
+ "has not been implemented."
56
+ )
57
+
58
+ def calculate_daily(
59
+ self,
60
+ temperature_max,
61
+ temperature_min,
62
+ humidity_max,
63
+ humidity_min,
64
+ wind_speed,
65
+ adatetime,
66
+ sunshine_duration=None,
67
+ pressure=None,
68
+ solar_radiation=None,
69
+ ):
70
+ if pressure is None:
71
+ # Eq. 7 p. 31
72
+ pressure = 101.3 * ((293 - 0.0065 * self.elevation) / 293) ** 5.26
73
+ variables = self.convert_units(
74
+ temperature_max=temperature_max,
75
+ temperature_min=temperature_min,
76
+ humidity_max=humidity_max,
77
+ humidity_min=humidity_min,
78
+ wind_speed=wind_speed,
79
+ sunshine_duration=sunshine_duration,
80
+ pressure=pressure,
81
+ )
82
+
83
+ # Radiation
84
+ r_a, N = self.get_extraterrestrial_radiation(adatetime)
85
+ if solar_radiation is None:
86
+ solar_radiation = (
87
+ 0.25 + 0.50 * variables["sunshine_duration"] / N
88
+ ) * r_a # Eq.35 p. 50
89
+ r_so = r_a * (0.75 + 2e-5 * self.elevation) # Eq. 37, p. 51
90
+ variables.update(self.convert_units(solar_radiation=solar_radiation))
91
+
92
+ with warnings.catch_warnings():
93
+ # See comment about RuntimeWarning on top of the file
94
+ warnings.simplefilter("ignore", RuntimeWarning)
95
+ temperature_mean = (
96
+ variables["temperature_max"] + variables["temperature_min"]
97
+ ) / 2
98
+ variables["temperature_mean"] = temperature_mean
99
+ gamma = self.get_psychrometric_constant(temperature_mean, pressure)
100
+ return self.penman_monteith_daily(
101
+ incoming_solar_radiation=variables["solar_radiation"],
102
+ clear_sky_solar_radiation=r_so,
103
+ psychrometric_constant=gamma,
104
+ mean_wind_speed=variables["wind_speed"],
105
+ temperature_max=variables["temperature_max"],
106
+ temperature_min=variables["temperature_min"],
107
+ temperature_mean=variables["temperature_mean"],
108
+ humidity_max=variables["humidity_max"],
109
+ humidity_min=variables["humidity_min"],
110
+ adate=adatetime,
111
+ )
112
+
113
+ def calculate_hourly(
114
+ self,
115
+ temperature,
116
+ humidity,
117
+ wind_speed,
118
+ solar_radiation,
119
+ adatetime,
120
+ pressure=None,
121
+ ):
122
+ if pressure is None:
123
+ # Eq. 7 p. 31
124
+ pressure = 101.3 * ((293 - 0.0065 * self.elevation) / 293) ** 5.26
125
+ variables = self.convert_units(
126
+ temperature=temperature,
127
+ humidity=humidity,
128
+ wind_speed=wind_speed,
129
+ pressure=pressure,
130
+ solar_radiation=solar_radiation,
131
+ )
132
+ gamma = self.get_psychrometric_constant(
133
+ variables["temperature"], variables["pressure"]
134
+ )
135
+ r_so = self.get_extraterrestrial_radiation(adatetime) * (
136
+ 0.75 + 2e-5 * self.elevation
137
+ ) # Eq. 37, p. 51
138
+ return self.penman_monteith_hourly(
139
+ incoming_solar_radiation=variables["solar_radiation"],
140
+ clear_sky_solar_radiation=r_so,
141
+ psychrometric_constant=gamma,
142
+ mean_wind_speed=variables["wind_speed"],
143
+ mean_temperature=variables["temperature"],
144
+ mean_relative_humidity=variables["humidity"],
145
+ adatetime=adatetime,
146
+ )
147
+
148
+ def convert_units(self, **kwargs):
149
+ result = {}
150
+ for item in kwargs:
151
+ varname = item
152
+ if item.endswith("_max") or item.endswith("_min"):
153
+ varname = item[:-4]
154
+ converter = self.unit_converters.get(varname, lambda x: x)
155
+ with warnings.catch_warnings():
156
+ # See comment about RuntimeWarning on top of the file
157
+ warnings.simplefilter("ignore", RuntimeWarning)
158
+ result[item] = converter(kwargs[item])
159
+ return result
160
+
161
+ def get_extraterrestrial_radiation(self, adatetime):
162
+ """
163
+ Calculates the solar radiation we would receive if there were no
164
+ atmosphere. This is a function of date, time and location.
165
+
166
+ If adatetime is a datetime object, it merely returns the
167
+ extraterrestrial radiation R_a; if it is a date object, it returns a
168
+ tuple, (R_a, N), where N is the daylight hours.
169
+ """
170
+ j = adatetime.timetuple().tm_yday # Day of year
171
+
172
+ # Inverse relative distance Earth-Sun, eq. 23, p. 46.
173
+ dr = 1 + 0.033 * cos(2 * pi * j / 365)
174
+
175
+ # Solar declination, eq. 24, p. 46.
176
+ decl = 0.409 * sin(2 * pi * j / 365 - 1.39)
177
+
178
+ if self.time_step == "D": # Daily?
179
+ phi = self.latitude / 180.0 * pi
180
+ omega_s = np.arccos(-np.tan(phi) * tan(decl)) # Eq. 25 p. 46
181
+
182
+ r_a = (
183
+ 24
184
+ * 60
185
+ / pi
186
+ * 0.0820
187
+ * dr
188
+ * (
189
+ omega_s * np.sin(phi) * sin(decl)
190
+ + np.cos(phi) * cos(decl) * np.sin(omega_s)
191
+ )
192
+ ) # Eq. 21 p. 46
193
+ n = 24 / pi * omega_s # Eq. 34 p. 48
194
+ return r_a, n
195
+
196
+ # Seasonal correction for solar time, eq. 32, p. 48.
197
+ b = 2 * pi * (j - 81) / 364
198
+ sc = 0.1645 * sin(2 * b) - 0.1255 * cos(b) - 0.025 * sin(b)
199
+
200
+ # Longitude at the centre of the local time zone
201
+ utc_offset = adatetime.utcoffset()
202
+ utc_offset_hours = utc_offset.days * 24 + utc_offset.seconds / 3600.0
203
+ lz = -utc_offset_hours * 15
204
+
205
+ # Solar time angle at midpoint of the time period, eq. 31, p. 48.
206
+ time_step_delta = (
207
+ self.time_step == "D" and dt.timedelta(days=1) or dt.timedelta(hours=1)
208
+ )
209
+ tm = adatetime - time_step_delta / 2
210
+ t = tm.hour + tm.minute / 60.0
211
+ omega = pi / 12 * ((t + 0.06667 * (lz + self.longitude) + sc) - 12)
212
+
213
+ # Solar time angles at beginning and end of the period, eqs. 29 and 30,
214
+ # p. 48.
215
+ t1 = time_step_delta.seconds / 3600.0
216
+ omega1 = omega - pi * t1 / 24
217
+ omega2 = omega + pi * t1 / 24
218
+
219
+ # Result: eq. 28, p. 47.
220
+ phi = self.latitude / 180.0 * pi
221
+ return (
222
+ 12
223
+ * 60
224
+ / pi
225
+ * 0.0820
226
+ * dr
227
+ * (
228
+ (omega2 - omega1) * np.sin(phi) * sin(decl)
229
+ + np.cos(phi) * cos(decl) * (np.sin(omega2) - np.sin(omega1))
230
+ )
231
+ )
232
+
233
+ def get_psychrometric_constant(self, temperature, pressure):
234
+ """
235
+ Allen et al. (1998), eq. 8, p. 32.
236
+
237
+ This is called a "constant" because, although it is a function of
238
+ temperature and pressure, its variations are small, and therefore it
239
+ can be assumed constant for a location assuming standard pressure at
240
+ that elevation and 20 degrees C. However, here we actually calculate
241
+ it, so it isn't a constant.
242
+ """
243
+ lambda_ = 2.501 - (2.361e-3) * temperature # eq. 3-1, p. 223
244
+ return 1.013e-3 * pressure / 0.622 / lambda_
245
+
246
+ def penman_monteith_daily(
247
+ self,
248
+ incoming_solar_radiation,
249
+ clear_sky_solar_radiation,
250
+ psychrometric_constant,
251
+ mean_wind_speed,
252
+ temperature_max,
253
+ temperature_min,
254
+ temperature_mean,
255
+ humidity_max,
256
+ humidity_min,
257
+ adate,
258
+ ):
259
+ """
260
+ Calculates and returns the reference evapotranspiration according
261
+ to Allen et al. (1998), eq. 6, p. 24 & 65.
262
+ """
263
+
264
+ # Saturation and actual vapour pressure
265
+ svp_max = self.get_saturation_vapour_pressure(temperature_max)
266
+ svp_min = self.get_saturation_vapour_pressure(temperature_min)
267
+ avp1 = svp_max * humidity_min / 100
268
+ avp2 = svp_min * humidity_max / 100
269
+ svp = (svp_max + svp_min) / 2 # Eq. 12 p. 36
270
+ avp = (avp1 + avp2) / 2 # Eq. 12 p. 36
271
+
272
+ # Saturation vapour pressure curve slope
273
+ delta = self.get_saturation_vapour_pressure_curve_slope(temperature_mean)
274
+
275
+ # Net incoming radiation; p. 51, eq. 38
276
+ albedo = (
277
+ self.albedo[adate.month - 1]
278
+ if self.albedo.__class__.__name__ in ("tuple", "list")
279
+ else self.albedo
280
+ )
281
+ rns = (1.0 - albedo) * incoming_solar_radiation
282
+
283
+ # Net outgoing radiation
284
+ rnl = self.get_net_outgoing_radiation(
285
+ (temperature_min, temperature_max),
286
+ incoming_solar_radiation,
287
+ clear_sky_solar_radiation,
288
+ avp,
289
+ )
290
+
291
+ # Net radiation at grass surface
292
+ rn = rns - rnl
293
+
294
+ # Soil heat flux
295
+ g_day = 0 # Eq. 42 p. 54
296
+
297
+ # Apply the formula
298
+ numerator_term1 = 0.408 * delta * (rn - g_day)
299
+ numerator_term2 = (
300
+ psychrometric_constant
301
+ * 900
302
+ / (temperature_mean + 273.16)
303
+ * mean_wind_speed
304
+ * (svp - avp)
305
+ )
306
+ denominator = delta + psychrometric_constant * (1 + 0.34 * mean_wind_speed)
307
+
308
+ return (numerator_term1 + numerator_term2) / denominator
309
+
310
+ def penman_monteith_hourly(
311
+ self,
312
+ incoming_solar_radiation,
313
+ clear_sky_solar_radiation,
314
+ psychrometric_constant,
315
+ mean_wind_speed,
316
+ mean_temperature,
317
+ mean_relative_humidity,
318
+ adatetime,
319
+ ):
320
+ """
321
+ Calculates and returns the reference evapotranspiration according
322
+ to Allen et al. (1998), eq. 53, p. 74.
323
+
324
+ As explained in Allen et al. (1998, p. 74), the function is
325
+ modified in relation to the original Penman-Monteith equation, so
326
+ that it is suitable for hourly data.
327
+ """
328
+
329
+ # Saturation and actual vapour pressure
330
+ svp = self.get_saturation_vapour_pressure(mean_temperature)
331
+ with warnings.catch_warnings():
332
+ # See comment about RuntimeWarning on top of the file
333
+ warnings.simplefilter("ignore", RuntimeWarning)
334
+ avp = svp * mean_relative_humidity / 100.0 # Eq. 54, p. 74
335
+
336
+ # Net incoming radiation; p. 51, eq. 38
337
+ albedo = (
338
+ self.albedo[adatetime.month - 1]
339
+ if self.albedo.__class__.__name__ in ("tuple", "list")
340
+ else self.albedo
341
+ )
342
+ rns = (1.0 - albedo) * incoming_solar_radiation
343
+
344
+ # Net outgoing radiation
345
+ rnl = self.get_net_outgoing_radiation(
346
+ mean_temperature, incoming_solar_radiation, clear_sky_solar_radiation, avp
347
+ )
348
+
349
+ # Net radiation at grass surface
350
+ rn = rns - rnl
351
+
352
+ # Saturation vapour pressure curve slope
353
+ delta = self.get_saturation_vapour_pressure_curve_slope(mean_temperature)
354
+
355
+ # Soil heat flux density
356
+ g = self.get_soil_heat_flux_density(incoming_solar_radiation, rn)
357
+
358
+ # Apply the formula
359
+ numerator_term1 = 0.408 * delta * (rn - g)
360
+ with warnings.catch_warnings():
361
+ # See comment about RuntimeWarning on top of the file
362
+ warnings.simplefilter("ignore", RuntimeWarning)
363
+ numerator_term2 = (
364
+ psychrometric_constant
365
+ * 37
366
+ / (mean_temperature + 273.16)
367
+ * mean_wind_speed
368
+ * (svp - avp)
369
+ )
370
+ denominator = delta + psychrometric_constant * (1 + 0.34 * mean_wind_speed)
371
+
372
+ return (numerator_term1 + numerator_term2) / denominator
373
+
374
+ def get_net_outgoing_radiation(
375
+ self,
376
+ temperature,
377
+ incoming_solar_radiation,
378
+ clear_sky_solar_radiation,
379
+ mean_actual_vapour_pressure,
380
+ ):
381
+ """
382
+ Allen et al. (1998), p. 52, eq. 39. Temperature can be a tuple (a pair)
383
+ of min and max, or a single value. If it is a single value, the
384
+ equation is modified according to end of page 74.
385
+ """
386
+ if temperature.__class__.__name__ in ("tuple", "list"):
387
+ with warnings.catch_warnings():
388
+ # See comment about RuntimeWarning on top of the file
389
+ warnings.simplefilter("ignore", RuntimeWarning)
390
+ factor1 = (
391
+ self.sigma
392
+ * ((temperature[0] + 273.16) ** 4 + (temperature[1] + 273.16) ** 4)
393
+ / 2
394
+ )
395
+ else:
396
+ with warnings.catch_warnings():
397
+ # See comment about RuntimeWarning on top of the file
398
+ warnings.simplefilter("ignore", RuntimeWarning)
399
+ factor1 = self.sigma / 24 * (temperature + 273.16) ** 4
400
+ factor2 = 0.34 - 0.14 * (mean_actual_vapour_pressure**0.5)
401
+
402
+ # Solar radiation ratio Rs/Rs0 (Allen et al., 1998, top of p. 75).
403
+ with warnings.catch_warnings():
404
+ # See comment about RuntimeWarning on top of the file
405
+ warnings.simplefilter("ignore", RuntimeWarning)
406
+ solar_radiation_ratio = np.where(
407
+ clear_sky_solar_radiation > 0.05,
408
+ incoming_solar_radiation / clear_sky_solar_radiation,
409
+ self.nighttime_solar_radiation_ratio,
410
+ )
411
+ solar_radiation_ratio = np.maximum(solar_radiation_ratio, 0.3)
412
+ solar_radiation_ratio = np.minimum(solar_radiation_ratio, 1.0)
413
+
414
+ factor3 = 1.35 * solar_radiation_ratio - 0.35
415
+
416
+ return factor1 * factor2 * factor3
417
+
418
+ def get_saturation_vapour_pressure(self, temperature):
419
+ "Allen et al. (1998), p. 36, eq. 11."
420
+ with warnings.catch_warnings():
421
+ # See comment about RuntimeWarning on top of the file
422
+ warnings.simplefilter("ignore")
423
+ return 0.6108 * math.e ** (17.27 * temperature / (237.3 + temperature))
424
+
425
+ def get_soil_heat_flux_density(self, incoming_solar_radiation, rn):
426
+ "Allen et al. (1998), p. 55, eq. 45 & 46."
427
+ coefficient = np.where(incoming_solar_radiation > 0.05, 0.1, 0.5)
428
+ return coefficient * rn
429
+
430
+ def get_saturation_vapour_pressure_curve_slope(self, temperature):
431
+ "Allen et al. (1998), p. 37, eq. 13."
432
+ numerator = 4098 * self.get_saturation_vapour_pressure(temperature)
433
+ with warnings.catch_warnings():
434
+ # See comment about RuntimeWarning on top of the file
435
+ warnings.simplefilter("ignore", RuntimeWarning)
436
+ denominator = (temperature + 237.3) ** 2
437
+ return numerator / denominator
haggregate/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ from .haggregate import * # NOQA
2
+ from .regularize import * # NOQA
3
+
4
+ __author__ = """Antonis Christofides"""
5
+ __email__ = "antonis@antonischristofides.com"
haggregate/cli.py ADDED
@@ -0,0 +1,91 @@
1
+ import configparser
2
+ import datetime as dt
3
+ import logging
4
+ import os
5
+ import sys
6
+ import traceback
7
+
8
+ import click
9
+
10
+ from haggregate import RegularizationMode, aggregate
11
+ from haggregate.regularize import regularize
12
+ from htimeseries import HTimeseries
13
+
14
+
15
+ @click.command()
16
+ @click.argument("configfile")
17
+ def main(configfile):
18
+ """Create lower-step timeseries from higher-step ones"""
19
+
20
+ # Start by setting logger to stdout; later we will switch it according to config
21
+ logger = logging.getLogger("haggregate")
22
+ stdout_handler = logging.StreamHandler()
23
+ logger.addHandler(stdout_handler)
24
+
25
+ try:
26
+ config = configparser.ConfigParser()
27
+ with open(configfile) as f:
28
+ config.read_file(f)
29
+
30
+ # Read the [General] section
31
+ logfile = config.get("General", "logfile", fallback="")
32
+ loglevel = config.get("General", "loglevel", fallback="warning")
33
+ base_dir = config.get("General", "base_dir", fallback=".")
34
+ target_step = config.get("General", "target_step")
35
+ min_count = config.getint("General", "min_count")
36
+ missing_flag = config.get("General", "missing_flag")
37
+ target_timestamp_offset = config.get(
38
+ "General", "target_timestamp_offset", fallback=None
39
+ )
40
+
41
+ # Remove [General] and make sure there are more sections
42
+ config.pop("General")
43
+ if not len(config.sections()):
44
+ raise configparser.NoSectionError("No time series have been specified")
45
+
46
+ # Setup logger
47
+ logger.setLevel(loglevel.upper())
48
+ if logfile:
49
+ logger.removeHandler(stdout_handler)
50
+ logger.addHandler(logging.FileHandler(logfile))
51
+
52
+ # Log start of execution
53
+ logger.info("Starting haggregate, " + dt.datetime.today().isoformat())
54
+
55
+ # Read each section and do the work for it
56
+ for section_name in config.sections():
57
+ section = config[section_name]
58
+ source_filename = os.path.join(base_dir, section.get("source_file"))
59
+ target_filename = os.path.join(base_dir, section.get("target_file"))
60
+ method = section.get("method")
61
+ with open(source_filename, newline="\n") as f:
62
+ ts = HTimeseries(
63
+ f, format=HTimeseries.FILE, default_tzinfo=dt.timezone.utc
64
+ )
65
+ if method == "mean":
66
+ regularization_mode = RegularizationMode.INSTANTANEOUS
67
+ else:
68
+ regularization_mode = RegularizationMode.INTERVAL
69
+ regts = regularize(ts, new_date_flag="DATEINSERT", mode=regularization_mode)
70
+ aggts = aggregate(
71
+ regts,
72
+ target_step,
73
+ method,
74
+ min_count=min_count,
75
+ missing_flag=missing_flag,
76
+ target_timestamp_offset=target_timestamp_offset,
77
+ )
78
+ with open(target_filename, "w") as f:
79
+ aggts.write(f, format=HTimeseries.FILE)
80
+
81
+ # Log end of execution
82
+ logger.info("Finished haggregate, " + dt.datetime.today().isoformat())
83
+
84
+ except Exception as e:
85
+ logger.error(str(e))
86
+ logger.debug(traceback.format_exc())
87
+ raise click.ClickException(str(e))
88
+
89
+
90
+ if __name__ == "__main__":
91
+ sys.exit(main())
@@ -0,0 +1,155 @@
1
+ import re
2
+ from enum import Enum
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from htimeseries import HTimeseries
8
+
9
+ methods = {
10
+ "sum": pd.Series.sum,
11
+ "mean": pd.Series.mean,
12
+ "max": pd.Series.max,
13
+ "min": pd.Series.min,
14
+ }
15
+
16
+
17
+ class AggregateError(Exception):
18
+ pass
19
+
20
+
21
+ def aggregate(
22
+ hts,
23
+ target_step,
24
+ method,
25
+ min_count=1,
26
+ missing_flag="MISS",
27
+ target_timestamp_offset=None,
28
+ ):
29
+ aggregation = Aggregation(
30
+ source_timeseries=hts,
31
+ target_step=target_step,
32
+ method=method,
33
+ min_count=min_count,
34
+ missing_flag=missing_flag,
35
+ target_timestamp_offset=target_timestamp_offset,
36
+ )
37
+ aggregation.execute()
38
+ return aggregation.result
39
+
40
+
41
+ class Aggregation:
42
+ def __init__(self, **kwargs):
43
+ for key, value in kwargs.items():
44
+ setattr(self, key, value)
45
+ self.source = SourceTimeseries(self.source_timeseries)
46
+ self.result = AggregatedTimeseries()
47
+ self.result.time_step = self.target_step
48
+
49
+ def execute(self):
50
+ self.result.set_metadata(self.source_timeseries)
51
+ try:
52
+ self.source.normalize(self.target_step)
53
+ except CannotInferFrequency:
54
+ return
55
+ self.do_aggregation()
56
+ self.result.remove_leading_and_trailing_nans()
57
+ self.result.add_timestamp_offset(self.target_timestamp_offset)
58
+
59
+ def do_aggregation(self):
60
+ self.create_resampler()
61
+ self.get_result_values()
62
+ self.get_result_flags()
63
+
64
+ def create_resampler(self):
65
+ self.resampler = self.source.data["value"].resample(
66
+ self.result.time_step, closed="right", label="right"
67
+ )
68
+
69
+ def get_result_values(self):
70
+ result_values = self.resampler.agg(methods[self.method])
71
+ values_count = self.resampler.count()
72
+ result_values[values_count < self.min_count] = np.nan
73
+ self.result.data["value"] = result_values
74
+
75
+ def get_result_flags(self):
76
+ max_count = int(pd.Timedelta(self.result.time_step) / self.source.freq)
77
+ values_count = self.resampler.count()
78
+ self.result.data["flags"] = (values_count < max_count).apply(
79
+ lambda x: self.missing_flag if x else ""
80
+ )
81
+
82
+
83
+ class CannotInferFrequency(Exception):
84
+ pass
85
+
86
+
87
+ attrs = ("unit", "timezone", "interval_type", "variable", "precision", "location")
88
+
89
+
90
+ class SourceTimeseries(HTimeseries):
91
+ def __init__(self, s):
92
+ for attr in attrs:
93
+ setattr(self, attr, getattr(s, attr, None))
94
+ self.data = s.data
95
+
96
+ def normalize(self, target_step):
97
+ """Reindex so that it has no missing records but has NaNs instead, starting from
98
+ one before and ending in one after.
99
+ """
100
+ current_range = self.data.index
101
+ try:
102
+ self.freq = pd.tseries.frequencies.to_offset(pd.infer_freq(current_range))
103
+ if self.freq is None:
104
+ raise AggregateError(
105
+ "Can't infer time series step; maybe it's not regularized"
106
+ )
107
+ except ValueError:
108
+ raise CannotInferFrequency()
109
+ first_timestamp = (current_range[0] - pd.Timedelta("1S")).floor(target_step)
110
+ end_timestamp = current_range[-1].ceil(target_step)
111
+ new_range = pd.date_range(first_timestamp, end_timestamp, freq=self.freq)
112
+ self.data = self.data.reindex(new_range)
113
+
114
+
115
+ class AggregatedTimeseries(HTimeseries):
116
+ def set_metadata(self, source_timeseries):
117
+ for attr in attrs:
118
+ setattr(self, attr, getattr(source_timeseries, attr, None))
119
+ if self.time_step not in ("1H", "1D"):
120
+ raise AggregateError("The target step can currently only be 1H or 1D")
121
+ if hasattr(source_timeseries, "title"):
122
+ self.title = "Aggregated " + source_timeseries.title
123
+ if hasattr(source_timeseries, "comment"):
124
+ self.comment = (
125
+ "Created by aggregating the time series that had this comment:\n\n"
126
+ + source_timeseries.comment
127
+ )
128
+
129
+ def remove_leading_and_trailing_nans(self):
130
+ while len(self.data.index) > 0 and pd.isnull(self.data["value"]).iloc[0]:
131
+ self.data = self.data.drop(self.data.index[0])
132
+ while len(self.data.index) > 0 and pd.isnull(self.data["value"]).iloc[-1]:
133
+ self.data = self.data.drop(self.data.index[-1])
134
+
135
+ def add_timestamp_offset(self, target_timestamp_offset):
136
+ if target_timestamp_offset:
137
+ periods = target_timestamp_offset.startswith("-") and 1 or -1
138
+ freq = target_timestamp_offset.lstrip("-")
139
+ self.data = self.data.shift(periods, freq=freq)
140
+
141
+
142
+ def _get_offset_in_minutes(timestamp_offset):
143
+ m = re.match(r"(-?)(\d*)(T|min)$", timestamp_offset)
144
+ if not m:
145
+ raise AggregateError(
146
+ "The target timestamp offset can currently only be a number of minutes "
147
+ "such as 1min"
148
+ )
149
+ sign = m.group(1) == "-" and -1 or 1
150
+ return sign * int(m.group(2))
151
+
152
+
153
+ class RegularizationMode(Enum):
154
+ INSTANTANEOUS = 1
155
+ INTERVAL = 2