anemoi-datasets 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. anemoi/datasets/__main__.py +7 -51
  2. anemoi/datasets/_version.py +2 -2
  3. anemoi/datasets/commands/__init__.py +5 -59
  4. anemoi/datasets/commands/copy.py +141 -83
  5. anemoi/datasets/commands/create.py +14 -3
  6. anemoi/datasets/commands/inspect/__init__.py +1 -5
  7. anemoi/datasets/compute/{perturbations.py → recentre.py} +24 -23
  8. anemoi/datasets/create/__init__.py +3 -0
  9. anemoi/datasets/create/config.py +7 -1
  10. anemoi/datasets/create/functions/sources/accumulations.py +7 -3
  11. anemoi/datasets/create/functions/sources/hindcasts.py +437 -0
  12. anemoi/datasets/create/functions/sources/mars.py +13 -7
  13. anemoi/datasets/create/functions/sources/{perturbations.py → recentre.py} +5 -5
  14. anemoi/datasets/create/input.py +0 -5
  15. anemoi/datasets/create/loaders.py +36 -0
  16. anemoi/datasets/create/persistent.py +1 -3
  17. anemoi/datasets/create/statistics/__init__.py +7 -17
  18. anemoi/datasets/create/statistics/summary.py +1 -4
  19. anemoi/datasets/create/writer.py +4 -3
  20. anemoi/datasets/data/indexing.py +1 -3
  21. anemoi/datasets/data/stores.py +2 -6
  22. anemoi/datasets/data/unchecked.py +1 -6
  23. anemoi/datasets/grids.py +2 -2
  24. {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/METADATA +30 -21
  25. {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/RECORD +29 -28
  26. {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/LICENSE +0 -0
  27. {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/WHEEL +0 -0
  28. {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/entry_points.txt +0 -0
  29. {anemoi_datasets-0.2.0.dist-info → anemoi_datasets-0.3.0.dist-info}/top_level.txt +0 -0
@@ -154,10 +154,16 @@ class LoadersConfig(Config):
154
154
  self.setdefault("build", Config())
155
155
  self.build.setdefault("group_by", "monthly")
156
156
  self.build.setdefault("use_grib_paramid", False)
157
+ self.build.setdefault("variable_naming", "default")
158
+ variable_naming = dict(
159
+ param="{param}",
160
+ param_levelist="{param}_{levelist}",
161
+ default="{param}_{levelist}",
162
+ ).get(self.build.variable_naming, self.build.variable_naming)
157
163
 
158
164
  self.setdefault("output", Config())
159
165
  self.output.setdefault("order_by", ["valid_datetime", "param_level", "number"])
160
- self.output.setdefault("remapping", Config(param_level="{param}_{levelist}"))
166
+ self.output.setdefault("remapping", Config(param_level=variable_naming))
161
167
  self.output.setdefault("statistics", "param_level")
162
168
  self.output.setdefault("chunking", Config(dates=1, ensembles=1))
163
169
  self.output.setdefault("dtype", "float32")
@@ -7,6 +7,7 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
9
  import datetime
10
+ import logging
10
11
  import warnings
11
12
  from copy import deepcopy
12
13
 
@@ -18,7 +19,7 @@ from climetlab.utils.availability import Availability
18
19
 
19
20
  from anemoi.datasets.create.utils import to_datetime_list
20
21
 
21
- DEBUG = True
22
+ LOG = logging.getLogger(__name__)
22
23
 
23
24
 
24
25
  def member(field):
@@ -73,7 +74,10 @@ class Accumulation:
73
74
  def write(self, template):
74
75
 
75
76
  assert self.startStep != self.endStep, (self.startStep, self.endStep)
76
- assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values))
77
+ if np.all(self.values < 0):
78
+ LOG.warning(
79
+ f"Negative values when computing accumutation for {self.param} ({self.date} {self.time}): min={np.amin(self.values)} max={np.amax(self.values)}"
80
+ )
77
81
 
78
82
  self.out.write(
79
83
  self.values,
@@ -395,6 +399,7 @@ def accumulations(context, dates, **request):
395
399
 
396
400
  KWARGS = {
397
401
  ("od", "oper"): dict(patch=scda),
402
+ ("od", "elda"): dict(base_times=(6, 18)),
398
403
  ("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
399
404
  ("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
400
405
  }
@@ -431,6 +436,5 @@ if __name__ == "__main__":
431
436
  dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
432
437
  dates = to_datetime_list(dates)
433
438
 
434
- DEBUG = True
435
439
  for f in accumulations(None, dates, **config):
436
440
  print(f, f.to_numpy().mean())
@@ -0,0 +1,437 @@
1
+ # (C) Copyright 2024 ECMWF.
2
+ #
3
+ # This software is licensed under the terms of the Apache Licence Version 2.0
4
+ # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
5
+ # In applying this licence, ECMWF does not waive the privileges and immunities
6
+ # granted to it by virtue of its status as an intergovernmental organisation
7
+ # nor does it submit to any jurisdiction.
8
+ #
9
+ import datetime
10
+ import warnings
11
+ from copy import deepcopy
12
+
13
+ import climetlab as cml
14
+ import numpy as np
15
+ from climetlab.core.temporary import temp_file
16
+ from climetlab.readers.grib.output import new_grib_output
17
+ from climetlab.utils.availability import Availability
18
+
19
+ from anemoi.datasets.create.functions.sources.mars import mars
20
+
21
+ DEBUG = True
22
+
23
+
24
+ def member(field):
25
+ # Bug in eccodes has number=0 randomly
26
+ number = field.metadata("number")
27
+ if number is None:
28
+ number = 0
29
+ return number
30
+
31
+
32
+ class Accumulation:
33
+ def __init__(self, out, /, param, date, time, number, step, frequency, **kwargs):
34
+ self.out = out
35
+ self.param = param
36
+ self.date = date
37
+ self.time = time
38
+ self.steps = step
39
+ self.number = number
40
+ self.values = None
41
+ self.seen = set()
42
+ self.startStep = None
43
+ self.endStep = None
44
+ self.done = False
45
+ self.frequency = frequency
46
+ self._check = None
47
+
48
+ @property
49
+ def key(self):
50
+ return (self.param, self.date, self.time, self.steps, self.number)
51
+
52
+ def check(self, field):
53
+ if self._check is None:
54
+ self._check = field.as_mars()
55
+
56
+ assert self.param == field.metadata("param"), (self.param, field.metadata("param"))
57
+ assert self.date == field.metadata("date"), (self.date, field.metadata("date"))
58
+ assert self.time == field.metadata("time"), (self.time, field.metadata("time"))
59
+ assert self.number == member(field), (self.number, member(field))
60
+
61
+ return
62
+
63
+ mars = field.as_mars()
64
+ keys1 = sorted(self._check.keys())
65
+ keys2 = sorted(mars.keys())
66
+
67
+ assert keys1 == keys2, (keys1, keys2)
68
+
69
+ for k in keys1:
70
+ if k not in ("step",):
71
+ assert self._check[k] == mars[k], (k, self._check[k], mars[k])
72
+
73
+ def write(self, template):
74
+
75
+ assert self.startStep != self.endStep, (self.startStep, self.endStep)
76
+ assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values))
77
+
78
+ self.out.write(
79
+ self.values,
80
+ template=template,
81
+ stepType="accum",
82
+ startStep=self.startStep,
83
+ endStep=self.endStep,
84
+ )
85
+ self.values = None
86
+ self.done = True
87
+
88
+ def add(self, field, values):
89
+
90
+ self.check(field)
91
+
92
+ step = field.metadata("step")
93
+ if step not in self.steps:
94
+ return
95
+
96
+ if not np.all(values >= 0):
97
+ warnings.warn(f"Negative values for {field}: {np.amin(values)} {np.amax(values)}")
98
+
99
+ assert not self.done, (self.key, step)
100
+ assert step not in self.seen, (self.key, step)
101
+
102
+ startStep = field.metadata("startStep")
103
+ endStep = field.metadata("endStep")
104
+
105
+ if self.buggy_steps and startStep == endStep:
106
+ startStep = 0
107
+
108
+ assert step == endStep, (startStep, endStep, step)
109
+
110
+ self.compute(values, startStep, endStep)
111
+
112
+ self.seen.add(step)
113
+
114
+ if len(self.seen) == len(self.steps):
115
+ self.write(template=field)
116
+
117
+ @classmethod
118
+ def mars_date_time_steps(cls, dates, step1, step2, frequency, base_times, adjust_step):
119
+
120
+ # assert step1 > 0, (step1, step2, frequency)
121
+
122
+ for valid_date in dates:
123
+ base_date = valid_date - datetime.timedelta(hours=step2)
124
+ add_step = 0
125
+ if base_date.hour not in base_times:
126
+ if not adjust_step:
127
+ raise ValueError(
128
+ f"Cannot find a base time in {base_times} that validates on {valid_date.isoformat()} for step={step2}"
129
+ )
130
+
131
+ while base_date.hour not in base_times:
132
+ # print(f'{base_date=}, {base_times=}, {add_step=} {frequency=}')
133
+ base_date -= datetime.timedelta(hours=1)
134
+ add_step += 1
135
+
136
+ yield cls._mars_date_time_step(base_date, step1, step2, add_step, frequency)
137
+
138
+ def __repr__(self) -> str:
139
+ return f"{self.__class__.__name__}({self.key})"
140
+
141
+
142
+ class AccumulationFromStart(Accumulation):
143
+ buggy_steps = True
144
+
145
+ def compute(self, values, startStep, endStep):
146
+
147
+ assert startStep == 0, startStep
148
+
149
+ if self.values is None:
150
+
151
+ self.values = np.copy(values)
152
+ self.startStep = 0
153
+ self.endStep = endStep
154
+
155
+ else:
156
+ assert endStep != self.endStep, (self.endStep, endStep)
157
+
158
+ if endStep > self.endStep:
159
+ # assert endStep - self.endStep == self.stepping, (self.endStep, endStep, self.stepping)
160
+ self.values = values - self.values
161
+ self.startStep = self.endStep
162
+ self.endStep = endStep
163
+ else:
164
+ # assert self.endStep - endStep == self.stepping, (self.endStep, endStep, self.stepping)
165
+ self.values = self.values - values
166
+ self.startStep = endStep
167
+
168
+ if not np.all(self.values >= 0):
169
+ warnings.warn(f"Negative values for {self.param}: {np.amin(self.values)} {np.amax(self.values)}")
170
+ self.values = np.maximum(self.values, 0)
171
+
172
+ @classmethod
173
+ def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
174
+ assert not frequency, frequency
175
+
176
+ steps = (step1 + add_step, step2 + add_step)
177
+ if steps[0] == 0:
178
+ steps = (steps[1],)
179
+
180
+ return (
181
+ base_date.year * 10000 + base_date.month * 100 + base_date.day,
182
+ base_date.hour * 100 + base_date.minute,
183
+ steps,
184
+ )
185
+
186
+
187
+ class AccumulationFromLastStep(Accumulation):
188
+ buggy_steps = False
189
+
190
+ def compute(self, values, startStep, endStep):
191
+
192
+ assert endStep - startStep == self.frequency, (startStep, endStep, self.frequency)
193
+
194
+ if self.startStep is None:
195
+ self.startStep = startStep
196
+ else:
197
+ self.startStep = min(self.startStep, startStep)
198
+
199
+ if self.endStep is None:
200
+ self.endStep = endStep
201
+ else:
202
+ self.endStep = max(self.endStep, endStep)
203
+
204
+ if self.values is None:
205
+ self.values = np.zeros_like(values)
206
+
207
+ self.values += values
208
+
209
+ @classmethod
210
+ def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
211
+ assert frequency > 0, frequency
212
+ # assert step1 > 0, (step1, step2, frequency, add_step, base_date)
213
+
214
+ steps = []
215
+ for step in range(step1 + frequency, step2 + frequency, frequency):
216
+ steps.append(step + add_step)
217
+ return (
218
+ base_date.year * 10000 + base_date.month * 100 + base_date.day,
219
+ base_date.hour * 100 + base_date.minute,
220
+ tuple(steps),
221
+ )
222
+
223
+
224
+ def identity(x):
225
+ return x
226
+
227
+
228
+ def compute_accumulations(
229
+ dates,
230
+ request,
231
+ user_accumulation_period=6,
232
+ data_accumulation_period=None,
233
+ patch=identity,
234
+ base_times=None,
235
+ ):
236
+ adjust_step = isinstance(user_accumulation_period, int)
237
+
238
+ if not isinstance(user_accumulation_period, (list, tuple)):
239
+ user_accumulation_period = (0, user_accumulation_period)
240
+
241
+ assert len(user_accumulation_period) == 2, user_accumulation_period
242
+ step1, step2 = user_accumulation_period
243
+ assert step1 < step2, user_accumulation_period
244
+
245
+ if base_times is None:
246
+ base_times = [0, 6, 12, 18]
247
+
248
+ base_times = [t // 100 if t > 100 else t for t in base_times]
249
+
250
+ AccumulationClass = AccumulationFromStart if data_accumulation_period in (0, None) else AccumulationFromLastStep
251
+
252
+ mars_date_time_steps = AccumulationClass.mars_date_time_steps(
253
+ dates,
254
+ step1,
255
+ step2,
256
+ data_accumulation_period,
257
+ base_times,
258
+ adjust_step,
259
+ )
260
+
261
+ request = deepcopy(request)
262
+
263
+ param = request["param"]
264
+ if not isinstance(param, (list, tuple)):
265
+ param = [param]
266
+
267
+ number = request.get("number", [0])
268
+ assert isinstance(number, (list, tuple))
269
+
270
+ frequency = data_accumulation_period
271
+
272
+ type_ = request.get("type", "an")
273
+ if type_ == "an":
274
+ type_ = "fc"
275
+
276
+ request.update({"type": type_, "levtype": "sfc"})
277
+
278
+ tmp = temp_file()
279
+ path = tmp.path
280
+ out = new_grib_output(path)
281
+
282
+ requests = []
283
+
284
+ accumulations = {}
285
+
286
+ for date, time, steps in mars_date_time_steps:
287
+ for p in param:
288
+ for n in number:
289
+ requests.append(
290
+ patch(
291
+ {
292
+ "param": p,
293
+ "date": date,
294
+ "time": time,
295
+ "step": sorted(steps),
296
+ "number": n,
297
+ }
298
+ )
299
+ )
300
+
301
+ compressed = Availability(requests)
302
+ ds = cml.load_source("empty")
303
+ for r in compressed.iterate():
304
+ request.update(r)
305
+ print("🌧️", request)
306
+ ds = ds + cml.load_source("mars", **request)
307
+
308
+ accumulations = {}
309
+ for a in [AccumulationClass(out, frequency=frequency, **r) for r in requests]:
310
+ for s in a.steps:
311
+ key = (a.param, a.date, a.time, s, a.number)
312
+ accumulations.setdefault(key, []).append(a)
313
+
314
+ for field in ds:
315
+ key = (
316
+ field.metadata("param"),
317
+ field.metadata("date"),
318
+ field.metadata("time"),
319
+ field.metadata("step"),
320
+ member(field),
321
+ )
322
+ values = field.values # optimisation
323
+ assert accumulations[key], key
324
+ for a in accumulations[key]:
325
+ a.add(field, values)
326
+
327
+ for acc in accumulations.values():
328
+ for a in acc:
329
+ assert a.done, (a.key, a.seen, a.steps)
330
+
331
+ out.close()
332
+
333
+ ds = cml.load_source("file", path)
334
+
335
+ assert len(ds) / len(param) / len(number) == len(dates), (
336
+ len(ds),
337
+ len(param),
338
+ len(dates),
339
+ )
340
+ ds._tmp = tmp
341
+
342
+ return ds
343
+
344
+
345
+ def to_list(x):
346
+ if isinstance(x, (list, tuple)):
347
+ return x
348
+ return [x]
349
+
350
+
351
+ def normalise_time_to_hours(r):
352
+ r = deepcopy(r)
353
+ if "time" not in r:
354
+ return r
355
+
356
+ times = []
357
+ for t in to_list(r["time"]):
358
+ assert len(t) == 4, r
359
+ assert t.endswith("00"), r
360
+ times.append(int(t) // 100)
361
+ r["time"] = tuple(times)
362
+ return r
363
+
364
+
365
+ def normalise_number(r):
366
+ if "number" not in r:
367
+ return r
368
+ number = r["number"]
369
+ number = to_list(number)
370
+
371
+ if len(number) > 4 and (number[1] == "to" and number[3] == "by"):
372
+ return list(range(int(number[0]), int(number[2]) + 1, int(number[4])))
373
+
374
+ if len(number) > 2 and number[1] == "to":
375
+ return list(range(int(number[0]), int(number[2]) + 1))
376
+
377
+ r["number"] = number
378
+ return r
379
+
380
+
381
+ class HindcastCompute:
382
+ def __init__(self, base_times, available_steps, request):
383
+ self.base_times = base_times
384
+ self.available_steps = available_steps
385
+ self.request = request
386
+
387
+ def compute_hindcast(self, date):
388
+ for step in self.available_steps:
389
+ start_date = date - datetime.timedelta(hours=step)
390
+ hours = start_date.hour
391
+ if hours in self.base_times:
392
+ r = deepcopy(self.request)
393
+ r["date"] = start_date
394
+ r["time"] = f"{start_date.hour:02d}00"
395
+ r["step"] = step
396
+ return r
397
+ raise ValueError(
398
+ f"Cannot find data for {self.request} for {date} (base_times={self.base_times}, available_steps={self.available_steps})"
399
+ )
400
+
401
+
402
+ def use_reference_year(reference_year, request):
403
+ request = deepcopy(request)
404
+ hdate = request.pop("date")
405
+ date = datetime.datetime(reference_year, hdate.month, hdate.day)
406
+ request.update(date=date.strftime("%Y-%m-%d"), hdate=hdate.strftime("%Y-%m-%d"))
407
+ return request
408
+
409
+
410
+ def hindcasts(context, dates, **request):
411
+ request["param"] = to_list(request["param"])
412
+ request["step"] = to_list(request["step"])
413
+ request["step"] = [int(_) for _ in request["step"]]
414
+
415
+ if request.get("stream") == "enfh" and "base_times" not in request:
416
+ request["base_times"] = [0]
417
+
418
+ available_steps = request.pop("step")
419
+ available_steps = to_list(available_steps)
420
+
421
+ base_times = request.pop("base_times")
422
+
423
+ reference_year = request.pop("reference_year")
424
+
425
+ context.trace("H️", f"hindcast {request} {base_times} {available_steps} {reference_year}")
426
+
427
+ c = HindcastCompute(base_times, available_steps, request)
428
+ requests = []
429
+ for d in dates:
430
+ req = c.compute_hindcast(d)
431
+ req = use_reference_year(reference_year, req)
432
+
433
+ requests.append(req)
434
+ return mars(context, dates, *requests, date_key="hdate")
435
+
436
+
437
+ execute = hindcasts
@@ -42,15 +42,21 @@ def normalise_time_delta(t):
42
42
  return t
43
43
 
44
44
 
45
- def _expand_mars_request(request, date):
45
+ def _expand_mars_request(request, date, date_key="date"):
46
46
  requests = []
47
47
  step = to_list(request.get("step", [0]))
48
48
  for s in step:
49
49
  r = deepcopy(request)
50
- base = date - datetime.timedelta(hours=int(s))
50
+
51
+ if isinstance(s, str) and "-" in s:
52
+ assert s.count("-") == 1, s
53
+ # this takes care of the cases where the step is a period such as 0-24 or 12-24
54
+ hours = int(str(s).split("-")[-1])
55
+
56
+ base = date - datetime.timedelta(hours=hours)
51
57
  r.update(
52
58
  {
53
- "date": base.strftime("%Y%m%d"),
59
+ date_key: base.strftime("%Y%m%d"),
54
60
  "time": base.strftime("%H%M"),
55
61
  "step": s,
56
62
  }
@@ -66,13 +72,13 @@ def _expand_mars_request(request, date):
66
72
  return requests
67
73
 
68
74
 
69
- def factorise_requests(dates, *requests):
75
+ def factorise_requests(dates, *requests, date_key="date"):
70
76
  updates = []
71
77
  for req in requests:
72
78
  # req = normalise_request(req)
73
79
 
74
80
  for d in dates:
75
- updates += _expand_mars_request(req, date=d)
81
+ updates += _expand_mars_request(req, date=d, date_key=date_key)
76
82
 
77
83
  compressed = Availability(updates)
78
84
  for r in compressed.iterate():
@@ -96,11 +102,11 @@ def use_grib_paramid(r):
96
102
  return r
97
103
 
98
104
 
99
- def mars(context, dates, *requests, **kwargs):
105
+ def mars(context, dates, *requests, date_key="date", **kwargs):
100
106
  if not requests:
101
107
  requests = [kwargs]
102
108
 
103
- requests = factorise_requests(dates, *requests)
109
+ requests = factorise_requests(dates, *requests, date_key=date_key)
104
110
  ds = load_source("empty")
105
111
  for r in requests:
106
112
  r = {k: v for k, v in r.items() if v != ("-",)}
@@ -8,7 +8,7 @@
8
8
  #
9
9
  from copy import deepcopy
10
10
 
11
- from anemoi.datasets.compute.perturbations import perturbations as compute_perturbations
11
+ from anemoi.datasets.compute.recentre import recentre as _recentre
12
12
 
13
13
  from .mars import mars
14
14
 
@@ -50,10 +50,10 @@ def load_if_needed(context, dates, dict_or_dataset):
50
50
  return dict_or_dataset
51
51
 
52
52
 
53
- def perturbations(context, dates, members, center, remapping={}, patches={}):
53
+ def recentre(context, dates, members, centre, alpha=1.0, remapping={}, patches={}):
54
54
  members = load_if_needed(context, dates, members)
55
- center = load_if_needed(context, dates, center)
56
- return compute_perturbations(members, center)
55
+ centre = load_if_needed(context, dates, centre)
56
+ return _recentre(members=members, centre=centre, alpha=alpha)
57
57
 
58
58
 
59
- execute = perturbations
59
+ execute = recentre
@@ -353,11 +353,6 @@ class Result(HasCoordsMixin):
353
353
  """Returns a dictionary with the parameters needed to retrieve the data."""
354
354
  return _data_request(self.datasource)
355
355
 
356
- @property
357
- def variables_with_nans(self):
358
- print("❌❌HERE")
359
- return
360
-
361
356
  def get_cube(self):
362
357
  trace("🧊", f"getting cube from {self.__class__.__name__}")
363
358
  ds = self.datasource
@@ -25,6 +25,7 @@ from anemoi.datasets.dates.groups import Groups
25
25
  from .check import DatasetName
26
26
  from .check import check_data_values
27
27
  from .chunks import ChunkFilter
28
+ from .config import DictObj
28
29
  from .config import build_output
29
30
  from .config import loader_config
30
31
  from .input import build_input
@@ -55,6 +56,8 @@ class GenericDatasetHandler:
55
56
  self.path = path
56
57
  self.kwargs = kwargs
57
58
  self.print = print
59
+ if "test" in kwargs:
60
+ self.test = kwargs["test"]
58
61
 
59
62
  @classmethod
60
63
  def from_config(cls, *, config, path, print=print, **kwargs):
@@ -157,7 +160,35 @@ class InitialiserLoader(Loader):
157
160
 
158
161
  self.tmp_statistics.delete()
159
162
 
163
+ if self.test:
164
+
165
+ def test_dates(cfg, n=4):
166
+ LOG.warn("Running in test mode. Changing the list of dates to use only 4.")
167
+ groups = Groups(**cfg)
168
+ dates = groups.dates
169
+ return dict(start=dates[0], end=dates[n - 1], frequency=dates.frequency, group_by=n)
170
+
171
+ self.main_config.dates = test_dates(self.main_config.dates)
172
+
173
+ def set_to_test_mode(obj):
174
+ if isinstance(obj, (list, tuple)):
175
+ for v in obj:
176
+ set_to_test_mode(v)
177
+ return
178
+ if isinstance(obj, (dict, DictObj)):
179
+ if "grid" in obj:
180
+ obj["grid"] = "20./20."
181
+ LOG.warn(f"Running in test mode. Setting grid to {obj['grid']}")
182
+ if "number" in obj:
183
+ obj["number"] = obj["number"][0:3]
184
+ LOG.warn(f"Running in test mode. Setting number to {obj['number']}")
185
+ for k, v in obj.items():
186
+ set_to_test_mode(v)
187
+
188
+ set_to_test_mode(self.main_config)
189
+
160
190
  LOG.info(self.main_config.dates)
191
+
161
192
  self.groups = Groups(**self.main_config.dates)
162
193
 
163
194
  self.output = build_output(self.main_config.output, parent=self)
@@ -615,6 +646,11 @@ class GenericAdditions(GenericDatasetHandler):
615
646
  assert len(found) + len(missing) == len(self.dates), (len(found), len(missing), len(self.dates))
616
647
  assert found.union(missing) == set(self.dates), (found, missing, set(self.dates))
617
648
 
649
+ if len(ifound) < 2:
650
+ LOG.warn(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.")
651
+ self.tmp_storage.delete()
652
+ return
653
+
618
654
  mask = sorted(list(ifound))
619
655
  for k in ["minimum", "maximum", "sums", "squares", "count", "has_nans"]:
620
656
  agg[k] = agg[k][mask, ...]
@@ -28,9 +28,7 @@ class PersistentDict:
28
28
  # Used in parrallel, during data loading,
29
29
  # to write data in pickle files.
30
30
  def __init__(self, directory, create=True):
31
- """dirname: str
32
- The directory where the data will be stored.
33
- """
31
+ """dirname: str The directory where the data will be stored."""
34
32
  self.dirname = directory
35
33
  self.name, self.ext = os.path.splitext(os.path.basename(self.dirname))
36
34
  if create: