anemoi-datasets 0.3.10__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. anemoi/datasets/_version.py +2 -2
  2. anemoi/datasets/commands/compare.py +59 -0
  3. anemoi/datasets/commands/create.py +84 -3
  4. anemoi/datasets/commands/inspect.py +9 -9
  5. anemoi/datasets/commands/scan.py +4 -4
  6. anemoi/datasets/compute/recentre.py +14 -9
  7. anemoi/datasets/create/__init__.py +44 -17
  8. anemoi/datasets/create/check.py +6 -5
  9. anemoi/datasets/create/chunks.py +1 -1
  10. anemoi/datasets/create/config.py +6 -27
  11. anemoi/datasets/create/functions/__init__.py +3 -3
  12. anemoi/datasets/create/functions/filters/empty.py +4 -4
  13. anemoi/datasets/create/functions/filters/rename.py +14 -6
  14. anemoi/datasets/create/functions/filters/rotate_winds.py +16 -60
  15. anemoi/datasets/create/functions/filters/unrotate_winds.py +14 -64
  16. anemoi/datasets/create/functions/sources/__init__.py +39 -0
  17. anemoi/datasets/create/functions/sources/accumulations.py +38 -56
  18. anemoi/datasets/create/functions/sources/constants.py +11 -4
  19. anemoi/datasets/create/functions/sources/empty.py +2 -2
  20. anemoi/datasets/create/functions/sources/forcings.py +3 -3
  21. anemoi/datasets/create/functions/sources/grib.py +8 -4
  22. anemoi/datasets/create/functions/sources/hindcasts.py +32 -364
  23. anemoi/datasets/create/functions/sources/mars.py +57 -26
  24. anemoi/datasets/create/functions/sources/netcdf.py +2 -60
  25. anemoi/datasets/create/functions/sources/opendap.py +3 -2
  26. anemoi/datasets/create/functions/sources/source.py +3 -3
  27. anemoi/datasets/create/functions/sources/tendencies.py +7 -7
  28. anemoi/datasets/create/functions/sources/xarray/__init__.py +73 -0
  29. anemoi/datasets/create/functions/sources/xarray/coordinates.py +234 -0
  30. anemoi/datasets/create/functions/sources/xarray/field.py +109 -0
  31. anemoi/datasets/create/functions/sources/xarray/fieldlist.py +171 -0
  32. anemoi/datasets/create/functions/sources/xarray/flavour.py +330 -0
  33. anemoi/datasets/create/functions/sources/xarray/grid.py +46 -0
  34. anemoi/datasets/create/functions/sources/xarray/metadata.py +161 -0
  35. anemoi/datasets/create/functions/sources/xarray/time.py +98 -0
  36. anemoi/datasets/create/functions/sources/xarray/variable.py +198 -0
  37. anemoi/datasets/create/functions/sources/xarray_kerchunk.py +42 -0
  38. anemoi/datasets/create/functions/sources/xarray_zarr.py +15 -0
  39. anemoi/datasets/create/functions/sources/zenodo.py +40 -0
  40. anemoi/datasets/create/input.py +309 -191
  41. anemoi/datasets/create/loaders.py +155 -77
  42. anemoi/datasets/create/patch.py +17 -14
  43. anemoi/datasets/create/persistent.py +1 -1
  44. anemoi/datasets/create/size.py +4 -5
  45. anemoi/datasets/create/statistics/__init__.py +51 -17
  46. anemoi/datasets/create/template.py +11 -61
  47. anemoi/datasets/create/trace.py +91 -0
  48. anemoi/datasets/create/utils.py +5 -52
  49. anemoi/datasets/create/zarr.py +24 -10
  50. anemoi/datasets/data/dataset.py +4 -4
  51. anemoi/datasets/data/misc.py +9 -37
  52. anemoi/datasets/data/stores.py +37 -14
  53. anemoi/datasets/dates/__init__.py +7 -1
  54. anemoi/datasets/dates/groups.py +3 -0
  55. {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/METADATA +24 -8
  56. anemoi_datasets-0.4.2.dist-info/RECORD +86 -0
  57. {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/WHEEL +1 -1
  58. anemoi_datasets-0.3.10.dist-info/RECORD +0 -73
  59. {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/LICENSE +0 -0
  60. {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/entry_points.txt +0 -0
  61. {anemoi_datasets-0.3.10.dist-info → anemoi_datasets-0.4.2.dist-info}/top_level.txt +0 -0
@@ -7,21 +7,13 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
9
  import datetime
10
- import warnings
11
- from copy import deepcopy
12
-
13
- import climetlab as cml
14
- import numpy as np
15
- from climetlab.core.temporary import temp_file
16
- from climetlab.readers.grib.output import new_grib_output
17
- from climetlab.utils.availability import Availability
18
10
 
19
11
  from anemoi.datasets.create.functions.sources.mars import mars
20
12
 
21
13
  DEBUG = True
22
14
 
23
15
 
24
- def member(field):
16
+ def _member(field):
25
17
  # Bug in eccodes has number=0 randomly
26
18
  number = field.metadata("number")
27
19
  if number is None:
@@ -29,355 +21,12 @@ def member(field):
29
21
  return number
30
22
 
31
23
 
32
- class Accumulation:
33
- def __init__(self, out, /, param, date, time, number, step, frequency, **kwargs):
34
- self.out = out
35
- self.param = param
36
- self.date = date
37
- self.time = time
38
- self.steps = step
39
- self.number = number
40
- self.values = None
41
- self.seen = set()
42
- self.startStep = None
43
- self.endStep = None
44
- self.done = False
45
- self.frequency = frequency
46
- self._check = None
47
-
48
- @property
49
- def key(self):
50
- return (self.param, self.date, self.time, self.steps, self.number)
51
-
52
- def check(self, field):
53
- if self._check is None:
54
- self._check = field.as_mars()
55
-
56
- assert self.param == field.metadata("param"), (self.param, field.metadata("param"))
57
- assert self.date == field.metadata("date"), (self.date, field.metadata("date"))
58
- assert self.time == field.metadata("time"), (self.time, field.metadata("time"))
59
- assert self.number == member(field), (self.number, member(field))
60
-
61
- return
62
-
63
- mars = field.as_mars()
64
- keys1 = sorted(self._check.keys())
65
- keys2 = sorted(mars.keys())
66
-
67
- assert keys1 == keys2, (keys1, keys2)
68
-
69
- for k in keys1:
70
- if k not in ("step",):
71
- assert self._check[k] == mars[k], (k, self._check[k], mars[k])
72
-
73
- def write(self, template):
74
-
75
- assert self.startStep != self.endStep, (self.startStep, self.endStep)
76
- assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values))
77
-
78
- self.out.write(
79
- self.values,
80
- template=template,
81
- stepType="accum",
82
- startStep=self.startStep,
83
- endStep=self.endStep,
84
- )
85
- self.values = None
86
- self.done = True
87
-
88
- def add(self, field, values):
89
-
90
- self.check(field)
91
-
92
- step = field.metadata("step")
93
- if step not in self.steps:
94
- return
95
-
96
- if not np.all(values >= 0):
97
- warnings.warn(f"Negative values for {field}: {np.amin(values)} {np.amax(values)}")
98
-
99
- assert not self.done, (self.key, step)
100
- assert step not in self.seen, (self.key, step)
101
-
102
- startStep = field.metadata("startStep")
103
- endStep = field.metadata("endStep")
104
-
105
- if self.buggy_steps and startStep == endStep:
106
- startStep = 0
107
-
108
- assert step == endStep, (startStep, endStep, step)
109
-
110
- self.compute(values, startStep, endStep)
111
-
112
- self.seen.add(step)
113
-
114
- if len(self.seen) == len(self.steps):
115
- self.write(template=field)
116
-
117
- @classmethod
118
- def mars_date_time_steps(cls, dates, step1, step2, frequency, base_times, adjust_step):
119
-
120
- # assert step1 > 0, (step1, step2, frequency)
121
-
122
- for valid_date in dates:
123
- base_date = valid_date - datetime.timedelta(hours=step2)
124
- add_step = 0
125
- if base_date.hour not in base_times:
126
- if not adjust_step:
127
- raise ValueError(
128
- f"Cannot find a base time in {base_times} that validates on {valid_date.isoformat()} for step={step2}"
129
- )
130
-
131
- while base_date.hour not in base_times:
132
- # print(f'{base_date=}, {base_times=}, {add_step=} {frequency=}')
133
- base_date -= datetime.timedelta(hours=1)
134
- add_step += 1
135
-
136
- yield cls._mars_date_time_step(base_date, step1, step2, add_step, frequency)
137
-
138
- def __repr__(self) -> str:
139
- return f"{self.__class__.__name__}({self.key})"
140
-
141
-
142
- class AccumulationFromStart(Accumulation):
143
- buggy_steps = True
144
-
145
- def compute(self, values, startStep, endStep):
146
-
147
- assert startStep == 0, startStep
148
-
149
- if self.values is None:
150
-
151
- self.values = np.copy(values)
152
- self.startStep = 0
153
- self.endStep = endStep
154
-
155
- else:
156
- assert endStep != self.endStep, (self.endStep, endStep)
157
-
158
- if endStep > self.endStep:
159
- # assert endStep - self.endStep == self.stepping, (self.endStep, endStep, self.stepping)
160
- self.values = values - self.values
161
- self.startStep = self.endStep
162
- self.endStep = endStep
163
- else:
164
- # assert self.endStep - endStep == self.stepping, (self.endStep, endStep, self.stepping)
165
- self.values = self.values - values
166
- self.startStep = endStep
167
-
168
- if not np.all(self.values >= 0):
169
- warnings.warn(f"Negative values for {self.param}: {np.amin(self.values)} {np.amax(self.values)}")
170
- self.values = np.maximum(self.values, 0)
171
-
172
- @classmethod
173
- def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
174
- assert not frequency, frequency
175
-
176
- steps = (step1 + add_step, step2 + add_step)
177
- if steps[0] == 0:
178
- steps = (steps[1],)
179
-
180
- return (
181
- base_date.year * 10000 + base_date.month * 100 + base_date.day,
182
- base_date.hour * 100 + base_date.minute,
183
- steps,
184
- )
185
-
186
-
187
- class AccumulationFromLastStep(Accumulation):
188
- buggy_steps = False
189
-
190
- def compute(self, values, startStep, endStep):
191
-
192
- assert endStep - startStep == self.frequency, (startStep, endStep, self.frequency)
193
-
194
- if self.startStep is None:
195
- self.startStep = startStep
196
- else:
197
- self.startStep = min(self.startStep, startStep)
198
-
199
- if self.endStep is None:
200
- self.endStep = endStep
201
- else:
202
- self.endStep = max(self.endStep, endStep)
203
-
204
- if self.values is None:
205
- self.values = np.zeros_like(values)
206
-
207
- self.values += values
208
-
209
- @classmethod
210
- def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency):
211
- assert frequency > 0, frequency
212
- # assert step1 > 0, (step1, step2, frequency, add_step, base_date)
213
-
214
- steps = []
215
- for step in range(step1 + frequency, step2 + frequency, frequency):
216
- steps.append(step + add_step)
217
- return (
218
- base_date.year * 10000 + base_date.month * 100 + base_date.day,
219
- base_date.hour * 100 + base_date.minute,
220
- tuple(steps),
221
- )
222
-
223
-
224
- def identity(x):
225
- return x
226
-
227
-
228
- def compute_accumulations(
229
- dates,
230
- request,
231
- user_accumulation_period=6,
232
- data_accumulation_period=None,
233
- patch=identity,
234
- base_times=None,
235
- ):
236
- adjust_step = isinstance(user_accumulation_period, int)
237
-
238
- if not isinstance(user_accumulation_period, (list, tuple)):
239
- user_accumulation_period = (0, user_accumulation_period)
240
-
241
- assert len(user_accumulation_period) == 2, user_accumulation_period
242
- step1, step2 = user_accumulation_period
243
- assert step1 < step2, user_accumulation_period
244
-
245
- if base_times is None:
246
- base_times = [0, 6, 12, 18]
247
-
248
- base_times = [t // 100 if t > 100 else t for t in base_times]
249
-
250
- AccumulationClass = AccumulationFromStart if data_accumulation_period in (0, None) else AccumulationFromLastStep
251
-
252
- mars_date_time_steps = AccumulationClass.mars_date_time_steps(
253
- dates,
254
- step1,
255
- step2,
256
- data_accumulation_period,
257
- base_times,
258
- adjust_step,
259
- )
260
-
261
- request = deepcopy(request)
262
-
263
- param = request["param"]
264
- if not isinstance(param, (list, tuple)):
265
- param = [param]
266
-
267
- number = request.get("number", [0])
268
- assert isinstance(number, (list, tuple))
269
-
270
- frequency = data_accumulation_period
271
-
272
- type_ = request.get("type", "an")
273
- if type_ == "an":
274
- type_ = "fc"
275
-
276
- request.update({"type": type_, "levtype": "sfc"})
277
-
278
- tmp = temp_file()
279
- path = tmp.path
280
- out = new_grib_output(path)
281
-
282
- requests = []
283
-
284
- accumulations = {}
285
-
286
- for date, time, steps in mars_date_time_steps:
287
- for p in param:
288
- for n in number:
289
- requests.append(
290
- patch(
291
- {
292
- "param": p,
293
- "date": date,
294
- "time": time,
295
- "step": sorted(steps),
296
- "number": n,
297
- }
298
- )
299
- )
300
-
301
- compressed = Availability(requests)
302
- ds = cml.load_source("empty")
303
- for r in compressed.iterate():
304
- request.update(r)
305
- print("🌧️", request)
306
- ds = ds + cml.load_source("mars", **request)
307
-
308
- accumulations = {}
309
- for a in [AccumulationClass(out, frequency=frequency, **r) for r in requests]:
310
- for s in a.steps:
311
- key = (a.param, a.date, a.time, s, a.number)
312
- accumulations.setdefault(key, []).append(a)
313
-
314
- for field in ds:
315
- key = (
316
- field.metadata("param"),
317
- field.metadata("date"),
318
- field.metadata("time"),
319
- field.metadata("step"),
320
- member(field),
321
- )
322
- values = field.values # optimisation
323
- assert accumulations[key], key
324
- for a in accumulations[key]:
325
- a.add(field, values)
326
-
327
- for acc in accumulations.values():
328
- for a in acc:
329
- assert a.done, (a.key, a.seen, a.steps)
330
-
331
- out.close()
332
-
333
- ds = cml.load_source("file", path)
334
-
335
- assert len(ds) / len(param) / len(number) == len(dates), (
336
- len(ds),
337
- len(param),
338
- len(dates),
339
- )
340
- ds._tmp = tmp
341
-
342
- return ds
343
-
344
-
345
- def to_list(x):
24
+ def _to_list(x):
346
25
  if isinstance(x, (list, tuple)):
347
26
  return x
348
27
  return [x]
349
28
 
350
29
 
351
- def normalise_time_to_hours(r):
352
- r = deepcopy(r)
353
- if "time" not in r:
354
- return r
355
-
356
- times = []
357
- for t in to_list(r["time"]):
358
- assert len(t) == 4, r
359
- assert t.endswith("00"), r
360
- times.append(int(t) // 100)
361
- r["time"] = tuple(times)
362
- return r
363
-
364
-
365
- def normalise_number(r):
366
- if "number" not in r:
367
- return r
368
- number = r["number"]
369
- number = to_list(number)
370
-
371
- if len(number) > 4 and (number[1] == "to" and number[3] == "by"):
372
- return list(range(int(number[0]), int(number[2]) + 1, int(number[4])))
373
-
374
- if len(number) > 2 and number[1] == "to":
375
- return list(range(int(number[0]), int(number[2]) + 1))
376
-
377
- r["number"] = number
378
- return r
379
-
380
-
381
30
  class HindcastCompute:
382
31
  def __init__(self, base_times, available_steps, request):
383
32
  self.base_times = base_times
@@ -385,22 +34,34 @@ class HindcastCompute:
385
34
  self.request = request
386
35
 
387
36
  def compute_hindcast(self, date):
388
- for step in self.available_steps:
37
+ result = []
38
+ for step in sorted(self.available_steps): # Use the shortest step
389
39
  start_date = date - datetime.timedelta(hours=step)
390
40
  hours = start_date.hour
391
41
  if hours in self.base_times:
392
- r = deepcopy(self.request)
42
+ r = self.request.copy()
393
43
  r["date"] = start_date
394
44
  r["time"] = f"{start_date.hour:02d}00"
395
45
  r["step"] = step
396
- return r
397
- raise ValueError(
398
- f"Cannot find data for {self.request} for {date} (base_times={self.base_times}, available_steps={self.available_steps})"
399
- )
46
+ result.append(r)
47
+
48
+ if not result:
49
+ raise ValueError(
50
+ f"Cannot find data for {self.request} for {date} (base_times={self.base_times}, "
51
+ f"available_steps={self.available_steps})"
52
+ )
53
+
54
+ if len(result) > 1:
55
+ raise ValueError(
56
+ f"Multiple requests for {self.request} for {date} (base_times={self.base_times}, "
57
+ f"available_steps={self.available_steps})"
58
+ )
59
+
60
+ return result[0]
400
61
 
401
62
 
402
63
  def use_reference_year(reference_year, request):
403
- request = deepcopy(request)
64
+ request = request.copy()
404
65
  hdate = request.pop("date")
405
66
  date = datetime.datetime(reference_year, hdate.month, hdate.day)
406
67
  request.update(date=date.strftime("%Y-%m-%d"), hdate=hdate.strftime("%Y-%m-%d"))
@@ -408,15 +69,15 @@ def use_reference_year(reference_year, request):
408
69
 
409
70
 
410
71
  def hindcasts(context, dates, **request):
411
- request["param"] = to_list(request["param"])
412
- request["step"] = to_list(request["step"])
72
+ request["param"] = _to_list(request["param"])
73
+ request["step"] = _to_list(request["step"])
413
74
  request["step"] = [int(_) for _ in request["step"]]
414
75
 
415
76
  if request.get("stream") == "enfh" and "base_times" not in request:
416
77
  request["base_times"] = [0]
417
78
 
418
79
  available_steps = request.pop("step")
419
- available_steps = to_list(available_steps)
80
+ available_steps = _to_list(available_steps)
420
81
 
421
82
  base_times = request.pop("base_times")
422
83
 
@@ -431,7 +92,14 @@ def hindcasts(context, dates, **request):
431
92
  req = use_reference_year(reference_year, req)
432
93
 
433
94
  requests.append(req)
434
- return mars(context, dates, *requests, date_key="hdate")
95
+
96
+ return mars(
97
+ context,
98
+ dates,
99
+ *requests,
100
+ date_key="hdate",
101
+ request_already_using_valid_datetime=True,
102
+ )
435
103
 
436
104
 
437
105
  execute = hindcasts
@@ -7,11 +7,10 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
9
  import datetime
10
- from copy import deepcopy
11
10
 
12
11
  from anemoi.utils.humanize import did_you_mean
13
- from climetlab import load_source
14
- from climetlab.utils.availability import Availability
12
+ from earthkit.data import from_source
13
+ from earthkit.data.utils.availability import Availability
15
14
 
16
15
  from anemoi.datasets.create.utils import to_datetime_list
17
16
 
@@ -43,25 +42,27 @@ def normalise_time_delta(t):
43
42
  return t
44
43
 
45
44
 
46
- def _expand_mars_request(request, date, date_key="date"):
45
+ def _expand_mars_request(request, date, request_already_using_valid_datetime=False, date_key="date"):
47
46
  requests = []
48
47
  step = to_list(request.get("step", [0]))
49
48
  for s in step:
50
- r = deepcopy(request)
51
-
52
- if isinstance(s, str) and "-" in s:
53
- assert s.count("-") == 1, s
54
- # this takes care of the cases where the step is a period such as 0-24 or 12-24
55
- hours = int(str(s).split("-")[-1])
56
-
57
- base = date - datetime.timedelta(hours=hours)
58
- r.update(
59
- {
60
- date_key: base.strftime("%Y%m%d"),
61
- "time": base.strftime("%H%M"),
62
- "step": s,
63
- }
64
- )
49
+ r = request.copy()
50
+
51
+ if not request_already_using_valid_datetime:
52
+
53
+ if isinstance(s, str) and "-" in s:
54
+ assert s.count("-") == 1, s
55
+ # this takes care of the cases where the step is a period such as 0-24 or 12-24
56
+ hours = int(str(s).split("-")[-1])
57
+
58
+ base = date - datetime.timedelta(hours=hours)
59
+ r.update(
60
+ {
61
+ date_key: base.strftime("%Y%m%d"),
62
+ "time": base.strftime("%H%M"),
63
+ "step": s,
64
+ }
65
+ )
65
66
 
66
67
  for pproc in ("grid", "rotation", "frame", "area", "bitmap", "resol"):
67
68
  if pproc in r:
@@ -73,13 +74,18 @@ def _expand_mars_request(request, date, date_key="date"):
73
74
  return requests
74
75
 
75
76
 
76
- def factorise_requests(dates, *requests, date_key="date"):
77
+ def factorise_requests(dates, *requests, request_already_using_valid_datetime=False, date_key="date"):
77
78
  updates = []
78
79
  for req in requests:
79
80
  # req = normalise_request(req)
80
81
 
81
82
  for d in dates:
82
- updates += _expand_mars_request(req, date=d, date_key=date_key)
83
+ updates += _expand_mars_request(
84
+ req,
85
+ date=d,
86
+ request_already_using_valid_datetime=request_already_using_valid_datetime,
87
+ date_key=date_key,
88
+ )
83
89
 
84
90
  compressed = Availability(updates)
85
91
  for r in compressed.iterate():
@@ -171,12 +177,33 @@ MARS_KEYS = [
171
177
  ]
172
178
 
173
179
 
174
- def mars(context, dates, *requests, date_key="date", **kwargs):
180
+ def mars(context, dates, *requests, request_already_using_valid_datetime=False, date_key="date", **kwargs):
175
181
  if not requests:
176
182
  requests = [kwargs]
177
183
 
178
- requests = factorise_requests(dates, *requests, date_key=date_key)
179
- ds = load_source("empty")
184
+ for r in requests:
185
+ # check for "Norway bug" where yaml transforms 'no' into False, etc.
186
+ for p in r.get("param", []):
187
+ if p is False:
188
+ raise ValueError(
189
+ "'param' cannot be 'False'. If you wrote 'param: no' or 'param: off' in yaml, you may want to use quotes?"
190
+ )
191
+ if p is None:
192
+ raise ValueError(
193
+ "'param' cannot be 'None'. If you wrote 'param: no' in yaml, you may want to use quotes?"
194
+ )
195
+ if p is True:
196
+ raise ValueError(
197
+ "'param' cannot be 'True'. If you wrote 'param: on' in yaml, you may want to use quotes?"
198
+ )
199
+
200
+ requests = factorise_requests(
201
+ dates,
202
+ *requests,
203
+ request_already_using_valid_datetime=request_already_using_valid_datetime,
204
+ date_key=date_key,
205
+ )
206
+ ds = from_source("empty")
180
207
  for r in requests:
181
208
  r = {k: v for k, v in r.items() if v != ("-",)}
182
209
 
@@ -184,14 +211,18 @@ def mars(context, dates, *requests, date_key="date", **kwargs):
184
211
  r = use_grib_paramid(r)
185
212
 
186
213
  if DEBUG:
187
- context.trace("✅", f"load_source(mars, {r}")
214
+ context.trace("✅", f"from_source(mars, {r}")
188
215
 
189
216
  for k, v in r.items():
190
217
  if k not in MARS_KEYS:
191
218
  raise ValueError(
192
219
  f"⚠️ Unknown key {k}={v} in MARS request. Did you mean '{did_you_mean(k, MARS_KEYS)}' ?"
193
220
  )
194
- ds = ds + load_source("mars", **r)
221
+ try:
222
+ ds = ds + from_source("mars", **r)
223
+ except Exception as e:
224
+ if "File is empty:" not in str(e):
225
+ raise
195
226
  return ds
196
227
 
197
228
 
@@ -7,66 +7,8 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
9
 
10
- import glob
11
-
12
- from climetlab import load_source
13
- from climetlab.utils.patterns import Pattern
14
-
15
-
16
- def _expand(paths):
17
- for path in paths:
18
- if path.startswith("file://"):
19
- path = path[7:]
20
-
21
- if path.startswith("http://"):
22
- yield path
23
- continue
24
-
25
- if path.startswith("https://"):
26
- yield path
27
- continue
28
-
29
- for p in glob.glob(path):
30
- yield p
31
-
32
-
33
- def check(what, ds, paths, **kwargs):
34
- count = 1
35
- for k, v in kwargs.items():
36
- if isinstance(v, (tuple, list)):
37
- count *= len(v)
38
-
39
- if len(ds) != count:
40
- raise ValueError(f"Expected {count} fields, got {len(ds)} (kwargs={kwargs}, {what}s={paths})")
41
-
42
-
43
- def load_netcdfs(emoji, what, context, dates, path, *args, **kwargs):
44
- given_paths = path if isinstance(path, list) else [path]
45
-
46
- dates = [d.isoformat() for d in dates]
47
- ds = load_source("empty")
48
-
49
- for path in given_paths:
50
- paths = Pattern(path, ignore_missing_keys=True).substitute(*args, date=dates, **kwargs)
51
-
52
- levels = kwargs.get("level", kwargs.get("levelist"))
53
-
54
- for path in _expand(paths):
55
- context.trace(emoji, what.upper(), path)
56
- s = load_source("opendap", path)
57
- s = s.sel(
58
- valid_datetime=dates,
59
- param=kwargs["param"],
60
- step=kwargs.get("step", 0),
61
- )
62
- if levels:
63
- s = s.sel(levelist=levels)
64
- ds = ds + s
65
-
66
- check(what, ds, given_paths, valid_datetime=dates, **kwargs)
67
-
68
- return ds
10
+ from .xarray import load_many
69
11
 
70
12
 
71
13
  def execute(context, dates, path, *args, **kwargs):
72
- return load_netcdfs("📁", "path", context, dates, path, *args, **kwargs)
14
+ return load_many("📁", context, dates, path, *args, **kwargs)
@@ -7,8 +7,9 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
9
 
10
- from .netcdf import load_netcdfs
10
+
11
+ from .xarray import load_many
11
12
 
12
13
 
13
14
  def execute(context, dates, url, *args, **kwargs):
14
- return load_netcdfs("🌐", "url", context, dates, url, *args, **kwargs)
15
+ return load_many("🌐", context, dates, url, *args, **kwargs)
@@ -6,7 +6,7 @@
6
6
  # granted to it by virtue of its status as an intergovernmental organisation
7
7
  # nor does it submit to any jurisdiction.
8
8
  #
9
- from climetlab import load_source
9
+ from earthkit.data import from_source
10
10
 
11
11
  from anemoi.datasets.create.utils import to_datetime_list
12
12
 
@@ -15,12 +15,12 @@ DEBUG = True
15
15
 
16
16
  def source(context, dates, **kwargs):
17
17
  name = kwargs.pop("name")
18
- context.trace("✅", f"load_source({name}, {dates}, {kwargs}")
18
+ context.trace("✅", f"from_source({name}, {dates}, {kwargs}")
19
19
  if kwargs["date"] == "$from_dates":
20
20
  kwargs["date"] = list({d.strftime("%Y%m%d") for d in dates})
21
21
  if kwargs["time"] == "$from_dates":
22
22
  kwargs["time"] = list({d.strftime("%H%M") for d in dates})
23
- return load_source(name, **kwargs)
23
+ return from_source(name, **kwargs)
24
24
 
25
25
 
26
26
  execute = source