cecil 0.0.28__tar.gz → 0.0.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cecil
3
- Version: 0.0.28
3
+ Version: 0.0.31
4
4
  Summary: Python SDK for Cecil Earth
5
5
  License-Expression: MIT
6
6
  License-File: LICENSE.txt
@@ -1,16 +1,15 @@
1
1
  import os
2
+ from typing import Dict, List, Optional
3
+ from warnings import warn
2
4
 
3
5
  import pandas as pd
4
6
  import requests
5
7
  import snowflake.connector
6
- import xarray
7
-
8
+ from cryptography.hazmat.primitives import serialization
8
9
  from pydantic import BaseModel
9
10
  from requests import auth
10
- from cryptography.hazmat.primitives import serialization
11
- from typing import Dict, List, Optional
12
- from warnings import warn
13
11
 
12
+ import xarray
14
13
  from .errors import (
15
14
  Error,
16
15
  _handle_bad_request,
@@ -35,11 +34,15 @@ from .models import (
35
34
  TransformationCreate,
36
35
  User,
37
36
  UserCreate,
38
- DataRequestMetadata,
39
- DataRequestParquetFiles,
37
+ SubscriptionMetadata,
38
+ SubscriptionParquetFiles,
39
+ SubscriptionListFiles,
40
+ Subscription,
41
+ SubscriptionCreate,
40
42
  )
41
43
  from .version import __version__
42
44
  from .xarray import load_xarray
45
+ from .xarray import load_xarray_v2
43
46
 
44
47
 
45
48
  class Client:
@@ -69,6 +72,11 @@ class Client:
69
72
  def create_data_request(
70
73
  self, aoi_id: str, dataset_id: str, external_ref: Optional[str] = None
71
74
  ) -> DataRequest:
75
+ warn(
76
+ "create_data_request() is deprecated, use create_subscription() instead.",
77
+ DeprecationWarning,
78
+ stacklevel=2,
79
+ )
72
80
  res = self._post(
73
81
  url="/v0/data-requests",
74
82
  model=DataRequestCreate(
@@ -78,22 +86,120 @@ class Client:
78
86
  return DataRequest(**res)
79
87
 
80
88
  def get_data_request(self, id: str) -> DataRequest:
89
+ warn(
90
+ "get_data_request() is deprecated, use get_subscription() instead.",
91
+ DeprecationWarning,
92
+ stacklevel=2,
93
+ )
81
94
  res = self._get(url=f"/v0/data-requests/{id}")
82
95
  return DataRequest(**res)
83
96
 
84
97
  def list_data_requests(self) -> List[DataRequest]:
98
+ warn(
99
+ "list_data_requests() is deprecated, use list_subscriptions() instead.",
100
+ DeprecationWarning,
101
+ stacklevel=2,
102
+ )
85
103
  res = self._get(url="/v0/data-requests")
86
104
  return [DataRequest(**record) for record in res["records"]]
87
105
 
88
- def load_xarray(self, data_request_id: str) -> xarray.Dataset:
89
- res = self._get(url=f"/v0/data-requests/{data_request_id}/metadata")
90
- metadata = DataRequestMetadata(**res)
91
- return load_xarray(metadata)
106
+ def list_subscriptions(self) -> List[Subscription]:
107
+ res = self._get(url="/v0/data-requests")
108
+ return [Subscription(**record) for record in res["records"]]
109
+
110
+ def create_subscription(
111
+ self, aoi_id: str, dataset_id: str, external_ref: Optional[str] = None
112
+ ) -> Subscription:
113
+ res = self._post(
114
+ url="/v0/data-requests",
115
+ model=SubscriptionCreate(
116
+ aoi_id=aoi_id, dataset_id=dataset_id, external_ref=external_ref
117
+ ),
118
+ )
119
+
120
+ return Subscription(**res)
121
+
122
+ def get_subscription(self, id: str) -> Subscription:
123
+ res = self._get(url=f"/v0/data-requests/{id}")
124
+ return Subscription(**res)
125
+
126
+ def load_xarray(
127
+ self,
128
+ subscription_id: Optional[str] = None,
129
+ data_request_id: Optional[str] = None,
130
+ ) -> xarray.Dataset:
131
+ if subscription_id is None and data_request_id is None:
132
+ raise TypeError("load_xarray() missing argument: 'subscription_id'")
133
+
134
+ if subscription_id is not None and data_request_id is not None:
135
+ raise ValueError(
136
+ "load_xarray() only accepts one argument but two were provided"
137
+ )
138
+
139
+ if data_request_id:
140
+ warn(
141
+ "data_request_id is deprecated, use subscription_id instead.",
142
+ DeprecationWarning,
143
+ stacklevel=2,
144
+ )
145
+ subscription_id = data_request_id
146
+
147
+ res = SubscriptionMetadata(
148
+ **self._get(url=f"/v0/data-requests/{subscription_id}/metadata")
149
+ )
150
+ return load_xarray(res)
151
+
152
+ def _load_xarray_v2(
153
+ self,
154
+ subscription_id: Optional[str] = None,
155
+ data_request_id: Optional[str] = None,
156
+ ) -> xarray.Dataset:
157
+ if subscription_id is None and data_request_id is None:
158
+ raise TypeError("load_xarray_v2() missing argument: 'subscription_id'")
159
+
160
+ if subscription_id is not None and data_request_id is not None:
161
+ raise ValueError(
162
+ "load_xarray_v2() only accepts one argument but two were provided"
163
+ )
164
+
165
+ if data_request_id:
166
+ warn(
167
+ "data_request_id is deprecated, use subscription_id instead.",
168
+ DeprecationWarning,
169
+ stacklevel=2,
170
+ )
171
+ subscription_id = data_request_id
172
+
173
+ res = SubscriptionListFiles(
174
+ **self._get(url=f"/v0/data-requests/{subscription_id}/files/tiff")
175
+ )
176
+ return load_xarray_v2(res)
92
177
 
93
- def load_dataframe(self, data_request_id: str) -> pd.DataFrame:
94
- res = self._get(url=f"/v0/data-requests/{data_request_id}/parquet-files")
95
- metadata = DataRequestParquetFiles(**res)
96
- df = pd.concat((pd.read_parquet(f) for f in metadata.files))
178
+ def load_dataframe(
179
+ self,
180
+ subscription_id: Optional[str] = None,
181
+ data_request_id: Optional[str] = None,
182
+ ) -> pd.DataFrame:
183
+ if subscription_id is None and data_request_id is None:
184
+ raise TypeError("load_dataframe missing argument: 'subscription_id'")
185
+
186
+ if subscription_id is not None and data_request_id is not None:
187
+ raise ValueError(
188
+ "load_dataframe only accepts one argument but two were provided"
189
+ )
190
+
191
+ if data_request_id:
192
+ warn(
193
+ "data_request_id is deprecated, use subscription_id instead.",
194
+ DeprecationWarning,
195
+ stacklevel=2,
196
+ )
197
+ subscription_id = data_request_id
198
+
199
+ res = SubscriptionParquetFiles(
200
+ **self._get(url=f"/v0/data-requests/{subscription_id}/parquet-files")
201
+ )
202
+ df = pd.concat((pd.read_parquet(f) for f in res.files))
97
203
  return df[
98
204
  [col for col in df.columns if col not in ("organisation_id", "created_at")]
99
205
  ]
@@ -126,7 +126,7 @@ class File(BaseModel):
126
126
  bands: List[Band]
127
127
 
128
128
 
129
- class DataRequestMetadata(BaseModel):
129
+ class SubscriptionMetadata(BaseModel):
130
130
  model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
131
131
  provider_name: str
132
132
  dataset_id: str
@@ -137,6 +137,55 @@ class DataRequestMetadata(BaseModel):
137
137
  files: List[File]
138
138
 
139
139
 
140
- class DataRequestParquetFiles(BaseModel):
140
+ class Bucket(BaseModel):
141
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
142
+ name: str
143
+ prefix: str
144
+
145
+
146
+ class BucketCredentials(BaseModel):
147
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
148
+ access_key_id: str
149
+ secret_access_key: str
150
+ session_token: str
151
+ expiration: datetime.datetime
152
+
153
+
154
+ class FileMapping(BaseModel):
155
+ type: str
156
+ bands: List
157
+
158
+
159
+ class SubscriptionListFiles(BaseModel):
160
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
161
+ provider_name: str
162
+ dataset_id: str
163
+ dataset_name: str
164
+ aoi_id: str
165
+ data_request_id: str
166
+ bucket: Bucket
167
+ credentials: BucketCredentials
168
+ allowed_actions: List
169
+ file_mapping: Dict[str, FileMapping]
170
+
171
+
172
+ class SubscriptionParquetFiles(BaseModel):
141
173
  model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
142
174
  files: List[str]
175
+
176
+
177
+ class Subscription(BaseModel):
178
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
179
+ id: str
180
+ aoi_id: str
181
+ dataset_id: str
182
+ external_ref: Optional[str]
183
+ created_at: datetime.datetime
184
+ created_by: str
185
+
186
+
187
+ class SubscriptionCreate(BaseModel):
188
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
189
+ aoi_id: str
190
+ dataset_id: str
191
+ external_ref: Optional[str]
@@ -0,0 +1 @@
1
+ __version__ = "0.0.31"
@@ -0,0 +1,415 @@
1
+ import re
2
+ import time
3
+ from datetime import datetime
4
+
5
+ import boto3
6
+ import dask
7
+ import rasterio
8
+ import rasterio.session
9
+ import rioxarray
10
+ import xarray
11
+
12
+ from .errors import Error
13
+ from .models import SubscriptionMetadata, SubscriptionListFiles
14
+
15
+ # v1
16
+
17
+
18
+ def load_xarray(metadata: SubscriptionMetadata) -> xarray.Dataset:
19
+ data_vars = {}
20
+
21
+ for f in metadata.files:
22
+ try:
23
+ dataset = _retry_with_exponential_backoff(_load_file, 5, 1, 2, f.url)
24
+ except Exception as e:
25
+ raise ValueError(f"failed to load file: {e}")
26
+
27
+ for b in f.bands:
28
+ band = dataset.sel(band=b.number, drop=True)
29
+
30
+ if b.time and b.time_pattern:
31
+ t = datetime.strptime(b.time, b.time_pattern)
32
+ band = band.expand_dims("time")
33
+ band = band.assign_coords(time=[t])
34
+
35
+ band.name = b.variable_name
36
+
37
+ if b.variable_name not in data_vars:
38
+ data_vars[b.variable_name] = []
39
+
40
+ data_vars[b.variable_name].append(band)
41
+
42
+ for variable_name, time_series in data_vars.items():
43
+ if "time" in time_series[0].dims:
44
+ data_vars[variable_name] = xarray.concat(
45
+ time_series, dim="time", join="exact"
46
+ )
47
+ else:
48
+ data_vars[variable_name] = time_series[0]
49
+
50
+ return xarray.Dataset(
51
+ data_vars=data_vars,
52
+ attrs={
53
+ "provider_name": metadata.provider_name,
54
+ "dataset_name": metadata.dataset_name,
55
+ "dataset_id": metadata.dataset_id,
56
+ "aoi_id": metadata.aoi_id,
57
+ "subscription_id": metadata.data_request_id,
58
+ },
59
+ )
60
+
61
+
62
+ def _retry_with_exponential_backoff(
63
+ func, retries, start_delay, multiplier, *args, **kwargs
64
+ ):
65
+ delay = start_delay
66
+ for attempt in range(1, retries + 1):
67
+ try:
68
+ return func(*args, **kwargs)
69
+ except Exception as e:
70
+ if attempt == retries:
71
+ raise e
72
+ time.sleep(delay)
73
+ delay *= multiplier
74
+ return None
75
+
76
+
77
+ def _load_file(url: str):
78
+ return rioxarray.open_rasterio(
79
+ url,
80
+ chunks={"x": 2000, "y": 2000},
81
+ )
82
+
83
+
84
+ # v2
85
+
86
+
87
+ def load_xarray_v2(res: SubscriptionListFiles) -> xarray.Dataset:
88
+ session = boto3.session.Session(
89
+ aws_access_key_id=res.credentials.access_key_id,
90
+ aws_secret_access_key=res.credentials.secret_access_key,
91
+ aws_session_token=res.credentials.session_token,
92
+ )
93
+
94
+ keys = _list_keys_v2(session, res.bucket.name, res.bucket.prefix)
95
+
96
+ if not keys:
97
+ return xarray.Dataset()
98
+
99
+ timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
100
+ data_vars = {}
101
+
102
+ for key in keys:
103
+ try:
104
+ file_da = _retry_with_exponential_backoff(
105
+ _load_file_v2,
106
+ 5,
107
+ 1,
108
+ 2,
109
+ session,
110
+ f"s3://{res.bucket.name}/{key}",
111
+ )
112
+ except Exception as e:
113
+ raise ValueError(f"failed to load file: {e}")
114
+
115
+ filename = key.split("/")[-1]
116
+
117
+ file_info = res.file_mapping.get(filename)
118
+ if not file_info:
119
+ continue
120
+
121
+ timestamp_str = timestamp_pattern.search(key).group()
122
+
123
+ for band_num, var_name in enumerate(file_info.bands, start=1):
124
+ band_da = file_da.sel(band=band_num, drop=True)
125
+ band_da.name = var_name
126
+
127
+ # Dataset with time dimension
128
+ if timestamp_str != "0000/00/00/00/00/00":
129
+ t = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
130
+ band_da = band_da.expand_dims("time")
131
+ band_da = band_da.assign_coords(time=[t])
132
+
133
+ if var_name not in data_vars:
134
+ data_vars[var_name] = []
135
+
136
+ data_vars[var_name].append(band_da)
137
+
138
+ for var_name, time_series in data_vars.items():
139
+ if "time" in time_series[0].dims:
140
+ data_vars[var_name] = xarray.concat(time_series, dim="time", join="exact")
141
+ else:
142
+ data_vars[var_name] = time_series[0]
143
+
144
+ return xarray.Dataset(
145
+ data_vars=data_vars,
146
+ attrs={
147
+ "provider_name": res.provider_name,
148
+ "dataset_name": res.dataset_name,
149
+ "dataset_id": res.dataset_id,
150
+ "aoi_id": res.aoi_id,
151
+ "subscription_id": res.data_request_id,
152
+ },
153
+ )
154
+
155
+
156
+ def _list_keys_v2(session: boto3.session.Session, bucket_name, prefix) -> list[str]:
157
+ s3_client = session.client("s3")
158
+ paginator = s3_client.get_paginator("list_objects_v2")
159
+ page_iterator = paginator.paginate(
160
+ Bucket=bucket_name,
161
+ Prefix=prefix,
162
+ )
163
+
164
+ keys = []
165
+ for page in page_iterator:
166
+ for obj in page.get("Contents", []):
167
+ keys.append(obj["Key"])
168
+
169
+ return keys
170
+
171
+
172
+ def _load_file_v2(aws_session: boto3.session.Session, url: str):
173
+ with rasterio.env.Env(
174
+ session=rasterio.session.AWSSession(aws_session),
175
+ GDAL_DISABLE_READDIR_ON_OPEN=True,
176
+ ):
177
+ return rioxarray.open_rasterio(
178
+ url,
179
+ chunks={"x": 2000, "y": 2000},
180
+ )
181
+
182
+
183
+ # v3
184
+
185
+
186
+ def load_xarray_v3(res: SubscriptionListFiles) -> xarray.Dataset:
187
+ session = boto3.session.Session(
188
+ aws_access_key_id=res.credentials.access_key_id,
189
+ aws_secret_access_key=res.credentials.secret_access_key,
190
+ aws_session_token=res.credentials.session_token,
191
+ )
192
+
193
+ keys = _list_keys_v3(session, res.bucket.name, res.bucket.prefix)
194
+
195
+ if not keys:
196
+ return xarray.Dataset()
197
+
198
+ timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
199
+ data_vars = {}
200
+
201
+ with rasterio.env.Env(
202
+ session=rasterio.session.AWSSession(session),
203
+ GDAL_DISABLE_READDIR_ON_OPEN=True,
204
+ ):
205
+ first_file = rioxarray.open_rasterio(
206
+ f"s3://{res.bucket.name}/{keys[0]}", chunks="auto"
207
+ )
208
+
209
+ for key in keys:
210
+ filename = key.split("/")[-1]
211
+
212
+ file_info = res.file_mapping.get(filename)
213
+ if not file_info:
214
+ continue
215
+
216
+ lazy_array = dask.array.from_delayed(
217
+ dask.delayed(_load_file_v3)(session, f"s3://{res.bucket.name}/{key}"),
218
+ shape=first_file.shape,
219
+ dtype=file_info.type,
220
+ )
221
+ lazy_da = xarray.DataArray(
222
+ lazy_array,
223
+ dims=first_file.dims,
224
+ coords=dict(first_file.coords),
225
+ # attrs=first_file.attrs.copy() # TODO: not the same for all files
226
+ )
227
+ # lazy_da.encoding = first_file.encoding.copy()
228
+ # lazy_da.rio.write_crs(first_file.rio.crs, inplace=True)
229
+ # lazy_da.rio.write_transform(first_file.rio.transform(), inplace=True)
230
+
231
+ timestamp_str = timestamp_pattern.search(key).group()
232
+
233
+ for band_num, var_name in enumerate(file_info.bands, start=1):
234
+ band_da = lazy_da.sel(band=band_num, drop=True)
235
+ band_da.name = var_name
236
+
237
+ # Dataset with time dimension
238
+ if timestamp_str != "0000/00/00/00/00/00":
239
+ t = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
240
+ band_da = band_da.expand_dims("time")
241
+ band_da = band_da.assign_coords(time=[t])
242
+
243
+ if var_name not in data_vars:
244
+ data_vars[var_name] = []
245
+
246
+ data_vars[var_name].append(band_da)
247
+
248
+ for var_name, time_series in data_vars.items():
249
+ if "time" in time_series[0].dims:
250
+ data_vars[var_name] = xarray.concat(time_series, dim="time", join="exact")
251
+ else:
252
+ data_vars[var_name] = time_series[0]
253
+
254
+ return xarray.Dataset(
255
+ data_vars=data_vars,
256
+ attrs={
257
+ "provider_name": res.provider_name,
258
+ "dataset_name": res.dataset_name,
259
+ "dataset_id": res.dataset_id,
260
+ "aoi_id": res.aoi_id,
261
+ "subscription_id": res.data_request_id,
262
+ },
263
+ )
264
+
265
+
266
+ def _load_file_v3(aws_session: boto3.session.Session, url: str):
267
+ with rasterio.env.Env(
268
+ session=rasterio.session.AWSSession(aws_session),
269
+ GDAL_DISABLE_READDIR_ON_OPEN=True,
270
+ ):
271
+ return rioxarray.open_rasterio(
272
+ url,
273
+ chunks="auto",
274
+ ).values
275
+ # ).sel(band=num_band, drop=True)
276
+ # ).sel(band=num_band, drop=True).values
277
+ # ).isel(band=num_band-1).values
278
+
279
+
280
+ def _list_keys_v3(session: boto3.session.Session, bucket_name, prefix) -> list[str]:
281
+ s3_client = session.client("s3")
282
+ paginator = s3_client.get_paginator("list_objects_v2")
283
+ page_iterator = paginator.paginate(
284
+ Bucket=bucket_name,
285
+ Prefix=prefix,
286
+ )
287
+
288
+ keys = []
289
+ for page in page_iterator:
290
+ for obj in page.get("Contents", []):
291
+ keys.append(obj["Key"])
292
+
293
+ return keys
294
+
295
+
296
+ # v4
297
+
298
+
299
+ def load_xarray_v4(res: SubscriptionListFiles) -> xarray.Dataset:
300
+
301
+ session = boto3.session.Session(
302
+ aws_access_key_id=res.credentials.access_key_id,
303
+ aws_secret_access_key=res.credentials.secret_access_key,
304
+ aws_session_token=res.credentials.session_token,
305
+ )
306
+
307
+ keys = _list_keys_v2(session, res.bucket.name, res.bucket.prefix)
308
+
309
+ if not keys:
310
+ return xarray.Dataset()
311
+
312
+ first_file_metadata = _get_file_metadata_v4(session, res.bucket.name, keys[0])
313
+
314
+ timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
315
+
316
+ data_vars = {}
317
+ for key in keys:
318
+ filename = key.split("/")[-1].rsplit(".", 1)[0]
319
+
320
+ file_info = res.file_mapping.get(filename)
321
+ if not file_info:
322
+ continue
323
+
324
+ timestamp_str = timestamp_pattern.search(key).group()
325
+
326
+ for band_num, band_name in enumerate(file_info.bands, start=1):
327
+ array = _create_dask_array_v4(
328
+ session,
329
+ f"s3://{res.bucket.name}/{key}",
330
+ band_num,
331
+ first_file_metadata["height"],
332
+ first_file_metadata["width"],
333
+ file_info.type,
334
+ )
335
+ da = xarray.DataArray(
336
+ array,
337
+ dims=("y", "x"),
338
+ )
339
+ da.name = band_name
340
+
341
+ # Dataset with time dimension
342
+ if timestamp_str != "0000/00/00/00/00/00":
343
+ time = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
344
+ da = da.expand_dims("time")
345
+ da = da.assign_coords(time=[time])
346
+
347
+ if band_name not in data_vars:
348
+ data_vars[band_name] = []
349
+
350
+ data_vars[band_name].append(da)
351
+
352
+ for variable_name, time_series in data_vars.items():
353
+ if "time" in time_series[0].dims:
354
+ data_vars[variable_name] = xarray.concat(
355
+ time_series,
356
+ dim="time",
357
+ join="exact",
358
+ )
359
+ else:
360
+ data_vars[variable_name] = time_series[0]
361
+
362
+ ds = xarray.Dataset(
363
+ data_vars=data_vars,
364
+ coords={
365
+ "y": first_file_metadata["y"],
366
+ "x": first_file_metadata["x"],
367
+ },
368
+ attrs={
369
+ "provider_name": res.provider_name,
370
+ "dataset_name": res.dataset_name,
371
+ "dataset_id": res.dataset_id,
372
+ "aoi_id": res.aoi_id,
373
+ "subscription_id": res.data_request_id,
374
+ },
375
+ )
376
+ ds = ds.rio.write_crs(first_file_metadata["crs"])
377
+
378
+ return ds
379
+
380
+
381
+ def _get_file_metadata_v4(session, bucket: str, path: str):
382
+ with rasterio.env.Env(
383
+ rasterio.session.AWSSession(session), GDAL_DISABLE_READDIR_ON_OPEN=True
384
+ ):
385
+ da = xarray.open_dataarray(f"s3://{bucket}/{path}", engine="rasterio")
386
+
387
+ return {
388
+ "crs": da.rio.crs,
389
+ "height": da.rio.height,
390
+ "width": da.rio.width,
391
+ "x": da.x.values,
392
+ "y": da.y.values,
393
+ }
394
+
395
+
396
+ def _create_dask_array_v4(
397
+ session: boto3.session.Session,
398
+ file_path: str,
399
+ band_num: int,
400
+ height: int,
401
+ width: int,
402
+ dtype: str,
403
+ ):
404
+ rasterio_session = rasterio.session.AWSSession(session)
405
+
406
+ def read_chunk():
407
+ with rasterio.env.Env(
408
+ session=rasterio_session, GDAL_DISABLE_READDIR_ON_OPEN=True
409
+ ):
410
+ with rasterio.open(file_path) as src:
411
+ return src.read(band_num)
412
+
413
+ return dask.array.from_delayed(
414
+ dask.delayed(read_chunk)(), shape=(height, width), dtype=dtype
415
+ )
@@ -1 +0,0 @@
1
- __version__ = "0.0.28"
@@ -1,74 +0,0 @@
1
- import os
2
- import rioxarray
3
- import xarray
4
-
5
- from datetime import datetime
6
-
7
- from .errors import Error
8
- from .models import DataRequestMetadata
9
-
10
- os.environ["GDAL_NUM_THREADS"] = "1"
11
- os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "FALSE"
12
-
13
-
14
- def align_pixel_grids(time_series):
15
- # Use the first timestep as reference
16
- reference_da = time_series[0]
17
- aligned_series = [reference_da]
18
-
19
- # Align all other timesteps to the reference grid
20
- for i, da in enumerate(time_series[1:], 1):
21
- try:
22
- aligned_da = da.rio.reproject_match(reference_da)
23
- aligned_series.append(aligned_da)
24
- except Exception as e:
25
- raise Error
26
- continue
27
-
28
- return aligned_series
29
-
30
-
31
- def load_xarray(metadata: DataRequestMetadata) -> xarray.Dataset:
32
- data_vars = {}
33
-
34
- for f in metadata.files:
35
- dataset = rioxarray.open_rasterio(
36
- f.url,
37
- chunks={"x": 2000, "y": 2000},
38
- )
39
-
40
- for b in f.bands:
41
- band = dataset.sel(band=b.number, drop=True)
42
-
43
- if b.time and b.time_pattern:
44
- time = datetime.strptime(b.time, b.time_pattern)
45
- band = band.expand_dims("time")
46
- band = band.assign_coords(time=[time])
47
-
48
- band.name = b.variable_name
49
-
50
- if b.variable_name not in data_vars:
51
- data_vars[b.variable_name] = []
52
-
53
- data_vars[b.variable_name].append(band)
54
-
55
- for variable_name, time_series in data_vars.items():
56
- if "time" in time_series[0].dims:
57
- # time_series = align_pixel_grids(time_series)
58
- data_vars[variable_name] = xarray.concat(
59
- time_series, dim="time", join="exact"
60
- )
61
- else:
62
- data_vars[variable_name] = time_series[0]
63
-
64
- return xarray.Dataset(
65
- data_vars=data_vars,
66
- attrs={
67
- "provider_name": metadata.provider_name,
68
- "dataset_id": metadata.dataset_id,
69
- "dataset_name": metadata.dataset_name,
70
- "dataset_crs": metadata.dataset_crs,
71
- "aoi_id": metadata.aoi_id,
72
- "data_request_id": metadata.data_request_id,
73
- },
74
- )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes