cecil 0.0.31__tar.gz → 0.0.35__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cecil-0.0.31 → cecil-0.0.35}/.gitignore +1 -0
- cecil-0.0.35/CONTRIBUTING.md +23 -0
- cecil-0.0.35/PKG-INFO +24 -0
- cecil-0.0.35/README.md +5 -0
- {cecil-0.0.31 → cecil-0.0.35}/pyproject.toml +2 -2
- {cecil-0.0.31 → cecil-0.0.35}/src/cecil/client.py +8 -8
- {cecil-0.0.31 → cecil-0.0.35}/src/cecil/models.py +4 -2
- cecil-0.0.35/src/cecil/version.py +1 -0
- cecil-0.0.35/src/cecil/xarray.py +193 -0
- cecil-0.0.31/CONTRIBUTING.md +0 -21
- cecil-0.0.31/PKG-INFO +0 -122
- cecil-0.0.31/README.md +0 -103
- cecil-0.0.31/src/cecil/version.py +0 -1
- cecil-0.0.31/src/cecil/xarray.py +0 -415
- {cecil-0.0.31 → cecil-0.0.35}/.editorconfig +0 -0
- {cecil-0.0.31 → cecil-0.0.35}/LICENSE.txt +0 -0
- {cecil-0.0.31 → cecil-0.0.35}/Makefile +0 -0
- {cecil-0.0.31 → cecil-0.0.35}/src/cecil/__init__.py +0 -0
- {cecil-0.0.31 → cecil-0.0.35}/src/cecil/errors.py +0 -0
- {cecil-0.0.31 → cecil-0.0.35}/tests/__init__.py +0 -0
- {cecil-0.0.31 → cecil-0.0.35}/tests/test_client.py +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
## Development installation
|
|
2
|
+
|
|
3
|
+
Install packaging/distribution tools and linter:
|
|
4
|
+
|
|
5
|
+
```shell
|
|
6
|
+
pip install hatch twine black
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
From top-level repo directory, install the package in editable mode:
|
|
10
|
+
|
|
11
|
+
```shell
|
|
12
|
+
pip install -e .
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Local edits to the package will immediately take effect.
|
|
16
|
+
|
|
17
|
+
Get the PyPI Test API Key from 1Password and add it to `~/.pypirc`:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
[testpypi]
|
|
21
|
+
username = __token__
|
|
22
|
+
password = <PyPI Test API Key>
|
|
23
|
+
```
|
cecil-0.0.35/PKG-INFO
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cecil
|
|
3
|
+
Version: 0.0.35
|
|
4
|
+
Summary: Python SDK for Cecil Earth
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE.txt
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Requires-Dist: dask==2025.11.0
|
|
13
|
+
Requires-Dist: pydantic<3.0.0,>=2.11.9
|
|
14
|
+
Requires-Dist: requests<3.0.0,>=2.32.5
|
|
15
|
+
Requires-Dist: rioxarray==0.19.0
|
|
16
|
+
Requires-Dist: snowflake-connector-python[pandas]<4.0.0,>=3.17.4
|
|
17
|
+
Requires-Dist: xarray==2025.11.0
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Cecil SDK
|
|
21
|
+
|
|
22
|
+
Please refer to the Cecil documentation:
|
|
23
|
+
|
|
24
|
+
https://docs.cecil.earth
|
cecil-0.0.35/README.md
ADDED
|
@@ -16,12 +16,12 @@ classifiers = [
|
|
|
16
16
|
"Operating System :: OS Independent",
|
|
17
17
|
]
|
|
18
18
|
dependencies = [
|
|
19
|
-
"dask==2025.
|
|
19
|
+
"dask==2025.11.0",
|
|
20
20
|
"pydantic>=2.11.9,<3.0.0",
|
|
21
21
|
"requests>=2.32.5,<3.0.0",
|
|
22
22
|
"rioxarray==0.19.0",
|
|
23
23
|
"snowflake-connector-python[pandas]>=3.17.4,<4.0.0",
|
|
24
|
-
"xarray==2025.
|
|
24
|
+
"xarray==2025.11.0"
|
|
25
25
|
]
|
|
26
26
|
|
|
27
27
|
[tool.hatch.version]
|
|
@@ -104,14 +104,14 @@ class Client:
|
|
|
104
104
|
return [DataRequest(**record) for record in res["records"]]
|
|
105
105
|
|
|
106
106
|
def list_subscriptions(self) -> List[Subscription]:
|
|
107
|
-
res = self._get(url="/v0/
|
|
107
|
+
res = self._get(url="/v0/subscriptions")
|
|
108
108
|
return [Subscription(**record) for record in res["records"]]
|
|
109
109
|
|
|
110
110
|
def create_subscription(
|
|
111
111
|
self, aoi_id: str, dataset_id: str, external_ref: Optional[str] = None
|
|
112
112
|
) -> Subscription:
|
|
113
113
|
res = self._post(
|
|
114
|
-
url="/v0/
|
|
114
|
+
url="/v0/subscriptions",
|
|
115
115
|
model=SubscriptionCreate(
|
|
116
116
|
aoi_id=aoi_id, dataset_id=dataset_id, external_ref=external_ref
|
|
117
117
|
),
|
|
@@ -120,7 +120,7 @@ class Client:
|
|
|
120
120
|
return Subscription(**res)
|
|
121
121
|
|
|
122
122
|
def get_subscription(self, id: str) -> Subscription:
|
|
123
|
-
res = self._get(url=f"/v0/
|
|
123
|
+
res = self._get(url=f"/v0/subscriptions/{id}")
|
|
124
124
|
return Subscription(**res)
|
|
125
125
|
|
|
126
126
|
def load_xarray(
|
|
@@ -145,7 +145,7 @@ class Client:
|
|
|
145
145
|
subscription_id = data_request_id
|
|
146
146
|
|
|
147
147
|
res = SubscriptionMetadata(
|
|
148
|
-
**self._get(url=f"/v0/
|
|
148
|
+
**self._get(url=f"/v0/subscriptions/{subscription_id}/metadata")
|
|
149
149
|
)
|
|
150
150
|
return load_xarray(res)
|
|
151
151
|
|
|
@@ -171,7 +171,7 @@ class Client:
|
|
|
171
171
|
subscription_id = data_request_id
|
|
172
172
|
|
|
173
173
|
res = SubscriptionListFiles(
|
|
174
|
-
**self._get(url=f"/v0/
|
|
174
|
+
**self._get(url=f"/v0/subscriptions/{subscription_id}/files/tiff")
|
|
175
175
|
)
|
|
176
176
|
return load_xarray_v2(res)
|
|
177
177
|
|
|
@@ -197,7 +197,7 @@ class Client:
|
|
|
197
197
|
subscription_id = data_request_id
|
|
198
198
|
|
|
199
199
|
res = SubscriptionParquetFiles(
|
|
200
|
-
**self._get(url=f"/v0/
|
|
200
|
+
**self._get(url=f"/v0/subscriptions/{subscription_id}/parquet-files")
|
|
201
201
|
)
|
|
202
202
|
df = pd.concat((pd.read_parquet(f) for f in res.files))
|
|
203
203
|
return df[
|
|
@@ -308,12 +308,12 @@ class Client:
|
|
|
308
308
|
def update_organisation_settings(
|
|
309
309
|
self,
|
|
310
310
|
*,
|
|
311
|
-
|
|
311
|
+
monthly_subscription_limit,
|
|
312
312
|
) -> OrganisationSettings:
|
|
313
313
|
res = self._post(
|
|
314
314
|
url="/v0/organisation/settings",
|
|
315
315
|
model=OrganisationSettings(
|
|
316
|
-
|
|
316
|
+
monthly_subscription_limit=monthly_subscription_limit,
|
|
317
317
|
),
|
|
318
318
|
)
|
|
319
319
|
return OrganisationSettings(**res)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
from typing import Dict, Optional, List
|
|
3
3
|
|
|
4
|
-
from pydantic import BaseModel, ConfigDict, SecretStr
|
|
4
|
+
from pydantic import BaseModel, ConfigDict, Field, SecretStr
|
|
5
5
|
from pydantic.alias_generators import to_camel
|
|
6
6
|
|
|
7
7
|
|
|
@@ -49,7 +49,9 @@ class DataRequestCreate(BaseModel):
|
|
|
49
49
|
|
|
50
50
|
class OrganisationSettings(BaseModel):
|
|
51
51
|
model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
|
|
52
|
-
|
|
52
|
+
monthly_subscription_limit: Optional[int] = Field(
|
|
53
|
+
alias="monthlyDataRequestLimit",
|
|
54
|
+
)
|
|
53
55
|
|
|
54
56
|
|
|
55
57
|
class RecoverAPIKey(BaseModel):
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.35"
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import time
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
import boto3
|
|
6
|
+
import dask
|
|
7
|
+
import rasterio
|
|
8
|
+
import rasterio.session
|
|
9
|
+
import rioxarray
|
|
10
|
+
import xarray
|
|
11
|
+
|
|
12
|
+
from .models import SubscriptionMetadata, SubscriptionListFiles
|
|
13
|
+
|
|
14
|
+
# v1
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_xarray(metadata: SubscriptionMetadata) -> xarray.Dataset:
|
|
18
|
+
data_vars = {}
|
|
19
|
+
|
|
20
|
+
for f in metadata.files:
|
|
21
|
+
try:
|
|
22
|
+
dataset = _retry_with_exponential_backoff(_load_file, 5, 1, 2, f.url)
|
|
23
|
+
except Exception as e:
|
|
24
|
+
raise ValueError(f"failed to load file: {e}")
|
|
25
|
+
|
|
26
|
+
for b in f.bands:
|
|
27
|
+
band = dataset.sel(band=b.number, drop=True)
|
|
28
|
+
|
|
29
|
+
if b.time and b.time_pattern:
|
|
30
|
+
t = datetime.strptime(b.time, b.time_pattern)
|
|
31
|
+
band = band.expand_dims("time")
|
|
32
|
+
band = band.assign_coords(time=[t])
|
|
33
|
+
|
|
34
|
+
band.name = b.variable_name
|
|
35
|
+
|
|
36
|
+
if b.variable_name not in data_vars:
|
|
37
|
+
data_vars[b.variable_name] = []
|
|
38
|
+
|
|
39
|
+
data_vars[b.variable_name].append(band)
|
|
40
|
+
|
|
41
|
+
for variable_name, time_series in data_vars.items():
|
|
42
|
+
if "time" in time_series[0].dims:
|
|
43
|
+
data_vars[variable_name] = xarray.concat(
|
|
44
|
+
time_series, dim="time", join="exact"
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
data_vars[variable_name] = time_series[0]
|
|
48
|
+
|
|
49
|
+
return xarray.Dataset(
|
|
50
|
+
data_vars=data_vars,
|
|
51
|
+
attrs={
|
|
52
|
+
"provider_name": metadata.provider_name,
|
|
53
|
+
"dataset_name": metadata.dataset_name,
|
|
54
|
+
"dataset_id": metadata.dataset_id,
|
|
55
|
+
"aoi_id": metadata.aoi_id,
|
|
56
|
+
"subscription_id": metadata.data_request_id,
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _retry_with_exponential_backoff(
|
|
62
|
+
func, retries, start_delay, multiplier, *args, **kwargs
|
|
63
|
+
):
|
|
64
|
+
delay = start_delay
|
|
65
|
+
for attempt in range(1, retries + 1):
|
|
66
|
+
try:
|
|
67
|
+
return func(*args, **kwargs)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
if attempt == retries:
|
|
70
|
+
raise e
|
|
71
|
+
time.sleep(delay)
|
|
72
|
+
delay *= multiplier
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _load_file(url: str):
|
|
77
|
+
return rioxarray.open_rasterio(
|
|
78
|
+
url,
|
|
79
|
+
chunks={"x": 2000, "y": 2000},
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# v2
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def load_xarray_v2(res: SubscriptionListFiles) -> xarray.Dataset:
|
|
87
|
+
session = boto3.session.Session(
|
|
88
|
+
aws_access_key_id=res.credentials.access_key_id,
|
|
89
|
+
aws_secret_access_key=res.credentials.secret_access_key,
|
|
90
|
+
aws_session_token=res.credentials.session_token,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
keys = _list_keys_v2(session, res.bucket.name, res.bucket.prefix)
|
|
94
|
+
|
|
95
|
+
if not keys:
|
|
96
|
+
return xarray.Dataset()
|
|
97
|
+
|
|
98
|
+
timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
|
|
99
|
+
data_vars = {}
|
|
100
|
+
|
|
101
|
+
with rasterio.env.Env(
|
|
102
|
+
session=rasterio.session.AWSSession(session),
|
|
103
|
+
):
|
|
104
|
+
first_file = rioxarray.open_rasterio(
|
|
105
|
+
f"s3://{res.bucket.name}/{keys[0]}", chunks="auto"
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
for key in keys:
|
|
109
|
+
filename = key.split("/")[-1]
|
|
110
|
+
|
|
111
|
+
file_info = res.file_mapping.get(filename)
|
|
112
|
+
if not file_info:
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
timestamp_str = timestamp_pattern.search(key).group()
|
|
116
|
+
|
|
117
|
+
for band_num, var_name in enumerate(file_info.bands, start=1):
|
|
118
|
+
lazy_array = dask.array.from_delayed(
|
|
119
|
+
dask.delayed(_load_file_v2)(
|
|
120
|
+
session, f"s3://{res.bucket.name}/{key}", band_num
|
|
121
|
+
),
|
|
122
|
+
shape=(
|
|
123
|
+
first_file.rio.height,
|
|
124
|
+
first_file.rio.width,
|
|
125
|
+
),
|
|
126
|
+
dtype=file_info.type,
|
|
127
|
+
)
|
|
128
|
+
band_da = xarray.DataArray(
|
|
129
|
+
lazy_array,
|
|
130
|
+
dims=("y", "x"),
|
|
131
|
+
coords={
|
|
132
|
+
"y": first_file.y.values,
|
|
133
|
+
"x": first_file.x.values,
|
|
134
|
+
},
|
|
135
|
+
# attrs=first_file.attrs.copy() # TODO: is it the same for all files?
|
|
136
|
+
)
|
|
137
|
+
# band_da.encoding = first_file.encoding.copy() # TODO: is it the same for all files?
|
|
138
|
+
band_da.rio.write_crs(first_file.rio.crs, inplace=True)
|
|
139
|
+
band_da.rio.write_transform(first_file.rio.transform(), inplace=True)
|
|
140
|
+
|
|
141
|
+
band_da.name = var_name
|
|
142
|
+
|
|
143
|
+
# Dataset with time dimension
|
|
144
|
+
if timestamp_str != "0000/00/00/00/00/00":
|
|
145
|
+
t = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
|
|
146
|
+
band_da = band_da.expand_dims("time")
|
|
147
|
+
band_da = band_da.assign_coords(time=[t])
|
|
148
|
+
|
|
149
|
+
if var_name not in data_vars:
|
|
150
|
+
data_vars[var_name] = []
|
|
151
|
+
|
|
152
|
+
data_vars[var_name].append(band_da)
|
|
153
|
+
|
|
154
|
+
for var_name, time_series in data_vars.items():
|
|
155
|
+
if "time" in time_series[0].dims:
|
|
156
|
+
data_vars[var_name] = xarray.concat(time_series, dim="time", join="exact")
|
|
157
|
+
else:
|
|
158
|
+
data_vars[var_name] = time_series[0]
|
|
159
|
+
|
|
160
|
+
return xarray.Dataset(
|
|
161
|
+
data_vars=data_vars,
|
|
162
|
+
attrs={
|
|
163
|
+
"provider_name": res.provider_name,
|
|
164
|
+
"dataset_name": res.dataset_name,
|
|
165
|
+
"dataset_id": res.dataset_id,
|
|
166
|
+
"aoi_id": res.aoi_id,
|
|
167
|
+
"subscription_id": res.data_request_id,
|
|
168
|
+
},
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _load_file_v2(aws_session: boto3.session.Session, url: str, band_num: int):
|
|
173
|
+
with rasterio.env.Env(
|
|
174
|
+
session=rasterio.session.AWSSession(aws_session),
|
|
175
|
+
):
|
|
176
|
+
with rasterio.open(url) as src:
|
|
177
|
+
return src.read(band_num)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _list_keys_v2(session: boto3.session.Session, bucket_name, prefix) -> list[str]:
|
|
181
|
+
s3_client = session.client("s3")
|
|
182
|
+
paginator = s3_client.get_paginator("list_objects_v2")
|
|
183
|
+
page_iterator = paginator.paginate(
|
|
184
|
+
Bucket=bucket_name,
|
|
185
|
+
Prefix=prefix,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
keys = []
|
|
189
|
+
for page in page_iterator:
|
|
190
|
+
for obj in page.get("Contents", []):
|
|
191
|
+
keys.append(obj["Key"])
|
|
192
|
+
|
|
193
|
+
return keys
|
cecil-0.0.31/CONTRIBUTING.md
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
## Development installation
|
|
2
|
-
|
|
3
|
-
Install packaging/distribution tools:
|
|
4
|
-
|
|
5
|
-
```shell
|
|
6
|
-
pip install hatch twine
|
|
7
|
-
```
|
|
8
|
-
|
|
9
|
-
Install linter
|
|
10
|
-
|
|
11
|
-
```shell
|
|
12
|
-
pip install black
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
From top-level repo directory, install the package in editable mode:
|
|
16
|
-
|
|
17
|
-
```shell
|
|
18
|
-
pip install -e .
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
Local edits to the package will immediately take effect.
|
cecil-0.0.31/PKG-INFO
DELETED
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: cecil
|
|
3
|
-
Version: 0.0.31
|
|
4
|
-
Summary: Python SDK for Cecil Earth
|
|
5
|
-
License-Expression: MIT
|
|
6
|
-
License-File: LICENSE.txt
|
|
7
|
-
Classifier: Development Status :: 4 - Beta
|
|
8
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
-
Classifier: Operating System :: OS Independent
|
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Requires-Python: >=3.10
|
|
12
|
-
Requires-Dist: dask==2025.9.1
|
|
13
|
-
Requires-Dist: pydantic<3.0.0,>=2.11.9
|
|
14
|
-
Requires-Dist: requests<3.0.0,>=2.32.5
|
|
15
|
-
Requires-Dist: rioxarray==0.19.0
|
|
16
|
-
Requires-Dist: snowflake-connector-python[pandas]<4.0.0,>=3.17.4
|
|
17
|
-
Requires-Dist: xarray==2025.6.1
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
|
|
20
|
-
# Cecil SDK
|
|
21
|
-
|
|
22
|
-
[](https://pypi.org/project/cecil-sdk)
|
|
23
|
-
[](https://pypi.org/project/cecil-sdk)
|
|
24
|
-
|
|
25
|
-
-----
|
|
26
|
-
|
|
27
|
-
## Table of Contents
|
|
28
|
-
|
|
29
|
-
- [Installation](#installation)
|
|
30
|
-
- [Authentication](#authentication)
|
|
31
|
-
- [License](#license)
|
|
32
|
-
- [Examples](#examples)
|
|
33
|
-
|
|
34
|
-
## Installation
|
|
35
|
-
|
|
36
|
-
```shell
|
|
37
|
-
pip install cecil
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
## Authentication
|
|
41
|
-
|
|
42
|
-
Set `CECIL_API_KEY` environment variable to your Cecil API key.
|
|
43
|
-
|
|
44
|
-
## Examples
|
|
45
|
-
|
|
46
|
-
### Create an AOI and data request using the Cecil client
|
|
47
|
-
|
|
48
|
-
```python
|
|
49
|
-
import cecil
|
|
50
|
-
|
|
51
|
-
client = cecil.Client()
|
|
52
|
-
|
|
53
|
-
my_aoi = client.create_aoi(
|
|
54
|
-
name="My AOI",
|
|
55
|
-
geometry={
|
|
56
|
-
"type": "Polygon",
|
|
57
|
-
"coordinates": [
|
|
58
|
-
[
|
|
59
|
-
[145.410408835, -42.004083838],
|
|
60
|
-
[145.410408835, -42.004203978],
|
|
61
|
-
[145.410623191, -42.004203978],
|
|
62
|
-
[145.410623191, -42.004083838],
|
|
63
|
-
[145.410408835, -42.004083838],
|
|
64
|
-
]
|
|
65
|
-
],
|
|
66
|
-
},
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
# Get dataset ID from docs.cecil.earth -> Datasets
|
|
70
|
-
planet_forest_carbon_diligence_id = "c2dd4f55-56f6-4d05-aae3-ba7c1dcd812f"
|
|
71
|
-
|
|
72
|
-
my_data_request = client.create_data_request(
|
|
73
|
-
aoi_id=my_aoi.id,
|
|
74
|
-
dataset_id=planet_forest_carbon_diligence_id,
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
print(client.get_data_request(my_data_request.id))
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
### Create a transformation using the Cecil client
|
|
81
|
-
|
|
82
|
-
```python
|
|
83
|
-
my_transformation = client.create_transformation(
|
|
84
|
-
data_request_id=my_data_request.id,
|
|
85
|
-
crs="EPSG:4326",
|
|
86
|
-
spatial_resolution=0.005,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
print(client.get_transformation(my_transformation.id))
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### Query data (once transformation is completed)
|
|
93
|
-
|
|
94
|
-
```python
|
|
95
|
-
df = client.query(f'''
|
|
96
|
-
SELECT *
|
|
97
|
-
FROM
|
|
98
|
-
planet.forest_carbon_diligence
|
|
99
|
-
WHERE
|
|
100
|
-
transformation_id = '{my_transformation.id}'
|
|
101
|
-
''')
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
### Other client methods:
|
|
105
|
-
|
|
106
|
-
```python
|
|
107
|
-
client.list_aois()
|
|
108
|
-
|
|
109
|
-
client.get_aoi(my_aoi.id)
|
|
110
|
-
|
|
111
|
-
client.list_data_requests()
|
|
112
|
-
|
|
113
|
-
client.get_data_request(my_data_request.id)
|
|
114
|
-
|
|
115
|
-
client.list_transformations()
|
|
116
|
-
|
|
117
|
-
client.get_transformation(my_transformation.id)
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
## License
|
|
121
|
-
|
|
122
|
-
`cecil` is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
|
cecil-0.0.31/README.md
DELETED
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
# Cecil SDK
|
|
2
|
-
|
|
3
|
-
[](https://pypi.org/project/cecil-sdk)
|
|
4
|
-
[](https://pypi.org/project/cecil-sdk)
|
|
5
|
-
|
|
6
|
-
-----
|
|
7
|
-
|
|
8
|
-
## Table of Contents
|
|
9
|
-
|
|
10
|
-
- [Installation](#installation)
|
|
11
|
-
- [Authentication](#authentication)
|
|
12
|
-
- [License](#license)
|
|
13
|
-
- [Examples](#examples)
|
|
14
|
-
|
|
15
|
-
## Installation
|
|
16
|
-
|
|
17
|
-
```shell
|
|
18
|
-
pip install cecil
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
## Authentication
|
|
22
|
-
|
|
23
|
-
Set `CECIL_API_KEY` environment variable to your Cecil API key.
|
|
24
|
-
|
|
25
|
-
## Examples
|
|
26
|
-
|
|
27
|
-
### Create an AOI and data request using the Cecil client
|
|
28
|
-
|
|
29
|
-
```python
|
|
30
|
-
import cecil
|
|
31
|
-
|
|
32
|
-
client = cecil.Client()
|
|
33
|
-
|
|
34
|
-
my_aoi = client.create_aoi(
|
|
35
|
-
name="My AOI",
|
|
36
|
-
geometry={
|
|
37
|
-
"type": "Polygon",
|
|
38
|
-
"coordinates": [
|
|
39
|
-
[
|
|
40
|
-
[145.410408835, -42.004083838],
|
|
41
|
-
[145.410408835, -42.004203978],
|
|
42
|
-
[145.410623191, -42.004203978],
|
|
43
|
-
[145.410623191, -42.004083838],
|
|
44
|
-
[145.410408835, -42.004083838],
|
|
45
|
-
]
|
|
46
|
-
],
|
|
47
|
-
},
|
|
48
|
-
)
|
|
49
|
-
|
|
50
|
-
# Get dataset ID from docs.cecil.earth -> Datasets
|
|
51
|
-
planet_forest_carbon_diligence_id = "c2dd4f55-56f6-4d05-aae3-ba7c1dcd812f"
|
|
52
|
-
|
|
53
|
-
my_data_request = client.create_data_request(
|
|
54
|
-
aoi_id=my_aoi.id,
|
|
55
|
-
dataset_id=planet_forest_carbon_diligence_id,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
print(client.get_data_request(my_data_request.id))
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
### Create a transformation using the Cecil client
|
|
62
|
-
|
|
63
|
-
```python
|
|
64
|
-
my_transformation = client.create_transformation(
|
|
65
|
-
data_request_id=my_data_request.id,
|
|
66
|
-
crs="EPSG:4326",
|
|
67
|
-
spatial_resolution=0.005,
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
print(client.get_transformation(my_transformation.id))
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
### Query data (once transformation is completed)
|
|
74
|
-
|
|
75
|
-
```python
|
|
76
|
-
df = client.query(f'''
|
|
77
|
-
SELECT *
|
|
78
|
-
FROM
|
|
79
|
-
planet.forest_carbon_diligence
|
|
80
|
-
WHERE
|
|
81
|
-
transformation_id = '{my_transformation.id}'
|
|
82
|
-
''')
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
### Other client methods:
|
|
86
|
-
|
|
87
|
-
```python
|
|
88
|
-
client.list_aois()
|
|
89
|
-
|
|
90
|
-
client.get_aoi(my_aoi.id)
|
|
91
|
-
|
|
92
|
-
client.list_data_requests()
|
|
93
|
-
|
|
94
|
-
client.get_data_request(my_data_request.id)
|
|
95
|
-
|
|
96
|
-
client.list_transformations()
|
|
97
|
-
|
|
98
|
-
client.get_transformation(my_transformation.id)
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
## License
|
|
102
|
-
|
|
103
|
-
`cecil` is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.31"
|
cecil-0.0.31/src/cecil/xarray.py
DELETED
|
@@ -1,415 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
import time
|
|
3
|
-
from datetime import datetime
|
|
4
|
-
|
|
5
|
-
import boto3
|
|
6
|
-
import dask
|
|
7
|
-
import rasterio
|
|
8
|
-
import rasterio.session
|
|
9
|
-
import rioxarray
|
|
10
|
-
import xarray
|
|
11
|
-
|
|
12
|
-
from .errors import Error
|
|
13
|
-
from .models import SubscriptionMetadata, SubscriptionListFiles
|
|
14
|
-
|
|
15
|
-
# v1
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def load_xarray(metadata: SubscriptionMetadata) -> xarray.Dataset:
|
|
19
|
-
data_vars = {}
|
|
20
|
-
|
|
21
|
-
for f in metadata.files:
|
|
22
|
-
try:
|
|
23
|
-
dataset = _retry_with_exponential_backoff(_load_file, 5, 1, 2, f.url)
|
|
24
|
-
except Exception as e:
|
|
25
|
-
raise ValueError(f"failed to load file: {e}")
|
|
26
|
-
|
|
27
|
-
for b in f.bands:
|
|
28
|
-
band = dataset.sel(band=b.number, drop=True)
|
|
29
|
-
|
|
30
|
-
if b.time and b.time_pattern:
|
|
31
|
-
t = datetime.strptime(b.time, b.time_pattern)
|
|
32
|
-
band = band.expand_dims("time")
|
|
33
|
-
band = band.assign_coords(time=[t])
|
|
34
|
-
|
|
35
|
-
band.name = b.variable_name
|
|
36
|
-
|
|
37
|
-
if b.variable_name not in data_vars:
|
|
38
|
-
data_vars[b.variable_name] = []
|
|
39
|
-
|
|
40
|
-
data_vars[b.variable_name].append(band)
|
|
41
|
-
|
|
42
|
-
for variable_name, time_series in data_vars.items():
|
|
43
|
-
if "time" in time_series[0].dims:
|
|
44
|
-
data_vars[variable_name] = xarray.concat(
|
|
45
|
-
time_series, dim="time", join="exact"
|
|
46
|
-
)
|
|
47
|
-
else:
|
|
48
|
-
data_vars[variable_name] = time_series[0]
|
|
49
|
-
|
|
50
|
-
return xarray.Dataset(
|
|
51
|
-
data_vars=data_vars,
|
|
52
|
-
attrs={
|
|
53
|
-
"provider_name": metadata.provider_name,
|
|
54
|
-
"dataset_name": metadata.dataset_name,
|
|
55
|
-
"dataset_id": metadata.dataset_id,
|
|
56
|
-
"aoi_id": metadata.aoi_id,
|
|
57
|
-
"subscription_id": metadata.data_request_id,
|
|
58
|
-
},
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _retry_with_exponential_backoff(
|
|
63
|
-
func, retries, start_delay, multiplier, *args, **kwargs
|
|
64
|
-
):
|
|
65
|
-
delay = start_delay
|
|
66
|
-
for attempt in range(1, retries + 1):
|
|
67
|
-
try:
|
|
68
|
-
return func(*args, **kwargs)
|
|
69
|
-
except Exception as e:
|
|
70
|
-
if attempt == retries:
|
|
71
|
-
raise e
|
|
72
|
-
time.sleep(delay)
|
|
73
|
-
delay *= multiplier
|
|
74
|
-
return None
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def _load_file(url: str):
|
|
78
|
-
return rioxarray.open_rasterio(
|
|
79
|
-
url,
|
|
80
|
-
chunks={"x": 2000, "y": 2000},
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
# v2
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def load_xarray_v2(res: SubscriptionListFiles) -> xarray.Dataset:
|
|
88
|
-
session = boto3.session.Session(
|
|
89
|
-
aws_access_key_id=res.credentials.access_key_id,
|
|
90
|
-
aws_secret_access_key=res.credentials.secret_access_key,
|
|
91
|
-
aws_session_token=res.credentials.session_token,
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
keys = _list_keys_v2(session, res.bucket.name, res.bucket.prefix)
|
|
95
|
-
|
|
96
|
-
if not keys:
|
|
97
|
-
return xarray.Dataset()
|
|
98
|
-
|
|
99
|
-
timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
|
|
100
|
-
data_vars = {}
|
|
101
|
-
|
|
102
|
-
for key in keys:
|
|
103
|
-
try:
|
|
104
|
-
file_da = _retry_with_exponential_backoff(
|
|
105
|
-
_load_file_v2,
|
|
106
|
-
5,
|
|
107
|
-
1,
|
|
108
|
-
2,
|
|
109
|
-
session,
|
|
110
|
-
f"s3://{res.bucket.name}/{key}",
|
|
111
|
-
)
|
|
112
|
-
except Exception as e:
|
|
113
|
-
raise ValueError(f"failed to load file: {e}")
|
|
114
|
-
|
|
115
|
-
filename = key.split("/")[-1]
|
|
116
|
-
|
|
117
|
-
file_info = res.file_mapping.get(filename)
|
|
118
|
-
if not file_info:
|
|
119
|
-
continue
|
|
120
|
-
|
|
121
|
-
timestamp_str = timestamp_pattern.search(key).group()
|
|
122
|
-
|
|
123
|
-
for band_num, var_name in enumerate(file_info.bands, start=1):
|
|
124
|
-
band_da = file_da.sel(band=band_num, drop=True)
|
|
125
|
-
band_da.name = var_name
|
|
126
|
-
|
|
127
|
-
# Dataset with time dimension
|
|
128
|
-
if timestamp_str != "0000/00/00/00/00/00":
|
|
129
|
-
t = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
|
|
130
|
-
band_da = band_da.expand_dims("time")
|
|
131
|
-
band_da = band_da.assign_coords(time=[t])
|
|
132
|
-
|
|
133
|
-
if var_name not in data_vars:
|
|
134
|
-
data_vars[var_name] = []
|
|
135
|
-
|
|
136
|
-
data_vars[var_name].append(band_da)
|
|
137
|
-
|
|
138
|
-
for var_name, time_series in data_vars.items():
|
|
139
|
-
if "time" in time_series[0].dims:
|
|
140
|
-
data_vars[var_name] = xarray.concat(time_series, dim="time", join="exact")
|
|
141
|
-
else:
|
|
142
|
-
data_vars[var_name] = time_series[0]
|
|
143
|
-
|
|
144
|
-
return xarray.Dataset(
|
|
145
|
-
data_vars=data_vars,
|
|
146
|
-
attrs={
|
|
147
|
-
"provider_name": res.provider_name,
|
|
148
|
-
"dataset_name": res.dataset_name,
|
|
149
|
-
"dataset_id": res.dataset_id,
|
|
150
|
-
"aoi_id": res.aoi_id,
|
|
151
|
-
"subscription_id": res.data_request_id,
|
|
152
|
-
},
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
def _list_keys_v2(session: boto3.session.Session, bucket_name, prefix) -> list[str]:
|
|
157
|
-
s3_client = session.client("s3")
|
|
158
|
-
paginator = s3_client.get_paginator("list_objects_v2")
|
|
159
|
-
page_iterator = paginator.paginate(
|
|
160
|
-
Bucket=bucket_name,
|
|
161
|
-
Prefix=prefix,
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
keys = []
|
|
165
|
-
for page in page_iterator:
|
|
166
|
-
for obj in page.get("Contents", []):
|
|
167
|
-
keys.append(obj["Key"])
|
|
168
|
-
|
|
169
|
-
return keys
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def _load_file_v2(aws_session: boto3.session.Session, url: str):
|
|
173
|
-
with rasterio.env.Env(
|
|
174
|
-
session=rasterio.session.AWSSession(aws_session),
|
|
175
|
-
GDAL_DISABLE_READDIR_ON_OPEN=True,
|
|
176
|
-
):
|
|
177
|
-
return rioxarray.open_rasterio(
|
|
178
|
-
url,
|
|
179
|
-
chunks={"x": 2000, "y": 2000},
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
# v3
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def load_xarray_v3(res: SubscriptionListFiles) -> xarray.Dataset:
|
|
187
|
-
session = boto3.session.Session(
|
|
188
|
-
aws_access_key_id=res.credentials.access_key_id,
|
|
189
|
-
aws_secret_access_key=res.credentials.secret_access_key,
|
|
190
|
-
aws_session_token=res.credentials.session_token,
|
|
191
|
-
)
|
|
192
|
-
|
|
193
|
-
keys = _list_keys_v3(session, res.bucket.name, res.bucket.prefix)
|
|
194
|
-
|
|
195
|
-
if not keys:
|
|
196
|
-
return xarray.Dataset()
|
|
197
|
-
|
|
198
|
-
timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
|
|
199
|
-
data_vars = {}
|
|
200
|
-
|
|
201
|
-
with rasterio.env.Env(
|
|
202
|
-
session=rasterio.session.AWSSession(session),
|
|
203
|
-
GDAL_DISABLE_READDIR_ON_OPEN=True,
|
|
204
|
-
):
|
|
205
|
-
first_file = rioxarray.open_rasterio(
|
|
206
|
-
f"s3://{res.bucket.name}/{keys[0]}", chunks="auto"
|
|
207
|
-
)
|
|
208
|
-
|
|
209
|
-
for key in keys:
|
|
210
|
-
filename = key.split("/")[-1]
|
|
211
|
-
|
|
212
|
-
file_info = res.file_mapping.get(filename)
|
|
213
|
-
if not file_info:
|
|
214
|
-
continue
|
|
215
|
-
|
|
216
|
-
lazy_array = dask.array.from_delayed(
|
|
217
|
-
dask.delayed(_load_file_v3)(session, f"s3://{res.bucket.name}/{key}"),
|
|
218
|
-
shape=first_file.shape,
|
|
219
|
-
dtype=file_info.type,
|
|
220
|
-
)
|
|
221
|
-
lazy_da = xarray.DataArray(
|
|
222
|
-
lazy_array,
|
|
223
|
-
dims=first_file.dims,
|
|
224
|
-
coords=dict(first_file.coords),
|
|
225
|
-
# attrs=first_file.attrs.copy() # TODO: not the same for all files
|
|
226
|
-
)
|
|
227
|
-
# lazy_da.encoding = first_file.encoding.copy()
|
|
228
|
-
# lazy_da.rio.write_crs(first_file.rio.crs, inplace=True)
|
|
229
|
-
# lazy_da.rio.write_transform(first_file.rio.transform(), inplace=True)
|
|
230
|
-
|
|
231
|
-
timestamp_str = timestamp_pattern.search(key).group()
|
|
232
|
-
|
|
233
|
-
for band_num, var_name in enumerate(file_info.bands, start=1):
|
|
234
|
-
band_da = lazy_da.sel(band=band_num, drop=True)
|
|
235
|
-
band_da.name = var_name
|
|
236
|
-
|
|
237
|
-
# Dataset with time dimension
|
|
238
|
-
if timestamp_str != "0000/00/00/00/00/00":
|
|
239
|
-
t = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
|
|
240
|
-
band_da = band_da.expand_dims("time")
|
|
241
|
-
band_da = band_da.assign_coords(time=[t])
|
|
242
|
-
|
|
243
|
-
if var_name not in data_vars:
|
|
244
|
-
data_vars[var_name] = []
|
|
245
|
-
|
|
246
|
-
data_vars[var_name].append(band_da)
|
|
247
|
-
|
|
248
|
-
for var_name, time_series in data_vars.items():
|
|
249
|
-
if "time" in time_series[0].dims:
|
|
250
|
-
data_vars[var_name] = xarray.concat(time_series, dim="time", join="exact")
|
|
251
|
-
else:
|
|
252
|
-
data_vars[var_name] = time_series[0]
|
|
253
|
-
|
|
254
|
-
return xarray.Dataset(
|
|
255
|
-
data_vars=data_vars,
|
|
256
|
-
attrs={
|
|
257
|
-
"provider_name": res.provider_name,
|
|
258
|
-
"dataset_name": res.dataset_name,
|
|
259
|
-
"dataset_id": res.dataset_id,
|
|
260
|
-
"aoi_id": res.aoi_id,
|
|
261
|
-
"subscription_id": res.data_request_id,
|
|
262
|
-
},
|
|
263
|
-
)
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
def _load_file_v3(aws_session: boto3.session.Session, url: str):
|
|
267
|
-
with rasterio.env.Env(
|
|
268
|
-
session=rasterio.session.AWSSession(aws_session),
|
|
269
|
-
GDAL_DISABLE_READDIR_ON_OPEN=True,
|
|
270
|
-
):
|
|
271
|
-
return rioxarray.open_rasterio(
|
|
272
|
-
url,
|
|
273
|
-
chunks="auto",
|
|
274
|
-
).values
|
|
275
|
-
# ).sel(band=num_band, drop=True)
|
|
276
|
-
# ).sel(band=num_band, drop=True).values
|
|
277
|
-
# ).isel(band=num_band-1).values
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
def _list_keys_v3(session: boto3.session.Session, bucket_name, prefix) -> list[str]:
|
|
281
|
-
s3_client = session.client("s3")
|
|
282
|
-
paginator = s3_client.get_paginator("list_objects_v2")
|
|
283
|
-
page_iterator = paginator.paginate(
|
|
284
|
-
Bucket=bucket_name,
|
|
285
|
-
Prefix=prefix,
|
|
286
|
-
)
|
|
287
|
-
|
|
288
|
-
keys = []
|
|
289
|
-
for page in page_iterator:
|
|
290
|
-
for obj in page.get("Contents", []):
|
|
291
|
-
keys.append(obj["Key"])
|
|
292
|
-
|
|
293
|
-
return keys
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
# v4
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def load_xarray_v4(res: SubscriptionListFiles) -> xarray.Dataset:
|
|
300
|
-
|
|
301
|
-
session = boto3.session.Session(
|
|
302
|
-
aws_access_key_id=res.credentials.access_key_id,
|
|
303
|
-
aws_secret_access_key=res.credentials.secret_access_key,
|
|
304
|
-
aws_session_token=res.credentials.session_token,
|
|
305
|
-
)
|
|
306
|
-
|
|
307
|
-
keys = _list_keys_v2(session, res.bucket.name, res.bucket.prefix)
|
|
308
|
-
|
|
309
|
-
if not keys:
|
|
310
|
-
return xarray.Dataset()
|
|
311
|
-
|
|
312
|
-
first_file_metadata = _get_file_metadata_v4(session, res.bucket.name, keys[0])
|
|
313
|
-
|
|
314
|
-
timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
|
|
315
|
-
|
|
316
|
-
data_vars = {}
|
|
317
|
-
for key in keys:
|
|
318
|
-
filename = key.split("/")[-1].rsplit(".", 1)[0]
|
|
319
|
-
|
|
320
|
-
file_info = res.file_mapping.get(filename)
|
|
321
|
-
if not file_info:
|
|
322
|
-
continue
|
|
323
|
-
|
|
324
|
-
timestamp_str = timestamp_pattern.search(key).group()
|
|
325
|
-
|
|
326
|
-
for band_num, band_name in enumerate(file_info.bands, start=1):
|
|
327
|
-
array = _create_dask_array_v4(
|
|
328
|
-
session,
|
|
329
|
-
f"s3://{res.bucket.name}/{key}",
|
|
330
|
-
band_num,
|
|
331
|
-
first_file_metadata["height"],
|
|
332
|
-
first_file_metadata["width"],
|
|
333
|
-
file_info.type,
|
|
334
|
-
)
|
|
335
|
-
da = xarray.DataArray(
|
|
336
|
-
array,
|
|
337
|
-
dims=("y", "x"),
|
|
338
|
-
)
|
|
339
|
-
da.name = band_name
|
|
340
|
-
|
|
341
|
-
# Dataset with time dimension
|
|
342
|
-
if timestamp_str != "0000/00/00/00/00/00":
|
|
343
|
-
time = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
|
|
344
|
-
da = da.expand_dims("time")
|
|
345
|
-
da = da.assign_coords(time=[time])
|
|
346
|
-
|
|
347
|
-
if band_name not in data_vars:
|
|
348
|
-
data_vars[band_name] = []
|
|
349
|
-
|
|
350
|
-
data_vars[band_name].append(da)
|
|
351
|
-
|
|
352
|
-
for variable_name, time_series in data_vars.items():
|
|
353
|
-
if "time" in time_series[0].dims:
|
|
354
|
-
data_vars[variable_name] = xarray.concat(
|
|
355
|
-
time_series,
|
|
356
|
-
dim="time",
|
|
357
|
-
join="exact",
|
|
358
|
-
)
|
|
359
|
-
else:
|
|
360
|
-
data_vars[variable_name] = time_series[0]
|
|
361
|
-
|
|
362
|
-
ds = xarray.Dataset(
|
|
363
|
-
data_vars=data_vars,
|
|
364
|
-
coords={
|
|
365
|
-
"y": first_file_metadata["y"],
|
|
366
|
-
"x": first_file_metadata["x"],
|
|
367
|
-
},
|
|
368
|
-
attrs={
|
|
369
|
-
"provider_name": res.provider_name,
|
|
370
|
-
"dataset_name": res.dataset_name,
|
|
371
|
-
"dataset_id": res.dataset_id,
|
|
372
|
-
"aoi_id": res.aoi_id,
|
|
373
|
-
"subscription_id": res.data_request_id,
|
|
374
|
-
},
|
|
375
|
-
)
|
|
376
|
-
ds = ds.rio.write_crs(first_file_metadata["crs"])
|
|
377
|
-
|
|
378
|
-
return ds
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
def _get_file_metadata_v4(session, bucket: str, path: str):
|
|
382
|
-
with rasterio.env.Env(
|
|
383
|
-
rasterio.session.AWSSession(session), GDAL_DISABLE_READDIR_ON_OPEN=True
|
|
384
|
-
):
|
|
385
|
-
da = xarray.open_dataarray(f"s3://{bucket}/{path}", engine="rasterio")
|
|
386
|
-
|
|
387
|
-
return {
|
|
388
|
-
"crs": da.rio.crs,
|
|
389
|
-
"height": da.rio.height,
|
|
390
|
-
"width": da.rio.width,
|
|
391
|
-
"x": da.x.values,
|
|
392
|
-
"y": da.y.values,
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
def _create_dask_array_v4(
|
|
397
|
-
session: boto3.session.Session,
|
|
398
|
-
file_path: str,
|
|
399
|
-
band_num: int,
|
|
400
|
-
height: int,
|
|
401
|
-
width: int,
|
|
402
|
-
dtype: str,
|
|
403
|
-
):
|
|
404
|
-
rasterio_session = rasterio.session.AWSSession(session)
|
|
405
|
-
|
|
406
|
-
def read_chunk():
|
|
407
|
-
with rasterio.env.Env(
|
|
408
|
-
session=rasterio_session, GDAL_DISABLE_READDIR_ON_OPEN=True
|
|
409
|
-
):
|
|
410
|
-
with rasterio.open(file_path) as src:
|
|
411
|
-
return src.read(band_num)
|
|
412
|
-
|
|
413
|
-
return dask.array.from_delayed(
|
|
414
|
-
dask.delayed(read_chunk)(), shape=(height, width), dtype=dtype
|
|
415
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|