cecil 0.0.24__py3-none-any.whl → 0.0.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cecil might be problematic. Click here for more details.

cecil/client.py CHANGED
@@ -9,10 +9,12 @@ from pydantic import BaseModel
9
9
  from requests import auth
10
10
  from cryptography.hazmat.primitives import serialization
11
11
  from typing import Dict, List, Optional
12
+ from warnings import warn
12
13
 
13
14
  from .errors import (
14
15
  Error,
15
16
  _handle_bad_request,
17
+ _handle_method_not_allowed,
16
18
  _handle_not_found,
17
19
  _handle_too_many_requests,
18
20
  _handle_unprocessable_entity,
@@ -34,9 +36,12 @@ from .models import (
34
36
  User,
35
37
  UserCreate,
36
38
  DataRequestMetadata,
39
+ DataRequestParquetFiles,
40
+ DataRequestLoadXarray,
37
41
  )
38
42
  from .version import __version__
39
43
  from .xarray import load_xarray
44
+ from .xarray import load_xarray_v2
40
45
 
41
46
 
42
47
  class Client:
@@ -87,9 +92,27 @@ class Client:
87
92
  metadata = DataRequestMetadata(**res)
88
93
  return load_xarray(metadata)
89
94
 
95
+ def load_xarray_v2(self, data_request_id: str) -> xarray.Dataset:
96
+ res = self._get(url=f"/v0/data-requests/{data_request_id}/load-xarray")
97
+ load_xarray_info = DataRequestLoadXarray(**res)
98
+ return load_xarray_v2(load_xarray_info)
99
+
100
+ def load_dataframe(self, data_request_id: str) -> pd.DataFrame:
101
+ res = self._get(url=f"/v0/data-requests/{data_request_id}/parquet-files")
102
+ metadata = DataRequestParquetFiles(**res)
103
+ df = pd.concat((pd.read_parquet(f) for f in metadata.files))
104
+ return df[
105
+ [col for col in df.columns if col not in ("organisation_id", "created_at")]
106
+ ]
107
+
90
108
  def create_transformation(
91
109
  self, data_request_id: str, crs: str, spatial_resolution: float
92
110
  ) -> Transformation:
111
+ warn(
112
+ "create_transformation() is deprecated, refer to https://github.com/cecilearth/examples",
113
+ DeprecationWarning,
114
+ stacklevel=2,
115
+ )
93
116
  res = self._post(
94
117
  url="/v0/transformations",
95
118
  model=TransformationCreate(
@@ -101,14 +124,32 @@ class Client:
101
124
  return Transformation(**res)
102
125
 
103
126
  def get_transformation(self, id: str) -> Transformation:
127
+ warn(
128
+ "get_transformation() is deprecated.",
129
+ DeprecationWarning,
130
+ stacklevel=2,
131
+ )
104
132
  res = self._get(url=f"/v0/transformations/{id}")
105
133
  return Transformation(**res)
106
134
 
107
135
  def list_transformations(self) -> List[Transformation]:
136
+ warn(
137
+ "list_transformations() is deprecated.",
138
+ DeprecationWarning,
139
+ stacklevel=2,
140
+ )
108
141
  res = self._get(url="/v0/transformations")
109
142
  return [Transformation(**record) for record in res["records"]]
110
143
 
111
144
  def query(self, sql: str) -> pd.DataFrame:
145
+ warn(
146
+ "query() is deprecated, use load_xarray() or load_dataframe() instead.",
147
+ DeprecationWarning,
148
+ stacklevel=2,
149
+ )
150
+ return self._query(sql)
151
+
152
+ def _query(self, sql: str) -> pd.DataFrame:
112
153
  if self._snowflake_user_creds is None:
113
154
  res = self._get(url="/v0/snowflake-user-credentials")
114
155
  self._snowflake_user_creds = SnowflakeUserCredentials(**res)
@@ -212,6 +253,8 @@ class Client:
212
253
  raise Error("unauthorised")
213
254
  case 404:
214
255
  _handle_not_found(err.response)
256
+ case 405:
257
+ _handle_method_not_allowed(err.response)
215
258
  case 422:
216
259
  _handle_unprocessable_entity(err.response)
217
260
  case 429:
cecil/errors.py CHANGED
@@ -36,6 +36,17 @@ def _handle_bad_request(response):
36
36
  raise Error("bad request", details)
37
37
 
38
38
 
39
+ def _handle_method_not_allowed(response):
40
+ if not _is_json(response.text):
41
+ raise Error("method not allowed")
42
+
43
+ details = {}
44
+ for key, value in response.json().items():
45
+ details[_format_json_key(key)] = value
46
+
47
+ raise Error("method not allowed", details)
48
+
49
+
39
50
  def _handle_not_found(response):
40
51
  if not _is_json(response.text):
41
52
  raise Error("resource not found")
cecil/models.py CHANGED
@@ -135,3 +135,29 @@ class DataRequestMetadata(BaseModel):
135
135
  aoi_id: str
136
136
  data_request_id: str
137
137
  files: List[File]
138
+
139
+
140
+ class Bucket(BaseModel):
141
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
142
+ name: str
143
+ prefix: str
144
+ access_key_id: str
145
+ secret_access_key: str
146
+ session_token: str
147
+ expiration: datetime.datetime
148
+
149
+
150
+ class DataRequestLoadXarray(BaseModel):
151
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
152
+ provider_name: str
153
+ dataset_id: str
154
+ dataset_name: str
155
+ dataset_crs: str
156
+ aoi_id: str
157
+ data_request_id: str
158
+ bucket: Bucket
159
+
160
+
161
+ class DataRequestParquetFiles(BaseModel):
162
+ model_config = ConfigDict(alias_generator=to_camel, populate_by_name=True)
163
+ files: List[str]
cecil/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.0.24"
1
+ __version__ = "0.0.30"
cecil/xarray.py CHANGED
@@ -1,14 +1,17 @@
1
1
  import os
2
+ import re
3
+ import time
4
+
5
+ import boto3
2
6
  import rioxarray
3
7
  import xarray
4
8
 
5
9
  from datetime import datetime
6
10
 
7
11
  from .errors import Error
8
- from .models import DataRequestMetadata
12
+ from .models import DataRequestMetadata, DataRequestLoadXarray, Bucket
9
13
 
10
- os.environ["GDAL_NUM_THREADS"] = "1"
11
- os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "FALSE"
14
+ os.environ["GDAL_DISABLE_READDIR_ON_OPEN"] = "TRUE"
12
15
 
13
16
 
14
17
  def align_pixel_grids(time_series):
@@ -21,29 +24,50 @@ def align_pixel_grids(time_series):
21
24
  try:
22
25
  aligned_da = da.rio.reproject_match(reference_da)
23
26
  aligned_series.append(aligned_da)
24
- except Exception as e:
27
+ except Exception:
25
28
  raise Error
26
- continue
27
29
 
28
30
  return aligned_series
29
31
 
30
32
 
33
+ def retry_with_exponential_backoff(
34
+ func, retries, start_delay, multiplier, *args, **kwargs
35
+ ):
36
+ delay = start_delay
37
+ for attempt in range(1, retries + 1):
38
+ try:
39
+ return func(*args, **kwargs)
40
+ except Exception as e:
41
+ if attempt == retries:
42
+ raise e
43
+ time.sleep(delay)
44
+ delay *= multiplier
45
+ return None
46
+
47
+
48
+ def load_file(url: str):
49
+ return rioxarray.open_rasterio(
50
+ url,
51
+ chunks={"x": 2000, "y": 2000},
52
+ )
53
+
54
+
31
55
  def load_xarray(metadata: DataRequestMetadata) -> xarray.Dataset:
32
56
  data_vars = {}
33
57
 
34
58
  for f in metadata.files:
35
- dataset = rioxarray.open_rasterio(
36
- f.url,
37
- chunks={"x": 2000, "y": 2000},
38
- )
59
+ try:
60
+ dataset = retry_with_exponential_backoff(load_file, 5, 1, 2, f.url)
61
+ except Exception as e:
62
+ raise ValueError(f"failed to load file: {e}")
39
63
 
40
64
  for b in f.bands:
41
65
  band = dataset.sel(band=b.number, drop=True)
42
66
 
43
67
  if b.time and b.time_pattern:
44
- time = datetime.strptime(b.time, b.time_pattern)
68
+ t = datetime.strptime(b.time, b.time_pattern)
45
69
  band = band.expand_dims("time")
46
- band = band.assign_coords(time=[time])
70
+ band = band.assign_coords(time=[t])
47
71
 
48
72
  band.name = b.variable_name
49
73
 
@@ -72,3 +96,70 @@ def load_xarray(metadata: DataRequestMetadata) -> xarray.Dataset:
72
96
  "data_request_id": metadata.data_request_id,
73
97
  },
74
98
  )
99
+
100
+
101
+ def load_xarray_v2(load_xarray_info: DataRequestLoadXarray) -> xarray.Dataset:
102
+ data_vars = {}
103
+
104
+ keys = _get_xarray_keys(load_xarray_info.bucket)
105
+ for key in keys:
106
+ timestamp_pattern = re.compile(r"\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}")
107
+ timestamp_str = timestamp_pattern.search(key).group()
108
+
109
+ variable_name = key.split("/")[14]
110
+ filename = f"s3://{load_xarray_info.bucket.name}/{key}"
111
+ dataset = rioxarray.open_rasterio(filename, chunks={"x": 2000, "y": 2000})
112
+ band = dataset.sel(band=1, drop=True)
113
+ band.name = variable_name
114
+
115
+ # Dataset without time information
116
+ if timestamp_str != "0000/00/00/00/00/00":
117
+ time = datetime.strptime(timestamp_str, "%Y/%m/%d/%H/%M/%S")
118
+ band = band.expand_dims("time")
119
+ band = band.assign_coords(time=[time])
120
+
121
+ if variable_name not in data_vars:
122
+ data_vars[variable_name] = []
123
+
124
+ data_vars[variable_name].append(band)
125
+
126
+ for variable_name, time_series in data_vars.items():
127
+ if "time" in time_series[0].dims:
128
+ data_vars[variable_name] = xarray.concat(
129
+ time_series, dim="time", join="exact"
130
+ )
131
+ else:
132
+ data_vars[variable_name] = time_series[0]
133
+
134
+ return xarray.Dataset(
135
+ data_vars=data_vars,
136
+ attrs={
137
+ "provider_name": load_xarray_info.provider_name,
138
+ "dataset_id": load_xarray_info.dataset_id,
139
+ "dataset_name": load_xarray_info.dataset_name,
140
+ "dataset_crs": load_xarray_info.dataset_crs,
141
+ "aoi_id": load_xarray_info.aoi_id,
142
+ "data_request_id": load_xarray_info.data_request_id,
143
+ },
144
+ )
145
+
146
+
147
+ def _get_xarray_keys(bucket: Bucket) -> list[str]:
148
+ os.environ["AWS_ACCESS_KEY_ID"] = bucket.access_key_id
149
+ os.environ["AWS_SECRET_ACCESS_KEY"] = bucket.secret_access_key
150
+ os.environ["AWS_SESSION_TOKEN"] = bucket.session_token
151
+
152
+ s3_client = boto3.client("s3")
153
+
154
+ paginator = s3_client.get_paginator("list_objects_v2")
155
+ page_iterator = paginator.paginate(
156
+ Bucket=bucket.name,
157
+ Prefix=bucket.prefix,
158
+ )
159
+
160
+ keys = []
161
+ for page in page_iterator:
162
+ for obj in page.get("Contents", []):
163
+ keys.append(obj["Key"])
164
+
165
+ return keys
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cecil
3
- Version: 0.0.24
3
+ Version: 0.0.30
4
4
  Summary: Python SDK for Cecil Earth
5
5
  License-Expression: MIT
6
6
  License-File: LICENSE.txt
@@ -8,10 +8,13 @@ Classifier: Development Status :: 4 - Beta
8
8
  Classifier: License :: OSI Approved :: MIT License
9
9
  Classifier: Operating System :: OS Independent
10
10
  Classifier: Programming Language :: Python :: 3
11
- Requires-Python: >=3.8
12
- Requires-Dist: pydantic
13
- Requires-Dist: requests
14
- Requires-Dist: snowflake-connector-python[pandas]
11
+ Requires-Python: >=3.10
12
+ Requires-Dist: dask==2025.9.1
13
+ Requires-Dist: pydantic<3.0.0,>=2.11.9
14
+ Requires-Dist: requests<3.0.0,>=2.32.5
15
+ Requires-Dist: rioxarray==0.19.0
16
+ Requires-Dist: snowflake-connector-python[pandas]<4.0.0,>=3.17.4
17
+ Requires-Dist: xarray==2025.6.1
15
18
  Description-Content-Type: text/markdown
16
19
 
17
20
  # Cecil SDK
@@ -0,0 +1,10 @@
1
+ cecil/__init__.py,sha256=AEcRl73BDSAQe6W0d1PDD87IEcumARtREl7dCVa_YQY,86
2
+ cecil/client.py,sha256=mhe7l133-uy7hpRWjcb2s4DD4wsnxPqJLoEuBKMut5I,9501
3
+ cecil/errors.py,sha256=EnyYvFfU_JWYTTRax58bdwOndri2f-HzbqyzxtoV8uo,2100
4
+ cecil/models.py,sha256=lI4UulUv-J0Qh4zrm1UBuqS96CDewyL6sGWRP4AQEQs,4163
5
+ cecil/version.py,sha256=8ZeepqkW4DvpVeNm92mx0tIzgvVevS4NKWkTXXHuXNY,23
6
+ cecil/xarray.py,sha256=K3IRfTkdWAJVUK0LgZLjztpeqhC4QptkrdNg9WYoIVk,5024
7
+ cecil-0.0.30.dist-info/METADATA,sha256=-G1QZ40hNHT_Nz8p5G9PFpqZTTnZoYeW6eEslNdlUGw,2800
8
+ cecil-0.0.30.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ cecil-0.0.30.dist-info/licenses/LICENSE.txt,sha256=mUexcmfYx3bG1VIzAdQTOf_NzStYw6-QkKVdUY_d4i4,1066
10
+ cecil-0.0.30.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- cecil/__init__.py,sha256=AEcRl73BDSAQe6W0d1PDD87IEcumARtREl7dCVa_YQY,86
2
- cecil/client.py,sha256=KyxrMSZpo1R46r6LIy8vSv5YxONsr8xiM4B0W4AGC_o,7923
3
- cecil/errors.py,sha256=ZNiSTYH2MgNZ7tNIgV07-Ge3KtmdncfzWiBi9yjURGs,1818
4
- cecil/models.py,sha256=3W892XywxLIZL6TDCHM8_PVRHzR48bJXT5kTV7UU_bY,3509
5
- cecil/version.py,sha256=ZQhXtFuXIGDwpCAUqfQWo3IQMf-8Lz9t5nYhJnOYBdI,23
6
- cecil/xarray.py,sha256=CvqfJ7NBMLLA4jf73ek2ZtV3sj8xOII__5S0_l3KXYI,2161
7
- cecil-0.0.24.dist-info/METADATA,sha256=rWLY1mb0eD52cUBkiofgC2ojdVVtjJ9twSo8KoQWqx4,2659
8
- cecil-0.0.24.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
- cecil-0.0.24.dist-info/licenses/LICENSE.txt,sha256=mUexcmfYx3bG1VIzAdQTOf_NzStYw6-QkKVdUY_d4i4,1066
10
- cecil-0.0.24.dist-info/RECORD,,
File without changes