cubexpress 0.1.6__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cubexpress might be problematic. Click here for more details.
- {cubexpress-0.1.6 → cubexpress-0.1.8}/PKG-INFO +1 -1
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/cloud_utils.py +61 -93
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/downloader.py +4 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/request.py +28 -23
- {cubexpress-0.1.6 → cubexpress-0.1.8}/pyproject.toml +1 -1
- {cubexpress-0.1.6 → cubexpress-0.1.8}/LICENSE +0 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/README.md +0 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/__init__.py +0 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/cache.py +0 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/conversion.py +0 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/cube.py +0 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/geospatial.py +0 -0
- {cubexpress-0.1.6 → cubexpress-0.1.8}/cubexpress/geotyping.py +0 -0
|
@@ -13,10 +13,6 @@ Both return a ``pandas.DataFrame`` with the columns **day**, **cloudPct** and
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
15
|
import datetime as dt
|
|
16
|
-
import json
|
|
17
|
-
import pathlib
|
|
18
|
-
from typing import List, Optional
|
|
19
|
-
|
|
20
16
|
import ee
|
|
21
17
|
import pandas as pd
|
|
22
18
|
|
|
@@ -28,10 +24,8 @@ def _cloud_table_single_range(
|
|
|
28
24
|
lon: float,
|
|
29
25
|
lat: float,
|
|
30
26
|
edge_size: int,
|
|
31
|
-
scale: int,
|
|
32
27
|
start: str,
|
|
33
|
-
end: str
|
|
34
|
-
collection: str = "COPERNICUS/S2_HARMONIZED",
|
|
28
|
+
end: str
|
|
35
29
|
) -> pd.DataFrame:
|
|
36
30
|
"""Return raw cloud-table rows for a single *start–end* interval.
|
|
37
31
|
|
|
@@ -53,76 +47,64 @@ def _cloud_table_single_range(
|
|
|
53
47
|
Columns: **day** (str), **cloudPct** (float), **images** (str
|
|
54
48
|
concatenation of asset IDs separated by ``-``). No filtering applied.
|
|
55
49
|
"""
|
|
56
|
-
roi = _square_roi(lon, lat, edge_size, scale)
|
|
57
|
-
s2 = ee.ImageCollection(collection)
|
|
58
|
-
|
|
59
|
-
if collection in (
|
|
60
|
-
"COPERNICUS/S2_HARMONIZED",
|
|
61
|
-
"COPERNICUS/S2_SR_HARMONIZED",
|
|
62
|
-
):
|
|
63
|
-
qa_band = "cs_cdf"
|
|
64
|
-
csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
|
|
65
|
-
else:
|
|
66
|
-
qa_band, csp = None, None
|
|
67
|
-
|
|
68
|
-
def _add_props(img):
|
|
69
|
-
day = ee.Date(img.get("system:time_start")).format("YYYY-MM-dd")
|
|
70
|
-
imgid = img.get("system:index")
|
|
71
|
-
|
|
72
|
-
if qa_band:
|
|
73
|
-
score = (
|
|
74
|
-
img.linkCollection(csp, [qa_band])
|
|
75
|
-
.select([qa_band])
|
|
76
|
-
.reduceRegion(ee.Reducer.mean(), roi, scale)
|
|
77
|
-
.get(qa_band)
|
|
78
|
-
)
|
|
79
|
-
# If score is null assume completely clear (score=1 → cloudPct=0)
|
|
80
|
-
score_safe = ee.Algorithms.If(score, score, -1)
|
|
81
|
-
cloud_pct = (
|
|
82
|
-
ee.Number(1)
|
|
83
|
-
.subtract(ee.Number(score_safe))
|
|
84
|
-
.multiply(10000)
|
|
85
|
-
.round()
|
|
86
|
-
.divide(100)
|
|
87
|
-
)
|
|
88
|
-
else:
|
|
89
|
-
cloud_pct = ee.Number(-1)
|
|
90
|
-
|
|
91
|
-
return ee.Feature(
|
|
92
|
-
None,
|
|
93
|
-
{
|
|
94
|
-
"day": day,
|
|
95
|
-
"cloudPct": cloud_pct,
|
|
96
|
-
"images": imgid,
|
|
97
|
-
},
|
|
98
|
-
)
|
|
99
50
|
|
|
100
|
-
|
|
101
|
-
|
|
51
|
+
center = ee.Geometry.Point([lon, lat])
|
|
52
|
+
roi = _square_roi(lon, lat, edge_size, 10)
|
|
53
|
+
|
|
54
|
+
s2 = (
|
|
55
|
+
ee.ImageCollection("COPERNICUS/S2_HARMONIZED")
|
|
102
56
|
.filterBounds(roi)
|
|
103
|
-
.
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
57
|
+
.filterDate(start, end)
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
csp = ee.ImageCollection("GOOGLE/CLOUD_SCORE_PLUS/V1/S2_HARMONIZED")
|
|
61
|
+
|
|
62
|
+
ic = (
|
|
63
|
+
s2
|
|
64
|
+
.linkCollection(csp, ["cs_cdf"])
|
|
65
|
+
.select(["cs_cdf"])
|
|
66
|
+
)
|
|
67
|
+
ids = ic.aggregate_array("system:index").getInfo()
|
|
68
|
+
df_ids = pd.DataFrame({"id": ids})
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
region_scale = edge_size * 10 / 2
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
raw = ic.getRegion(geometry=center, scale=region_scale).getInfo()
|
|
76
|
+
except ee.ee_exception.EEException as e:
|
|
77
|
+
if "No bands in collection" in str(e):
|
|
78
|
+
return pd.DataFrame(
|
|
79
|
+
columns=["id", "cs_cdf", "date", "high_null_flag"]
|
|
80
|
+
)
|
|
81
|
+
raise
|
|
82
|
+
|
|
83
|
+
df_raw = pd.DataFrame(raw[1:], columns=raw[0])
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
df = (
|
|
87
|
+
df_ids
|
|
88
|
+
.merge(df_raw, on="id", how="left")
|
|
89
|
+
.assign(
|
|
90
|
+
date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d"),
|
|
91
|
+
high_null_flag=lambda d: d["cs_cdf"].isna().astype(int),
|
|
92
|
+
)
|
|
93
|
+
.drop(columns=["longitude", "latitude", "time"])
|
|
107
94
|
)
|
|
108
95
|
|
|
109
|
-
df =
|
|
110
|
-
|
|
111
|
-
df["images"] = df["images"].astype(str)
|
|
96
|
+
df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
|
|
97
|
+
|
|
112
98
|
return df
|
|
113
99
|
|
|
114
100
|
|
|
115
|
-
def
|
|
101
|
+
def s2_cloud_table(
|
|
116
102
|
lon: float,
|
|
117
103
|
lat: float,
|
|
118
104
|
edge_size: int = 2048,
|
|
119
|
-
scale: int = 10,
|
|
120
105
|
start: str = "2017-01-01",
|
|
121
106
|
end: str = "2024-12-31",
|
|
122
|
-
|
|
123
|
-
bands: Optional[List[str]] = None,
|
|
124
|
-
collection: str = "COPERNICUS/S2_HARMONIZED",
|
|
125
|
-
output_path: str | pathlib.Path | None = None,
|
|
107
|
+
cscore: float = 0.5,
|
|
126
108
|
cache: bool = True,
|
|
127
109
|
verbose: bool = True,
|
|
128
110
|
) -> pd.DataFrame:
|
|
@@ -161,23 +143,10 @@ def cloud_table(
|
|
|
161
143
|
pandas.DataFrame
|
|
162
144
|
Filtered cloud table with ``.attrs`` containing the call parameters.
|
|
163
145
|
"""
|
|
164
|
-
if bands is None:
|
|
165
|
-
bands = [
|
|
166
|
-
"B1",
|
|
167
|
-
"B2",
|
|
168
|
-
"B3",
|
|
169
|
-
"B4",
|
|
170
|
-
"B5",
|
|
171
|
-
"B6",
|
|
172
|
-
"B7",
|
|
173
|
-
"B8",
|
|
174
|
-
"B8A",
|
|
175
|
-
"B9",
|
|
176
|
-
"B10",
|
|
177
|
-
"B11",
|
|
178
|
-
"B12",
|
|
179
|
-
]
|
|
180
146
|
|
|
147
|
+
bands = ["B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B9", "B10", "B11", "B12"]
|
|
148
|
+
collection = "COPERNICUS/S2_HARMONIZED"
|
|
149
|
+
scale = 10
|
|
181
150
|
cache_file = _cache_key(lon, lat, edge_size, scale, collection)
|
|
182
151
|
|
|
183
152
|
# ─── 1. Load cached data if present ────────────────────────────────────
|
|
@@ -185,7 +154,7 @@ def cloud_table(
|
|
|
185
154
|
if verbose:
|
|
186
155
|
print("📂 Loading cached table …")
|
|
187
156
|
df_cached = pd.read_parquet(cache_file)
|
|
188
|
-
have_idx = pd.to_datetime(df_cached["
|
|
157
|
+
have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
|
|
189
158
|
|
|
190
159
|
cached_start = have_idx.min().date()
|
|
191
160
|
cached_end = have_idx.max().date()
|
|
@@ -204,39 +173,40 @@ def cloud_table(
|
|
|
204
173
|
a1, b1 = start, cached_start.isoformat()
|
|
205
174
|
df_new_parts.append(
|
|
206
175
|
_cloud_table_single_range(
|
|
207
|
-
lon, lat, edge_size,
|
|
176
|
+
lon, lat, edge_size, a1, b1
|
|
208
177
|
)
|
|
209
178
|
)
|
|
210
179
|
if dt.date.fromisoformat(end) > cached_end:
|
|
211
180
|
a2, b2 = cached_end.isoformat(), end
|
|
212
181
|
df_new_parts.append(
|
|
213
182
|
_cloud_table_single_range(
|
|
214
|
-
lon, lat, edge_size,
|
|
183
|
+
lon, lat, edge_size, a2, b2
|
|
215
184
|
)
|
|
216
185
|
)
|
|
217
186
|
df_new = pd.concat(df_new_parts, ignore_index=True)
|
|
218
187
|
df_full = (
|
|
219
188
|
pd.concat([df_cached, df_new], ignore_index=True)
|
|
220
|
-
.
|
|
221
|
-
.sort_values("day", kind="mergesort")
|
|
189
|
+
.sort_values("date", kind="mergesort")
|
|
222
190
|
)
|
|
223
191
|
else:
|
|
224
|
-
|
|
192
|
+
|
|
225
193
|
if verbose:
|
|
226
194
|
msg = "Generating table (no cache found)…" if cache else "Generating table…"
|
|
227
195
|
print("⏳", msg)
|
|
228
196
|
df_full = _cloud_table_single_range(
|
|
229
|
-
lon, lat, edge_size,
|
|
197
|
+
lon, lat, edge_size, start, end
|
|
230
198
|
)
|
|
199
|
+
|
|
231
200
|
|
|
232
201
|
# ─── 2. Save cache ─────────────────────────────────────────────────────
|
|
233
202
|
if cache:
|
|
234
203
|
df_full.to_parquet(cache_file, compression="zstd")
|
|
235
204
|
|
|
236
205
|
# ─── 3. Filter by cloud cover and requested date window ────────────────
|
|
206
|
+
|
|
237
207
|
result = (
|
|
238
|
-
df_full.query("@start <=
|
|
239
|
-
.query("
|
|
208
|
+
df_full.query("@start <= date <= @end")
|
|
209
|
+
.query("cs_cdf > @cscore")
|
|
240
210
|
.reset_index(drop=True)
|
|
241
211
|
)
|
|
242
212
|
|
|
@@ -248,9 +218,7 @@ def cloud_table(
|
|
|
248
218
|
"edge_size": edge_size,
|
|
249
219
|
"scale": scale,
|
|
250
220
|
"bands": bands,
|
|
251
|
-
"collection": collection
|
|
252
|
-
"cloud_max": cloud_max,
|
|
253
|
-
"output_path": str(output_path) if output_path else "",
|
|
221
|
+
"collection": collection
|
|
254
222
|
}
|
|
255
223
|
)
|
|
256
224
|
return result
|
|
@@ -20,7 +20,11 @@ from typing import Any, Dict, List
|
|
|
20
20
|
import ee
|
|
21
21
|
import rasterio as rio
|
|
22
22
|
from rasterio.io import MemoryFile
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
23
25
|
|
|
26
|
+
os.environ['CPL_LOG_ERRORS'] = 'OFF'
|
|
27
|
+
logging.getLogger('rasterio._env').setLevel(logging.ERROR)
|
|
24
28
|
|
|
25
29
|
def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None:
|
|
26
30
|
"""Download *ulist* and save it as *full_outname*.
|
|
@@ -29,49 +29,54 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
29
29
|
If *df* is empty after filtering.
|
|
30
30
|
|
|
31
31
|
"""
|
|
32
|
-
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
df_ = df.copy()
|
|
35
|
+
|
|
36
|
+
if df_.empty:
|
|
33
37
|
raise ValueError("cloud_table returned no rows; nothing to request.")
|
|
34
38
|
|
|
35
39
|
rt = lonlat2rt(
|
|
36
|
-
lon=
|
|
37
|
-
lat=
|
|
38
|
-
edge_size=
|
|
39
|
-
scale=
|
|
40
|
+
lon=df_.attrs["lon"],
|
|
41
|
+
lat=df_.attrs["lat"],
|
|
42
|
+
edge_size=df_.attrs["edge_size"],
|
|
43
|
+
scale=df_.attrs["scale"],
|
|
40
44
|
)
|
|
41
|
-
centre_hash = pgh.encode(
|
|
42
|
-
reqs:
|
|
45
|
+
centre_hash = pgh.encode(df_.attrs["lat"], df_.attrs["lon"], precision=5)
|
|
46
|
+
reqs: list[Request] = []
|
|
43
47
|
|
|
44
48
|
if mosaic:
|
|
45
49
|
# group all asset IDs per day
|
|
46
50
|
grouped = (
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
.groupby("day")["img"]
|
|
50
|
-
.apply(list)
|
|
51
|
+
df_.groupby("date")["id"] # Series con listas de ids por día
|
|
52
|
+
.apply(list)
|
|
51
53
|
)
|
|
52
54
|
|
|
53
55
|
for day, img_ids in grouped.items():
|
|
54
56
|
ee_img = ee.ImageCollection(
|
|
55
|
-
[ee.Image(f"{
|
|
57
|
+
[ee.Image(f"{df_.attrs['collection']}/{img}") for img in img_ids]
|
|
56
58
|
).mosaic()
|
|
59
|
+
|
|
57
60
|
reqs.append(
|
|
58
61
|
Request(
|
|
59
|
-
id=f"{day}_{centre_hash}
|
|
62
|
+
id=f"{day}_{centre_hash}",
|
|
60
63
|
raster_transform=rt,
|
|
61
64
|
image=ee_img,
|
|
62
|
-
bands=
|
|
65
|
+
bands=df_.attrs["bands"],
|
|
63
66
|
)
|
|
64
67
|
)
|
|
65
68
|
else: # one request per asset
|
|
66
|
-
for _, row in
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
69
|
+
for _, row in df_.iterrows():
|
|
70
|
+
img_id = row["id"]
|
|
71
|
+
day = row["date"]
|
|
72
|
+
|
|
73
|
+
reqs.append(
|
|
74
|
+
Request(
|
|
75
|
+
id=f"{day}_{centre_hash}_{img_id}",
|
|
76
|
+
raster_transform=rt,
|
|
77
|
+
image=f"{df_.attrs['collection']}/{img_id}",
|
|
78
|
+
bands=df_.attrs["bands"],
|
|
75
79
|
)
|
|
80
|
+
)
|
|
76
81
|
|
|
77
82
|
return RequestSet(requestset=reqs)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|