cubexpress 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cubexpress/__init__.py +2 -2
- cubexpress/cloud_utils.py +44 -28
- cubexpress/cube.py +44 -9
- cubexpress/downloader.py +47 -7
- cubexpress/geotyping.py +6 -16
- cubexpress/request.py +56 -31
- {cubexpress-0.1.8.dist-info → cubexpress-0.1.10.dist-info}/METADATA +2 -1
- cubexpress-0.1.10.dist-info/RECORD +13 -0
- cubexpress-0.1.8.dist-info/RECORD +0 -13
- {cubexpress-0.1.8.dist-info → cubexpress-0.1.10.dist-info}/LICENSE +0 -0
- {cubexpress-0.1.8.dist-info → cubexpress-0.1.10.dist-info}/WHEEL +0 -0
cubexpress/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from cubexpress.conversion import lonlat2rt, geo2utm
|
|
2
2
|
from cubexpress.geotyping import RasterTransform, Request, RequestSet
|
|
3
|
-
from cubexpress.cloud_utils import
|
|
3
|
+
from cubexpress.cloud_utils import s2_cloud_table
|
|
4
4
|
from cubexpress.cube import get_cube
|
|
5
5
|
from cubexpress.request import table_to_requestset
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ __all__ = [
|
|
|
15
15
|
"RequestSet",
|
|
16
16
|
"geo2utm",
|
|
17
17
|
"get_cube",
|
|
18
|
-
"
|
|
18
|
+
"s2_cloud_table",
|
|
19
19
|
"table_to_requestset"
|
|
20
20
|
]
|
|
21
21
|
|
cubexpress/cloud_utils.py
CHANGED
|
@@ -27,25 +27,30 @@ def _cloud_table_single_range(
|
|
|
27
27
|
start: str,
|
|
28
28
|
end: str
|
|
29
29
|
) -> pd.DataFrame:
|
|
30
|
-
"""
|
|
30
|
+
"""
|
|
31
|
+
Build a daily cloud-score table for a square Sentinel-2 footprint.
|
|
31
32
|
|
|
32
33
|
Parameters
|
|
33
34
|
----------
|
|
34
|
-
lon, lat
|
|
35
|
-
|
|
36
|
-
edge_size
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
ISO-dates (``YYYY-MM-DD``) delimiting the query.
|
|
41
|
-
collection
|
|
42
|
-
Sentinel-2 collection name to query.
|
|
35
|
+
lon, lat : float
|
|
36
|
+
Point at the centre of the requested region (°).
|
|
37
|
+
edge_size : int
|
|
38
|
+
Side length of the square region in Sentinel-2 pixels (10 m each).
|
|
39
|
+
start, end : str
|
|
40
|
+
ISO-8601 dates delimiting the period, e.g. ``"2024-06-01"``.
|
|
43
41
|
|
|
44
42
|
Returns
|
|
45
43
|
-------
|
|
46
44
|
pandas.DataFrame
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
One row per image with columns:
|
|
46
|
+
* ``id`` – Sentinel-2 ID
|
|
47
|
+
* ``cs_cdf`` – Cloud Score Plus CDF (0–1)
|
|
48
|
+
* ``date`` – acquisition date (YYYY-MM-DD)
|
|
49
|
+
* ``null_flag`` – 1 if cloud score missing
|
|
50
|
+
|
|
51
|
+
Notes
|
|
52
|
+
-----
|
|
53
|
+
Missing ``cs_cdf`` values are filled with the mean of the same day.
|
|
49
54
|
"""
|
|
50
55
|
|
|
51
56
|
center = ee.Geometry.Point([lon, lat])
|
|
@@ -64,6 +69,8 @@ def _cloud_table_single_range(
|
|
|
64
69
|
.linkCollection(csp, ["cs_cdf"])
|
|
65
70
|
.select(["cs_cdf"])
|
|
66
71
|
)
|
|
72
|
+
|
|
73
|
+
# image IDs for every expected date
|
|
67
74
|
ids = ic.aggregate_array("system:index").getInfo()
|
|
68
75
|
df_ids = pd.DataFrame({"id": ids})
|
|
69
76
|
|
|
@@ -76,7 +83,7 @@ def _cloud_table_single_range(
|
|
|
76
83
|
except ee.ee_exception.EEException as e:
|
|
77
84
|
if "No bands in collection" in str(e):
|
|
78
85
|
return pd.DataFrame(
|
|
79
|
-
columns=["id", "cs_cdf", "date", "
|
|
86
|
+
columns=["id", "cs_cdf", "date", "null_flag"]
|
|
80
87
|
)
|
|
81
88
|
raise
|
|
82
89
|
|
|
@@ -88,11 +95,12 @@ def _cloud_table_single_range(
|
|
|
88
95
|
.merge(df_raw, on="id", how="left")
|
|
89
96
|
.assign(
|
|
90
97
|
date=lambda d: pd.to_datetime(d["id"].str[:8], format="%Y%m%d").dt.strftime("%Y-%m-%d"),
|
|
91
|
-
|
|
98
|
+
null_flag=lambda d: d["cs_cdf"].isna().astype(int),
|
|
92
99
|
)
|
|
93
100
|
.drop(columns=["longitude", "latitude", "time"])
|
|
94
101
|
)
|
|
95
102
|
|
|
103
|
+
# fill missing scores with daily mean
|
|
96
104
|
df["cs_cdf"] = df["cs_cdf"].fillna(df.groupby("date")["cs_cdf"].transform("mean"))
|
|
97
105
|
|
|
98
106
|
return df
|
|
@@ -101,11 +109,12 @@ def _cloud_table_single_range(
|
|
|
101
109
|
def s2_cloud_table(
|
|
102
110
|
lon: float,
|
|
103
111
|
lat: float,
|
|
104
|
-
edge_size: int
|
|
105
|
-
start: str
|
|
106
|
-
end: str
|
|
107
|
-
|
|
108
|
-
|
|
112
|
+
edge_size: int,
|
|
113
|
+
start: str,
|
|
114
|
+
end: str,
|
|
115
|
+
max_cscore: float = 1.0,
|
|
116
|
+
min_cscore: float = 0.0,
|
|
117
|
+
cache: bool = False,
|
|
109
118
|
verbose: bool = True,
|
|
110
119
|
) -> pd.DataFrame:
|
|
111
120
|
"""Build (and cache) a per-day cloud-table for the requested ROI.
|
|
@@ -152,7 +161,7 @@ def s2_cloud_table(
|
|
|
152
161
|
# ─── 1. Load cached data if present ────────────────────────────────────
|
|
153
162
|
if cache and cache_file.exists():
|
|
154
163
|
if verbose:
|
|
155
|
-
print("📂 Loading cached
|
|
164
|
+
print("📂 Loading cached metadata …")
|
|
156
165
|
df_cached = pd.read_parquet(cache_file)
|
|
157
166
|
have_idx = pd.to_datetime(df_cached["date"], errors="coerce").dropna()
|
|
158
167
|
|
|
@@ -164,7 +173,7 @@ def s2_cloud_table(
|
|
|
164
173
|
and dt.date.fromisoformat(end) <= cached_end
|
|
165
174
|
):
|
|
166
175
|
if verbose:
|
|
167
|
-
print("✅ Served entirely from
|
|
176
|
+
print("✅ Served entirely from metadata.")
|
|
168
177
|
df_full = df_cached
|
|
169
178
|
else:
|
|
170
179
|
# Identify missing segments and fetch only those.
|
|
@@ -183,15 +192,21 @@ def s2_cloud_table(
|
|
|
183
192
|
lon, lat, edge_size, a2, b2
|
|
184
193
|
)
|
|
185
194
|
)
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
195
|
+
df_new_parts = [df for df in df_new_parts if not df.empty]
|
|
196
|
+
|
|
197
|
+
if df_new_parts:
|
|
198
|
+
|
|
199
|
+
df_new = pd.concat(df_new_parts, ignore_index=True)
|
|
200
|
+
df_full = (
|
|
201
|
+
pd.concat([df_cached, df_new], ignore_index=True)
|
|
202
|
+
.sort_values("date", kind="mergesort")
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
df_full = df_cached
|
|
191
206
|
else:
|
|
192
207
|
|
|
193
208
|
if verbose:
|
|
194
|
-
msg = "Generating
|
|
209
|
+
msg = "Generating metadata (no cache found)…" if cache else "Generating metadata…"
|
|
195
210
|
print("⏳", msg)
|
|
196
211
|
df_full = _cloud_table_single_range(
|
|
197
212
|
lon, lat, edge_size, start, end
|
|
@@ -206,7 +221,7 @@ def s2_cloud_table(
|
|
|
206
221
|
|
|
207
222
|
result = (
|
|
208
223
|
df_full.query("@start <= date <= @end")
|
|
209
|
-
.query("cs_cdf
|
|
224
|
+
.query("@min_cscore <= cs_cdf <= @max_cscore")
|
|
210
225
|
.reset_index(drop=True)
|
|
211
226
|
)
|
|
212
227
|
|
|
@@ -222,3 +237,4 @@ def s2_cloud_table(
|
|
|
222
237
|
}
|
|
223
238
|
)
|
|
224
239
|
return result
|
|
240
|
+
|
cubexpress/cube.py
CHANGED
|
@@ -16,17 +16,21 @@ from __future__ import annotations
|
|
|
16
16
|
import pathlib
|
|
17
17
|
import concurrent.futures
|
|
18
18
|
from typing import Dict, Any
|
|
19
|
-
|
|
20
19
|
import ee
|
|
20
|
+
|
|
21
|
+
|
|
21
22
|
from cubexpress.downloader import download_manifest, download_manifests
|
|
22
23
|
from cubexpress.geospatial import quadsplit_manifest, calculate_cell_size
|
|
23
|
-
from cubexpress.
|
|
24
|
+
from cubexpress.request import table_to_requestset
|
|
25
|
+
import pandas as pd
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
def get_geotiff(
|
|
27
29
|
manifest: Dict[str, Any],
|
|
28
30
|
full_outname: pathlib.Path | str,
|
|
31
|
+
join: bool = True,
|
|
29
32
|
nworks: int = 4,
|
|
33
|
+
verbose: bool = True,
|
|
30
34
|
) -> None:
|
|
31
35
|
"""Download *manifest* to *full_outname*, retrying with tiled requests.
|
|
32
36
|
|
|
@@ -39,20 +43,28 @@ def get_geotiff(
|
|
|
39
43
|
nworks
|
|
40
44
|
Maximum worker threads when the image must be split; default **4**.
|
|
41
45
|
"""
|
|
46
|
+
full_outname = pathlib.Path(full_outname)
|
|
42
47
|
try:
|
|
43
|
-
download_manifest(manifest,
|
|
48
|
+
download_manifest(manifest, full_outname)
|
|
44
49
|
except ee.ee_exception.EEException as err:
|
|
45
|
-
|
|
50
|
+
|
|
46
51
|
size = manifest["grid"]["dimensions"]["width"] # square images assumed
|
|
47
52
|
cell_w, cell_h, power = calculate_cell_size(str(err), size)
|
|
48
53
|
tiled = quadsplit_manifest(manifest, cell_w, cell_h, power)
|
|
49
|
-
download_manifests(tiled,
|
|
54
|
+
download_manifests(tiled, full_outname, join, nworks)
|
|
55
|
+
|
|
56
|
+
if verbose:
|
|
57
|
+
print(f"Downloaded {full_outname}")
|
|
50
58
|
|
|
51
59
|
|
|
52
60
|
def get_cube(
|
|
53
|
-
|
|
61
|
+
table: pd.DataFrame,
|
|
54
62
|
outfolder: pathlib.Path | str,
|
|
63
|
+
mosaic: bool = True,
|
|
64
|
+
join: bool = True,
|
|
55
65
|
nworks: int = 4,
|
|
66
|
+
verbose: bool = True,
|
|
67
|
+
cache: bool = True
|
|
56
68
|
) -> None:
|
|
57
69
|
"""Download every request in *requests* to *outfolder* using a thread pool.
|
|
58
70
|
|
|
@@ -68,17 +80,40 @@ def get_cube(
|
|
|
68
80
|
nworks
|
|
69
81
|
Pool size for concurrent downloads; default **4**.
|
|
70
82
|
"""
|
|
71
|
-
|
|
83
|
+
|
|
84
|
+
requests = table_to_requestset(
|
|
85
|
+
table=table,
|
|
86
|
+
mosaic=mosaic
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
outfolder = pathlib.Path(outfolder).expanduser().resolve()
|
|
72
90
|
|
|
73
91
|
with concurrent.futures.ThreadPoolExecutor(max_workers=nworks) as pool:
|
|
74
92
|
futures = []
|
|
75
93
|
for _, row in requests._dataframe.iterrows():
|
|
76
|
-
outname =
|
|
94
|
+
outname = pathlib.Path(outfolder) / f"{row.id}.tif"
|
|
95
|
+
if outname.exists() and cache:
|
|
96
|
+
continue
|
|
77
97
|
outname.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
-
futures.append(
|
|
98
|
+
futures.append(
|
|
99
|
+
pool.submit(
|
|
100
|
+
get_geotiff,
|
|
101
|
+
row.manifest,
|
|
102
|
+
outname,
|
|
103
|
+
join,
|
|
104
|
+
nworks,
|
|
105
|
+
verbose
|
|
106
|
+
)
|
|
107
|
+
)
|
|
79
108
|
|
|
80
109
|
for fut in concurrent.futures.as_completed(futures):
|
|
81
110
|
try:
|
|
82
111
|
fut.result()
|
|
83
112
|
except Exception as exc: # noqa: BLE001 – log and keep going
|
|
84
113
|
print(f"Download error: {exc}")
|
|
114
|
+
|
|
115
|
+
download_df = requests._dataframe[["outname", "cs_cdf", "date"]].copy()
|
|
116
|
+
download_df["outname"] = outfolder / requests._dataframe["outname"]
|
|
117
|
+
download_df.rename(columns={"outname": "full_outname"}, inplace=True)
|
|
118
|
+
|
|
119
|
+
return download_df
|
cubexpress/downloader.py
CHANGED
|
@@ -21,7 +21,11 @@ import ee
|
|
|
21
21
|
import rasterio as rio
|
|
22
22
|
from rasterio.io import MemoryFile
|
|
23
23
|
import logging
|
|
24
|
+
from rasterio.merge import merge
|
|
25
|
+
from rasterio.enums import Resampling
|
|
24
26
|
import os
|
|
27
|
+
import shutil
|
|
28
|
+
import tempfile
|
|
25
29
|
|
|
26
30
|
os.environ['CPL_LOG_ERRORS'] = 'OFF'
|
|
27
31
|
logging.getLogger('rasterio._env').setLevel(logging.ERROR)
|
|
@@ -50,10 +54,10 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
|
|
|
50
54
|
driver="GTiff",
|
|
51
55
|
tiled=True,
|
|
52
56
|
interleave="band",
|
|
53
|
-
blockxsize=256,
|
|
57
|
+
blockxsize=256, # TODO: Creo que es 128 (por de la superresolucion)
|
|
54
58
|
blockysize=256,
|
|
55
59
|
compress="ZSTD",
|
|
56
|
-
zstd_level=13,
|
|
60
|
+
# zstd_level=13,
|
|
57
61
|
predictor=2,
|
|
58
62
|
num_threads=20,
|
|
59
63
|
nodata=65535,
|
|
@@ -65,13 +69,11 @@ def download_manifest(ulist: Dict[str, Any], full_outname: pathlib.Path) -> None
|
|
|
65
69
|
with rio.open(full_outname, "w", **profile) as dst:
|
|
66
70
|
dst.write(src.read())
|
|
67
71
|
|
|
68
|
-
print(f"{full_outname} downloaded successfully.") # noqa: T201
|
|
69
|
-
|
|
70
|
-
|
|
71
72
|
def download_manifests(
|
|
72
|
-
manifests:
|
|
73
|
-
max_workers: int,
|
|
73
|
+
manifests: list[Dict[str, Any]],
|
|
74
74
|
full_outname: pathlib.Path,
|
|
75
|
+
join: bool = True,
|
|
76
|
+
max_workers: int = 4,
|
|
75
77
|
) -> None:
|
|
76
78
|
"""Download every manifest in *manifests* concurrently.
|
|
77
79
|
|
|
@@ -79,6 +81,12 @@ def download_manifests(
|
|
|
79
81
|
``full_outname.parent/full_outname.stem`` with names ``000000.tif``,
|
|
80
82
|
``000001.tif`` … according to the list order.
|
|
81
83
|
"""
|
|
84
|
+
# full_outname = pathlib.Path("/home/contreras/Documents/GitHub/cubexpress/cubexpress_test/2017-08-19_6mfrw_18LVN.tif")
|
|
85
|
+
original_dir = full_outname.parent
|
|
86
|
+
if join:
|
|
87
|
+
tmp_dir = pathlib.Path(tempfile.mkdtemp(prefix="s2tmp_"))
|
|
88
|
+
full_outname = tmp_dir / full_outname.name
|
|
89
|
+
|
|
82
90
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
83
91
|
futures = []
|
|
84
92
|
|
|
@@ -93,3 +101,35 @@ def download_manifests(
|
|
|
93
101
|
fut.result()
|
|
94
102
|
except Exception as exc: # noqa: BLE001
|
|
95
103
|
print(f"Error en una de las descargas: {exc}") # noqa: T201
|
|
104
|
+
|
|
105
|
+
dir_path = full_outname.parent / full_outname.stem
|
|
106
|
+
input_files = sorted(dir_path.glob("*.tif"))
|
|
107
|
+
|
|
108
|
+
if dir_path.exists() and len(input_files) > 1:
|
|
109
|
+
|
|
110
|
+
with rio.Env(GDAL_NUM_THREADS="8", NUM_THREADS="8"):
|
|
111
|
+
srcs = [rio.open(fp) for fp in input_files]
|
|
112
|
+
mosaic, out_transform = merge(
|
|
113
|
+
srcs,
|
|
114
|
+
nodata=65535,
|
|
115
|
+
resampling=Resampling.nearest
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
meta = srcs[0].profile.copy()
|
|
119
|
+
meta["transform"] = out_transform
|
|
120
|
+
meta.update(
|
|
121
|
+
height=mosaic.shape[1],
|
|
122
|
+
width=mosaic.shape[2]
|
|
123
|
+
)
|
|
124
|
+
outname = original_dir / full_outname.name
|
|
125
|
+
outname.parent.mkdir(parents=True, exist_ok=True)
|
|
126
|
+
with rio.open(outname, "w", **meta) as dst:
|
|
127
|
+
dst.write(mosaic)
|
|
128
|
+
|
|
129
|
+
for src in srcs:
|
|
130
|
+
src.close()
|
|
131
|
+
|
|
132
|
+
# Delete a folder with pathlib
|
|
133
|
+
shutil.rmtree(dir_path)
|
|
134
|
+
else:
|
|
135
|
+
return outname
|
cubexpress/geotyping.py
CHANGED
|
@@ -306,13 +306,17 @@ class RequestSet(BaseModel):
|
|
|
306
306
|
"crsCode": meta.raster_transform.crs,
|
|
307
307
|
},
|
|
308
308
|
},
|
|
309
|
+
"cs_cdf": int(meta.id.split("_")[-1]) / 100,
|
|
310
|
+
"date": meta.id.split("_")[0],
|
|
309
311
|
"outname": f"{meta.id}.tif",
|
|
310
312
|
}
|
|
313
|
+
|
|
311
314
|
for index, meta in enumerate(self.requestset)
|
|
312
315
|
]
|
|
313
316
|
)
|
|
314
317
|
|
|
315
318
|
|
|
319
|
+
|
|
316
320
|
def _validate_dataframe_schema(self) -> None:
|
|
317
321
|
"""
|
|
318
322
|
Checks that the `_dataframe` contains the required columns and that each column
|
|
@@ -367,21 +371,7 @@ class RequestSet(BaseModel):
|
|
|
367
371
|
f"Column '{col_name}' has an invalid type in row {i}. "
|
|
368
372
|
f"Expected {expected_type}, got {type(value)}"
|
|
369
373
|
)
|
|
370
|
-
|
|
371
|
-
# B) Validation of the `manifest` column structure
|
|
372
|
-
# - Must contain at least 'assetId' or 'expression'
|
|
373
|
-
# - Must contain 'grid' with the minimum required sub-keys
|
|
374
|
-
# - Example:
|
|
375
|
-
# {
|
|
376
|
-
# "fileFormat": "GEO_TIFF",
|
|
377
|
-
# "bandIds": [...],
|
|
378
|
-
# "grid": {
|
|
379
|
-
# "dimensions": {"width": ..., "height": ...},
|
|
380
|
-
# "affineTransform": {...},
|
|
381
|
-
# "crsCode": ...
|
|
382
|
-
# },
|
|
383
|
-
# // Either "assetId" or "expression" must be here
|
|
384
|
-
# }
|
|
374
|
+
|
|
385
375
|
for i, row in self._dataframe.iterrows():
|
|
386
376
|
manifest = row["manifest"]
|
|
387
377
|
|
|
@@ -482,7 +472,7 @@ class RequestSet(BaseModel):
|
|
|
482
472
|
str: A string representation of the entire RasterTransformSet.
|
|
483
473
|
"""
|
|
484
474
|
num_entries = len(self.requestset)
|
|
485
|
-
return f"
|
|
475
|
+
return f"RequestSet({num_entries} entries)"
|
|
486
476
|
|
|
487
477
|
def __str__(self):
|
|
488
478
|
return super().__repr__()
|
cubexpress/request.py
CHANGED
|
@@ -5,13 +5,15 @@ from __future__ import annotations
|
|
|
5
5
|
import ee
|
|
6
6
|
import pandas as pd
|
|
7
7
|
import pygeohash as pgh
|
|
8
|
-
from typing import List
|
|
9
8
|
|
|
10
9
|
from cubexpress.geotyping import Request, RequestSet
|
|
11
10
|
from cubexpress.conversion import lonlat2rt
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
def table_to_requestset(
|
|
13
|
+
def table_to_requestset(
|
|
14
|
+
table: pd.DataFrame,
|
|
15
|
+
mosaic: bool = True
|
|
16
|
+
) -> RequestSet:
|
|
15
17
|
"""Return a :class:`RequestSet` built from *df* (cloud_table result).
|
|
16
18
|
|
|
17
19
|
Parameters
|
|
@@ -30,52 +32,75 @@ def table_to_requestset(df: pd.DataFrame, *, mosaic: bool = True) -> RequestSet:
|
|
|
30
32
|
|
|
31
33
|
"""
|
|
32
34
|
|
|
35
|
+
|
|
36
|
+
df = table.copy()
|
|
33
37
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if df_.empty:
|
|
38
|
+
if df.empty:
|
|
37
39
|
raise ValueError("cloud_table returned no rows; nothing to request.")
|
|
38
40
|
|
|
39
41
|
rt = lonlat2rt(
|
|
40
|
-
lon=
|
|
41
|
-
lat=
|
|
42
|
-
edge_size=
|
|
43
|
-
scale=
|
|
42
|
+
lon=df.attrs["lon"],
|
|
43
|
+
lat=df.attrs["lat"],
|
|
44
|
+
edge_size=df.attrs["edge_size"],
|
|
45
|
+
scale=df.attrs["scale"],
|
|
44
46
|
)
|
|
45
|
-
centre_hash = pgh.encode(
|
|
47
|
+
centre_hash = pgh.encode(df.attrs["lat"], df.attrs["lon"], precision=5)
|
|
46
48
|
reqs: list[Request] = []
|
|
47
49
|
|
|
50
|
+
|
|
51
|
+
|
|
48
52
|
if mosaic:
|
|
49
|
-
# group all asset IDs per day
|
|
50
53
|
grouped = (
|
|
51
|
-
|
|
52
|
-
.
|
|
54
|
+
df.groupby('date')
|
|
55
|
+
.agg(
|
|
56
|
+
id_list = ('id', list),
|
|
57
|
+
cs_cdf_mean = ('cs_cdf', lambda x: int(round(x.mean(), 2) * 100))
|
|
58
|
+
)
|
|
53
59
|
)
|
|
54
60
|
|
|
55
|
-
for day,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
61
|
+
for day, row in grouped.iterrows():
|
|
62
|
+
|
|
63
|
+
img_ids = row["id_list"]
|
|
64
|
+
cdf = row["cs_cdf_mean"]
|
|
65
|
+
|
|
66
|
+
if len(img_ids) > 1:
|
|
67
|
+
|
|
68
|
+
ee_img = ee.ImageCollection(
|
|
69
|
+
[ee.Image(f"{df.attrs['collection']}/{img}") for img in img_ids]
|
|
70
|
+
).mosaic()
|
|
71
|
+
|
|
72
|
+
reqs.append(
|
|
73
|
+
Request(
|
|
74
|
+
id=f"{day}_{centre_hash}_{cdf}",
|
|
75
|
+
raster_transform=rt,
|
|
76
|
+
image=ee_img,
|
|
77
|
+
bands=df.attrs["bands"],
|
|
78
|
+
)
|
|
66
79
|
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
80
|
+
else:
|
|
81
|
+
for img_id in img_ids:
|
|
82
|
+
tile = img_id.split("_")[-1][1:]
|
|
83
|
+
reqs.append(
|
|
84
|
+
Request(
|
|
85
|
+
id=f"{day}_{centre_hash}_{tile}_{cdf}",
|
|
86
|
+
raster_transform=rt,
|
|
87
|
+
image=f"{df.attrs['collection']}/{img_id}",
|
|
88
|
+
bands=df.attrs["bands"],
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
for _, row in df.iterrows():
|
|
70
93
|
img_id = row["id"]
|
|
71
|
-
|
|
94
|
+
tile = img_id.split("_")[-1][1:]
|
|
95
|
+
day = row["date"]
|
|
96
|
+
cdf = int(round(row["cs_cdf"], 2) * 100)
|
|
72
97
|
|
|
73
98
|
reqs.append(
|
|
74
99
|
Request(
|
|
75
|
-
id=f"{day}_{centre_hash}_{
|
|
100
|
+
id=f"{day}_{centre_hash}_{tile}_{cdf}",
|
|
76
101
|
raster_transform=rt,
|
|
77
|
-
image=f"{
|
|
78
|
-
bands=
|
|
102
|
+
image=f"{df.attrs['collection']}/{img_id}",
|
|
103
|
+
bands=df.attrs["bands"],
|
|
79
104
|
)
|
|
80
105
|
)
|
|
81
106
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cubexpress
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: Efficient processing of cubic Earth-observation (EO) data.
|
|
5
5
|
Home-page: https://github.com/andesdatacube/cubexpress
|
|
6
6
|
License: MIT
|
|
@@ -20,6 +20,7 @@ Requires-Dist: earthengine-api (>=1.5.12)
|
|
|
20
20
|
Requires-Dist: numpy (>=2.0.2)
|
|
21
21
|
Requires-Dist: pandas (>=2.2.2)
|
|
22
22
|
Requires-Dist: pyarrow (>=14.0.0)
|
|
23
|
+
Requires-Dist: pydantic (>=2.11.4)
|
|
23
24
|
Requires-Dist: pygeohash (>=1.2.0)
|
|
24
25
|
Requires-Dist: pyproj (>=3.6.0)
|
|
25
26
|
Requires-Dist: rasterio (>=1.3.9)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
cubexpress/__init__.py,sha256=RjyAqwiD0rU_Z5tCJTYNGKXZ1ggpfPB51wzhr0KwweY,570
|
|
2
|
+
cubexpress/cache.py,sha256=EZiR2AJfplaLpqMIVFb5piCAgFqHKF1vgLIrutfz8tA,1425
|
|
3
|
+
cubexpress/cloud_utils.py,sha256=Vr2A1SZDKP_2xNiLYgwmWOUX8P8I-pXQrxBETiUDq60,7441
|
|
4
|
+
cubexpress/conversion.py,sha256=JSaMnswY-2n5E4H2zxb-oEOTJ8UPzXfMeSVCremtvTw,2520
|
|
5
|
+
cubexpress/cube.py,sha256=tU0lqhtQUwEiz33yebYIbw-a0R4zmTAei-b_xqMIcWU,3719
|
|
6
|
+
cubexpress/downloader.py,sha256=gHVNCNTwK9qA5MPaEHB_m0wOPprw010qaTVnszwbuUk,4668
|
|
7
|
+
cubexpress/geospatial.py,sha256=ZbsPIgsYQFnNFXUuQ136rJsL4b2Bf91o0Vsswby2dFc,1812
|
|
8
|
+
cubexpress/geotyping.py,sha256=XoSXQuoq5CfzKndM2Pko5KXIP0vxGNm02LOOMbCWkrs,16692
|
|
9
|
+
cubexpress/request.py,sha256=jy5K9MQEurNlwhF0izFmoIh3o7m9bC97fsTT_7C7Gv0,3051
|
|
10
|
+
cubexpress-0.1.10.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
|
|
11
|
+
cubexpress-0.1.10.dist-info/METADATA,sha256=Tn_XBaLWbO4xbmzYJCM6vnbwRNZ1d1ABZ6uF4G4REYM,9664
|
|
12
|
+
cubexpress-0.1.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
13
|
+
cubexpress-0.1.10.dist-info/RECORD,,
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
cubexpress/__init__.py,sha256=ybNczt27OVUkT8WE8v0-A9hByKtfNsNysg-V8CnZqxE,564
|
|
2
|
-
cubexpress/cache.py,sha256=EZiR2AJfplaLpqMIVFb5piCAgFqHKF1vgLIrutfz8tA,1425
|
|
3
|
-
cubexpress/cloud_utils.py,sha256=svK4XpXj2CcyGqGwZryxmlRxrl8Z45qtvcAf2VMAaxk,7041
|
|
4
|
-
cubexpress/conversion.py,sha256=JSaMnswY-2n5E4H2zxb-oEOTJ8UPzXfMeSVCremtvTw,2520
|
|
5
|
-
cubexpress/cube.py,sha256=1GPVAt5Q0vGqA3QJ4gixAevlosV4JHNKMzN1PirhawI,2911
|
|
6
|
-
cubexpress/downloader.py,sha256=JWRegfrxWaFSrYOkobV7IeAZNi6aS6VXvm3mwj5QJSw,3255
|
|
7
|
-
cubexpress/geospatial.py,sha256=ZbsPIgsYQFnNFXUuQ136rJsL4b2Bf91o0Vsswby2dFc,1812
|
|
8
|
-
cubexpress/geotyping.py,sha256=6hjzjZhg6jRYRhLMQ_IiBygnShWlRCtpIbf6rRaQQ7s,17163
|
|
9
|
-
cubexpress/request.py,sha256=9Xhm98zQrJx73PCcy-dzbFAjVQ08O0lHDH4hXLOLuwM,2282
|
|
10
|
-
cubexpress-0.1.8.dist-info/LICENSE,sha256=XjoS-d76b7Cl-VgCWhQk83tNf2dNldKBN8SrImwGc2Q,1072
|
|
11
|
-
cubexpress-0.1.8.dist-info/METADATA,sha256=uNYE-wmqWuaGEJpOBwdFa3scYZNu9GwB4My6oumg_6U,9628
|
|
12
|
-
cubexpress-0.1.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
13
|
-
cubexpress-0.1.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|