satcube 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of satcube might be problematic. Click here for more details.
- satcube/__init__.py +6 -7
- satcube/align.py +98 -0
- satcube/cloud_detection.py +170 -110
- satcube/download.py +53 -42
- satcube/objects.py +71 -0
- satcube/utils.py +61 -2
- {satcube-0.1.13.dist-info → satcube-0.1.15.dist-info}/METADATA +6 -10
- satcube-0.1.15.dist-info/RECORD +10 -0
- satcube/cloud_detection_old.py +0 -24
- satcube/dataclass.py +0 -39
- satcube/download_old.py +0 -82
- satcube/main.py +0 -453
- satcube/utils_old.py +0 -1087
- satcube-0.1.13.dist-info/RECORD +0 -13
- {satcube-0.1.13.dist-info → satcube-0.1.15.dist-info}/LICENSE +0 -0
- {satcube-0.1.13.dist-info → satcube-0.1.15.dist-info}/WHEEL +0 -0
satcube/__init__.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
from satcube.cloud_detection import cloud_masking
|
|
2
|
-
from satcube.download import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
__all__ = ["cloud_masking", "download_data"]
|
|
7
|
-
|
|
2
|
+
from satcube.download import download
|
|
3
|
+
from satcube.align import align
|
|
8
4
|
import importlib.metadata
|
|
9
|
-
|
|
5
|
+
from satcube.objects import SatCubeMetadata
|
|
6
|
+
|
|
7
|
+
__all__ = ["cloud_masking", "download", "align", "SatCubeMetadata"]
|
|
8
|
+
# __version__ = importlib.metadata.version("satcube")
|
|
10
9
|
|
satcube/align.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import pathlib
|
|
4
|
+
from typing import List, Tuple
|
|
5
|
+
import pickle
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import satalign
|
|
8
|
+
import shutil
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import rasterio as rio
|
|
12
|
+
import xarray as xr
|
|
13
|
+
from affine import Affine
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
15
|
+
from tqdm import tqdm
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def process_row(row: pd.Series, reference: np.ndarray, input_dir: pathlib.Path, output_dir: pathlib.Path) -> None:
|
|
19
|
+
row_path = input_dir / (row["id"] + ".tif")
|
|
20
|
+
output_path = output_dir / (row["id"] + ".tif")
|
|
21
|
+
with rio.open(row_path) as src:
|
|
22
|
+
row_image = src.read()
|
|
23
|
+
profile_image = src.profile
|
|
24
|
+
|
|
25
|
+
row_image_float = row_image.astype(np.float32) / 10000
|
|
26
|
+
row_image_float = row_image_float[np.newaxis, ...]
|
|
27
|
+
|
|
28
|
+
pcc_model = satalign.LGM(
|
|
29
|
+
datacube = row_image_float,
|
|
30
|
+
reference = reference
|
|
31
|
+
)
|
|
32
|
+
image, _ = pcc_model.run_multicore()
|
|
33
|
+
image = (image * 10000).astype(np.uint16).squeeze()
|
|
34
|
+
|
|
35
|
+
with rio.open(output_path, "w", **profile_image) as dst:
|
|
36
|
+
dst.write(image)
|
|
37
|
+
|
|
38
|
+
def align(
|
|
39
|
+
input_dir: str | pathlib.Path = "raw",
|
|
40
|
+
output_dir: str | pathlib.Path = "aligned",
|
|
41
|
+
nworks: int = 4,
|
|
42
|
+
cache: bool = False
|
|
43
|
+
) -> None:
|
|
44
|
+
|
|
45
|
+
input_dir = pathlib.Path(input_dir).expanduser().resolve()
|
|
46
|
+
output_dir = pathlib.Path(output_dir).expanduser().resolve()
|
|
47
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
metadata_path = input_dir / "metadata.csv"
|
|
50
|
+
|
|
51
|
+
if not metadata_path.exists():
|
|
52
|
+
raise FileNotFoundError(
|
|
53
|
+
f"Metadata file not found: {metadata_path}. "
|
|
54
|
+
"Please run the download step first."
|
|
55
|
+
)
|
|
56
|
+
else:
|
|
57
|
+
metadata = pd.read_csv(metadata_path)
|
|
58
|
+
|
|
59
|
+
if cache:
|
|
60
|
+
exist_files = [file.stem for file in output_dir.glob("*.tif")]
|
|
61
|
+
metadata = metadata[~metadata["id"].isin(exist_files)]
|
|
62
|
+
|
|
63
|
+
if metadata.empty:
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
id_reference = metadata.sort_values(
|
|
67
|
+
by=["cs_cdf", "date"],
|
|
68
|
+
ascending=False,
|
|
69
|
+
).iloc[0]["id"]
|
|
70
|
+
|
|
71
|
+
reference_path = input_dir / (id_reference + ".tif")
|
|
72
|
+
|
|
73
|
+
with rio.open(reference_path) as ref_src:
|
|
74
|
+
reference = ref_src.read()
|
|
75
|
+
|
|
76
|
+
reference_float = reference.astype(np.float32) / 10000
|
|
77
|
+
|
|
78
|
+
with ThreadPoolExecutor(max_workers=nworks) as executor:
|
|
79
|
+
futures = {
|
|
80
|
+
executor.submit(process_row, row, reference_float, input_dir, output_dir)
|
|
81
|
+
for _, row in metadata.iterrows()
|
|
82
|
+
}
|
|
83
|
+
for future in tqdm(
|
|
84
|
+
as_completed(futures),
|
|
85
|
+
total=len(futures),
|
|
86
|
+
desc="Aligning images",
|
|
87
|
+
unit="image",
|
|
88
|
+
leave=True
|
|
89
|
+
):
|
|
90
|
+
try:
|
|
91
|
+
future.result()
|
|
92
|
+
except Exception as e:
|
|
93
|
+
print(f"Error processing image: {e}")
|
|
94
|
+
|
|
95
|
+
metadata = input_dir / "metadata.csv"
|
|
96
|
+
if metadata.exists():
|
|
97
|
+
metadata_dst = output_dir / "metadata.csv"
|
|
98
|
+
shutil.copy(metadata, metadata_dst)
|
satcube/cloud_detection.py
CHANGED
|
@@ -12,36 +12,148 @@ Example
|
|
|
12
12
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
|
-
import
|
|
16
|
-
from pathlib import Path
|
|
17
|
-
from typing import List
|
|
15
|
+
import pathlib
|
|
18
16
|
|
|
19
17
|
import mlstac
|
|
20
18
|
import numpy as np
|
|
21
19
|
import rasterio as rio
|
|
20
|
+
from rasterio.windows import Window
|
|
22
21
|
import torch
|
|
22
|
+
import pandas as pd
|
|
23
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
24
|
+
from tqdm import tqdm
|
|
25
|
+
import rasterio as rio
|
|
26
|
+
from rasterio.merge import merge
|
|
27
|
+
import shutil
|
|
23
28
|
|
|
24
|
-
from satcube.utils import
|
|
25
|
-
import warnings
|
|
26
|
-
|
|
27
|
-
|
|
29
|
+
from satcube.utils import define_iteration, DeviceManager
|
|
30
|
+
import warnings
|
|
28
31
|
warnings.filterwarnings(
|
|
29
32
|
"ignore",
|
|
30
|
-
message=
|
|
33
|
+
message="The secret HF_TOKEN does not exist in your Colab secrets.",
|
|
31
34
|
category=UserWarning,
|
|
32
|
-
module="huggingface_hub
|
|
35
|
+
module=r"huggingface_hub\.utils\._.*",
|
|
33
36
|
)
|
|
34
37
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def infer_cloudmask(
|
|
41
|
+
input_path: str | pathlib.Path,
|
|
42
|
+
output_path: str | pathlib.Path,
|
|
43
|
+
cloud_model: torch.nn.Module,
|
|
38
44
|
*,
|
|
39
|
-
|
|
40
|
-
|
|
45
|
+
chunk_size: int = 512,
|
|
46
|
+
overlap: int = 32,
|
|
47
|
+
device: str = "cpu",
|
|
41
48
|
save_mask: bool = False,
|
|
49
|
+
prefix: str = ""
|
|
50
|
+
) -> pathlib.Path:
|
|
51
|
+
"""
|
|
52
|
+
Predict 'image_path' in overlapping patches of 'chunk_size' x 'chunk_size',
|
|
53
|
+
but only write the valid (inner) region to avoid seam artifacts.
|
|
54
|
+
|
|
55
|
+
This uses partial overlap logic:
|
|
56
|
+
- For interior tiles, skip overlap//2 on each side.
|
|
57
|
+
- For boundary tiles, we skip only the interior side to avoid losing data at the edges.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
image_path : Path to input image.
|
|
62
|
+
output_path : Path to output single-band mask.
|
|
63
|
+
cloud_model : PyTorch model (already loaded with weights).
|
|
64
|
+
chunk_size : Size of each tile to read from the source image (default 512).
|
|
65
|
+
overlap : Overlap in pixels between adjacent tiles (default 32).
|
|
66
|
+
device : "cpu" or "cuda:0".
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
pathlib.Path : The path to the created output image.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
input_path = pathlib.Path(input_path)
|
|
74
|
+
output_path = pathlib.Path(output_path)
|
|
75
|
+
|
|
76
|
+
with rio.open(input_path) as src:
|
|
77
|
+
meta = src.profile
|
|
78
|
+
if not meta.get("tiled", False):
|
|
79
|
+
raise ValueError("The input image is not marked as tiled in its metadata.")
|
|
80
|
+
# Ensure the internal blocksize matches chunk_size
|
|
81
|
+
if chunk_size % meta["blockxsize"] != 0 and meta["blockxsize"] <= chunk_size:
|
|
82
|
+
raise ValueError(f"Image blocks must be {chunk_size}x{chunk_size}, "
|
|
83
|
+
f"got {meta['blockxsize']}x{meta['blockysize']}")
|
|
84
|
+
height, width = meta["height"], meta["width"]
|
|
85
|
+
|
|
86
|
+
full_mask = np.zeros((height, width), dtype=np.float32)
|
|
87
|
+
|
|
88
|
+
coords = define_iteration((height, width), chunk_size, overlap)
|
|
89
|
+
|
|
90
|
+
with rio.open(input_path) as src:
|
|
91
|
+
|
|
92
|
+
for (row_off, col_off) in coords:
|
|
93
|
+
|
|
94
|
+
window = Window(col_off, row_off, chunk_size, chunk_size)
|
|
95
|
+
patch = src.read(window=window) / 1e4
|
|
96
|
+
patch_tensor = torch.from_numpy(patch).float().unsqueeze(0).to(device)
|
|
97
|
+
result = cloud_model(patch_tensor).cpu().numpy().astype(np.uint8)
|
|
98
|
+
|
|
99
|
+
if col_off == 0:
|
|
100
|
+
offset_x = 0
|
|
101
|
+
else:
|
|
102
|
+
offset_x = col_off + overlap // 2
|
|
103
|
+
if row_off == 0:
|
|
104
|
+
offset_y = 0
|
|
105
|
+
else:
|
|
106
|
+
offset_y = row_off + overlap // 2
|
|
107
|
+
if (offset_x + chunk_size) == width:
|
|
108
|
+
length_x = chunk_size
|
|
109
|
+
sub_x_start = 0
|
|
110
|
+
else:
|
|
111
|
+
length_x = chunk_size - (overlap // 2)
|
|
112
|
+
sub_x_start = overlap // 2 if col_off != 0 else 0
|
|
113
|
+
|
|
114
|
+
if (offset_y + chunk_size) == height:
|
|
115
|
+
length_y = chunk_size
|
|
116
|
+
sub_y_start = 0
|
|
117
|
+
else:
|
|
118
|
+
length_y = chunk_size - (overlap // 2)
|
|
119
|
+
sub_y_start = overlap // 2 if row_off != 0 else 0
|
|
120
|
+
|
|
121
|
+
full_mask[
|
|
122
|
+
offset_y : offset_y + length_y,
|
|
123
|
+
offset_x : offset_x + length_x
|
|
124
|
+
] = result[
|
|
125
|
+
sub_y_start : sub_y_start + length_y,
|
|
126
|
+
sub_x_start : sub_x_start + length_x
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
if save_mask:
|
|
130
|
+
out_meta = meta.copy()
|
|
131
|
+
out_meta.update(count=1, dtype="uint8", nodata=255)
|
|
132
|
+
output_mask = output_path.parent / (output_path.stem + "_mask.tif")
|
|
133
|
+
with rio.open(output_mask, "w", **out_meta) as dst:
|
|
134
|
+
dst.write(full_mask, 1)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
data = src.read()
|
|
138
|
+
img_prof = src.profile.copy()
|
|
139
|
+
|
|
140
|
+
masked = data.copy()
|
|
141
|
+
masked[:, full_mask != 0] = 65535
|
|
142
|
+
img_prof.update(dtype="uint16", nodata=65535)
|
|
143
|
+
|
|
144
|
+
with rio.open(output_path, "w", **img_prof) as dst:
|
|
145
|
+
dst.write(masked)
|
|
146
|
+
|
|
147
|
+
return output_path
|
|
148
|
+
|
|
149
|
+
def cloud_masking(
|
|
150
|
+
input: str | pathlib.Path = "raw",
|
|
151
|
+
output: str | pathlib.Path = "masked",
|
|
152
|
+
model_path: str | pathlib.Path = "SEN2CloudEnsemble",
|
|
42
153
|
device: str = "cpu",
|
|
43
|
-
|
|
44
|
-
|
|
154
|
+
save_mask: bool = False,
|
|
155
|
+
nworks: int = 4,
|
|
156
|
+
) -> list[pathlib.Path]:
|
|
45
157
|
"""Write cloud-masked Sentinel-2 images.
|
|
46
158
|
|
|
47
159
|
Parameters
|
|
@@ -49,7 +161,8 @@ def cloud_masking(
|
|
|
49
161
|
input
|
|
50
162
|
Path to a single ``.tif`` file **or** a directory containing them.
|
|
51
163
|
output
|
|
52
|
-
Destination directory (created
|
|
164
|
+
Destination directory (created i
|
|
165
|
+
f missing).
|
|
53
166
|
tile, pad
|
|
54
167
|
Tile size and padding (pixels) when tiling is required.
|
|
55
168
|
save_mask
|
|
@@ -64,14 +177,12 @@ def cloud_masking(
|
|
|
64
177
|
list[pathlib.Path]
|
|
65
178
|
Paths to the generated masked images.
|
|
66
179
|
"""
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
src = Path(input).expanduser().resolve()
|
|
70
|
-
dst_dir = Path(output).expanduser().resolve()
|
|
180
|
+
src = pathlib.Path(input).expanduser().resolve()
|
|
181
|
+
dst_dir = pathlib.Path(output).expanduser().resolve()
|
|
71
182
|
dst_dir.mkdir(parents=True, exist_ok=True)
|
|
72
183
|
|
|
73
184
|
# Collect files to process -------------------------------------------------
|
|
74
|
-
tif_paths
|
|
185
|
+
tif_paths = []
|
|
75
186
|
if src.is_dir():
|
|
76
187
|
tif_paths = [p for p in src.rglob("*.tif")]
|
|
77
188
|
elif src.is_file() and src.suffix.lower() == ".tif":
|
|
@@ -84,95 +195,44 @@ def cloud_masking(
|
|
|
84
195
|
print(f"[cloud_masking] No .tif files found in {src}")
|
|
85
196
|
return []
|
|
86
197
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
if not dir.exists():
|
|
90
|
-
|
|
198
|
+
if not pathlib.Path(model_path).exists():
|
|
91
199
|
mlstac.download(
|
|
92
200
|
file = "https://huggingface.co/tacofoundation/CloudSEN12-models/resolve/main/SEN2CloudEnsemble/mlm.json",
|
|
93
|
-
output_dir =
|
|
201
|
+
output_dir = model_path
|
|
94
202
|
)
|
|
95
203
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
tensor = torch.nn.functional.pad(tensor, (0, pad_r, 0, pad_b))
|
|
132
|
-
mask = dm.model(tensor.to(dm.device)).squeeze(0)
|
|
133
|
-
full_mask[:] = mask[..., :h, :w].cpu().numpy().astype(np.uint8)
|
|
134
|
-
else: # tiled
|
|
135
|
-
with rio.open(tif_path) as src_img, torch.inference_mode():
|
|
136
|
-
for y0 in range(0, h, tile):
|
|
137
|
-
for x0 in range(0, w, tile):
|
|
138
|
-
y0r, x0r = max(0, y0 - pad), max(0, x0 - pad)
|
|
139
|
-
y1r, x1r = min(h, y0 + tile + pad), min(w, x0 + tile + pad)
|
|
140
|
-
win = rio.windows.Window(x0r, y0r, x1r - x0r, y1r - y0r)
|
|
141
|
-
|
|
142
|
-
patch = src_img.read(window=win).astype(np.float32) / 1e4
|
|
143
|
-
tensor = torch.from_numpy(patch).unsqueeze(0).to(dm.device)
|
|
144
|
-
mask = dm.model(tensor).squeeze(0).cpu().numpy().astype(np.uint8)
|
|
145
|
-
|
|
146
|
-
y_in0 = pad if y0r else 0
|
|
147
|
-
x_in0 = pad if x0r else 0
|
|
148
|
-
y_in1 = mask.shape[0] - (pad if y1r < h else 0)
|
|
149
|
-
x_in1 = mask.shape[1] - (pad if x1r < w else 0)
|
|
150
|
-
core = mask[y_in0:y_in1, x_in0:x_in1]
|
|
151
|
-
full_mask[y0 : y0 + core.shape[0], x0 : x0 + core.shape[1]] = core
|
|
152
|
-
|
|
153
|
-
# ----------------------- output --------------------------------------
|
|
154
|
-
if save_mask:
|
|
155
|
-
with rio.open(mask_path, "w", **mask_prof) as dst:
|
|
156
|
-
dst.write(full_mask, 1)
|
|
157
|
-
|
|
158
|
-
with rio.open(tif_path) as src_img:
|
|
159
|
-
data = src_img.read()
|
|
160
|
-
img_prof = src_img.profile.copy()
|
|
161
|
-
|
|
162
|
-
masked = data.copy()
|
|
163
|
-
masked[:, full_mask != 0] = 65535
|
|
164
|
-
img_prof.update(dtype="uint16", nodata=65535)
|
|
165
|
-
|
|
166
|
-
with rio.open(masked_path, "w", **img_prof) as dst:
|
|
167
|
-
dst.write(masked)
|
|
168
|
-
|
|
169
|
-
masked_paths.append(masked_path)
|
|
170
|
-
dt = time.perf_counter() - t0
|
|
171
|
-
print(f"[{idx}/{len(tif_paths)}] {rel} → done in {dt:.1f}s")
|
|
172
|
-
|
|
173
|
-
if dm.device == "cuda":
|
|
174
|
-
_reset_gpu()
|
|
175
|
-
|
|
176
|
-
total_time = time.perf_counter() - t_start
|
|
177
|
-
print(f"Processed {len(masked_paths)} image(s) in {total_time:.1f}s.")
|
|
178
|
-
return masked_paths
|
|
204
|
+
model = mlstac.load(model_path)
|
|
205
|
+
cloud_model = DeviceManager(model, init_device=device).model
|
|
206
|
+
cloud_model.eval()
|
|
207
|
+
|
|
208
|
+
with ThreadPoolExecutor(max_workers=nworks) as executor:
|
|
209
|
+
futures = {
|
|
210
|
+
executor.submit(
|
|
211
|
+
infer_cloudmask,
|
|
212
|
+
input_path=p,
|
|
213
|
+
output_path=dst_dir / p.name,
|
|
214
|
+
cloud_model=cloud_model,
|
|
215
|
+
device=device,
|
|
216
|
+
save_mask=save_mask,
|
|
217
|
+
prefix=f"[{i+1}/{len(tif_paths)}] "
|
|
218
|
+
): p for i, p in enumerate(tif_paths)
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
for future in tqdm(
|
|
222
|
+
as_completed(futures),
|
|
223
|
+
total=len(futures),
|
|
224
|
+
desc="Cloud Masking",
|
|
225
|
+
position=0,
|
|
226
|
+
leave=True
|
|
227
|
+
):
|
|
228
|
+
p = futures[future]
|
|
229
|
+
try:
|
|
230
|
+
result = future.result()
|
|
231
|
+
print(f"{result} processed successfully.")
|
|
232
|
+
except Exception as e:
|
|
233
|
+
print(f"Error processing {p}: {e}")
|
|
234
|
+
|
|
235
|
+
metadata = src / "metadata.csv"
|
|
236
|
+
if metadata.exists():
|
|
237
|
+
metadata_dst = dst_dir / "metadata.csv"
|
|
238
|
+
shutil.copy(metadata, metadata_dst)
|
satcube/download.py
CHANGED
|
@@ -1,57 +1,68 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import cubexpress
|
|
1
|
+
import sys, time, threading, itertools
|
|
2
|
+
import cubexpress as ce
|
|
4
3
|
import pandas as pd
|
|
4
|
+
from satcube.objects import SatCubeMetadata
|
|
5
|
+
import pathlib
|
|
5
6
|
|
|
6
|
-
|
|
7
|
-
def download_data(
|
|
8
|
-
*, # keyword-only
|
|
7
|
+
def download(
|
|
9
8
|
lon: float,
|
|
10
9
|
lat: float,
|
|
11
|
-
|
|
12
|
-
edge_size: int = 2_048,
|
|
10
|
+
edge_size: int,
|
|
13
11
|
start: str,
|
|
14
12
|
end: str,
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
13
|
+
*,
|
|
14
|
+
max_cscore: float = 1,
|
|
15
|
+
min_cscore: float = 0,
|
|
16
|
+
outfolder: str = "raw",
|
|
17
|
+
nworks: int = 4
|
|
18
|
+
) -> "SatCubeMetadata":
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
outfolder = pathlib.Path(outfolder).resolve()
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
----------
|
|
25
|
-
lon, lat Center point in degrees.
|
|
26
|
-
cloud_max Max cloud cover (%).
|
|
27
|
-
edge_size Square side length (m).
|
|
28
|
-
start, end YYYY-MM-DD date range.
|
|
29
|
-
output Folder for GeoTIFFs.
|
|
30
|
-
scale Pixel size (m).
|
|
31
|
-
nworks Parallel workers.
|
|
32
|
-
mosaic Merge scenes per date.
|
|
33
|
-
auto_init_gee Call ee.Initialize() if needed.
|
|
34
|
-
|
|
35
|
-
Returns
|
|
36
|
-
-------
|
|
37
|
-
pandas.DataFrame
|
|
38
|
-
Scene catalogue used for the request.
|
|
39
|
-
"""
|
|
40
|
-
# Filter scenes
|
|
41
|
-
df = cubexpress.cloud_table(
|
|
23
|
+
table = ce.s2_table(
|
|
42
24
|
lon=lon,
|
|
43
25
|
lat=lat,
|
|
44
26
|
edge_size=edge_size,
|
|
45
|
-
scale=scale,
|
|
46
|
-
cloud_max=cloud_max,
|
|
47
27
|
start=start,
|
|
48
28
|
end=end,
|
|
29
|
+
max_cscore=max_cscore,
|
|
30
|
+
min_cscore=min_cscore
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
requests = ce.table_to_requestset(
|
|
34
|
+
table=table,
|
|
35
|
+
mosaic=True
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
ce.get_cube(
|
|
39
|
+
requests=requests,
|
|
40
|
+
outfolder=outfolder,
|
|
41
|
+
nworks=nworks
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
table_req = (
|
|
45
|
+
requests._dataframe.copy()
|
|
46
|
+
.drop(columns=['geotransform', 'manifest', 'outname', 'width', 'height', 'scale_x', 'scale_y'])
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
table_req['date'] = table_req['id'].str.split('_').str[0]
|
|
50
|
+
|
|
51
|
+
result_table = (
|
|
52
|
+
table.groupby('date')
|
|
53
|
+
.agg(
|
|
54
|
+
id=('id', lambda x: '-'.join(x)),
|
|
55
|
+
cs_cdf=('cs_cdf', 'first')
|
|
56
|
+
)
|
|
57
|
+
.reset_index()
|
|
49
58
|
)
|
|
59
|
+
|
|
60
|
+
table_final = table_req.merge(
|
|
61
|
+
result_table,
|
|
62
|
+
on='date',
|
|
63
|
+
how='left'
|
|
64
|
+
).rename(columns={'id_x': 'id', 'id_y': 'gee_ids'})
|
|
50
65
|
|
|
51
|
-
|
|
52
|
-
requests = cubexpress.table_to_requestset(df, mosaic=mosaic)
|
|
53
|
-
pathlib.Path(output).mkdir(parents=True, exist_ok=True)
|
|
66
|
+
table_final.to_csv(outfolder / "metadata.csv", index=False)
|
|
54
67
|
|
|
55
|
-
|
|
56
|
-
cubexpress.get_cube(requests, output, nworks)
|
|
57
|
-
return df
|
|
68
|
+
return SatCubeMetadata(df=table_final, raw_dir=outfolder)
|
satcube/objects.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# satcube/objects.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
import pathlib
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from satcube.align import align as _align_fn
|
|
8
|
+
from satcube.cloud_detection import cloud_masking as _cloud_fn
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SatCubeMetadata:
|
|
12
|
+
df: pd.DataFrame = field(repr=False)
|
|
13
|
+
raw_dir: pathlib.Path = field(repr=False)
|
|
14
|
+
|
|
15
|
+
def __repr__(self) -> str:
|
|
16
|
+
return self.df.__repr__()
|
|
17
|
+
|
|
18
|
+
__str__ = __repr__
|
|
19
|
+
|
|
20
|
+
def _repr_html_(self) -> str:
|
|
21
|
+
html = getattr(self.df, "_repr_html_", None)
|
|
22
|
+
return html() if callable(html) else self.df.__repr__()
|
|
23
|
+
|
|
24
|
+
def align(
|
|
25
|
+
self,
|
|
26
|
+
input_dir: str | pathlib.Path | None = None,
|
|
27
|
+
output_dir: str | pathlib.Path = "aligned",
|
|
28
|
+
nworks: int = 4,
|
|
29
|
+
cache: bool = False
|
|
30
|
+
) -> "SatCubeMetadata":
|
|
31
|
+
|
|
32
|
+
if input_dir is None:
|
|
33
|
+
input_dir = self.raw_dir
|
|
34
|
+
else:
|
|
35
|
+
input_dir = pathlib.Path(input_dir).expanduser().resolve()
|
|
36
|
+
|
|
37
|
+
_align_fn(
|
|
38
|
+
input_dir=input_dir,
|
|
39
|
+
output_dir=output_dir,
|
|
40
|
+
nworks=nworks,
|
|
41
|
+
cache=cache
|
|
42
|
+
)
|
|
43
|
+
self.aligned_dir = pathlib.Path(output_dir).resolve()
|
|
44
|
+
return self
|
|
45
|
+
|
|
46
|
+
def cloud_masking(
|
|
47
|
+
self,
|
|
48
|
+
output_dir: str | pathlib.Path = "masked",
|
|
49
|
+
model_path: str | pathlib.Path = "SEN2CloudEnsemble",
|
|
50
|
+
device: str = "cpu"
|
|
51
|
+
) -> "SatCubeMetadata":
|
|
52
|
+
if not hasattr(self, "aligned_dir"):
|
|
53
|
+
raise RuntimeError("You must run .align() first")
|
|
54
|
+
_cloud_fn(
|
|
55
|
+
input=self.aligned_dir,
|
|
56
|
+
output=output_dir,
|
|
57
|
+
model_path=model_path,
|
|
58
|
+
device=device
|
|
59
|
+
)
|
|
60
|
+
self.masked_dir = pathlib.Path(output_dir).resolve()
|
|
61
|
+
return self
|
|
62
|
+
|
|
63
|
+
def __getattr__(self, item):
|
|
64
|
+
return getattr(self.df, item)
|
|
65
|
+
|
|
66
|
+
def __getitem__(self, key):
|
|
67
|
+
return self.df.__getitem__(key)
|
|
68
|
+
|
|
69
|
+
def __len__(self):
|
|
70
|
+
return len(self.df)
|
|
71
|
+
|