satcube 0.1.13__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of satcube might be problematic. Click here for more details.

@@ -1,29 +1,24 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: satcube
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: A Python package to create cloud-free monthly composites by fusing Landsat and Sentinel-2 data.
5
5
  Home-page: https://github.com/IPL-UV/satcube
6
6
  Author: Cesar Aybar
7
7
  Author-email: fcesar.aybar@uv.es
8
- Requires-Python: >=3.10,<4.0
8
+ Requires-Python: >=3.9
9
9
  Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
10
11
  Classifier: Programming Language :: Python :: 3.10
11
12
  Classifier: Programming Language :: Python :: 3.11
12
13
  Classifier: Programming Language :: Python :: 3.12
13
14
  Provides-Extra: full
14
- Requires-Dist: cubexpress (>=0.1.7)
15
- Requires-Dist: earthengine-api (>=1.5.12)
15
+ Requires-Dist: cubexpress (>=0.1.10)
16
16
  Requires-Dist: mlstac (>=0.4.0)
17
- Requires-Dist: numpy (>=1.25.0)
18
- Requires-Dist: pandas (>=2.0.0)
19
17
  Requires-Dist: phicloudmask (>=0.0.2)
20
- Requires-Dist: pydantic (>=2.8.0)
21
- Requires-Dist: rasterio (>=1.3.9)
22
18
  Requires-Dist: requests (>=2.26.0)
23
- Requires-Dist: satalign (>=0.1.11)
19
+ Requires-Dist: satalign (>=0.1.12)
24
20
  Requires-Dist: scikit-learn (>=1.2.0)
25
21
  Requires-Dist: segmentation-models-pytorch (>=0.3.0)
26
- Requires-Dist: utm (>=0.7.0)
27
22
  Requires-Dist: xarray (>=2023.7.0)
28
23
  Project-URL: Documentation, https://ipl-uv.github.io/satcube/
29
24
  Project-URL: Repository, https://github.com/IPL-UV/satcube
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "satcube"
3
- version = "0.1.13"
3
+ version = "0.1.14"
4
4
  description = "A Python package to create cloud-free monthly composites by fusing Landsat and Sentinel-2 data."
5
5
  authors = ["Cesar Aybar <fcesar.aybar@uv.es>"]
6
6
  repository = "https://github.com/IPL-UV/satcube"
@@ -9,21 +9,15 @@ readme = "README.md"
9
9
  packages = [{ include = "satcube" }]
10
10
 
11
11
  [tool.poetry.dependencies]
12
- python = ">=3.10,<4.0"
13
- cubexpress = ">=0.1.7"
12
+ python = ">=3.9"
13
+ cubexpress = ">=0.1.10"
14
14
  mlstac = ">=0.4.0"
15
- satalign = ">=0.1.11"
15
+ satalign = ">=0.1.12"
16
16
  segmentation-models-pytorch = ">=0.3.0"
17
+ phicloudmask = ">=0.0.2"
17
18
  scikit-learn = ">=1.2.0"
18
- pandas = ">=2.0.0"
19
- pydantic = ">=2.8.0"
20
- rasterio = ">=1.3.9"
21
- earthengine-api = ">=1.5.12"
22
- numpy = ">=1.25.0"
23
19
  requests = ">=2.26.0"
24
- xarray = ">=2023.7.0"
25
- utm = ">=0.7.0"
26
- phicloudmask = ">=0.0.2"
20
+ xarray = ">=2023.7.0"
27
21
 
28
22
  [tool.poetry.extras]
29
23
  full = ["torch"]
@@ -1,9 +1,9 @@
1
1
  from satcube.cloud_detection import cloud_masking
2
- from satcube.download import download_data
2
+ from satcube.download import download
3
+ from satcube.align import align
3
4
 
4
5
 
5
-
6
- __all__ = ["cloud_masking", "download_data"]
6
+ __all__ = ["cloud_masking", "download", "align"]
7
7
 
8
8
  import importlib.metadata
9
9
  __version__ = importlib.metadata.version("satcube")
@@ -0,0 +1,149 @@
1
+ from __future__ import annotations
2
+
3
+ import pathlib
4
+ from typing import List, Tuple
5
+ import pickle
6
+
7
+ import numpy as np
8
+ import rasterio
9
+ import xarray as xr
10
+ from affine import Affine
11
+
12
+ import satalign as sat
13
+
14
+
15
+ def align(
16
+ input_dir: str | pathlib.Path,
17
+ output_dir: str | pathlib.Path,
18
+ *,
19
+ channel: str = "mean",
20
+ crop_center: int = 128,
21
+ num_threads: int = 2,
22
+ save_tiffs: bool = True,
23
+ ) -> Tuple[xr.DataArray, List[np.ndarray]]:
24
+ """Align all masked Sentinel‑2 tiles found in *input_dir*.
25
+
26
+ Parameters
27
+ ----------
28
+ input_dir
29
+ Directory containing masked Sentinel‑2 *TIFF* tiles produced by the
30
+ previous preprocessing stage.
31
+ output_dir
32
+ Directory where the alignment artefacts (``datacube.pickle``) and, if
33
+ requested, one aligned *GeoTIFF* per date will be written.
34
+ channel
35
+ Datacube band used by the *PCC* model for correlation. ``"mean"`` is
36
+ recommended because it carries fewer noise artefacts.
37
+ crop_center
38
+ Half‑size (in pixels) of the square window extracted around the scene
39
+ centre that is fed to the correlation engine.
40
+ num_threads
41
+ Number of CPU threads for the multi‑core phase‑correlation run.
42
+ save_tiffs
43
+ If *True* (default) the aligned datacube is exported to tiled *COGs* via
44
+ :func:`save_aligned_cube_to_tiffs`.
45
+ pickle_datacube
46
+ If *True* (default) the raw (unaligned) datacube is pickled to
47
+ ``datacube.pickle`` inside *output_dir* for reproducibility/debugging.
48
+
49
+ Returns
50
+ -------
51
+ aligned_cube, warp_matrices
52
+ *aligned_cube* is the spatially aligned datacube as an
53
+ :class:`xarray.DataArray`; *warp_matrices* is the list of 3 × 3 affine
54
+ homography matrices (one per time step) returned by the
55
+ :pyclass:`~satalign.PCC` engine.
56
+ """
57
+ input_path = pathlib.Path(input_dir).expanduser().resolve()
58
+ output_path = pathlib.Path(output_dir).expanduser().resolve()
59
+ output_path.mkdir(parents=True, exist_ok=True)
60
+
61
+ # ── 1. Build datacube ────────────────────────────────────────────
62
+ da = sat.utils.create_array(input_path)
63
+
64
+ # ── 2. Select reference slice (highest cloud‑score CDF) ───────────
65
+ da_sorted = da.sortby("cs_cdf", ascending=False)
66
+ ref_slice = da_sorted.isel(time=0)
67
+ reference = ref_slice.where((ref_slice != 0) & (ref_slice != 65535))
68
+
69
+ # ── 3. Instantiate and run PCC model ─────────────────────────────
70
+ pcc_model = sat.PCC(
71
+ datacube=da,
72
+ reference=reference,
73
+ channel=channel,
74
+ crop_center=crop_center,
75
+ num_threads=num_threads,
76
+ )
77
+ aligned_cube, warp_matrices = pcc_model.run_multicore()
78
+
79
+ # ── 4. Optionally export as Cloud‑Optimised GeoTIFFs ────────────
80
+ if save_tiffs:
81
+ save_aligned_cube_to_tiffs(aligned_cube, output_path)
82
+
83
+ return aligned_cube, warp_matrices
84
+
85
+
86
+ def save_aligned_cube_to_tiffs(
87
+ aligned_cube: xr.DataArray,
88
+ out_dir: str | pathlib.Path,
89
+ *,
90
+ block_size: int = 128,
91
+ ) -> None:
92
+ """Write each time slice of *aligned_cube* to an individual tiled COG.
93
+
94
+ The filenames follow the pattern ``YYYY‑MM‑DD.tif``.
95
+
96
+ Parameters
97
+ ----------
98
+ aligned_cube
99
+ Datacube returned by :func:`align_datacube`.
100
+ out_dir
101
+ Target directory; it will be created if it does not exist.
102
+ block_size
103
+ Internal tile size (*rasterio* ``blockxsize`` and ``blockysize``).
104
+ """
105
+ out_dir_path = pathlib.Path(out_dir).expanduser().resolve()
106
+ out_dir_path.mkdir(parents=True, exist_ok=True)
107
+
108
+ # ── 1. Build affine transform from x/y coordinate vectors ────────
109
+ x_vals = aligned_cube.x.values
110
+ y_vals = aligned_cube.y.values
111
+ x_res = float(x_vals[1] - x_vals[0]) # positive (east)
112
+ y_res = float(y_vals[1] - y_vals[0]) # negative (north‑up)
113
+ transform = (
114
+ Affine.translation(x_vals[0] - x_res / 2.0, y_vals[0] - y_res / 2.0)
115
+ * Affine.scale(x_res, y_res)
116
+ )
117
+
118
+ # ── 2. Retrieve CRS from datacube attributes ────────────────────
119
+ attrs = aligned_cube.attrs
120
+ crs: str | None = attrs.get("crs_wkt")
121
+ if crs is None and "crs_epsg" in attrs:
122
+ crs = f"EPSG:{int(attrs['crs_epsg'])}"
123
+
124
+ # ── 3. Loop over acquisition dates and write GeoTIFFs ────────────
125
+ for t in aligned_cube.time.values:
126
+ date_str = str(t)[:10] # YYYY‑MM‑DD
127
+ da_t = aligned_cube.sel(time=t)
128
+
129
+ # Ensure (band, y, x) memory layout for rasterio
130
+ data = da_t.transpose("band", "y", "x").values
131
+
132
+ profile = {
133
+ "driver": da_t.attrs.get("driver", "GTiff"),
134
+ "height": da_t.sizes["y"],
135
+ "width": da_t.sizes["x"],
136
+ "count": da_t.sizes["band"],
137
+ "dtype": str(da_t.dtype),
138
+ "transform": transform,
139
+ "crs": crs,
140
+ "nodata": int(getattr(da_t, "nodata", 0)),
141
+ "tiled": True,
142
+ "blockxsize": block_size,
143
+ "blockysize": block_size,
144
+ "interleave": "band",
145
+ }
146
+
147
+ outfile = out_dir_path / f"{date_str}.tif"
148
+ with rasterio.open(outfile, "w", **profile) as dst:
149
+ dst.write(data)
@@ -0,0 +1,242 @@
1
+ """Predict cloud masks for Sentinel-2 GeoTIFFs with the SEN2CloudEnsemble model.
2
+
3
+ The callable :pyfunc:`cloud_masking` accepts **either** a single ``.tif`` file
4
+ or a directory tree; in both cases it writes a masked copy of every image (and,
5
+ optionally, the binary mask) to *output*.
6
+
7
+ Example
8
+ -------
9
+ >>> from satcube.cloud_detection import cloud_masking
10
+ >>> cloud_masking("~/s2/input", "~/s2/output", device="cuda")
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import pathlib
16
+
17
+ import mlstac
18
+ import numpy as np
19
+ import rasterio as rio
20
+ from rasterio.windows import Window
21
+ import torch
22
+ import pandas as pd
23
+ from tqdm import tqdm
24
+ import rasterio as rio
25
+ from rasterio.merge import merge
26
+
27
+ from satcube.utils import define_iteration, DeviceManager
28
+ import warnings
29
+ warnings.filterwarnings(
30
+ "ignore",
31
+ message="The secret HF_TOKEN does not exist in your Colab secrets.",
32
+ category=UserWarning,
33
+ module=r"huggingface_hub\.utils\._.*",
34
+ )
35
+
36
+
37
+
38
+
39
+ def infer_cloudmask(
40
+ input_path: str | pathlib.Path,
41
+ output_path: str | pathlib.Path,
42
+ cloud_model: torch.nn.Module,
43
+ *,
44
+ chunk_size: int = 512,
45
+ overlap: int = 32,
46
+ device: str = "cpu",
47
+ save_mask: bool = False,
48
+ prefix: str = ""
49
+ ) -> pathlib.Path:
50
+ """
51
+ Predict 'image_path' in overlapping patches of 'chunk_size' x 'chunk_size',
52
+ but only write the valid (inner) region to avoid seam artifacts.
53
+
54
+ This uses partial overlap logic:
55
+ - For interior tiles, skip overlap//2 on each side.
56
+ - For boundary tiles, we skip only the interior side to avoid losing data at the edges.
57
+
58
+ Parameters
59
+ ----------
60
+ image_path : Path to input image.
61
+ output_path : Path to output single-band mask.
62
+ cloud_model : PyTorch model (already loaded with weights).
63
+ chunk_size : Size of each tile to read from the source image (default 512).
64
+ overlap : Overlap in pixels between adjacent tiles (default 32).
65
+ device : "cpu" or "cuda:0".
66
+
67
+ Returns
68
+ -------
69
+ pathlib.Path : The path to the created output image.
70
+ """
71
+ # image_path = "/home/contreras/Documents/GitHub/satcube2/raw/2018-07-15_6q49m.tif"
72
+ # output_path = "/home/contreras/Documents/GitHub/satcube2/masked/2018-07-15_6q49m.tif"
73
+
74
+ input_path = pathlib.Path(input_path)
75
+ output_path = pathlib.Path(output_path)
76
+
77
+ # 1) Validate metadata
78
+ with rio.open(input_path) as src:
79
+ meta = src.profile
80
+ if not meta.get("tiled", False):
81
+ raise ValueError("The input image is not marked as tiled in its metadata.")
82
+ # Ensure the internal blocksize matches chunk_size
83
+ if chunk_size % meta["blockxsize"] != 0 and meta["blockxsize"] <= chunk_size:
84
+ raise ValueError(f"Image blocks must be {chunk_size}x{chunk_size}, "
85
+ f"got {meta['blockxsize']}x{meta['blockysize']}")
86
+ height, width = meta["height"], meta["width"]
87
+
88
+
89
+ # 2) Crear un buffer en RAM (float32)
90
+ full_mask = np.zeros((height, width), dtype=np.float32)
91
+
92
+ # 3) Iterar por ventanas
93
+
94
+ coords = define_iteration((height, width), chunk_size, overlap)
95
+
96
+ with rio.open(input_path) as src:
97
+
98
+ for (row_off, col_off) in tqdm(
99
+ coords,
100
+ desc=f"{prefix} Inference on {input_path.name}",
101
+ position=1,
102
+ leave=False):
103
+
104
+ window = Window(col_off, row_off, chunk_size, chunk_size)
105
+ patch = src.read(window=window) / 1e4
106
+ patch_tensor = torch.from_numpy(patch).float().unsqueeze(0).to(device)
107
+ result = cloud_model(patch_tensor).cpu().numpy().astype(np.uint8)
108
+
109
+ if col_off == 0:
110
+ offset_x = 0
111
+ else:
112
+ offset_x = col_off + overlap // 2
113
+ if row_off == 0:
114
+ offset_y = 0
115
+ else:
116
+ offset_y = row_off + overlap // 2
117
+ if (offset_x + chunk_size) == width:
118
+ length_x = chunk_size
119
+ sub_x_start = 0
120
+ else:
121
+ length_x = chunk_size - (overlap // 2)
122
+ sub_x_start = overlap // 2 if col_off != 0 else 0
123
+
124
+ if (offset_y + chunk_size) == height:
125
+ length_y = chunk_size
126
+ sub_y_start = 0
127
+ else:
128
+ length_y = chunk_size - (overlap // 2)
129
+ sub_y_start = overlap // 2 if row_off != 0 else 0
130
+
131
+ full_mask[
132
+ offset_y : offset_y + length_y,
133
+ offset_x : offset_x + length_x
134
+ ] = result[
135
+ sub_y_start : sub_y_start + length_y,
136
+ sub_x_start : sub_x_start + length_x
137
+ ]
138
+
139
+ if save_mask:
140
+ out_meta = meta.copy()
141
+ out_meta.update(count=1, dtype="uint8", nodata=255)
142
+ output_mask = output_path.parent / (output_path.stem + "_mask.tif")
143
+ with rio.open(output_mask, "w", **out_meta) as dst:
144
+ dst.write(full_mask, 1)
145
+
146
+ with rio.open(input_path) as src_img:
147
+ data = src_img.read()
148
+ img_prof = src_img.profile.copy()
149
+
150
+ masked = data.copy()
151
+ masked[:, full_mask != 0] = 65535
152
+ img_prof.update(dtype="uint16", nodata=65535)
153
+
154
+ with rio.open(output_path, "w", **img_prof) as dst:
155
+ dst.write(masked)
156
+
157
+ return output_path
158
+
159
+ def cloud_masking(
160
+ input: str | pathlib.Path = "raw",
161
+ output: str | pathlib.Path = "masked",
162
+ model_path: str | pathlib.Path = "SEN2CloudEnsemble",
163
+ save_mask: bool = False,
164
+ device: str = "cpu",
165
+ cache: bool = True
166
+ ) -> list[pathlib.Path]:
167
+ """Write cloud-masked Sentinel-2 images.
168
+
169
+ Parameters
170
+ ----------
171
+ input
172
+ Path to a single ``.tif`` file **or** a directory containing them.
173
+ output
174
+ Destination directory (created i
175
+ f missing).
176
+ tile, pad
177
+ Tile size and padding (pixels) when tiling is required.
178
+ save_mask
179
+ If *True*, store the binary mask alongside the masked image.
180
+ device
181
+ Torch device for inference, e.g. ``"cpu"`` or ``"cuda:0"``.
182
+ max_pix_cpu
183
+ Tile images larger than this when running on CPU.
184
+
185
+ Returns
186
+ ------
187
+ list[pathlib.Path]
188
+ Paths to the generated masked images.
189
+ """
190
+ src = pathlib.Path(input).expanduser().resolve()
191
+ dst_dir = pathlib.Path(output).expanduser().resolve()
192
+ dst_dir.mkdir(parents=True, exist_ok=True)
193
+
194
+ # Collect files to process -------------------------------------------------
195
+ tif_paths = []
196
+ if src.is_dir():
197
+ tif_paths = [p for p in src.rglob("*.tif")]
198
+ elif src.is_file() and src.suffix.lower() == ".tif":
199
+ tif_paths = [src]
200
+ src = src.parent # for relative-path bookkeeping below
201
+ else:
202
+ raise ValueError(f"Input must be a .tif or directory, got: {src}")
203
+
204
+ if not tif_paths:
205
+ print(f"[cloud_masking] No .tif files found in {src}")
206
+ return []
207
+
208
+ if not pathlib.Path(model_path).exists():
209
+ mlstac.download(
210
+ file = "https://huggingface.co/tacofoundation/CloudSEN12-models/resolve/main/SEN2CloudEnsemble/mlm.json",
211
+ output_dir = model_path
212
+ )
213
+
214
+ model = mlstac.load(model_path)
215
+ cloud_model = DeviceManager(model, init_device=device).model
216
+ cloud_model.eval()
217
+
218
+ outs = [
219
+ infer_cloudmask(
220
+ input_path=p,
221
+ output_path=dst_dir / p.name,
222
+ cloud_model=cloud_model,
223
+ device=device,
224
+ save_mask=save_mask,
225
+ prefix=f"[{i+1}/{len(tif_paths)}] "
226
+ )
227
+ for i, p in enumerate(tif_paths)
228
+ ]
229
+
230
+ df_out = (
231
+ pd.Series(outs, name="path")
232
+ .to_frame()
233
+ .assign(path_str=lambda df: df["path"].astype(str))
234
+ .assign(
235
+ date = lambda df: df["path_str"].str.extract(r"(\d{4}-\d{2}-\d{2})", expand=False),
236
+ score = lambda df: df["path_str"].str.extract(r"_(\d+)\.tif$", expand=False).astype(int) / 100
237
+ )
238
+ .drop(columns="path_str") # ya no la necesitamos
239
+ .sort_values("score", ascending=False, ignore_index=True)
240
+ )
241
+
242
+ return df_out
@@ -0,0 +1,65 @@
1
+ import sys, time, threading, itertools
2
+ import cubexpress as ce
3
+ import pandas as pd
4
+
5
+ def download(
6
+ lon: float,
7
+ lat: float,
8
+ edge_size: int,
9
+ start: str,
10
+ end: str,
11
+ *,
12
+ max_cscore: float = 1,
13
+ min_cscore: float = 0,
14
+ outfolder: str = "raw",
15
+ nworks: int = 4,
16
+ cache: bool = True,
17
+ show_spinner: bool = True,
18
+ verbose: bool = False
19
+ ) -> pd.DataFrame:
20
+ """
21
+ """
22
+ stop_flag = {"v": False}
23
+
24
+ if show_spinner and not verbose:
25
+ def _spin():
26
+ for ch in itertools.cycle("|/-\\"):
27
+ if stop_flag["v"]:
28
+ break
29
+ sys.stdout.write(f"\rDownloading Sentinel-2 imagery & metadata… {ch}")
30
+ sys.stdout.flush()
31
+ time.sleep(0.1)
32
+ sys.stdout.write("\rDownloading Sentinel-2 imagery & metadata ✅\n")
33
+ sys.stdout.flush()
34
+
35
+ th = threading.Thread(target=_spin, daemon=True)
36
+ th.start()
37
+ else:
38
+ th = None
39
+
40
+ try:
41
+ table = ce.s2_cloud_table(
42
+ lon=lon,
43
+ lat=lat,
44
+ edge_size=edge_size,
45
+ start=start,
46
+ end=end,
47
+ max_cscore=max_cscore,
48
+ min_cscore=min_cscore,
49
+ cache=cache,
50
+ verbose=verbose
51
+ )
52
+
53
+ ce.get_cube(
54
+ table=table,
55
+ outfolder=outfolder,
56
+ nworks=nworks,
57
+ verbose=verbose,
58
+ cache=cache
59
+ )
60
+ finally:
61
+ stop_flag["v"] = True
62
+ if th is not None:
63
+ th.join()
64
+
65
+ return table
@@ -1,11 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import gc
4
+ import itertools
4
5
  from typing import Any, Optional
5
-
6
6
  import torch
7
7
 
8
-
9
8
  def _reset_gpu() -> None:
10
9
  """Release CUDA memory and reset allocation statistics.
11
10
 
@@ -15,6 +14,65 @@ def _reset_gpu() -> None:
15
14
  torch.cuda.reset_peak_memory_stats()
16
15
 
17
16
 
17
+ def define_iteration(dimension: tuple, chunk_size: int, overlap: int = 0):
18
+ """
19
+ Define the iteration strategy to walk through the image with an overlap.
20
+
21
+ Args:
22
+ dimension (tuple): Dimension of the S2 image.
23
+ chunk_size (int): Size of the chunks.
24
+ overlap (int): Size of the overlap between chunks.
25
+
26
+ Returns:
27
+ list: List of chunk coordinates.
28
+ """
29
+ dimy, dimx = dimension
30
+
31
+ if chunk_size > max(dimx, dimy):
32
+ return [(0, 0)]
33
+
34
+ # Adjust step to create overlap
35
+ y_step = chunk_size - overlap
36
+ x_step = chunk_size - overlap
37
+
38
+ # Generate initial chunk positions
39
+ iterchunks = list(itertools.product(range(0, dimy, y_step), range(0, dimx, x_step)))
40
+
41
+ # Fix chunks at the edges to stay within bounds
42
+ iterchunks_fixed = fix_lastchunk(
43
+ iterchunks=iterchunks, s2dim=dimension, chunk_size=chunk_size
44
+ )
45
+
46
+ return iterchunks_fixed
47
+
48
+
49
+ def fix_lastchunk(iterchunks, s2dim, chunk_size):
50
+ """
51
+ Fix the last chunk of the overlay to ensure it aligns with image boundaries.
52
+
53
+ Args:
54
+ iterchunks (list): List of chunks created by itertools.product.
55
+ s2dim (tuple): Dimension of the S2 images.
56
+ chunk_size (int): Size of the chunks.
57
+
58
+ Returns:
59
+ list: List of adjusted chunk coordinates.
60
+ """
61
+ itercontainer = []
62
+
63
+ for index_i, index_j in iterchunks:
64
+ # Adjust if the chunk extends beyond bounds
65
+ if index_i + chunk_size > s2dim[0]:
66
+ index_i = max(s2dim[0] - chunk_size, 0)
67
+ if index_j + chunk_size > s2dim[1]:
68
+ index_j = max(s2dim[1] - chunk_size, 0)
69
+
70
+ itercontainer.append((index_i, index_j))
71
+
72
+ return itercontainer
73
+
74
+
75
+
18
76
  class DeviceManager:
19
77
  """Hold a compiled mlstac model and move it between devices on demand."""
20
78
 
@@ -68,3 +126,4 @@ class DeviceManager:
68
126
  self.model = self._experiment.compiled_model(device=new_device, mode="max")
69
127
  self.device = new_device
70
128
  return self.model
129
+
@@ -1,178 +0,0 @@
1
- """Predict cloud masks for Sentinel-2 GeoTIFFs with the SEN2CloudEnsemble model.
2
-
3
- The callable :pyfunc:`cloud_masking` accepts **either** a single ``.tif`` file
4
- or a directory tree; in both cases it writes a masked copy of every image (and,
5
- optionally, the binary mask) to *output*.
6
-
7
- Example
8
- -------
9
- >>> from satcube.cloud_detection import cloud_masking
10
- >>> cloud_masking("~/s2/input", "~/s2/output", device="cuda")
11
- """
12
-
13
- from __future__ import annotations
14
-
15
- import time
16
- from pathlib import Path
17
- from typing import List
18
-
19
- import mlstac
20
- import numpy as np
21
- import rasterio as rio
22
- import torch
23
-
24
- from satcube.utils import DeviceManager, _reset_gpu
25
- import warnings, re
26
-
27
-
28
- warnings.filterwarnings(
29
- "ignore",
30
- message=re.escape("The secret `HF_TOKEN` does not exist in your Colab secrets."),
31
- category=UserWarning,
32
- module="huggingface_hub.utils._auth",
33
- )
34
-
35
- def cloud_masking(
36
- input: str | Path, # noqa: A002 (shadowing built-in is OK here)
37
- output: str | Path,
38
- *,
39
- tile: int = 512,
40
- pad: int = 64,
41
- save_mask: bool = False,
42
- device: str = "cpu",
43
- max_pix_cpu: float = 7.0e7
44
- ) -> List[Path]:
45
- """Write cloud-masked Sentinel-2 images.
46
-
47
- Parameters
48
- ----------
49
- input
50
- Path to a single ``.tif`` file **or** a directory containing them.
51
- output
52
- Destination directory (created if missing).
53
- tile, pad
54
- Tile size and padding (pixels) when tiling is required.
55
- save_mask
56
- If *True*, store the binary mask alongside the masked image.
57
- device
58
- Torch device for inference, e.g. ``"cpu"`` or ``"cuda:0"``.
59
- max_pix_cpu
60
- Tile images larger than this when running on CPU.
61
-
62
- Returns
63
- ------
64
- list[pathlib.Path]
65
- Paths to the generated masked images.
66
- """
67
- t_start = time.perf_counter()
68
-
69
- src = Path(input).expanduser().resolve()
70
- dst_dir = Path(output).expanduser().resolve()
71
- dst_dir.mkdir(parents=True, exist_ok=True)
72
-
73
- # Collect files to process -------------------------------------------------
74
- tif_paths: list[Path]
75
- if src.is_dir():
76
- tif_paths = [p for p in src.rglob("*.tif")]
77
- elif src.is_file() and src.suffix.lower() == ".tif":
78
- tif_paths = [src]
79
- src = src.parent # for relative-path bookkeeping below
80
- else:
81
- raise ValueError(f"Input must be a .tif or directory, got: {src}")
82
-
83
- if not tif_paths:
84
- print(f"[cloud_masking] No .tif files found in {src}")
85
- return []
86
-
87
- dir = Path("SEN2CloudEnsemble")
88
-
89
- if not dir.exists():
90
-
91
- mlstac.download(
92
- file = "https://huggingface.co/tacofoundation/CloudSEN12-models/resolve/main/SEN2CloudEnsemble/mlm.json",
93
- output_dir = "SEN2CloudEnsemble",
94
- )
95
-
96
- experiment = mlstac.load(dir.as_posix())
97
-
98
- dm = DeviceManager(experiment, init_device=device)
99
-
100
- masked_paths: list[Path] = []
101
-
102
- # -------------------------------------------------------------------------
103
- for idx, tif_path in enumerate(tif_paths, 1):
104
- rel = tif_path.relative_to(src)
105
- out_dir = dst_dir / rel.parent
106
- out_dir.mkdir(parents=True, exist_ok=True)
107
-
108
- mask_path = out_dir / f"{tif_path.stem}_cloudmask.tif"
109
- masked_path = out_dir / f"{tif_path.stem}_masked.tif"
110
-
111
- with rio.open(tif_path) as src_img:
112
- profile = src_img.profile
113
- h, w = src_img.height, src_img.width
114
-
115
- mask_prof = profile.copy()
116
- mask_prof.update(driver="GTiff", count=1, dtype="uint8", nodata=255)
117
-
118
- do_tiling = (dm.device == "cuda") or (h * w > max_pix_cpu)
119
- full_mask = np.full((h, w), 255, np.uint8)
120
-
121
- t0 = time.perf_counter()
122
-
123
- # ----------------------- inference -----------------------------------
124
- if not do_tiling: # full frame
125
- with rio.open(tif_path) as src_img, torch.inference_mode():
126
- img = src_img.read().astype(np.float32) / 1e4
127
- h32, w32 = (h + 31) // 32 * 32, (w + 31) // 32 * 32
128
- pad_b, pad_r = h32 - h, w32 - w
129
- tensor = torch.from_numpy(img).unsqueeze(0)
130
- if pad_b or pad_r:
131
- tensor = torch.nn.functional.pad(tensor, (0, pad_r, 0, pad_b))
132
- mask = dm.model(tensor.to(dm.device)).squeeze(0)
133
- full_mask[:] = mask[..., :h, :w].cpu().numpy().astype(np.uint8)
134
- else: # tiled
135
- with rio.open(tif_path) as src_img, torch.inference_mode():
136
- for y0 in range(0, h, tile):
137
- for x0 in range(0, w, tile):
138
- y0r, x0r = max(0, y0 - pad), max(0, x0 - pad)
139
- y1r, x1r = min(h, y0 + tile + pad), min(w, x0 + tile + pad)
140
- win = rio.windows.Window(x0r, y0r, x1r - x0r, y1r - y0r)
141
-
142
- patch = src_img.read(window=win).astype(np.float32) / 1e4
143
- tensor = torch.from_numpy(patch).unsqueeze(0).to(dm.device)
144
- mask = dm.model(tensor).squeeze(0).cpu().numpy().astype(np.uint8)
145
-
146
- y_in0 = pad if y0r else 0
147
- x_in0 = pad if x0r else 0
148
- y_in1 = mask.shape[0] - (pad if y1r < h else 0)
149
- x_in1 = mask.shape[1] - (pad if x1r < w else 0)
150
- core = mask[y_in0:y_in1, x_in0:x_in1]
151
- full_mask[y0 : y0 + core.shape[0], x0 : x0 + core.shape[1]] = core
152
-
153
- # ----------------------- output --------------------------------------
154
- if save_mask:
155
- with rio.open(mask_path, "w", **mask_prof) as dst:
156
- dst.write(full_mask, 1)
157
-
158
- with rio.open(tif_path) as src_img:
159
- data = src_img.read()
160
- img_prof = src_img.profile.copy()
161
-
162
- masked = data.copy()
163
- masked[:, full_mask != 0] = 65535
164
- img_prof.update(dtype="uint16", nodata=65535)
165
-
166
- with rio.open(masked_path, "w", **img_prof) as dst:
167
- dst.write(masked)
168
-
169
- masked_paths.append(masked_path)
170
- dt = time.perf_counter() - t0
171
- print(f"[{idx}/{len(tif_paths)}] {rel} → done in {dt:.1f}s")
172
-
173
- if dm.device == "cuda":
174
- _reset_gpu()
175
-
176
- total_time = time.perf_counter() - t_start
177
- print(f"Processed {len(masked_paths)} image(s) in {total_time:.1f}s.")
178
- return masked_paths
@@ -1,57 +0,0 @@
1
- import pathlib
2
- import ee
3
- import cubexpress
4
- import pandas as pd
5
-
6
-
7
- def download_data(
8
- *, # keyword-only
9
- lon: float,
10
- lat: float,
11
- cloud_max: int = 40,
12
- edge_size: int = 2_048,
13
- start: str,
14
- end: str,
15
- output: str = "raw",
16
- scale: int = 10,
17
- nworks: int = 4,
18
- mosaic: bool = True
19
- ) -> pd.DataFrame:
20
- """
21
- Download a Sentinel cube for (lon, lat) and return its metadata.
22
-
23
- Parameters
24
- ----------
25
- lon, lat Center point in degrees.
26
- cloud_max Max cloud cover (%).
27
- edge_size Square side length (m).
28
- start, end YYYY-MM-DD date range.
29
- output Folder for GeoTIFFs.
30
- scale Pixel size (m).
31
- nworks Parallel workers.
32
- mosaic Merge scenes per date.
33
- auto_init_gee Call ee.Initialize() if needed.
34
-
35
- Returns
36
- -------
37
- pandas.DataFrame
38
- Scene catalogue used for the request.
39
- """
40
- # Filter scenes
41
- df = cubexpress.cloud_table(
42
- lon=lon,
43
- lat=lat,
44
- edge_size=edge_size,
45
- scale=scale,
46
- cloud_max=cloud_max,
47
- start=start,
48
- end=end,
49
- )
50
-
51
- # Build requests + ensure dir
52
- requests = cubexpress.table_to_requestset(df, mosaic=mosaic)
53
- pathlib.Path(output).mkdir(parents=True, exist_ok=True)
54
-
55
- # Download cube
56
- cubexpress.get_cube(requests, output, nworks)
57
- return df
File without changes
File without changes
File without changes
File without changes
File without changes