satcube 0.1.17__py3-none-any.whl → 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of satcube might be problematic. Click here for more details.
- satcube/__init__.py +2 -4
- satcube/align.py +85 -44
- satcube/archive_cloud_detection.py +23 -0
- satcube/archive_dataclass.py +39 -0
- satcube/archive_main.py +453 -0
- satcube/archive_utils.py +1087 -0
- satcube/{cloud_detection.py → cloud.py} +100 -95
- satcube/composite.py +85 -0
- satcube/download.py +2 -5
- satcube/gapfill.py +216 -0
- satcube/objects.py +208 -36
- satcube/smooth.py +46 -0
- {satcube-0.1.17.dist-info → satcube-0.1.18.dist-info}/METADATA +1 -1
- satcube-0.1.18.dist-info/RECORD +17 -0
- satcube-0.1.17.dist-info/RECORD +0 -10
- {satcube-0.1.17.dist-info → satcube-0.1.18.dist-info}/LICENSE +0 -0
- {satcube-0.1.17.dist-info → satcube-0.1.18.dist-info}/WHEEL +0 -0
|
@@ -24,8 +24,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
import rasterio as rio
|
|
26
26
|
from rasterio.merge import merge
|
|
27
|
-
import shutil
|
|
28
|
-
|
|
29
27
|
from satcube.utils import define_iteration, DeviceManager
|
|
30
28
|
import warnings
|
|
31
29
|
warnings.filterwarnings(
|
|
@@ -35,8 +33,6 @@ warnings.filterwarnings(
|
|
|
35
33
|
module=r"huggingface_hub\.utils\._.*",
|
|
36
34
|
)
|
|
37
35
|
|
|
38
|
-
|
|
39
|
-
|
|
40
36
|
def infer_cloudmask(
|
|
41
37
|
input_path: str | pathlib.Path,
|
|
42
38
|
output_path: str | pathlib.Path,
|
|
@@ -45,38 +41,16 @@ def infer_cloudmask(
|
|
|
45
41
|
chunk_size: int = 512,
|
|
46
42
|
overlap: int = 32,
|
|
47
43
|
device: str = "cpu",
|
|
48
|
-
save_mask: bool =
|
|
49
|
-
prefix: str = ""
|
|
44
|
+
save_mask: bool = True
|
|
50
45
|
) -> pathlib.Path:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
This uses partial overlap logic:
|
|
56
|
-
- For interior tiles, skip overlap//2 on each side.
|
|
57
|
-
- For boundary tiles, we skip only the interior side to avoid losing data at the edges.
|
|
58
|
-
|
|
59
|
-
Parameters
|
|
60
|
-
----------
|
|
61
|
-
image_path : Path to input image.
|
|
62
|
-
output_path : Path to output single-band mask.
|
|
63
|
-
cloud_model : PyTorch model (already loaded with weights).
|
|
64
|
-
chunk_size : Size of each tile to read from the source image (default 512).
|
|
65
|
-
overlap : Overlap in pixels between adjacent tiles (default 32).
|
|
66
|
-
device : "cpu" or "cuda:0".
|
|
67
|
-
|
|
68
|
-
Returns
|
|
69
|
-
-------
|
|
70
|
-
pathlib.Path : The path to the created output image.
|
|
71
|
-
"""
|
|
72
|
-
|
|
73
|
-
input_path = pathlib.Path(input_path)
|
|
74
|
-
output_path = pathlib.Path(output_path)
|
|
46
|
+
|
|
47
|
+
input_path = pathlib.Path(input_path).expanduser().resolve()
|
|
48
|
+
output_path = pathlib.Path(output_path).expanduser().resolve()
|
|
75
49
|
|
|
76
50
|
with rio.open(input_path) as src:
|
|
77
51
|
meta = src.profile
|
|
78
|
-
if not meta.get("tiled", False):
|
|
79
|
-
|
|
52
|
+
# if not meta.get("tiled", False):
|
|
53
|
+
# raise ValueError("The input image is not marked as tiled in its metadata.")
|
|
80
54
|
# Ensure the internal blocksize matches chunk_size
|
|
81
55
|
if chunk_size % meta["blockxsize"] != 0 and meta["blockxsize"] <= chunk_size:
|
|
82
56
|
raise ValueError(f"Image blocks must be {chunk_size}x{chunk_size}, "
|
|
@@ -93,9 +67,21 @@ def infer_cloudmask(
|
|
|
93
67
|
|
|
94
68
|
window = Window(col_off, row_off, chunk_size, chunk_size)
|
|
95
69
|
patch = src.read(window=window) / 1e4
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
70
|
+
|
|
71
|
+
patch_tensor = (
|
|
72
|
+
torch.from_numpy(patch)
|
|
73
|
+
.float()
|
|
74
|
+
.unsqueeze(0)
|
|
75
|
+
.to(device)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
result = (
|
|
79
|
+
cloud_model(patch_tensor)
|
|
80
|
+
.cpu()
|
|
81
|
+
.numpy()
|
|
82
|
+
.astype(np.uint8)
|
|
83
|
+
)
|
|
84
|
+
|
|
99
85
|
if col_off == 0:
|
|
100
86
|
offset_x = 0
|
|
101
87
|
else:
|
|
@@ -132,8 +118,7 @@ def infer_cloudmask(
|
|
|
132
118
|
output_mask = output_path.parent / (output_path.stem + "_mask.tif")
|
|
133
119
|
with rio.open(output_mask, "w", **out_meta) as dst:
|
|
134
120
|
dst.write(full_mask, 1)
|
|
135
|
-
|
|
136
|
-
|
|
121
|
+
|
|
137
122
|
data = src.read()
|
|
138
123
|
img_prof = src.profile.copy()
|
|
139
124
|
|
|
@@ -144,57 +129,62 @@ def infer_cloudmask(
|
|
|
144
129
|
with rio.open(output_path, "w", **img_prof) as dst:
|
|
145
130
|
dst.write(masked)
|
|
146
131
|
|
|
147
|
-
|
|
132
|
+
flat = full_mask.astype(np.uint8).ravel()
|
|
133
|
+
counts = np.bincount(flat, minlength=4)
|
|
134
|
+
total = flat.size
|
|
135
|
+
percentages = {
|
|
136
|
+
"id": input_path.stem,
|
|
137
|
+
"clear_pct": counts[0] / total * 100.0,
|
|
138
|
+
"thin_cloud_pct": counts[1] / total * 100.0,
|
|
139
|
+
"cloud_shadow_pct": counts[2] / total * 100.0,
|
|
140
|
+
"thick_cloud_pct": counts[3] / total * 100.0,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return percentages
|
|
148
144
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
145
|
+
|
|
146
|
+
def cloud_fn(
|
|
147
|
+
metadata: pd.DataFrame | None = None,
|
|
148
|
+
input_dir: str | pathlib.Path | None = None,
|
|
149
|
+
output_dir: str | pathlib.Path = "masked",
|
|
152
150
|
model_path: str | pathlib.Path = "SEN2CloudEnsemble",
|
|
153
151
|
device: str = "cpu",
|
|
154
|
-
save_mask: bool =
|
|
152
|
+
save_mask: bool = True,
|
|
153
|
+
cache: bool = False,
|
|
155
154
|
nworks: int = 4,
|
|
156
|
-
) ->
|
|
157
|
-
"""Write cloud-masked Sentinel-2 images.
|
|
158
|
-
|
|
159
|
-
Parameters
|
|
160
|
-
----------
|
|
161
|
-
input
|
|
162
|
-
Path to a single ``.tif`` file **or** a directory containing them.
|
|
163
|
-
output
|
|
164
|
-
Destination directory (created i
|
|
165
|
-
f missing).
|
|
166
|
-
tile, pad
|
|
167
|
-
Tile size and padding (pixels) when tiling is required.
|
|
168
|
-
save_mask
|
|
169
|
-
If *True*, store the binary mask alongside the masked image.
|
|
170
|
-
device
|
|
171
|
-
Torch device for inference, e.g. ``"cpu"`` or ``"cuda:0"``.
|
|
172
|
-
max_pix_cpu
|
|
173
|
-
Tile images larger than this when running on CPU.
|
|
174
|
-
|
|
175
|
-
Returns
|
|
176
|
-
------
|
|
177
|
-
list[pathlib.Path]
|
|
178
|
-
Paths to the generated masked images.
|
|
179
|
-
"""
|
|
180
|
-
src = pathlib.Path(input).expanduser().resolve()
|
|
181
|
-
dst_dir = pathlib.Path(output).expanduser().resolve()
|
|
182
|
-
dst_dir.mkdir(parents=True, exist_ok=True)
|
|
183
|
-
|
|
184
|
-
# Collect files to process -------------------------------------------------
|
|
185
|
-
tif_paths = []
|
|
186
|
-
if src.is_dir():
|
|
187
|
-
tif_paths = [p for p in src.rglob("*.tif")]
|
|
188
|
-
elif src.is_file() and src.suffix.lower() == ".tif":
|
|
189
|
-
tif_paths = [src]
|
|
190
|
-
src = src.parent # for relative-path bookkeeping below
|
|
191
|
-
else:
|
|
192
|
-
raise ValueError(f"Input must be a .tif or directory, got: {src}")
|
|
193
|
-
|
|
194
|
-
if not tif_paths:
|
|
195
|
-
print(f"[cloud_masking] No .tif files found in {src}")
|
|
196
|
-
return []
|
|
155
|
+
) -> pd.DataFrame | None:
|
|
197
156
|
|
|
157
|
+
input_dir = pathlib.Path(input_dir).expanduser().resolve()
|
|
158
|
+
output_dir = pathlib.Path(output_dir).expanduser().resolve()
|
|
159
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
160
|
+
|
|
161
|
+
if metadata is None:
|
|
162
|
+
if not input_dir:
|
|
163
|
+
raise ValueError("Input directory must be specified.")
|
|
164
|
+
else:
|
|
165
|
+
if input_dir.is_dir():
|
|
166
|
+
tif_paths = [p for p in input_dir.rglob("*.tif")]
|
|
167
|
+
df = pd.DataFrame({
|
|
168
|
+
"id": [p.stem for p in tif_paths],
|
|
169
|
+
"path": [str(p) for p in tif_paths]
|
|
170
|
+
})
|
|
171
|
+
elif input_dir.is_file() and input_dir.suffix.lower() == ".tif":
|
|
172
|
+
tif_paths = [input_dir]
|
|
173
|
+
input_dir = input_dir.parent
|
|
174
|
+
else:
|
|
175
|
+
raise ValueError(f"Input must be a .tif or directory, got: {input_dir}")
|
|
176
|
+
else:
|
|
177
|
+
if not input_dir:
|
|
178
|
+
raise ValueError("Input directory must be specified.")
|
|
179
|
+
else:
|
|
180
|
+
df = metadata["id"].to_frame()
|
|
181
|
+
df["path"] = df["id"].apply(lambda x: str(input_dir / (x + ".tif")))
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
if cache:
|
|
185
|
+
exist_files = [file.stem for file in output_dir.glob("*.tif")]
|
|
186
|
+
df = df[~df["id"].isin(exist_files)]
|
|
187
|
+
|
|
198
188
|
if not pathlib.Path(model_path).exists():
|
|
199
189
|
mlstac.download(
|
|
200
190
|
file = "https://huggingface.co/tacofoundation/CloudSEN12-models/resolve/main/SEN2CloudEnsemble/mlm.json",
|
|
@@ -202,22 +192,23 @@ def cloud_masking(
|
|
|
202
192
|
)
|
|
203
193
|
|
|
204
194
|
model = mlstac.load(model_path)
|
|
205
|
-
cloud_model = DeviceManager(model, init_device=device).model
|
|
206
|
-
|
|
195
|
+
cloud_model = DeviceManager(model, init_device=device).model.eval()
|
|
196
|
+
|
|
197
|
+
results_cloud = []
|
|
207
198
|
|
|
208
199
|
with ThreadPoolExecutor(max_workers=nworks) as executor:
|
|
209
200
|
futures = {
|
|
210
201
|
executor.submit(
|
|
211
202
|
infer_cloudmask,
|
|
212
|
-
input_path=p,
|
|
213
|
-
output_path=
|
|
203
|
+
input_path=p["path"],
|
|
204
|
+
output_path=output_dir / (p["id"] + ".tif"),
|
|
214
205
|
cloud_model=cloud_model,
|
|
215
206
|
device=device,
|
|
216
|
-
save_mask=save_mask
|
|
217
|
-
|
|
218
|
-
): p for i, p in enumerate(tif_paths)
|
|
207
|
+
save_mask=save_mask
|
|
208
|
+
): p for i, p in df.iterrows()
|
|
219
209
|
}
|
|
220
210
|
|
|
211
|
+
|
|
221
212
|
for future in tqdm(
|
|
222
213
|
as_completed(futures),
|
|
223
214
|
total=len(futures),
|
|
@@ -228,11 +219,25 @@ def cloud_masking(
|
|
|
228
219
|
p = futures[future]
|
|
229
220
|
try:
|
|
230
221
|
result = future.result()
|
|
231
|
-
|
|
222
|
+
results_cloud.append(result)
|
|
232
223
|
except Exception as e:
|
|
233
224
|
print(f"Error processing {p}: {e}")
|
|
234
225
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
226
|
+
cloud_df = pd.DataFrame(results_cloud)
|
|
227
|
+
|
|
228
|
+
if cloud_df.empty:
|
|
229
|
+
return metadata
|
|
230
|
+
|
|
231
|
+
metadata = metadata.drop(
|
|
232
|
+
columns=["clear_pct","thin_cloud_pct", "cloud_shadow_pct", "thick_cloud_pct"],
|
|
233
|
+
errors="ignore"
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
metadata = metadata.merge(
|
|
237
|
+
cloud_df,
|
|
238
|
+
on="id",
|
|
239
|
+
how="left",
|
|
240
|
+
suffixes=('', '')
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
return metadata
|
satcube/composite.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import rasterio as rio
|
|
6
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def monthly_composites_s2(
|
|
10
|
+
metadata: pd.DataFrame | None = None,
|
|
11
|
+
input_dir: str | pathlib.Path | None = None,
|
|
12
|
+
output_dir: str | pathlib.Path = "monthly_composites",
|
|
13
|
+
date_range: Tuple[str, str] = ("2018-06-01", "2020-01-01"),
|
|
14
|
+
agg_method: str = "median",
|
|
15
|
+
):
|
|
16
|
+
|
|
17
|
+
input_dir = pathlib.Path(input_dir).expanduser().resolve()
|
|
18
|
+
output_dir = pathlib.Path(output_dir).expanduser().resolve()
|
|
19
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
20
|
+
|
|
21
|
+
all_raw_files = [input_dir / f for f in input_dir.glob("*.tif") if f.is_file()]
|
|
22
|
+
|
|
23
|
+
with rio.open(all_raw_files[0]) as src:
|
|
24
|
+
profile = src.profile
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
all_raw_dates = pd.to_datetime(metadata["date"])
|
|
28
|
+
all_raw_date_min = pd.to_datetime(date_range[0])
|
|
29
|
+
all_raw_date_max = pd.to_datetime(date_range[1])
|
|
30
|
+
all_raw_dates_unique = pd.date_range(
|
|
31
|
+
all_raw_date_min, all_raw_date_max, freq="MS"
|
|
32
|
+
) + pd.DateOffset(days=14)
|
|
33
|
+
all_raw_dates_unique = all_raw_dates_unique.strftime("%Y-%m-15")
|
|
34
|
+
|
|
35
|
+
# Aggregate the data considering the method and dates
|
|
36
|
+
new_table = []
|
|
37
|
+
for idx, date in enumerate(all_raw_dates_unique):
|
|
38
|
+
|
|
39
|
+
# Get the images to aggregate
|
|
40
|
+
idxs = all_raw_dates.dt.strftime("%Y-%m-15") == date
|
|
41
|
+
images = [all_raw_files[i] for i in np.where(idxs)[0]]
|
|
42
|
+
|
|
43
|
+
if len(images) == 0:
|
|
44
|
+
data = np.ones((profile["count"], profile["height"], profile["width"]))
|
|
45
|
+
data = 65535 * data
|
|
46
|
+
nodata = 1
|
|
47
|
+
profile_image = profile
|
|
48
|
+
else:
|
|
49
|
+
# Read the images
|
|
50
|
+
container = []
|
|
51
|
+
for image in images:
|
|
52
|
+
with rio.open(image) as src:
|
|
53
|
+
data = src.read()
|
|
54
|
+
profile_image = src.profile
|
|
55
|
+
container.append(data)
|
|
56
|
+
|
|
57
|
+
# Aggregate the data
|
|
58
|
+
if agg_method == "mean":
|
|
59
|
+
data = np.mean(container, axis=0)
|
|
60
|
+
elif agg_method == "median":
|
|
61
|
+
data = np.median(container, axis=0)
|
|
62
|
+
elif agg_method == "max":
|
|
63
|
+
data = np.max(container, axis=0)
|
|
64
|
+
elif agg_method == "min":
|
|
65
|
+
data = np.min(container, axis=0)
|
|
66
|
+
else:
|
|
67
|
+
raise ValueError("Invalid aggregation method")
|
|
68
|
+
|
|
69
|
+
nodata = 0
|
|
70
|
+
|
|
71
|
+
# Save the image
|
|
72
|
+
with rio.open(output_dir / f"{date}.tif", "w", **profile_image) as dst:
|
|
73
|
+
dst.write(data.astype(rio.uint16))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
meta_dict = {
|
|
77
|
+
"outname": f"{date}.tif",
|
|
78
|
+
"date": date,
|
|
79
|
+
"nodata": nodata,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
new_table.append(meta_dict)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
return pd.DataFrame(new_table)
|
satcube/download.py
CHANGED
|
@@ -17,7 +17,6 @@ def download(
|
|
|
17
17
|
nworks: int = 4
|
|
18
18
|
) -> "SatCubeMetadata":
|
|
19
19
|
|
|
20
|
-
|
|
21
20
|
outfolder = pathlib.Path(outfolder).resolve()
|
|
22
21
|
|
|
23
22
|
table = ce.s2_table(
|
|
@@ -57,12 +56,10 @@ def download(
|
|
|
57
56
|
.reset_index()
|
|
58
57
|
)
|
|
59
58
|
|
|
60
|
-
|
|
59
|
+
df = table_req.merge(
|
|
61
60
|
result_table,
|
|
62
61
|
on='date',
|
|
63
62
|
how='left'
|
|
64
63
|
).rename(columns={'id_x': 'id', 'id_y': 'gee_ids'})
|
|
65
64
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
return SatCubeMetadata(df=table_final, raw_dir=outfolder)
|
|
65
|
+
return SatCubeMetadata(df=df, raw_dir=outfolder)
|
satcube/gapfill.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# satcube/gapfill.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pathlib, shutil
|
|
5
|
+
from typing import Literal, List, Tuple
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import rasterio as rio
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
|
|
11
|
+
from sklearn.linear_model import LinearRegression
|
|
12
|
+
|
|
13
|
+
_GAP_METHOD = Literal["histogram_matching", "linear"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def linear_interpolation(
|
|
19
|
+
image1: np.ndarray, image2: np.ndarray, image3: np.ndarray
|
|
20
|
+
) -> np.ndarray:
|
|
21
|
+
"""Apply linear interpolation to image3 using image1 and image2 as
|
|
22
|
+
reference images.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
image1 (np.ndarray): The first reference image.
|
|
26
|
+
image2 (np.ndarray): The second reference image.
|
|
27
|
+
image3 (np.ndarray): The image to be matched.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
np.ndarray: The matched image.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# remove nan values
|
|
34
|
+
image1_nonan = image1.flatten().copy()
|
|
35
|
+
image1_nonan = image1_nonan[~np.isnan(image1_nonan)]
|
|
36
|
+
|
|
37
|
+
image2_nonan = image2.flatten().copy()
|
|
38
|
+
image2_nonan = image2_nonan[~np.isnan(image2_nonan)]
|
|
39
|
+
|
|
40
|
+
# Calculate the slope and intercept
|
|
41
|
+
linreg = LinearRegression()
|
|
42
|
+
linreg.fit(image2_nonan[:, np.newaxis], image1_nonan[:, np.newaxis])
|
|
43
|
+
slope = linreg.coef_[0]
|
|
44
|
+
intercept = linreg.intercept_
|
|
45
|
+
|
|
46
|
+
# Apply the linear interpolation
|
|
47
|
+
image3_matched = slope * image3 + intercept
|
|
48
|
+
|
|
49
|
+
return image3_matched
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def tripple_histogram_matching(
|
|
53
|
+
image1: np.ndarray, image2: np.ndarray, image3: np.ndarray
|
|
54
|
+
) -> np.ndarray:
|
|
55
|
+
"""Apply histogram matching to image3 using image1 and image2 as reference images.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
image1 (np.ndarray): The first reference image.
|
|
59
|
+
image2 (np.ndarray): The second reference image.
|
|
60
|
+
image3 (np.ndarray): The image to be matched.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
np.ndarray: The matched image.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# remove nan values
|
|
67
|
+
image1_nonan = image1.flatten().copy()
|
|
68
|
+
image1_nonan = image1_nonan[~np.isnan(image1_nonan)]
|
|
69
|
+
|
|
70
|
+
image2_nonan = image2.flatten().copy()
|
|
71
|
+
image2_nonan = image2_nonan[~np.isnan(image2_nonan)]
|
|
72
|
+
|
|
73
|
+
image3_nonan = image3.flatten().copy()
|
|
74
|
+
image3_nonan = image3_nonan[~np.isnan(image3_nonan)]
|
|
75
|
+
|
|
76
|
+
# Calculate histograms
|
|
77
|
+
hist1, bins = np.histogram(image1_nonan, 128, [0, 2])
|
|
78
|
+
hist2, bins = np.histogram(image2_nonan, 128, [0, 2])
|
|
79
|
+
hist3, bins = np.histogram(image3_nonan, 128, [0, 2])
|
|
80
|
+
|
|
81
|
+
# Calculate the cumulative distribution function (CDF) of img1
|
|
82
|
+
cdf1 = hist1.cumsum() / hist1.sum()
|
|
83
|
+
|
|
84
|
+
# Calculate the CDF of img2
|
|
85
|
+
cdf2 = hist2.cumsum() / hist2.sum()
|
|
86
|
+
|
|
87
|
+
# Create a lookup table (LUT) to map the pixel values of img1 to img2
|
|
88
|
+
lut = np.interp(cdf2, cdf1, bins[:-1])
|
|
89
|
+
|
|
90
|
+
# Perform histogram matching
|
|
91
|
+
img3_matched = np.interp(image3.ravel(), bins[:-1], lut).reshape(image3.shape)
|
|
92
|
+
|
|
93
|
+
return img3_matched
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _fill_one(
|
|
97
|
+
img_path: pathlib.Path,
|
|
98
|
+
ref_paths: List[pathlib.Path],
|
|
99
|
+
dates: np.ndarray,
|
|
100
|
+
this_date: np.datetime64,
|
|
101
|
+
*,
|
|
102
|
+
method: _GAP_METHOD,
|
|
103
|
+
out_dir: pathlib.Path,
|
|
104
|
+
quiet: bool
|
|
105
|
+
) -> float:
|
|
106
|
+
"""Gap‑fill a single S2 scene; return error metric."""
|
|
107
|
+
with rio.open(img_path) as src:
|
|
108
|
+
data = src.read() / 1e4
|
|
109
|
+
prof = src.profile
|
|
110
|
+
data[data == 6.5535] = np.nan
|
|
111
|
+
cloudmask = np.isnan(data).mean(0)
|
|
112
|
+
|
|
113
|
+
if cloudmask.sum() == 0: # imagen limpia: copia sin procesar
|
|
114
|
+
shutil.copy(img_path, out_dir / img_path.name)
|
|
115
|
+
return 0.0
|
|
116
|
+
|
|
117
|
+
# ordenar todas las demás por cercanía temporal
|
|
118
|
+
idxs = np.argsort(np.abs(dates - this_date))
|
|
119
|
+
best_img, best_metric = None, np.inf
|
|
120
|
+
tries = 0
|
|
121
|
+
|
|
122
|
+
for i in idxs:
|
|
123
|
+
if tries == 5: # máximo 5 intentos
|
|
124
|
+
break
|
|
125
|
+
ref_path = ref_paths[i]
|
|
126
|
+
if ref_path == img_path:
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
with rio.open(ref_path) as src:
|
|
130
|
+
ref = src.read() / 1e4
|
|
131
|
+
ref[ref == 6.5535] = np.nan
|
|
132
|
+
ref_mask = np.isnan(ref) * 1.0
|
|
133
|
+
|
|
134
|
+
# descartar ref con nubes superpuestas
|
|
135
|
+
if np.sum((ref_mask + cloudmask) == 2) != 0:
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
full_mask = ((cloudmask + ref_mask) > 0).astype(float)
|
|
139
|
+
data_masked = np.where(full_mask, np.nan, data)
|
|
140
|
+
ref_masked = np.where(full_mask, np.nan, ref)
|
|
141
|
+
|
|
142
|
+
filled = np.zeros_like(data)
|
|
143
|
+
for b in range(data.shape[0]):
|
|
144
|
+
if method == "histogram_matching":
|
|
145
|
+
filled[b] = tripple_histogram_matching(data_masked[b], ref_masked[b], ref[b])
|
|
146
|
+
else: # "linear"
|
|
147
|
+
filled[b] = linear_interpolation(data_masked[b], ref_masked[b], ref[b])
|
|
148
|
+
|
|
149
|
+
# calcular métrica
|
|
150
|
+
a = filled[[2, 1, 0]].mean(0)
|
|
151
|
+
b = data[[2, 1, 0]].mean(0)
|
|
152
|
+
metric = np.nanmean(np.abs(a - b) / (a + b))
|
|
153
|
+
|
|
154
|
+
if metric < best_metric:
|
|
155
|
+
best_metric = metric
|
|
156
|
+
best_img = filled
|
|
157
|
+
|
|
158
|
+
tries += 1
|
|
159
|
+
|
|
160
|
+
if best_img is None: # no suitable ref found
|
|
161
|
+
if not quiet:
|
|
162
|
+
print(f"{img_path.name}: no cloud‑free neighbour found – copied.")
|
|
163
|
+
shutil.copy(img_path, out_dir / img_path.name)
|
|
164
|
+
return np.nan # could also return 0.0
|
|
165
|
+
|
|
166
|
+
# Ensure float dtype for isnan()
|
|
167
|
+
if best_img.dtype.kind in "iu": # i = int, u = uint
|
|
168
|
+
best_img = best_img.astype(np.float32)
|
|
169
|
+
|
|
170
|
+
# Combine and save
|
|
171
|
+
best_img[np.isnan(best_img)] = 0
|
|
172
|
+
data[np.isnan(data)] = 0
|
|
173
|
+
final = data + best_img * full_mask
|
|
174
|
+
final[final < 0] = 0
|
|
175
|
+
final = (final * 1e4).astype(np.uint16)
|
|
176
|
+
|
|
177
|
+
with rio.open(out_dir / img_path.name, "w", **prof) as dst:
|
|
178
|
+
dst.write(final)
|
|
179
|
+
|
|
180
|
+
if not quiet:
|
|
181
|
+
print(f"{img_path.name} gap‑filled (error={best_metric:.4f})")
|
|
182
|
+
|
|
183
|
+
return float(best_metric)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def gapfill_fn( # ← wrapper estilo align_fn
|
|
187
|
+
metadata: pd.DataFrame,
|
|
188
|
+
input_dir: str | pathlib.Path,
|
|
189
|
+
output_dir: str | pathlib.Path = "gapfilled",
|
|
190
|
+
*,
|
|
191
|
+
method: _GAP_METHOD = "histogram_matching",
|
|
192
|
+
quiet: bool = False
|
|
193
|
+
) -> pd.DataFrame:
|
|
194
|
+
"""Gap‑fill every image listed in *metadata*.
|
|
195
|
+
|
|
196
|
+
Returns
|
|
197
|
+
-------
|
|
198
|
+
pd.DataFrame
|
|
199
|
+
Original dataframe + column ``match_error``.
|
|
200
|
+
"""
|
|
201
|
+
input_dir = pathlib.Path(input_dir).expanduser().resolve()
|
|
202
|
+
output_dir = pathlib.Path(output_dir).expanduser().resolve()
|
|
203
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
204
|
+
|
|
205
|
+
img_paths = [input_dir / f"{i}.tif" for i in metadata["id"]]
|
|
206
|
+
dates = pd.to_datetime(metadata["date"]).to_numpy()
|
|
207
|
+
|
|
208
|
+
errors: List[float] = []
|
|
209
|
+
for i, img in enumerate(tqdm(img_paths, desc="Gap‑filling", unit="img")):
|
|
210
|
+
err = _fill_one(img, img_paths, dates, dates[i],
|
|
211
|
+
method=method, out_dir=output_dir, quiet=quiet)
|
|
212
|
+
errors.append(err)
|
|
213
|
+
|
|
214
|
+
metadata = metadata.drop(columns=["match_error"], errors="ignore")
|
|
215
|
+
metadata["match_error"] = errors
|
|
216
|
+
return metadata
|