geoai-py 0.15.0__py2.py3-none-any.whl → 0.18.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoai/__init__.py +16 -1
- geoai/agents/__init__.py +4 -0
- geoai/agents/catalog_models.py +51 -0
- geoai/agents/catalog_tools.py +907 -0
- geoai/agents/geo_agents.py +934 -42
- geoai/agents/stac_models.py +67 -0
- geoai/agents/stac_tools.py +435 -0
- geoai/change_detection.py +32 -7
- geoai/download.py +5 -1
- geoai/geoai.py +3 -0
- geoai/timm_segment.py +4 -1
- geoai/tools/__init__.py +65 -0
- geoai/tools/cloudmask.py +431 -0
- geoai/tools/multiclean.py +357 -0
- geoai/train.py +694 -35
- geoai/utils.py +752 -208
- {geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/METADATA +6 -2
- geoai_py-0.18.0.dist-info/RECORD +33 -0
- geoai_py-0.15.0.dist-info/RECORD +0 -26
- {geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/WHEEL +0 -0
- {geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/entry_points.txt +0 -0
- {geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/licenses/LICENSE +0 -0
- {geoai_py-0.15.0.dist-info → geoai_py-0.18.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MultiClean integration utilities for cleaning segmentation results.
|
|
3
|
+
|
|
4
|
+
This module provides functions to use MultiClean (https://github.com/DPIRD-DMA/MultiClean)
|
|
5
|
+
for post-processing segmentation masks and classification rasters. MultiClean performs
|
|
6
|
+
morphological operations to smooth edges, remove noise islands, and fill gaps.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Optional, List, Union, Tuple
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from multiclean import clean_array
|
|
15
|
+
|
|
16
|
+
MULTICLEAN_AVAILABLE = True
|
|
17
|
+
except ImportError:
|
|
18
|
+
MULTICLEAN_AVAILABLE = False
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import rasterio
|
|
22
|
+
|
|
23
|
+
RASTERIO_AVAILABLE = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
RASTERIO_AVAILABLE = False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def check_multiclean_available():
|
|
29
|
+
"""
|
|
30
|
+
Check if multiclean is installed.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ImportError: If multiclean is not installed.
|
|
34
|
+
"""
|
|
35
|
+
if not MULTICLEAN_AVAILABLE:
|
|
36
|
+
raise ImportError(
|
|
37
|
+
"multiclean is not installed. "
|
|
38
|
+
"Please install it with: pip install multiclean "
|
|
39
|
+
"or: pip install geoai-py[extra]"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def clean_segmentation_mask(
|
|
44
|
+
mask: np.ndarray,
|
|
45
|
+
class_values: Optional[Union[int, List[int]]] = None,
|
|
46
|
+
smooth_edge_size: int = 2,
|
|
47
|
+
min_island_size: int = 100,
|
|
48
|
+
connectivity: int = 8,
|
|
49
|
+
max_workers: Optional[int] = None,
|
|
50
|
+
fill_nan: bool = False,
|
|
51
|
+
) -> np.ndarray:
|
|
52
|
+
"""
|
|
53
|
+
Clean a segmentation mask using MultiClean morphological operations.
|
|
54
|
+
|
|
55
|
+
This function applies three cleaning operations:
|
|
56
|
+
1. Edge smoothing - Uses morphological opening to reduce jagged boundaries
|
|
57
|
+
2. Island removal - Eliminates small connected components (noise)
|
|
58
|
+
3. Gap filling - Replaces invalid pixels with nearest valid class
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
mask (np.ndarray): 2D numpy array containing segmentation classes.
|
|
62
|
+
Can be int or float. NaN values are treated as nodata.
|
|
63
|
+
class_values (int, list of int, or None): Target class values to process.
|
|
64
|
+
If None, auto-detects unique values from the mask. Defaults to None.
|
|
65
|
+
smooth_edge_size (int): Kernel width in pixels for edge smoothing.
|
|
66
|
+
Set to 0 to disable smoothing. Defaults to 2.
|
|
67
|
+
min_island_size (int): Minimum area (in pixels) for connected components.
|
|
68
|
+
Components with area strictly less than this are removed. Defaults to 100.
|
|
69
|
+
connectivity (int): Connectivity for component detection. Use 4 or 8.
|
|
70
|
+
8-connectivity considers diagonal neighbors. Defaults to 8.
|
|
71
|
+
max_workers (int, optional): Thread pool size for parallel processing.
|
|
72
|
+
If None, uses default threading. Defaults to None.
|
|
73
|
+
fill_nan (bool): Whether to fill NaN pixels with nearest valid class.
|
|
74
|
+
Defaults to False.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
np.ndarray: Cleaned 2D segmentation mask with same shape as input.
|
|
78
|
+
|
|
79
|
+
Raises:
|
|
80
|
+
ImportError: If multiclean is not installed.
|
|
81
|
+
ValueError: If mask is not 2D or if connectivity is not 4 or 8.
|
|
82
|
+
|
|
83
|
+
Example:
|
|
84
|
+
>>> import numpy as np
|
|
85
|
+
>>> from geoai.tools.multiclean import clean_segmentation_mask
|
|
86
|
+
>>> mask = np.random.randint(0, 3, (512, 512))
|
|
87
|
+
>>> cleaned = clean_segmentation_mask(
|
|
88
|
+
... mask,
|
|
89
|
+
... class_values=[0, 1, 2],
|
|
90
|
+
... smooth_edge_size=2,
|
|
91
|
+
... min_island_size=50
|
|
92
|
+
... )
|
|
93
|
+
"""
|
|
94
|
+
check_multiclean_available()
|
|
95
|
+
|
|
96
|
+
if mask.ndim != 2:
|
|
97
|
+
raise ValueError(f"Mask must be 2D, got shape {mask.shape}")
|
|
98
|
+
|
|
99
|
+
if connectivity not in [4, 8]:
|
|
100
|
+
raise ValueError(f"Connectivity must be 4 or 8, got {connectivity}")
|
|
101
|
+
|
|
102
|
+
# Apply MultiClean
|
|
103
|
+
cleaned = clean_array(
|
|
104
|
+
mask,
|
|
105
|
+
class_values=class_values,
|
|
106
|
+
smooth_edge_size=smooth_edge_size,
|
|
107
|
+
min_island_size=min_island_size,
|
|
108
|
+
connectivity=connectivity,
|
|
109
|
+
max_workers=max_workers,
|
|
110
|
+
fill_nan=fill_nan,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return cleaned
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def clean_raster(
|
|
117
|
+
input_path: str,
|
|
118
|
+
output_path: str,
|
|
119
|
+
class_values: Optional[Union[int, List[int]]] = None,
|
|
120
|
+
smooth_edge_size: int = 2,
|
|
121
|
+
min_island_size: int = 100,
|
|
122
|
+
connectivity: int = 8,
|
|
123
|
+
max_workers: Optional[int] = None,
|
|
124
|
+
fill_nan: bool = False,
|
|
125
|
+
band: int = 1,
|
|
126
|
+
nodata: Optional[float] = None,
|
|
127
|
+
) -> None:
|
|
128
|
+
"""
|
|
129
|
+
Clean a classification raster (GeoTIFF) and save the result.
|
|
130
|
+
|
|
131
|
+
Reads a GeoTIFF file, applies MultiClean morphological operations,
|
|
132
|
+
and saves the cleaned result while preserving geospatial metadata
|
|
133
|
+
(CRS, transform, nodata value).
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
input_path (str): Path to input GeoTIFF file.
|
|
137
|
+
output_path (str): Path to save cleaned GeoTIFF file.
|
|
138
|
+
class_values (int, list of int, or None): Target class values to process.
|
|
139
|
+
If None, auto-detects unique values. Defaults to None.
|
|
140
|
+
smooth_edge_size (int): Kernel width in pixels for edge smoothing.
|
|
141
|
+
Defaults to 2.
|
|
142
|
+
min_island_size (int): Minimum area (in pixels) for components.
|
|
143
|
+
Defaults to 100.
|
|
144
|
+
connectivity (int): Connectivity for component detection (4 or 8).
|
|
145
|
+
Defaults to 8.
|
|
146
|
+
max_workers (int, optional): Thread pool size. Defaults to None.
|
|
147
|
+
fill_nan (bool): Whether to fill NaN/nodata pixels. Defaults to False.
|
|
148
|
+
band (int): Band index to read (1-indexed). Defaults to 1.
|
|
149
|
+
nodata (float, optional): Nodata value to use. If None, uses value
|
|
150
|
+
from input file. Defaults to None.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
None: Writes cleaned raster to output_path.
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
ImportError: If multiclean or rasterio is not installed.
|
|
157
|
+
FileNotFoundError: If input_path does not exist.
|
|
158
|
+
|
|
159
|
+
Example:
|
|
160
|
+
>>> from geoai.tools.multiclean import clean_raster
|
|
161
|
+
>>> clean_raster(
|
|
162
|
+
... "segmentation_raw.tif",
|
|
163
|
+
... "segmentation_cleaned.tif",
|
|
164
|
+
... class_values=[0, 1, 2],
|
|
165
|
+
... smooth_edge_size=3,
|
|
166
|
+
... min_island_size=50
|
|
167
|
+
... )
|
|
168
|
+
"""
|
|
169
|
+
check_multiclean_available()
|
|
170
|
+
|
|
171
|
+
if not RASTERIO_AVAILABLE:
|
|
172
|
+
raise ImportError(
|
|
173
|
+
"rasterio is required for raster operations. "
|
|
174
|
+
"Please install it with: pip install rasterio"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
if not os.path.exists(input_path):
|
|
178
|
+
raise FileNotFoundError(f"Input file not found: {input_path}")
|
|
179
|
+
|
|
180
|
+
# Read input raster
|
|
181
|
+
with rasterio.open(input_path) as src:
|
|
182
|
+
# Read the specified band
|
|
183
|
+
mask = src.read(band)
|
|
184
|
+
|
|
185
|
+
# Get metadata
|
|
186
|
+
profile = src.profile.copy()
|
|
187
|
+
|
|
188
|
+
# Handle nodata
|
|
189
|
+
if nodata is None:
|
|
190
|
+
nodata = src.nodata
|
|
191
|
+
|
|
192
|
+
# Convert nodata to NaN if specified
|
|
193
|
+
if nodata is not None:
|
|
194
|
+
mask = mask.astype(np.float32)
|
|
195
|
+
mask[mask == nodata] = np.nan
|
|
196
|
+
|
|
197
|
+
# Clean the mask
|
|
198
|
+
cleaned = clean_segmentation_mask(
|
|
199
|
+
mask,
|
|
200
|
+
class_values=class_values,
|
|
201
|
+
smooth_edge_size=smooth_edge_size,
|
|
202
|
+
min_island_size=min_island_size,
|
|
203
|
+
connectivity=connectivity,
|
|
204
|
+
max_workers=max_workers,
|
|
205
|
+
fill_nan=fill_nan,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Convert NaN back to nodata if needed
|
|
209
|
+
if nodata is not None:
|
|
210
|
+
# Convert any remaining NaN values back to nodata value
|
|
211
|
+
if np.isnan(cleaned).any():
|
|
212
|
+
cleaned = np.nan_to_num(cleaned, nan=nodata)
|
|
213
|
+
|
|
214
|
+
# Update profile for output
|
|
215
|
+
profile.update(
|
|
216
|
+
dtype=cleaned.dtype,
|
|
217
|
+
count=1,
|
|
218
|
+
compress="lzw",
|
|
219
|
+
nodata=nodata,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Write cleaned raster
|
|
223
|
+
output_dir = os.path.dirname(os.path.abspath(output_path))
|
|
224
|
+
if output_dir and output_dir != os.path.abspath(os.sep):
|
|
225
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
226
|
+
with rasterio.open(output_path, "w", **profile) as dst:
|
|
227
|
+
dst.write(cleaned, 1)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def clean_raster_batch(
|
|
231
|
+
input_paths: List[str],
|
|
232
|
+
output_dir: str,
|
|
233
|
+
class_values: Optional[Union[int, List[int]]] = None,
|
|
234
|
+
smooth_edge_size: int = 2,
|
|
235
|
+
min_island_size: int = 100,
|
|
236
|
+
connectivity: int = 8,
|
|
237
|
+
max_workers: Optional[int] = None,
|
|
238
|
+
fill_nan: bool = False,
|
|
239
|
+
band: int = 1,
|
|
240
|
+
suffix: str = "_cleaned",
|
|
241
|
+
verbose: bool = True,
|
|
242
|
+
) -> List[str]:
|
|
243
|
+
"""
|
|
244
|
+
Clean multiple classification rasters in batch.
|
|
245
|
+
|
|
246
|
+
Processes multiple GeoTIFF files with the same cleaning parameters
|
|
247
|
+
and saves results to an output directory.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
input_paths (list of str): List of paths to input GeoTIFF files.
|
|
251
|
+
output_dir (str): Directory to save cleaned files.
|
|
252
|
+
class_values (int, list of int, or None): Target class values.
|
|
253
|
+
Defaults to None (auto-detect).
|
|
254
|
+
smooth_edge_size (int): Kernel width for edge smoothing. Defaults to 2.
|
|
255
|
+
min_island_size (int): Minimum component area. Defaults to 100.
|
|
256
|
+
connectivity (int): Connectivity (4 or 8). Defaults to 8.
|
|
257
|
+
max_workers (int, optional): Thread pool size. Defaults to None.
|
|
258
|
+
fill_nan (bool): Whether to fill NaN pixels. Defaults to False.
|
|
259
|
+
band (int): Band index to read (1-indexed). Defaults to 1.
|
|
260
|
+
suffix (str): Suffix to add to output filenames. Defaults to "_cleaned".
|
|
261
|
+
verbose (bool): Whether to print progress. Defaults to True.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
list of str: Paths to cleaned output files.
|
|
265
|
+
|
|
266
|
+
Raises:
|
|
267
|
+
ImportError: If multiclean or rasterio is not installed.
|
|
268
|
+
|
|
269
|
+
Example:
|
|
270
|
+
>>> from geoai.tools.multiclean import clean_raster_batch
|
|
271
|
+
>>> input_files = ["mask1.tif", "mask2.tif", "mask3.tif"]
|
|
272
|
+
>>> outputs = clean_raster_batch(
|
|
273
|
+
... input_files,
|
|
274
|
+
... output_dir="cleaned_masks",
|
|
275
|
+
... min_island_size=50
|
|
276
|
+
... )
|
|
277
|
+
"""
|
|
278
|
+
check_multiclean_available()
|
|
279
|
+
|
|
280
|
+
# Create output directory
|
|
281
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
282
|
+
|
|
283
|
+
output_paths = []
|
|
284
|
+
|
|
285
|
+
for i, input_path in enumerate(input_paths):
|
|
286
|
+
if verbose:
|
|
287
|
+
print(f"Processing {i+1}/{len(input_paths)}: {input_path}")
|
|
288
|
+
|
|
289
|
+
# Generate output filename
|
|
290
|
+
basename = os.path.basename(input_path)
|
|
291
|
+
name, ext = os.path.splitext(basename)
|
|
292
|
+
output_filename = f"{name}{suffix}{ext}"
|
|
293
|
+
output_path = os.path.join(output_dir, output_filename)
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
# Clean the raster
|
|
297
|
+
clean_raster(
|
|
298
|
+
input_path,
|
|
299
|
+
output_path,
|
|
300
|
+
class_values=class_values,
|
|
301
|
+
smooth_edge_size=smooth_edge_size,
|
|
302
|
+
min_island_size=min_island_size,
|
|
303
|
+
connectivity=connectivity,
|
|
304
|
+
max_workers=max_workers,
|
|
305
|
+
fill_nan=fill_nan,
|
|
306
|
+
band=band,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
output_paths.append(output_path)
|
|
310
|
+
|
|
311
|
+
if verbose:
|
|
312
|
+
print(f" ✓ Saved to: {output_path}")
|
|
313
|
+
|
|
314
|
+
except Exception as e:
|
|
315
|
+
if verbose:
|
|
316
|
+
print(f" ✗ Failed: {e}")
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
return output_paths
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
def compare_masks(
|
|
323
|
+
original: np.ndarray,
|
|
324
|
+
cleaned: np.ndarray,
|
|
325
|
+
) -> Tuple[int, int, float]:
|
|
326
|
+
"""
|
|
327
|
+
Compare original and cleaned masks to quantify changes.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
original (np.ndarray): Original segmentation mask.
|
|
331
|
+
cleaned (np.ndarray): Cleaned segmentation mask.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
tuple: (pixels_changed, total_pixels, change_percentage)
|
|
335
|
+
- pixels_changed: Number of pixels that changed value
|
|
336
|
+
- total_pixels: Total number of valid pixels
|
|
337
|
+
- change_percentage: Percentage of pixels changed
|
|
338
|
+
|
|
339
|
+
Example:
|
|
340
|
+
>>> import numpy as np
|
|
341
|
+
>>> from geoai.tools.multiclean import compare_masks
|
|
342
|
+
>>> original = np.random.randint(0, 3, (512, 512))
|
|
343
|
+
>>> cleaned = original.copy()
|
|
344
|
+
>>> changed, total, pct = compare_masks(original, cleaned)
|
|
345
|
+
>>> print(f"Changed: {pct:.2f}%")
|
|
346
|
+
"""
|
|
347
|
+
# Handle NaN values
|
|
348
|
+
valid_mask = ~(np.isnan(original) | np.isnan(cleaned))
|
|
349
|
+
|
|
350
|
+
# Count changed pixels
|
|
351
|
+
pixels_changed = np.sum((original != cleaned) & valid_mask)
|
|
352
|
+
total_pixels = np.sum(valid_mask)
|
|
353
|
+
|
|
354
|
+
# Calculate percentage
|
|
355
|
+
change_percentage = (pixels_changed / total_pixels * 100) if total_pixels > 0 else 0
|
|
356
|
+
|
|
357
|
+
return pixels_changed, total_pixels, change_percentage
|