geoai-py 0.18.1__py2.py3-none-any.whl → 0.19.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoai/__init__.py +23 -1
- geoai/agents/__init__.py +1 -0
- geoai/agents/geo_agents.py +74 -29
- geoai/geoai.py +2 -0
- geoai/landcover_train.py +685 -0
- geoai/landcover_utils.py +383 -0
- geoai/map_widgets.py +556 -0
- geoai/moondream.py +990 -0
- geoai/tools/__init__.py +11 -0
- geoai/tools/sr.py +194 -0
- geoai/train.py +22 -0
- geoai/utils.py +304 -1654
- {geoai_py-0.18.1.dist-info → geoai_py-0.19.0.dist-info}/METADATA +3 -1
- {geoai_py-0.18.1.dist-info → geoai_py-0.19.0.dist-info}/RECORD +18 -14
- {geoai_py-0.18.1.dist-info → geoai_py-0.19.0.dist-info}/WHEEL +0 -0
- {geoai_py-0.18.1.dist-info → geoai_py-0.19.0.dist-info}/entry_points.txt +0 -0
- {geoai_py-0.18.1.dist-info → geoai_py-0.19.0.dist-info}/licenses/LICENSE +0 -0
- {geoai_py-0.18.1.dist-info → geoai_py-0.19.0.dist-info}/top_level.txt +0 -0
geoai/landcover_utils.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Landcover Classification Utilities - Enhanced Tile Export Module
|
|
3
|
+
|
|
4
|
+
This module extends the base geoai functionality with specialized utilities
|
|
5
|
+
for discrete landcover classification. It provides enhanced tile generation
|
|
6
|
+
with background filtering capabilities to improve training efficiency.
|
|
7
|
+
|
|
8
|
+
Key Features:
|
|
9
|
+
- Enhanced tile filtering with configurable feature ratio thresholds
|
|
10
|
+
- Separate statistics tracking for different skip reasons
|
|
11
|
+
- Maintains full compatibility with base geoai workflow
|
|
12
|
+
- Optimized for discrete landcover classification tasks
|
|
13
|
+
|
|
14
|
+
Date: November 2025
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import os
|
|
18
|
+
import warnings
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
21
|
+
|
|
22
|
+
import geopandas as gpd
|
|
23
|
+
import numpy as np
|
|
24
|
+
import rasterio
|
|
25
|
+
from rasterio import features
|
|
26
|
+
from rasterio.windows import Window
|
|
27
|
+
from tqdm import tqdm
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def export_landcover_tiles(
|
|
31
|
+
in_raster: str,
|
|
32
|
+
out_folder: str,
|
|
33
|
+
in_class_data: Optional[Union[str, gpd.GeoDataFrame]] = None,
|
|
34
|
+
tile_size: int = 256,
|
|
35
|
+
stride: int = 128,
|
|
36
|
+
class_value_field: str = "class",
|
|
37
|
+
buffer_radius: float = 0,
|
|
38
|
+
max_tiles: Optional[int] = None,
|
|
39
|
+
quiet: bool = False,
|
|
40
|
+
all_touched: bool = True,
|
|
41
|
+
create_overview: bool = False,
|
|
42
|
+
skip_empty_tiles: bool = False,
|
|
43
|
+
min_feature_ratio: Union[bool, float] = False,
|
|
44
|
+
metadata_format: str = "PASCAL_VOC",
|
|
45
|
+
) -> Dict[str, Any]:
|
|
46
|
+
"""
|
|
47
|
+
Export GeoTIFF tiles optimized for landcover classification training.
|
|
48
|
+
|
|
49
|
+
This function extends the base export_geotiff_tiles with enhanced filtering
|
|
50
|
+
capabilities specifically designed for discrete landcover classification.
|
|
51
|
+
It can filter out tiles dominated by background pixels to improve training
|
|
52
|
+
data quality and reduce dataset size.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
in_raster: Path to input raster (image to tile)
|
|
56
|
+
out_folder: Output directory for tiles
|
|
57
|
+
in_class_data: Path to vector mask or GeoDataFrame (optional for image-only export)
|
|
58
|
+
tile_size: Size of output tiles in pixels (default: 256)
|
|
59
|
+
stride: Stride for sliding window (default: 128)
|
|
60
|
+
class_value_field: Field name containing class values (default: "class")
|
|
61
|
+
buffer_radius: Buffer radius around features in pixels (default: 0)
|
|
62
|
+
max_tiles: Maximum number of tiles to export (default: None)
|
|
63
|
+
quiet: Suppress progress output (default: False)
|
|
64
|
+
all_touched: Include pixels touched by geometry (default: True)
|
|
65
|
+
create_overview: Create overview image showing tile locations (default: False)
|
|
66
|
+
skip_empty_tiles: Skip tiles with no features (default: False)
|
|
67
|
+
min_feature_ratio: Minimum ratio of non-background pixels required to keep tile
|
|
68
|
+
- False: Disable ratio filtering (default)
|
|
69
|
+
- 0.0-1.0: Minimum ratio threshold (e.g., 0.1 = 10% features required)
|
|
70
|
+
metadata_format: Annotation format ("PASCAL_VOC" or "YOLO")
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Dictionary containing:
|
|
74
|
+
- tiles_exported: Number of tiles successfully exported
|
|
75
|
+
- tiles_skipped_empty: Number of completely empty tiles skipped
|
|
76
|
+
- tiles_skipped_ratio: Number of tiles filtered by min_feature_ratio
|
|
77
|
+
- output_dirs: Dictionary with paths to images and labels directories
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
# Original behavior (no filtering)
|
|
81
|
+
export_landcover_tiles(
|
|
82
|
+
"input.tif",
|
|
83
|
+
"output",
|
|
84
|
+
"mask.shp",
|
|
85
|
+
skip_empty_tiles=True
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Light filtering (keep tiles with ≥5% features)
|
|
89
|
+
export_landcover_tiles(
|
|
90
|
+
"input.tif",
|
|
91
|
+
"output",
|
|
92
|
+
"mask.shp",
|
|
93
|
+
skip_empty_tiles=True,
|
|
94
|
+
min_feature_ratio=0.05
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Moderate filtering (keep tiles with ≥15% features)
|
|
98
|
+
export_landcover_tiles(
|
|
99
|
+
"input.tif",
|
|
100
|
+
"output",
|
|
101
|
+
"mask.shp",
|
|
102
|
+
skip_empty_tiles=True,
|
|
103
|
+
min_feature_ratio=0.15
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
Note:
|
|
107
|
+
This function is designed for discrete landcover classification where
|
|
108
|
+
class 0 typically represents background/no data. The min_feature_ratio
|
|
109
|
+
parameter counts non-zero pixels as "features".
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
# Validate min_feature_ratio parameter
|
|
113
|
+
if min_feature_ratio is not False:
|
|
114
|
+
if not isinstance(min_feature_ratio, (int, float)):
|
|
115
|
+
warnings.warn(
|
|
116
|
+
f"min_feature_ratio must be a number between 0.0 and 1.0, got {type(min_feature_ratio)}. "
|
|
117
|
+
"Disabling ratio filtering."
|
|
118
|
+
)
|
|
119
|
+
min_feature_ratio = False
|
|
120
|
+
elif not (0.0 <= min_feature_ratio <= 1.0):
|
|
121
|
+
warnings.warn(
|
|
122
|
+
f"min_feature_ratio must be between 0.0 and 1.0, got {min_feature_ratio}. "
|
|
123
|
+
"Disabling ratio filtering."
|
|
124
|
+
)
|
|
125
|
+
min_feature_ratio = False
|
|
126
|
+
|
|
127
|
+
# Create output directories
|
|
128
|
+
out_folder = Path(out_folder)
|
|
129
|
+
out_folder.mkdir(parents=True, exist_ok=True)
|
|
130
|
+
|
|
131
|
+
images_dir = out_folder / "images"
|
|
132
|
+
labels_dir = out_folder / "labels"
|
|
133
|
+
images_dir.mkdir(exist_ok=True)
|
|
134
|
+
labels_dir.mkdir(exist_ok=True)
|
|
135
|
+
|
|
136
|
+
if metadata_format == "PASCAL_VOC":
|
|
137
|
+
ann_dir = out_folder / "annotations"
|
|
138
|
+
ann_dir.mkdir(exist_ok=True)
|
|
139
|
+
|
|
140
|
+
# Initialize statistics
|
|
141
|
+
stats = {
|
|
142
|
+
"tiles_exported": 0,
|
|
143
|
+
"tiles_skipped_empty": 0,
|
|
144
|
+
"tiles_skipped_ratio": 0,
|
|
145
|
+
"output_dirs": {"images": str(images_dir), "labels": str(labels_dir)},
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
# Open raster
|
|
149
|
+
with rasterio.open(in_raster) as src:
|
|
150
|
+
height, width = src.shape
|
|
151
|
+
|
|
152
|
+
# Detect if in_class_data is raster or vector
|
|
153
|
+
is_class_data_raster = False
|
|
154
|
+
class_src = None
|
|
155
|
+
gdf = None
|
|
156
|
+
mask_array = None
|
|
157
|
+
|
|
158
|
+
if in_class_data is not None:
|
|
159
|
+
if isinstance(in_class_data, str):
|
|
160
|
+
file_ext = Path(in_class_data).suffix.lower()
|
|
161
|
+
if file_ext in [
|
|
162
|
+
".tif",
|
|
163
|
+
".tiff",
|
|
164
|
+
".img",
|
|
165
|
+
".jp2",
|
|
166
|
+
".png",
|
|
167
|
+
".bmp",
|
|
168
|
+
".gif",
|
|
169
|
+
]:
|
|
170
|
+
try:
|
|
171
|
+
# Try to open as raster
|
|
172
|
+
class_src = rasterio.open(in_class_data)
|
|
173
|
+
is_class_data_raster = True
|
|
174
|
+
|
|
175
|
+
# Verify CRS match
|
|
176
|
+
if class_src.crs != src.crs:
|
|
177
|
+
if not quiet:
|
|
178
|
+
print(
|
|
179
|
+
f"Warning: CRS mismatch between image ({src.crs}) and mask ({class_src.crs})"
|
|
180
|
+
)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
is_class_data_raster = False
|
|
183
|
+
if not quiet:
|
|
184
|
+
print(f"Could not open as raster, trying vector: {e}")
|
|
185
|
+
|
|
186
|
+
# If not raster or raster open failed, try vector
|
|
187
|
+
if not is_class_data_raster:
|
|
188
|
+
gdf = gpd.read_file(in_class_data)
|
|
189
|
+
|
|
190
|
+
# Reproject if needed
|
|
191
|
+
if gdf.crs != src.crs:
|
|
192
|
+
if not quiet:
|
|
193
|
+
print(f"Reprojecting mask from {gdf.crs} to {src.crs}")
|
|
194
|
+
gdf = gdf.to_crs(src.crs)
|
|
195
|
+
|
|
196
|
+
# Apply buffer if requested
|
|
197
|
+
if buffer_radius > 0:
|
|
198
|
+
gdf.geometry = gdf.geometry.buffer(buffer_radius)
|
|
199
|
+
|
|
200
|
+
# For vector data, rasterize entire mask up front for efficiency
|
|
201
|
+
shapes = [
|
|
202
|
+
(geom, value)
|
|
203
|
+
for geom, value in zip(gdf.geometry, gdf[class_value_field])
|
|
204
|
+
]
|
|
205
|
+
mask_array = features.rasterize(
|
|
206
|
+
shapes,
|
|
207
|
+
out_shape=(height, width),
|
|
208
|
+
transform=src.transform,
|
|
209
|
+
all_touched=all_touched,
|
|
210
|
+
fill=0,
|
|
211
|
+
dtype=np.uint8,
|
|
212
|
+
)
|
|
213
|
+
else:
|
|
214
|
+
# Assume GeoDataFrame passed directly
|
|
215
|
+
gdf = in_class_data
|
|
216
|
+
|
|
217
|
+
# Reproject if needed
|
|
218
|
+
if gdf.crs != src.crs:
|
|
219
|
+
if not quiet:
|
|
220
|
+
print(f"Reprojecting mask from {gdf.crs} to {src.crs}")
|
|
221
|
+
gdf = gdf.to_crs(src.crs)
|
|
222
|
+
|
|
223
|
+
# Apply buffer if requested
|
|
224
|
+
if buffer_radius > 0:
|
|
225
|
+
gdf.geometry = gdf.geometry.buffer(buffer_radius)
|
|
226
|
+
|
|
227
|
+
# Rasterize entire mask up front
|
|
228
|
+
shapes = [
|
|
229
|
+
(geom, value)
|
|
230
|
+
for geom, value in zip(gdf.geometry, gdf[class_value_field])
|
|
231
|
+
]
|
|
232
|
+
mask_array = features.rasterize(
|
|
233
|
+
shapes,
|
|
234
|
+
out_shape=(height, width),
|
|
235
|
+
transform=src.transform,
|
|
236
|
+
all_touched=all_touched,
|
|
237
|
+
fill=0,
|
|
238
|
+
dtype=np.uint8,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Calculate tile positions
|
|
242
|
+
tile_positions = []
|
|
243
|
+
for y in range(0, height - tile_size + 1, stride):
|
|
244
|
+
for x in range(0, width - tile_size + 1, stride):
|
|
245
|
+
tile_positions.append((x, y))
|
|
246
|
+
|
|
247
|
+
if max_tiles:
|
|
248
|
+
tile_positions = tile_positions[:max_tiles]
|
|
249
|
+
|
|
250
|
+
# Process tiles
|
|
251
|
+
pbar = tqdm(tile_positions, desc="Exporting tiles", disable=quiet)
|
|
252
|
+
|
|
253
|
+
for tile_idx, (x, y) in enumerate(pbar):
|
|
254
|
+
window = Window(x, y, tile_size, tile_size)
|
|
255
|
+
|
|
256
|
+
# Read image tile
|
|
257
|
+
image_tile = src.read(window=window)
|
|
258
|
+
|
|
259
|
+
# Read mask tile based on data type
|
|
260
|
+
mask_tile = None
|
|
261
|
+
has_features = False
|
|
262
|
+
|
|
263
|
+
if is_class_data_raster and class_src is not None:
|
|
264
|
+
# For raster masks, read directly from the raster source
|
|
265
|
+
# Get window transform and bounds
|
|
266
|
+
window_transform = src.window_transform(window)
|
|
267
|
+
minx = window_transform[2]
|
|
268
|
+
maxy = window_transform[5]
|
|
269
|
+
maxx = minx + tile_size * window_transform[0]
|
|
270
|
+
miny = maxy + tile_size * window_transform[4]
|
|
271
|
+
|
|
272
|
+
# Get corresponding window in class raster
|
|
273
|
+
window_class = rasterio.windows.from_bounds(
|
|
274
|
+
minx, miny, maxx, maxy, class_src.transform
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
# Read label data from raster
|
|
279
|
+
mask_tile = class_src.read(
|
|
280
|
+
1,
|
|
281
|
+
window=window_class,
|
|
282
|
+
boundless=True,
|
|
283
|
+
out_shape=(tile_size, tile_size),
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
# Check if tile has features
|
|
287
|
+
has_features = np.any(mask_tile > 0)
|
|
288
|
+
except Exception as e:
|
|
289
|
+
if not quiet:
|
|
290
|
+
pbar.write(f"Error reading mask tile at ({x}, {y}): {e}")
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
elif mask_array is not None:
|
|
294
|
+
# For vector masks (pre-rasterized)
|
|
295
|
+
mask_tile = mask_array[y : y + tile_size, x : x + tile_size]
|
|
296
|
+
has_features = np.any(mask_tile > 0)
|
|
297
|
+
|
|
298
|
+
# Skip empty tiles if requested
|
|
299
|
+
if skip_empty_tiles and not has_features:
|
|
300
|
+
stats["tiles_skipped_empty"] += 1
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# Apply min_feature_ratio filtering if enabled
|
|
304
|
+
if skip_empty_tiles and has_features and min_feature_ratio is not False:
|
|
305
|
+
# Calculate ratio of non-background pixels
|
|
306
|
+
total_pixels = mask_tile.size
|
|
307
|
+
feature_pixels = np.sum(mask_tile > 0)
|
|
308
|
+
feature_ratio = feature_pixels / total_pixels
|
|
309
|
+
|
|
310
|
+
# Skip tile if below threshold
|
|
311
|
+
if feature_ratio < min_feature_ratio:
|
|
312
|
+
stats["tiles_skipped_ratio"] += 1
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
# Save image tile
|
|
316
|
+
tile_name = f"tile_{tile_idx:06d}.tif"
|
|
317
|
+
image_path = images_dir / tile_name
|
|
318
|
+
|
|
319
|
+
# Get transform for this tile
|
|
320
|
+
tile_transform = src.window_transform(window)
|
|
321
|
+
|
|
322
|
+
# Write image
|
|
323
|
+
with rasterio.open(
|
|
324
|
+
image_path,
|
|
325
|
+
"w",
|
|
326
|
+
driver="GTiff",
|
|
327
|
+
height=tile_size,
|
|
328
|
+
width=tile_size,
|
|
329
|
+
count=src.count,
|
|
330
|
+
dtype=src.dtypes[0],
|
|
331
|
+
crs=src.crs,
|
|
332
|
+
transform=tile_transform,
|
|
333
|
+
compress="lzw",
|
|
334
|
+
) as dst:
|
|
335
|
+
dst.write(image_tile)
|
|
336
|
+
|
|
337
|
+
# Save mask tile if available
|
|
338
|
+
if mask_tile is not None:
|
|
339
|
+
mask_path = labels_dir / tile_name
|
|
340
|
+
with rasterio.open(
|
|
341
|
+
mask_path,
|
|
342
|
+
"w",
|
|
343
|
+
driver="GTiff",
|
|
344
|
+
height=tile_size,
|
|
345
|
+
width=tile_size,
|
|
346
|
+
count=1,
|
|
347
|
+
dtype=np.uint8,
|
|
348
|
+
crs=src.crs,
|
|
349
|
+
transform=tile_transform,
|
|
350
|
+
compress="lzw",
|
|
351
|
+
) as dst:
|
|
352
|
+
dst.write(mask_tile, 1)
|
|
353
|
+
|
|
354
|
+
stats["tiles_exported"] += 1
|
|
355
|
+
|
|
356
|
+
# Update progress bar description with selection count
|
|
357
|
+
if not quiet:
|
|
358
|
+
pbar.set_description(
|
|
359
|
+
f"Exporting tiles ({stats['tiles_exported']}/{tile_idx + 1})"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# Close raster class source if opened
|
|
363
|
+
if class_src is not None:
|
|
364
|
+
class_src.close()
|
|
365
|
+
|
|
366
|
+
# Print summary
|
|
367
|
+
if not quiet:
|
|
368
|
+
print(f"\n{'='*60}")
|
|
369
|
+
print("TILE EXPORT SUMMARY")
|
|
370
|
+
print(f"{'='*60}")
|
|
371
|
+
print(f"Tiles exported: {stats['tiles_exported']}/{len(tile_positions)}")
|
|
372
|
+
if skip_empty_tiles:
|
|
373
|
+
print(f"Tiles skipped (empty): {stats['tiles_skipped_empty']}")
|
|
374
|
+
if min_feature_ratio is not False:
|
|
375
|
+
print(
|
|
376
|
+
f"Tiles skipped (low feature ratio < {min_feature_ratio}): {stats['tiles_skipped_ratio']}"
|
|
377
|
+
)
|
|
378
|
+
print(f"\nOutput directories:")
|
|
379
|
+
print(f" Images: {stats['output_dirs']['images']}")
|
|
380
|
+
print(f" Labels: {stats['output_dirs']['labels']}")
|
|
381
|
+
print(f"{'='*60}\n")
|
|
382
|
+
|
|
383
|
+
return stats
|