geoai-py 0.13.1__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {geoai_py-0.13.1 → geoai_py-0.14.0}/.gitignore +2 -0
  2. {geoai_py-0.13.1 → geoai_py-0.14.0}/PKG-INFO +9 -5
  3. {geoai_py-0.13.1 → geoai_py-0.14.0}/README.md +8 -4
  4. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/__init__.py +1 -1
  5. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/train.py +119 -9
  6. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/utils.py +192 -40
  7. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai_py.egg-info/PKG-INFO +9 -5
  8. {geoai_py-0.13.1 → geoai_py-0.14.0}/mkdocs.yml +2 -0
  9. {geoai_py-0.13.1 → geoai_py-0.14.0}/pyproject.toml +2 -2
  10. {geoai_py-0.13.1 → geoai_py-0.14.0}/.dockerignore +0 -0
  11. {geoai_py-0.13.1 → geoai_py-0.14.0}/.editorconfig +0 -0
  12. {geoai_py-0.13.1 → geoai_py-0.14.0}/.pre-commit-config.yaml +0 -0
  13. {geoai_py-0.13.1 → geoai_py-0.14.0}/Dockerfile +0 -0
  14. {geoai_py-0.13.1 → geoai_py-0.14.0}/LICENSE +0 -0
  15. {geoai_py-0.13.1 → geoai_py-0.14.0}/MANIFEST.in +0 -0
  16. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/agents/__init__.py +0 -0
  17. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/agents/geo_agents.py +0 -0
  18. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/agents/map_tools.py +0 -0
  19. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/change_detection.py +0 -0
  20. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/classify.py +0 -0
  21. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/detectron2.py +0 -0
  22. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/dinov3.py +0 -0
  23. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/download.py +0 -0
  24. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/extract.py +0 -0
  25. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/geoai.py +0 -0
  26. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/hf.py +0 -0
  27. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/map_widgets.py +0 -0
  28. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/sam.py +0 -0
  29. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/segment.py +0 -0
  30. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai/segmentation.py +0 -0
  31. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai_py.egg-info/SOURCES.txt +0 -0
  32. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai_py.egg-info/dependency_links.txt +0 -0
  33. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai_py.egg-info/entry_points.txt +0 -0
  34. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai_py.egg-info/requires.txt +0 -0
  35. {geoai_py-0.13.1 → geoai_py-0.14.0}/geoai_py.egg-info/top_level.txt +0 -0
  36. {geoai_py-0.13.1 → geoai_py-0.14.0}/pytest.ini +0 -0
  37. {geoai_py-0.13.1 → geoai_py-0.14.0}/requirements.txt +0 -0
  38. {geoai_py-0.13.1 → geoai_py-0.14.0}/requirements_docs.txt +0 -0
  39. {geoai_py-0.13.1 → geoai_py-0.14.0}/setup.cfg +0 -0
  40. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/__init__.py +0 -0
  41. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/create_test_data.py +0 -0
  42. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/test_classify.py +0 -0
  43. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/test_download.py +0 -0
  44. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/test_extract.py +0 -0
  45. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/test_fixtures.py +0 -0
  46. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/test_geoai.py +0 -0
  47. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/test_segment.py +0 -0
  48. {geoai_py-0.13.1 → geoai_py-0.14.0}/tests/test_utils.py +0 -0
@@ -5,6 +5,8 @@ private/
5
5
  *$py.class
6
6
  *.jp2
7
7
  tests/data/
8
+ CLAUDE.md
9
+ AI_AGENTS.md
8
10
 
9
11
  # C extensions
10
12
  *.so
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: geoai-py
3
- Version: 0.13.1
3
+ Version: 0.14.0
4
4
  Summary: A Python package for using Artificial Intelligence (AI) with geospatial data
5
5
  Author-email: Qiusheng Wu <giswqs@gmail.com>
6
6
  License: MIT License
@@ -159,13 +159,17 @@ Comprehensive documentation is available at [https://opengeoai.org](https://open
159
159
 
160
160
  ## 📺 Video Tutorials
161
161
 
162
- Check out this 2-hour video tutorial on using GeoAI for geospatial data analysis and visualization.
162
+ ### GeoAI Made Easy: Learn the Python Package Step-by-Step (Beginner Friendly)
163
163
 
164
- [![cover](https://github.com/user-attachments/assets/1c14e651-65b9-41ae-b42d-3ad028b3eeb8)](https://youtu.be/jdK-cleFUkc)
164
+ [![intro](https://github.com/user-attachments/assets/7e60ce05-573d-4d0d-9876-5289b87e5136)](https://youtu.be/VIl29Rca6zE&list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
165
165
 
166
- To learn more about GeoAI, you can watch the following video tutorials:
166
+ ### GeoAI Workshop: Unlocking the Power of GeoAI with Python
167
167
 
168
- [![cover](https://github.com/user-attachments/assets/3cde9547-ab62-4d70-b23a-3e5ed27c7407)](https://tinyurl.com/GeoAI-Tutorials)
168
+ [![cover](https://github.com/user-attachments/assets/1c14e651-65b9-41ae-b42d-3ad028b3eeb8)](https://youtu.be/jdK-cleFUkc&list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
169
+
170
+ ### GeoAI Tutorials Playlist
171
+
172
+ [![cover](https://github.com/user-attachments/assets/3cde9547-ab62-4d70-b23a-3e5ed27c7407)](https://www.youtube.com/playlist?list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
169
173
 
170
174
  ## 🤝 Contributing
171
175
 
@@ -103,13 +103,17 @@ Comprehensive documentation is available at [https://opengeoai.org](https://open
103
103
 
104
104
  ## 📺 Video Tutorials
105
105
 
106
- Check out this 2-hour video tutorial on using GeoAI for geospatial data analysis and visualization.
106
+ ### GeoAI Made Easy: Learn the Python Package Step-by-Step (Beginner Friendly)
107
107
 
108
- [![cover](https://github.com/user-attachments/assets/1c14e651-65b9-41ae-b42d-3ad028b3eeb8)](https://youtu.be/jdK-cleFUkc)
108
+ [![intro](https://github.com/user-attachments/assets/7e60ce05-573d-4d0d-9876-5289b87e5136)](https://youtu.be/VIl29Rca6zE&list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
109
109
 
110
- To learn more about GeoAI, you can watch the following video tutorials:
110
+ ### GeoAI Workshop: Unlocking the Power of GeoAI with Python
111
111
 
112
- [![cover](https://github.com/user-attachments/assets/3cde9547-ab62-4d70-b23a-3e5ed27c7407)](https://tinyurl.com/GeoAI-Tutorials)
112
+ [![cover](https://github.com/user-attachments/assets/1c14e651-65b9-41ae-b42d-3ad028b3eeb8)](https://youtu.be/jdK-cleFUkc&list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
113
+
114
+ ### GeoAI Tutorials Playlist
115
+
116
+ [![cover](https://github.com/user-attachments/assets/3cde9547-ab62-4d70-b23a-3e5ed27c7407)](https://www.youtube.com/playlist?list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
113
117
 
114
118
  ## 🤝 Contributing
115
119
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  __author__ = """Qiusheng Wu"""
4
4
  __email__ = "giswqs@gmail.com"
5
- __version__ = "0.13.1"
5
+ __version__ = "0.14.0"
6
6
 
7
7
 
8
8
  import os
@@ -433,7 +433,7 @@ def train_one_epoch(
433
433
  elapsed_time = time.time() - start_time
434
434
  if verbose:
435
435
  print(
436
- f"Epoch: {epoch}, Batch: {i}/{len(data_loader)}, Loss: {losses.item():.4f}, Time: {elapsed_time:.2f}s"
436
+ f"Epoch: {epoch + 1}, Batch: {i + 1}/{len(data_loader)}, Loss: {losses.item():.4f}, Time: {elapsed_time:.2f}s"
437
437
  )
438
438
  start_time = time.time()
439
439
 
@@ -2625,6 +2625,8 @@ def semantic_inference_on_geotiff(
2625
2625
  num_channels: int = 3,
2626
2626
  num_classes: int = 2,
2627
2627
  device: Optional[torch.device] = None,
2628
+ probability_path: Optional[str] = None,
2629
+ probability_threshold: Optional[float] = None,
2628
2630
  quiet: bool = False,
2629
2631
  **kwargs: Any,
2630
2632
  ) -> Tuple[str, float]:
@@ -2641,6 +2643,11 @@ def semantic_inference_on_geotiff(
2641
2643
  num_channels (int): Number of channels to use from the input image.
2642
2644
  num_classes (int): Number of classes in the model output.
2643
2645
  device (torch.device, optional): Device to run inference on.
2646
+ probability_path (str, optional): Path to save probability map. If provided,
2647
+ the normalized class probabilities will be saved as a multi-band raster.
2648
+ probability_threshold (float, optional): Probability threshold for binary classification.
2649
+ Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
2650
+ are classified as class 1, otherwise class 0. If None (default), uses argmax.
2644
2651
  quiet (bool): If True, suppress progress bar. Defaults to False.
2645
2652
  **kwargs: Additional arguments.
2646
2653
 
@@ -2811,10 +2818,19 @@ def semantic_inference_on_geotiff(
2811
2818
  / count_accumulator[valid_pixels]
2812
2819
  )
2813
2820
 
2814
- # Take argmax to get final class predictions
2815
- mask[valid_pixels] = np.argmax(
2816
- normalized_probs[:, valid_pixels], axis=0
2817
- ).astype(np.uint8)
2821
+ # Apply threshold for binary classification or use argmax
2822
+ if probability_threshold is not None and num_classes == 2:
2823
+ # Use threshold: classify as class 1 if probability >= threshold
2824
+ mask[valid_pixels] = (
2825
+ normalized_probs[1, valid_pixels] >= probability_threshold
2826
+ ).astype(np.uint8)
2827
+ if not quiet:
2828
+ print(f"Using probability threshold: {probability_threshold}")
2829
+ else:
2830
+ # Take argmax to get final class predictions
2831
+ mask[valid_pixels] = np.argmax(
2832
+ normalized_probs[:, valid_pixels], axis=0
2833
+ ).astype(np.uint8)
2818
2834
 
2819
2835
  # Check class distribution in predictions (summary only)
2820
2836
  unique_classes, class_counts = np.unique(
@@ -2839,6 +2855,29 @@ def semantic_inference_on_geotiff(
2839
2855
  if not quiet:
2840
2856
  print(f"Saved prediction to {output_path}")
2841
2857
 
2858
+ # Save probability map if requested
2859
+ if probability_path is not None:
2860
+ prob_dir = os.path.abspath(os.path.dirname(probability_path))
2861
+ os.makedirs(prob_dir, exist_ok=True)
2862
+
2863
+ # Prepare probability output metadata
2864
+ prob_meta = meta.copy()
2865
+ prob_meta.update({"count": num_classes, "dtype": "float32"})
2866
+
2867
+ # Save normalized probabilities
2868
+ with rasterio.open(probability_path, "w", **prob_meta) as dst:
2869
+ for class_idx in range(num_classes):
2870
+ # Normalize probabilities
2871
+ prob_band = np.zeros((height, width), dtype=np.float32)
2872
+ prob_band[valid_pixels] = (
2873
+ prob_accumulator[class_idx, valid_pixels]
2874
+ / count_accumulator[valid_pixels]
2875
+ )
2876
+ dst.write(prob_band, class_idx + 1)
2877
+
2878
+ if not quiet:
2879
+ print(f"Saved probability map to {probability_path}")
2880
+
2842
2881
  return output_path, inference_time
2843
2882
 
2844
2883
 
@@ -2853,6 +2892,8 @@ def semantic_inference_on_image(
2853
2892
  num_classes: int = 2,
2854
2893
  device: Optional[torch.device] = None,
2855
2894
  binary_output: bool = True,
2895
+ probability_path: Optional[str] = None,
2896
+ probability_threshold: Optional[float] = None,
2856
2897
  quiet: bool = False,
2857
2898
  **kwargs: Any,
2858
2899
  ) -> Tuple[str, float]:
@@ -2870,6 +2911,11 @@ def semantic_inference_on_image(
2870
2911
  num_classes (int): Number of classes in the model output.
2871
2912
  device (torch.device, optional): Device to run inference on.
2872
2913
  binary_output (bool): If True, convert multi-class output to binary (class > 0).
2914
+ probability_path (str, optional): Path to save probability map. If provided,
2915
+ the normalized class probabilities will be saved as a multi-band raster.
2916
+ probability_threshold (float, optional): Probability threshold for binary classification.
2917
+ Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
2918
+ are classified as class 1, otherwise class 0. If None (default), uses argmax.
2873
2919
  quiet (bool): If True, suppress progress bar. Defaults to False.
2874
2920
  **kwargs: Additional arguments.
2875
2921
 
@@ -3056,10 +3102,19 @@ def semantic_inference_on_image(
3056
3102
  / count_accumulator[valid_pixels]
3057
3103
  )
3058
3104
 
3059
- # Take argmax to get final class predictions
3060
- mask[valid_pixels] = np.argmax(
3061
- normalized_probs[:, valid_pixels], axis=0
3062
- ).astype(np.uint8)
3105
+ # Apply threshold for binary classification or use argmax
3106
+ if probability_threshold is not None and num_classes == 2:
3107
+ # Use threshold: classify as class 1 if probability >= threshold
3108
+ mask[valid_pixels] = (
3109
+ normalized_probs[1, valid_pixels] >= probability_threshold
3110
+ ).astype(np.uint8)
3111
+ if not quiet:
3112
+ print(f"Using probability threshold: {probability_threshold}")
3113
+ else:
3114
+ # Take argmax to get final class predictions
3115
+ mask[valid_pixels] = np.argmax(
3116
+ normalized_probs[:, valid_pixels], axis=0
3117
+ ).astype(np.uint8)
3063
3118
 
3064
3119
  # Check class distribution in predictions before binary conversion
3065
3120
  unique_classes, class_counts = np.unique(mask, return_counts=True)
@@ -3116,6 +3171,40 @@ def semantic_inference_on_image(
3116
3171
  if not quiet:
3117
3172
  print(f"Saved prediction to {output_path}")
3118
3173
 
3174
+ # Save probability map if requested
3175
+ if probability_path is not None:
3176
+ prob_dir = os.path.abspath(os.path.dirname(probability_path))
3177
+ os.makedirs(prob_dir, exist_ok=True)
3178
+
3179
+ # For regular images, we'll save as a multi-channel TIFF
3180
+ # since we need to preserve floating point values
3181
+ import rasterio
3182
+ from rasterio.transform import from_bounds
3183
+
3184
+ # Create a simple affine transform (identity transform for pixel coordinates)
3185
+ transform = from_bounds(0, 0, width, height, width, height)
3186
+
3187
+ # Prepare probability output metadata
3188
+ prob_meta = {
3189
+ "driver": "GTiff",
3190
+ "height": height,
3191
+ "width": width,
3192
+ "count": num_classes,
3193
+ "dtype": "float32",
3194
+ "transform": transform,
3195
+ }
3196
+
3197
+ # Save normalized probabilities
3198
+ with rasterio.open(probability_path, "w", **prob_meta) as dst:
3199
+ for class_idx in range(num_classes):
3200
+ # Normalize probabilities
3201
+ prob_band = np.zeros((height, width), dtype=np.float32)
3202
+ prob_band[valid_pixels] = normalized_probs[class_idx, valid_pixels]
3203
+ dst.write(prob_band, class_idx + 1)
3204
+
3205
+ if not quiet:
3206
+ print(f"Saved probability map to {probability_path}")
3207
+
3119
3208
  return output_path, inference_time
3120
3209
 
3121
3210
 
@@ -3131,6 +3220,8 @@ def semantic_segmentation(
3131
3220
  overlap: int = 256,
3132
3221
  batch_size: int = 4,
3133
3222
  device: Optional[torch.device] = None,
3223
+ probability_path: Optional[str] = None,
3224
+ probability_threshold: Optional[float] = None,
3134
3225
  quiet: bool = False,
3135
3226
  **kwargs: Any,
3136
3227
  ) -> None:
@@ -3152,6 +3243,12 @@ def semantic_segmentation(
3152
3243
  overlap (int): Overlap between adjacent windows.
3153
3244
  batch_size (int): Batch size for inference.
3154
3245
  device (torch.device, optional): Device to run inference on.
3246
+ probability_path (str, optional): Path to save probability map. If provided,
3247
+ the normalized class probabilities will be saved as a multi-band raster.
3248
+ probability_threshold (float, optional): Probability threshold for binary classification.
3249
+ Only used when num_classes=2. If provided, pixels with class 1 probability >= threshold
3250
+ are classified as class 1, otherwise class 0. If None (default), uses argmax.
3251
+ Must be between 0 and 1.
3155
3252
  quiet (bool): If True, suppress progress bar. Defaults to False.
3156
3253
  **kwargs: Additional arguments.
3157
3254
 
@@ -3205,6 +3302,15 @@ def semantic_segmentation(
3205
3302
  model.to(device)
3206
3303
  model.eval()
3207
3304
 
3305
+ # Validate probability_threshold
3306
+ if probability_threshold is not None:
3307
+ if not (0 <= probability_threshold <= 1):
3308
+ raise ValueError("probability_threshold must be between 0 and 1")
3309
+ if num_classes != 2:
3310
+ raise ValueError(
3311
+ "probability_threshold is only supported for binary classification (num_classes=2)"
3312
+ )
3313
+
3208
3314
  # Use appropriate inference function based on file format
3209
3315
  if is_geotiff:
3210
3316
  semantic_inference_on_geotiff(
@@ -3217,6 +3323,8 @@ def semantic_segmentation(
3217
3323
  num_channels=num_channels,
3218
3324
  num_classes=num_classes,
3219
3325
  device=device,
3326
+ probability_path=probability_path,
3327
+ probability_threshold=probability_threshold,
3220
3328
  quiet=quiet,
3221
3329
  **kwargs,
3222
3330
  )
@@ -3235,6 +3343,8 @@ def semantic_segmentation(
3235
3343
  num_classes=num_classes,
3236
3344
  device=device,
3237
3345
  binary_output=True, # Convert to binary output for better visualization
3346
+ probability_path=probability_path,
3347
+ probability_threshold=probability_threshold,
3238
3348
  quiet=quiet,
3239
3349
  **kwargs,
3240
3350
  )
@@ -3115,8 +3115,9 @@ def export_geotiff_tiles(
3115
3115
 
3116
3116
  def export_geotiff_tiles_batch(
3117
3117
  images_folder,
3118
- masks_folder,
3119
- output_folder,
3118
+ masks_folder=None,
3119
+ masks_file=None,
3120
+ output_folder=None,
3120
3121
  tile_size=256,
3121
3122
  stride=128,
3122
3123
  class_value_field="class",
@@ -3128,21 +3129,34 @@ def export_geotiff_tiles_batch(
3128
3129
  skip_empty_tiles=False,
3129
3130
  image_extensions=None,
3130
3131
  mask_extensions=None,
3132
+ match_by_name=True,
3131
3133
  ) -> Dict[str, Any]:
3132
3134
  """
3133
- Export georeferenced GeoTIFF tiles from folders of images and masks.
3135
+ Export georeferenced GeoTIFF tiles from images and masks.
3134
3136
 
3135
- This function processes multiple image-mask pairs from input folders,
3136
- generating tiles for each pair. All image tiles are saved to a single
3137
- 'images' folder and all mask tiles to a single 'masks' folder.
3137
+ This function supports three mask input modes:
3138
+ 1. Single vector file covering all images (masks_file parameter)
3139
+ 2. Multiple vector files, one per image (masks_folder parameter)
3140
+ 3. Multiple raster mask files (masks_folder parameter)
3138
3141
 
3139
- Images and masks are paired by their sorted order (alphabetically), not by
3140
- filename matching. The number of images and masks must be equal.
3142
+ For mode 1 (single vector file), specify masks_file path. The function will
3143
+ use spatial intersection to determine which features apply to each image.
3144
+
3145
+ For mode 2/3 (multiple mask files), specify masks_folder path. Images and masks
3146
+ are paired either by matching filenames (match_by_name=True) or by sorted order
3147
+ (match_by_name=False).
3148
+
3149
+ All image tiles are saved to a single 'images' folder and all mask tiles to a
3150
+ single 'masks' folder within the output directory.
3141
3151
 
3142
3152
  Args:
3143
3153
  images_folder (str): Path to folder containing raster images
3144
- masks_folder (str): Path to folder containing classification masks/vectors
3145
- output_folder (str): Path to output folder
3154
+ masks_folder (str, optional): Path to folder containing classification masks/vectors.
3155
+ Use this for multiple mask files (one per image or raster masks).
3156
+ masks_file (str, optional): Path to a single vector file covering all images.
3157
+ Use this for a single GeoJSON/Shapefile that covers multiple images.
3158
+ output_folder (str, optional): Path to output folder. If None, creates 'tiles'
3159
+ subfolder in images_folder.
3146
3160
  tile_size (int): Size of tiles in pixels (square)
3147
3161
  stride (int): Step size between tiles
3148
3162
  class_value_field (str): Field containing class values (for vector data)
@@ -3154,18 +3168,61 @@ def export_geotiff_tiles_batch(
3154
3168
  skip_empty_tiles (bool): If True, skip tiles with no features
3155
3169
  image_extensions (list): List of image file extensions to process (default: common raster formats)
3156
3170
  mask_extensions (list): List of mask file extensions to process (default: common raster/vector formats)
3171
+ match_by_name (bool): If True, match image and mask files by base filename.
3172
+ If False, match by sorted order (alphabetically). Only applies when masks_folder is used.
3157
3173
 
3158
3174
  Returns:
3159
3175
  Dict[str, Any]: Dictionary containing batch processing statistics
3160
3176
 
3161
3177
  Raises:
3162
- ValueError: If no images or masks found, or if counts don't match
3178
+ ValueError: If no images found, or if masks_folder and masks_file are both specified,
3179
+ or if neither is specified, or if counts don't match when using masks_folder with
3180
+ match_by_name=False.
3181
+
3182
+ Examples:
3183
+ # Single vector file covering all images
3184
+ >>> stats = export_geotiff_tiles_batch(
3185
+ ... images_folder='data/images',
3186
+ ... masks_file='data/buildings.geojson',
3187
+ ... output_folder='output/tiles'
3188
+ ... )
3189
+
3190
+ # Multiple vector files, matched by filename
3191
+ >>> stats = export_geotiff_tiles_batch(
3192
+ ... images_folder='data/images',
3193
+ ... masks_folder='data/masks',
3194
+ ... output_folder='output/tiles',
3195
+ ... match_by_name=True
3196
+ ... )
3197
+
3198
+ # Multiple mask files, matched by sorted order
3199
+ >>> stats = export_geotiff_tiles_batch(
3200
+ ... images_folder='data/images',
3201
+ ... masks_folder='data/masks',
3202
+ ... output_folder='output/tiles',
3203
+ ... match_by_name=False
3204
+ ... )
3163
3205
  """
3164
3206
 
3165
3207
  import logging
3166
3208
 
3167
3209
  logging.getLogger("rasterio").setLevel(logging.ERROR)
3168
3210
 
3211
+ # Validate input parameters
3212
+ if masks_folder is not None and masks_file is not None:
3213
+ raise ValueError(
3214
+ "Cannot specify both masks_folder and masks_file. Please use only one."
3215
+ )
3216
+
3217
+ if masks_folder is None and masks_file is None:
3218
+ raise ValueError(
3219
+ "Must specify either masks_folder or masks_file for mask data source."
3220
+ )
3221
+
3222
+ # Default output folder if not specified
3223
+ if output_folder is None:
3224
+ output_folder = os.path.join(images_folder, "tiles")
3225
+
3169
3226
  # Default extensions if not provided
3170
3227
  if image_extensions is None:
3171
3228
  image_extensions = [".tif", ".tiff", ".jpg", ".jpeg", ".png", ".jp2", ".img"]
@@ -3202,30 +3259,88 @@ def export_geotiff_tiles_batch(
3202
3259
  pattern = os.path.join(images_folder, f"*{ext}")
3203
3260
  image_files.extend(glob.glob(pattern))
3204
3261
 
3205
- # Get list of mask files
3206
- mask_files = []
3207
- for ext in mask_extensions:
3208
- pattern = os.path.join(masks_folder, f"*{ext}")
3209
- mask_files.extend(glob.glob(pattern))
3210
-
3211
3262
  # Sort files for consistent processing
3212
3263
  image_files.sort()
3213
- mask_files.sort()
3214
3264
 
3215
3265
  if not image_files:
3216
3266
  raise ValueError(
3217
3267
  f"No image files found in {images_folder} with extensions {image_extensions}"
3218
3268
  )
3219
3269
 
3220
- if not mask_files:
3221
- raise ValueError(
3222
- f"No mask files found in {masks_folder} with extensions {mask_extensions}"
3223
- )
3270
+ # Handle different mask input modes
3271
+ use_single_mask_file = masks_file is not None
3272
+ mask_files = []
3273
+ image_mask_pairs = []
3224
3274
 
3225
- if len(image_files) != len(mask_files):
3226
- raise ValueError(
3227
- f"Number of image files ({len(image_files)}) does not match number of mask files ({len(mask_files)})"
3228
- )
3275
+ if use_single_mask_file:
3276
+ # Mode 1: Single vector file covering all images
3277
+ if not os.path.exists(masks_file):
3278
+ raise ValueError(f"Mask file not found: {masks_file}")
3279
+
3280
+ # Load the single mask file once - will be spatially filtered per image
3281
+ single_mask_gdf = gpd.read_file(masks_file)
3282
+
3283
+ if not quiet:
3284
+ print(f"Using single mask file: {masks_file}")
3285
+ print(
3286
+ f"Mask contains {len(single_mask_gdf)} features in CRS: {single_mask_gdf.crs}"
3287
+ )
3288
+
3289
+ # Create pairs with the same mask file for all images
3290
+ for image_file in image_files:
3291
+ image_mask_pairs.append((image_file, masks_file, single_mask_gdf))
3292
+
3293
+ else:
3294
+ # Mode 2/3: Multiple mask files (vector or raster)
3295
+ # Get list of mask files
3296
+ for ext in mask_extensions:
3297
+ pattern = os.path.join(masks_folder, f"*{ext}")
3298
+ mask_files.extend(glob.glob(pattern))
3299
+
3300
+ # Sort files for consistent processing
3301
+ mask_files.sort()
3302
+
3303
+ if not mask_files:
3304
+ raise ValueError(
3305
+ f"No mask files found in {masks_folder} with extensions {mask_extensions}"
3306
+ )
3307
+
3308
+ # Match images to masks
3309
+ if match_by_name:
3310
+ # Match by base filename
3311
+ image_dict = {
3312
+ os.path.splitext(os.path.basename(f))[0]: f for f in image_files
3313
+ }
3314
+ mask_dict = {
3315
+ os.path.splitext(os.path.basename(f))[0]: f for f in mask_files
3316
+ }
3317
+
3318
+ # Find matching pairs
3319
+ for img_base, img_path in image_dict.items():
3320
+ if img_base in mask_dict:
3321
+ image_mask_pairs.append((img_path, mask_dict[img_base], None))
3322
+ else:
3323
+ if not quiet:
3324
+ print(f"Warning: No mask found for image {img_base}")
3325
+
3326
+ if not image_mask_pairs:
3327
+ raise ValueError(
3328
+ "No matching image-mask pairs found when matching by filename. "
3329
+ "Check that image and mask files have matching base names."
3330
+ )
3331
+
3332
+ else:
3333
+ # Match by sorted order
3334
+ if len(image_files) != len(mask_files):
3335
+ raise ValueError(
3336
+ f"Number of image files ({len(image_files)}) does not match "
3337
+ f"number of mask files ({len(mask_files)}) when matching by sorted order. "
3338
+ f"Use match_by_name=True for filename-based matching."
3339
+ )
3340
+
3341
+ # Create pairs by sorted order
3342
+ for image_file, mask_file in zip(image_files, mask_files):
3343
+ image_mask_pairs.append((image_file, mask_file, None))
3229
3344
 
3230
3345
  # Initialize batch statistics
3231
3346
  batch_stats = {
@@ -3239,23 +3354,24 @@ def export_geotiff_tiles_batch(
3239
3354
  }
3240
3355
 
3241
3356
  if not quiet:
3242
- print(
3243
- f"Found {len(image_files)} image files and {len(mask_files)} mask files to process"
3244
- )
3245
- print(f"Processing batch from {images_folder} and {masks_folder}")
3357
+ if use_single_mask_file:
3358
+ print(f"Found {len(image_files)} image files to process")
3359
+ print(f"Using single mask file: {masks_file}")
3360
+ else:
3361
+ print(f"Found {len(image_mask_pairs)} matching image-mask pairs to process")
3362
+ print(f"Processing batch from {images_folder} and {masks_folder}")
3246
3363
  print(f"Output folder: {output_folder}")
3247
3364
  print("-" * 60)
3248
3365
 
3249
3366
  # Global tile counter for unique naming
3250
3367
  global_tile_counter = 0
3251
3368
 
3252
- # Process each image-mask pair by sorted order
3253
- for idx, (image_file, mask_file) in enumerate(
3369
+ # Process each image-mask pair
3370
+ for idx, (image_file, mask_file, mask_gdf) in enumerate(
3254
3371
  tqdm(
3255
- zip(image_files, mask_files),
3372
+ image_mask_pairs,
3256
3373
  desc="Processing image pairs",
3257
3374
  disable=quiet,
3258
- total=len(image_files),
3259
3375
  )
3260
3376
  ):
3261
3377
  batch_stats["total_image_pairs"] += 1
@@ -3267,9 +3383,12 @@ def export_geotiff_tiles_batch(
3267
3383
  if not quiet:
3268
3384
  print(f"\nProcessing: {base_name}")
3269
3385
  print(f" Image: {os.path.basename(image_file)}")
3270
- print(f" Mask: {os.path.basename(mask_file)}")
3386
+ if use_single_mask_file:
3387
+ print(f" Mask: {os.path.basename(mask_file)} (spatially filtered)")
3388
+ else:
3389
+ print(f" Mask: {os.path.basename(mask_file)}")
3271
3390
 
3272
- # Process the image-mask pair manually to get direct control over tile saving
3391
+ # Process the image-mask pair
3273
3392
  tiles_generated = _process_image_mask_pair(
3274
3393
  image_file=image_file,
3275
3394
  mask_file=mask_file,
@@ -3285,6 +3404,8 @@ def export_geotiff_tiles_batch(
3285
3404
  all_touched=all_touched,
3286
3405
  skip_empty_tiles=skip_empty_tiles,
3287
3406
  quiet=quiet,
3407
+ mask_gdf=mask_gdf, # Pass pre-loaded GeoDataFrame if using single mask
3408
+ use_single_mask_file=use_single_mask_file,
3288
3409
  )
3289
3410
 
3290
3411
  # Update counters
@@ -3362,10 +3483,16 @@ def _process_image_mask_pair(
3362
3483
  all_touched=True,
3363
3484
  skip_empty_tiles=False,
3364
3485
  quiet=False,
3486
+ mask_gdf=None,
3487
+ use_single_mask_file=False,
3365
3488
  ):
3366
3489
  """
3367
3490
  Process a single image-mask pair and save tiles directly to output directories.
3368
3491
 
3492
+ Args:
3493
+ mask_gdf (GeoDataFrame, optional): Pre-loaded GeoDataFrame when using single mask file
3494
+ use_single_mask_file (bool): If True, spatially filter mask_gdf to image bounds
3495
+
3369
3496
  Returns:
3370
3497
  dict: Statistics for this image-mask pair
3371
3498
  """
@@ -3433,11 +3560,36 @@ def _process_image_mask_pair(
3433
3560
  else:
3434
3561
  # Load vector class data
3435
3562
  try:
3436
- gdf = gpd.read_file(mask_file)
3563
+ if use_single_mask_file and mask_gdf is not None:
3564
+ # Using pre-loaded single mask file - spatially filter to image bounds
3565
+ # Get image bounds
3566
+ image_bounds = box(*src.bounds)
3567
+ image_gdf = gpd.GeoDataFrame(
3568
+ {"geometry": [image_bounds]}, crs=src.crs
3569
+ )
3437
3570
 
3438
- # Always reproject to match raster CRS
3439
- if gdf.crs != src.crs:
3440
- gdf = gdf.to_crs(src.crs)
3571
+ # Reproject mask if needed
3572
+ if mask_gdf.crs != src.crs:
3573
+ mask_gdf_reprojected = mask_gdf.to_crs(src.crs)
3574
+ else:
3575
+ mask_gdf_reprojected = mask_gdf
3576
+
3577
+ # Spatially filter features that intersect with image bounds
3578
+ gdf = mask_gdf_reprojected[
3579
+ mask_gdf_reprojected.intersects(image_bounds)
3580
+ ].copy()
3581
+
3582
+ if not quiet and len(gdf) > 0:
3583
+ print(
3584
+ f" Filtered to {len(gdf)} features intersecting image bounds"
3585
+ )
3586
+ else:
3587
+ # Load individual mask file
3588
+ gdf = gpd.read_file(mask_file)
3589
+
3590
+ # Always reproject to match raster CRS
3591
+ if gdf.crs != src.crs:
3592
+ gdf = gdf.to_crs(src.crs)
3441
3593
 
3442
3594
  # Apply buffer if specified
3443
3595
  if buffer_radius > 0:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: geoai-py
3
- Version: 0.13.1
3
+ Version: 0.14.0
4
4
  Summary: A Python package for using Artificial Intelligence (AI) with geospatial data
5
5
  Author-email: Qiusheng Wu <giswqs@gmail.com>
6
6
  License: MIT License
@@ -159,13 +159,17 @@ Comprehensive documentation is available at [https://opengeoai.org](https://open
159
159
 
160
160
  ## 📺 Video Tutorials
161
161
 
162
- Check out this 2-hour video tutorial on using GeoAI for geospatial data analysis and visualization.
162
+ ### GeoAI Made Easy: Learn the Python Package Step-by-Step (Beginner Friendly)
163
163
 
164
- [![cover](https://github.com/user-attachments/assets/1c14e651-65b9-41ae-b42d-3ad028b3eeb8)](https://youtu.be/jdK-cleFUkc)
164
+ [![intro](https://github.com/user-attachments/assets/7e60ce05-573d-4d0d-9876-5289b87e5136)](https://youtu.be/VIl29Rca6zE&list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
165
165
 
166
- To learn more about GeoAI, you can watch the following video tutorials:
166
+ ### GeoAI Workshop: Unlocking the Power of GeoAI with Python
167
167
 
168
- [![cover](https://github.com/user-attachments/assets/3cde9547-ab62-4d70-b23a-3e5ed27c7407)](https://tinyurl.com/GeoAI-Tutorials)
168
+ [![cover](https://github.com/user-attachments/assets/1c14e651-65b9-41ae-b42d-3ad028b3eeb8)](https://youtu.be/jdK-cleFUkc&list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
169
+
170
+ ### GeoAI Tutorials Playlist
171
+
172
+ [![cover](https://github.com/user-attachments/assets/3cde9547-ab62-4d70-b23a-3e5ed27c7407)](https://www.youtube.com/playlist?list=PLAxJ4-o7ZoPcvENqwaPa_QwbbkZ5sctZE)
169
173
 
170
174
  ## 🤝 Contributing
171
175
 
@@ -108,6 +108,7 @@ nav:
108
108
  - examples/edit_vector.ipynb
109
109
  - examples/image_chips.ipynb
110
110
  - examples/image_tiling.ipynb
111
+ - examples/create_training_data.ipynb
111
112
  - examples/building_footprints_usa.ipynb
112
113
  - examples/building_footprints_africa.ipynb
113
114
  - examples/building_footprints_china.ipynb
@@ -121,6 +122,7 @@ nav:
121
122
  - examples/data_visualization.ipynb
122
123
  - examples/train_object_detection_model.ipynb
123
124
  - examples/train_segmentation_model.ipynb
125
+ - examples/train_instance_segmentation_model.ipynb
124
126
  - examples/train_landcover_classification.ipynb
125
127
  - examples/train_building_footprints_usa.ipynb
126
128
  - examples/train_solar_panel_detection.ipynb
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "geoai-py"
3
- version = "0.13.1"
3
+ version = "0.14.0"
4
4
  dynamic = [
5
5
  "dependencies",
6
6
  ]
@@ -44,7 +44,7 @@ universal = true
44
44
 
45
45
 
46
46
  [tool.bumpversion]
47
- current_version = "0.13.1"
47
+ current_version = "0.14.0"
48
48
  commit = true
49
49
  tag = true
50
50
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes