geoai-py 0.2.3__py2.py3-none-any.whl → 0.3.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geoai/geoai.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """Main module."""
2
2
 
3
- from .common import *
3
+ from .utils import *
4
4
  from .preprocess import *
5
5
  from .extract import *
geoai/preprocess.py CHANGED
@@ -13,13 +13,14 @@ import pandas as pd
13
13
  from rasterio.windows import Window
14
14
  from rasterio import features
15
15
  from rasterio.plot import show
16
- from shapely.geometry import box, shape, mapping
16
+ from shapely.geometry import box, shape, mapping, Polygon
17
17
  import matplotlib.pyplot as plt
18
18
  from tqdm import tqdm
19
19
  from torchvision.transforms import RandomRotation
20
20
  from shapely.affinity import rotate
21
21
  import torchgeo
22
22
  import torch
23
+ import cv2
23
24
 
24
25
 
25
26
  def download_file(url, output_path=None, overwrite=False):
@@ -115,6 +116,43 @@ def get_raster_info(raster_path):
115
116
  return info
116
117
 
117
118
 
119
+ def get_raster_stats(raster_path, divide_by=1.0):
120
+ """Calculate statistics for each band in a raster dataset.
121
+
122
+ This function computes min, max, mean, and standard deviation values
123
+ for each band in the provided raster, returning results in a dictionary
124
+ with lists for each statistic type.
125
+
126
+ Args:
127
+ raster_path (str): Path to the raster file
128
+ divide_by (float, optional): Value to divide pixel values by.
129
+ Defaults to 1.0, which keeps the original pixel
130
+
131
+ Returns:
132
+ dict: Dictionary containing lists of statistics with keys:
133
+ - 'min': List of minimum values for each band
134
+ - 'max': List of maximum values for each band
135
+ - 'mean': List of mean values for each band
136
+ - 'std': List of standard deviation values for each band
137
+ """
138
+ # Initialize the results dictionary with empty lists
139
+ stats = {"min": [], "max": [], "mean": [], "std": []}
140
+
141
+ # Open the raster dataset
142
+ with rasterio.open(raster_path) as src:
143
+ # Calculate statistics for each band
144
+ for i in range(1, src.count + 1):
145
+ band = src.read(i, masked=True)
146
+
147
+ # Append statistics for this band to each list
148
+ stats["min"].append(float(band.min()) / divide_by)
149
+ stats["max"].append(float(band.max()) / divide_by)
150
+ stats["mean"].append(float(band.mean()) / divide_by)
151
+ stats["std"].append(float(band.std()) / divide_by)
152
+
153
+ return stats
154
+
155
+
118
156
  def print_raster_info(raster_path, show_preview=True, figsize=(10, 8)):
119
157
  """Print formatted information about a raster dataset and optionally show a preview.
120
158
 
@@ -2762,3 +2800,202 @@ def export_training_data(
2762
2800
 
2763
2801
  # Return statistics
2764
2802
  return stats, out_folder
2803
+
2804
+
2805
+ def masks_to_vector(
2806
+ mask_path,
2807
+ output_path=None,
2808
+ simplify_tolerance=1.0,
2809
+ mask_threshold=0.5,
2810
+ min_area=100,
2811
+ nms_iou_threshold=0.5,
2812
+ ):
2813
+ """
2814
+ Convert a building mask GeoTIFF to vector polygons and save as a vector dataset.
2815
+
2816
+ Args:
2817
+ mask_path: Path to the building masks GeoTIFF
2818
+ output_path: Path to save the output GeoJSON (default: mask_path with .geojson extension)
2819
+ simplify_tolerance: Tolerance for polygon simplification (default: self.simplify_tolerance)
2820
+ mask_threshold: Threshold for mask binarization (default: self.mask_threshold)
2821
+ min_area: Minimum area in pixels to keep a building (default: self.small_building_area)
2822
+ nms_iou_threshold: IoU threshold for non-maximum suppression (default: self.nms_iou_threshold)
2823
+
2824
+ Returns:
2825
+ GeoDataFrame with building footprints
2826
+ """
2827
+ # Set default output path if not provided
2828
+ # if output_path is None:
2829
+ # output_path = os.path.splitext(mask_path)[0] + ".geojson"
2830
+
2831
+ print(f"Converting mask to GeoJSON with parameters:")
2832
+ print(f"- Mask threshold: {mask_threshold}")
2833
+ print(f"- Min building area: {min_area}")
2834
+ print(f"- Simplify tolerance: {simplify_tolerance}")
2835
+ print(f"- NMS IoU threshold: {nms_iou_threshold}")
2836
+
2837
+ # Open the mask raster
2838
+ with rasterio.open(mask_path) as src:
2839
+ # Read the mask data
2840
+ mask_data = src.read(1)
2841
+ transform = src.transform
2842
+ crs = src.crs
2843
+
2844
+ # Print mask statistics
2845
+ print(f"Mask dimensions: {mask_data.shape}")
2846
+ print(f"Mask value range: {mask_data.min()} to {mask_data.max()}")
2847
+
2848
+ # Prepare for connected component analysis
2849
+ # Binarize the mask based on threshold
2850
+ binary_mask = (mask_data > (mask_threshold * 255)).astype(np.uint8)
2851
+
2852
+ # Apply morphological operations for better results (optional)
2853
+ kernel = np.ones((3, 3), np.uint8)
2854
+ binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
2855
+
2856
+ # Find connected components
2857
+ num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
2858
+ binary_mask, connectivity=8
2859
+ )
2860
+
2861
+ print(f"Found {num_labels-1} potential buildings") # Subtract 1 for background
2862
+
2863
+ # Create list to store polygons and confidence values
2864
+ all_polygons = []
2865
+ all_confidences = []
2866
+
2867
+ # Process each component (skip the first one which is background)
2868
+ for i in tqdm(range(1, num_labels)):
2869
+ # Extract this building
2870
+ area = stats[i, cv2.CC_STAT_AREA]
2871
+
2872
+ # Skip if too small
2873
+ if area < min_area:
2874
+ continue
2875
+
2876
+ # Create a mask for this building
2877
+ building_mask = (labels == i).astype(np.uint8)
2878
+
2879
+ # Find contours
2880
+ contours, _ = cv2.findContours(
2881
+ building_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
2882
+ )
2883
+
2884
+ # Process each contour
2885
+ for contour in contours:
2886
+ # Skip if too few points
2887
+ if contour.shape[0] < 3:
2888
+ continue
2889
+
2890
+ # Simplify contour if it has many points
2891
+ if contour.shape[0] > 50 and simplify_tolerance > 0:
2892
+ epsilon = simplify_tolerance * cv2.arcLength(contour, True)
2893
+ contour = cv2.approxPolyDP(contour, epsilon, True)
2894
+
2895
+ # Convert to list of (x, y) coordinates
2896
+ polygon_points = contour.reshape(-1, 2)
2897
+
2898
+ # Convert pixel coordinates to geographic coordinates
2899
+ geo_points = []
2900
+ for x, y in polygon_points:
2901
+ gx, gy = transform * (x, y)
2902
+ geo_points.append((gx, gy))
2903
+
2904
+ # Create Shapely polygon
2905
+ if len(geo_points) >= 3:
2906
+ try:
2907
+ shapely_poly = Polygon(geo_points)
2908
+ if shapely_poly.is_valid and shapely_poly.area > 0:
2909
+ all_polygons.append(shapely_poly)
2910
+
2911
+ # Calculate "confidence" as normalized size
2912
+ # This is a proxy since we don't have model confidence scores
2913
+ normalized_size = min(1.0, area / 1000) # Cap at 1.0
2914
+ all_confidences.append(normalized_size)
2915
+ except Exception as e:
2916
+ print(f"Error creating polygon: {e}")
2917
+
2918
+ print(f"Created {len(all_polygons)} valid polygons")
2919
+
2920
+ # Create GeoDataFrame
2921
+ if not all_polygons:
2922
+ print("No valid polygons found")
2923
+ return None
2924
+
2925
+ gdf = gpd.GeoDataFrame(
2926
+ {
2927
+ "geometry": all_polygons,
2928
+ "confidence": all_confidences,
2929
+ "class": 1, # Building class
2930
+ },
2931
+ crs=crs,
2932
+ )
2933
+
2934
+ def filter_overlapping_polygons(gdf, **kwargs):
2935
+ """
2936
+ Filter overlapping polygons using non-maximum suppression.
2937
+
2938
+ Args:
2939
+ gdf: GeoDataFrame with polygons
2940
+ **kwargs: Optional parameters:
2941
+ nms_iou_threshold: IoU threshold for filtering
2942
+
2943
+ Returns:
2944
+ Filtered GeoDataFrame
2945
+ """
2946
+ if len(gdf) <= 1:
2947
+ return gdf
2948
+
2949
+ # Get parameters from kwargs or use instance defaults
2950
+ iou_threshold = kwargs.get("nms_iou_threshold", nms_iou_threshold)
2951
+
2952
+ # Sort by confidence
2953
+ gdf = gdf.sort_values("confidence", ascending=False)
2954
+
2955
+ # Fix any invalid geometries
2956
+ gdf["geometry"] = gdf["geometry"].apply(
2957
+ lambda geom: geom.buffer(0) if not geom.is_valid else geom
2958
+ )
2959
+
2960
+ keep_indices = []
2961
+ polygons = gdf.geometry.values
2962
+
2963
+ for i in range(len(polygons)):
2964
+ if i in keep_indices:
2965
+ continue
2966
+
2967
+ keep = True
2968
+ for j in keep_indices:
2969
+ # Skip invalid geometries
2970
+ if not polygons[i].is_valid or not polygons[j].is_valid:
2971
+ continue
2972
+
2973
+ # Calculate IoU
2974
+ try:
2975
+ intersection = polygons[i].intersection(polygons[j]).area
2976
+ union = polygons[i].area + polygons[j].area - intersection
2977
+ iou = intersection / union if union > 0 else 0
2978
+
2979
+ if iou > iou_threshold:
2980
+ keep = False
2981
+ break
2982
+ except Exception:
2983
+ # Skip on topology exceptions
2984
+ continue
2985
+
2986
+ if keep:
2987
+ keep_indices.append(i)
2988
+
2989
+ return gdf.iloc[keep_indices]
2990
+
2991
+ # Apply non-maximum suppression to remove overlapping polygons
2992
+ gdf = filter_overlapping_polygons(gdf, nms_iou_threshold=nms_iou_threshold)
2993
+
2994
+ print(f"Final building count after filtering: {len(gdf)}")
2995
+
2996
+ # Save to file
2997
+ if output_path is not None:
2998
+ gdf.to_file(output_path)
2999
+ print(f"Saved {len(gdf)} building footprints to {output_path}")
3000
+
3001
+ return gdf