geoai-py 0.2.3__py2.py3-none-any.whl → 0.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geoai/geoai.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """Main module."""
2
2
 
3
- from .common import *
3
+ from .utils import *
4
4
  from .preprocess import *
5
5
  from .extract import *
geoai/preprocess.py CHANGED
@@ -13,13 +13,20 @@ import pandas as pd
13
13
  from rasterio.windows import Window
14
14
  from rasterio import features
15
15
  from rasterio.plot import show
16
- from shapely.geometry import box, shape, mapping
16
+ from shapely.geometry import box, shape, mapping, Polygon
17
17
  import matplotlib.pyplot as plt
18
18
  from tqdm import tqdm
19
19
  from torchvision.transforms import RandomRotation
20
20
  from shapely.affinity import rotate
21
- import torchgeo
22
21
  import torch
22
+ import cv2
23
+
24
+ try:
25
+ import torchgeo
26
+ except ImportError as e:
27
+ raise ImportError(
28
+ "Your torchgeo version is too old. Please upgrade to the latest version using 'pip install -U torchgeo'."
29
+ )
23
30
 
24
31
 
25
32
  def download_file(url, output_path=None, overwrite=False):
@@ -115,6 +122,43 @@ def get_raster_info(raster_path):
115
122
  return info
116
123
 
117
124
 
125
+ def get_raster_stats(raster_path, divide_by=1.0):
126
+ """Calculate statistics for each band in a raster dataset.
127
+
128
+ This function computes min, max, mean, and standard deviation values
129
+ for each band in the provided raster, returning results in a dictionary
130
+ with lists for each statistic type.
131
+
132
+ Args:
133
+ raster_path (str): Path to the raster file
134
+ divide_by (float, optional): Value to divide pixel values by.
135
+ Defaults to 1.0, which keeps the original pixel
136
+
137
+ Returns:
138
+ dict: Dictionary containing lists of statistics with keys:
139
+ - 'min': List of minimum values for each band
140
+ - 'max': List of maximum values for each band
141
+ - 'mean': List of mean values for each band
142
+ - 'std': List of standard deviation values for each band
143
+ """
144
+ # Initialize the results dictionary with empty lists
145
+ stats = {"min": [], "max": [], "mean": [], "std": []}
146
+
147
+ # Open the raster dataset
148
+ with rasterio.open(raster_path) as src:
149
+ # Calculate statistics for each band
150
+ for i in range(1, src.count + 1):
151
+ band = src.read(i, masked=True)
152
+
153
+ # Append statistics for this band to each list
154
+ stats["min"].append(float(band.min()) / divide_by)
155
+ stats["max"].append(float(band.max()) / divide_by)
156
+ stats["mean"].append(float(band.mean()) / divide_by)
157
+ stats["std"].append(float(band.std()) / divide_by)
158
+
159
+ return stats
160
+
161
+
118
162
  def print_raster_info(raster_path, show_preview=True, figsize=(10, 8)):
119
163
  """Print formatted information about a raster dataset and optionally show a preview.
120
164
 
@@ -2762,3 +2806,202 @@ def export_training_data(
2762
2806
 
2763
2807
  # Return statistics
2764
2808
  return stats, out_folder
2809
+
2810
+
2811
+ def masks_to_vector(
2812
+ mask_path,
2813
+ output_path=None,
2814
+ simplify_tolerance=1.0,
2815
+ mask_threshold=0.5,
2816
+ min_area=100,
2817
+ nms_iou_threshold=0.5,
2818
+ ):
2819
+ """
2820
+ Convert a building mask GeoTIFF to vector polygons and save as a vector dataset.
2821
+
2822
+ Args:
2823
+ mask_path: Path to the building masks GeoTIFF
2824
+ output_path: Path to save the output GeoJSON (default: mask_path with .geojson extension)
2825
+ simplify_tolerance: Tolerance for polygon simplification (default: self.simplify_tolerance)
2826
+ mask_threshold: Threshold for mask binarization (default: self.mask_threshold)
2827
+ min_area: Minimum area in pixels to keep a building (default: self.small_building_area)
2828
+ nms_iou_threshold: IoU threshold for non-maximum suppression (default: self.nms_iou_threshold)
2829
+
2830
+ Returns:
2831
+ GeoDataFrame with building footprints
2832
+ """
2833
+ # Set default output path if not provided
2834
+ # if output_path is None:
2835
+ # output_path = os.path.splitext(mask_path)[0] + ".geojson"
2836
+
2837
+ print(f"Converting mask to GeoJSON with parameters:")
2838
+ print(f"- Mask threshold: {mask_threshold}")
2839
+ print(f"- Min building area: {min_area}")
2840
+ print(f"- Simplify tolerance: {simplify_tolerance}")
2841
+ print(f"- NMS IoU threshold: {nms_iou_threshold}")
2842
+
2843
+ # Open the mask raster
2844
+ with rasterio.open(mask_path) as src:
2845
+ # Read the mask data
2846
+ mask_data = src.read(1)
2847
+ transform = src.transform
2848
+ crs = src.crs
2849
+
2850
+ # Print mask statistics
2851
+ print(f"Mask dimensions: {mask_data.shape}")
2852
+ print(f"Mask value range: {mask_data.min()} to {mask_data.max()}")
2853
+
2854
+ # Prepare for connected component analysis
2855
+ # Binarize the mask based on threshold
2856
+ binary_mask = (mask_data > (mask_threshold * 255)).astype(np.uint8)
2857
+
2858
+ # Apply morphological operations for better results (optional)
2859
+ kernel = np.ones((3, 3), np.uint8)
2860
+ binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
2861
+
2862
+ # Find connected components
2863
+ num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
2864
+ binary_mask, connectivity=8
2865
+ )
2866
+
2867
+ print(f"Found {num_labels-1} potential buildings") # Subtract 1 for background
2868
+
2869
+ # Create list to store polygons and confidence values
2870
+ all_polygons = []
2871
+ all_confidences = []
2872
+
2873
+ # Process each component (skip the first one which is background)
2874
+ for i in tqdm(range(1, num_labels)):
2875
+ # Extract this building
2876
+ area = stats[i, cv2.CC_STAT_AREA]
2877
+
2878
+ # Skip if too small
2879
+ if area < min_area:
2880
+ continue
2881
+
2882
+ # Create a mask for this building
2883
+ building_mask = (labels == i).astype(np.uint8)
2884
+
2885
+ # Find contours
2886
+ contours, _ = cv2.findContours(
2887
+ building_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
2888
+ )
2889
+
2890
+ # Process each contour
2891
+ for contour in contours:
2892
+ # Skip if too few points
2893
+ if contour.shape[0] < 3:
2894
+ continue
2895
+
2896
+ # Simplify contour if it has many points
2897
+ if contour.shape[0] > 50 and simplify_tolerance > 0:
2898
+ epsilon = simplify_tolerance * cv2.arcLength(contour, True)
2899
+ contour = cv2.approxPolyDP(contour, epsilon, True)
2900
+
2901
+ # Convert to list of (x, y) coordinates
2902
+ polygon_points = contour.reshape(-1, 2)
2903
+
2904
+ # Convert pixel coordinates to geographic coordinates
2905
+ geo_points = []
2906
+ for x, y in polygon_points:
2907
+ gx, gy = transform * (x, y)
2908
+ geo_points.append((gx, gy))
2909
+
2910
+ # Create Shapely polygon
2911
+ if len(geo_points) >= 3:
2912
+ try:
2913
+ shapely_poly = Polygon(geo_points)
2914
+ if shapely_poly.is_valid and shapely_poly.area > 0:
2915
+ all_polygons.append(shapely_poly)
2916
+
2917
+ # Calculate "confidence" as normalized size
2918
+ # This is a proxy since we don't have model confidence scores
2919
+ normalized_size = min(1.0, area / 1000) # Cap at 1.0
2920
+ all_confidences.append(normalized_size)
2921
+ except Exception as e:
2922
+ print(f"Error creating polygon: {e}")
2923
+
2924
+ print(f"Created {len(all_polygons)} valid polygons")
2925
+
2926
+ # Create GeoDataFrame
2927
+ if not all_polygons:
2928
+ print("No valid polygons found")
2929
+ return None
2930
+
2931
+ gdf = gpd.GeoDataFrame(
2932
+ {
2933
+ "geometry": all_polygons,
2934
+ "confidence": all_confidences,
2935
+ "class": 1, # Building class
2936
+ },
2937
+ crs=crs,
2938
+ )
2939
+
2940
+ def filter_overlapping_polygons(gdf, **kwargs):
2941
+ """
2942
+ Filter overlapping polygons using non-maximum suppression.
2943
+
2944
+ Args:
2945
+ gdf: GeoDataFrame with polygons
2946
+ **kwargs: Optional parameters:
2947
+ nms_iou_threshold: IoU threshold for filtering
2948
+
2949
+ Returns:
2950
+ Filtered GeoDataFrame
2951
+ """
2952
+ if len(gdf) <= 1:
2953
+ return gdf
2954
+
2955
+ # Get parameters from kwargs or use instance defaults
2956
+ iou_threshold = kwargs.get("nms_iou_threshold", nms_iou_threshold)
2957
+
2958
+ # Sort by confidence
2959
+ gdf = gdf.sort_values("confidence", ascending=False)
2960
+
2961
+ # Fix any invalid geometries
2962
+ gdf["geometry"] = gdf["geometry"].apply(
2963
+ lambda geom: geom.buffer(0) if not geom.is_valid else geom
2964
+ )
2965
+
2966
+ keep_indices = []
2967
+ polygons = gdf.geometry.values
2968
+
2969
+ for i in range(len(polygons)):
2970
+ if i in keep_indices:
2971
+ continue
2972
+
2973
+ keep = True
2974
+ for j in keep_indices:
2975
+ # Skip invalid geometries
2976
+ if not polygons[i].is_valid or not polygons[j].is_valid:
2977
+ continue
2978
+
2979
+ # Calculate IoU
2980
+ try:
2981
+ intersection = polygons[i].intersection(polygons[j]).area
2982
+ union = polygons[i].area + polygons[j].area - intersection
2983
+ iou = intersection / union if union > 0 else 0
2984
+
2985
+ if iou > iou_threshold:
2986
+ keep = False
2987
+ break
2988
+ except Exception:
2989
+ # Skip on topology exceptions
2990
+ continue
2991
+
2992
+ if keep:
2993
+ keep_indices.append(i)
2994
+
2995
+ return gdf.iloc[keep_indices]
2996
+
2997
+ # Apply non-maximum suppression to remove overlapping polygons
2998
+ gdf = filter_overlapping_polygons(gdf, nms_iou_threshold=nms_iou_threshold)
2999
+
3000
+ print(f"Final building count after filtering: {len(gdf)}")
3001
+
3002
+ # Save to file
3003
+ if output_path is not None:
3004
+ gdf.to_file(output_path)
3005
+ print(f"Saved {len(gdf)} building footprints to {output_path}")
3006
+
3007
+ return gdf