geoai-py 0.2.3__py2.py3-none-any.whl → 0.3.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoai/__init__.py +1 -1
- geoai/extract.py +1006 -67
- geoai/geoai.py +1 -1
- geoai/preprocess.py +245 -2
- geoai/{common.py → utils.py} +463 -4
- {geoai_py-0.2.3.dist-info → geoai_py-0.3.1.dist-info}/METADATA +1 -1
- geoai_py-0.3.1.dist-info/RECORD +13 -0
- geoai_py-0.2.3.dist-info/RECORD +0 -13
- {geoai_py-0.2.3.dist-info → geoai_py-0.3.1.dist-info}/LICENSE +0 -0
- {geoai_py-0.2.3.dist-info → geoai_py-0.3.1.dist-info}/WHEEL +0 -0
- {geoai_py-0.2.3.dist-info → geoai_py-0.3.1.dist-info}/entry_points.txt +0 -0
- {geoai_py-0.2.3.dist-info → geoai_py-0.3.1.dist-info}/top_level.txt +0 -0
geoai/geoai.py
CHANGED
geoai/preprocess.py
CHANGED
|
@@ -13,13 +13,20 @@ import pandas as pd
|
|
|
13
13
|
from rasterio.windows import Window
|
|
14
14
|
from rasterio import features
|
|
15
15
|
from rasterio.plot import show
|
|
16
|
-
from shapely.geometry import box, shape, mapping
|
|
16
|
+
from shapely.geometry import box, shape, mapping, Polygon
|
|
17
17
|
import matplotlib.pyplot as plt
|
|
18
18
|
from tqdm import tqdm
|
|
19
19
|
from torchvision.transforms import RandomRotation
|
|
20
20
|
from shapely.affinity import rotate
|
|
21
|
-
import torchgeo
|
|
22
21
|
import torch
|
|
22
|
+
import cv2
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
import torchgeo
|
|
26
|
+
except ImportError as e:
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"Your torchgeo version is too old. Please upgrade to the latest version using 'pip install -U torchgeo'."
|
|
29
|
+
)
|
|
23
30
|
|
|
24
31
|
|
|
25
32
|
def download_file(url, output_path=None, overwrite=False):
|
|
@@ -115,6 +122,43 @@ def get_raster_info(raster_path):
|
|
|
115
122
|
return info
|
|
116
123
|
|
|
117
124
|
|
|
125
|
+
def get_raster_stats(raster_path, divide_by=1.0):
|
|
126
|
+
"""Calculate statistics for each band in a raster dataset.
|
|
127
|
+
|
|
128
|
+
This function computes min, max, mean, and standard deviation values
|
|
129
|
+
for each band in the provided raster, returning results in a dictionary
|
|
130
|
+
with lists for each statistic type.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
raster_path (str): Path to the raster file
|
|
134
|
+
divide_by (float, optional): Value to divide pixel values by.
|
|
135
|
+
Defaults to 1.0, which keeps the original pixel
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
dict: Dictionary containing lists of statistics with keys:
|
|
139
|
+
- 'min': List of minimum values for each band
|
|
140
|
+
- 'max': List of maximum values for each band
|
|
141
|
+
- 'mean': List of mean values for each band
|
|
142
|
+
- 'std': List of standard deviation values for each band
|
|
143
|
+
"""
|
|
144
|
+
# Initialize the results dictionary with empty lists
|
|
145
|
+
stats = {"min": [], "max": [], "mean": [], "std": []}
|
|
146
|
+
|
|
147
|
+
# Open the raster dataset
|
|
148
|
+
with rasterio.open(raster_path) as src:
|
|
149
|
+
# Calculate statistics for each band
|
|
150
|
+
for i in range(1, src.count + 1):
|
|
151
|
+
band = src.read(i, masked=True)
|
|
152
|
+
|
|
153
|
+
# Append statistics for this band to each list
|
|
154
|
+
stats["min"].append(float(band.min()) / divide_by)
|
|
155
|
+
stats["max"].append(float(band.max()) / divide_by)
|
|
156
|
+
stats["mean"].append(float(band.mean()) / divide_by)
|
|
157
|
+
stats["std"].append(float(band.std()) / divide_by)
|
|
158
|
+
|
|
159
|
+
return stats
|
|
160
|
+
|
|
161
|
+
|
|
118
162
|
def print_raster_info(raster_path, show_preview=True, figsize=(10, 8)):
|
|
119
163
|
"""Print formatted information about a raster dataset and optionally show a preview.
|
|
120
164
|
|
|
@@ -2762,3 +2806,202 @@ def export_training_data(
|
|
|
2762
2806
|
|
|
2763
2807
|
# Return statistics
|
|
2764
2808
|
return stats, out_folder
|
|
2809
|
+
|
|
2810
|
+
|
|
2811
|
+
def masks_to_vector(
|
|
2812
|
+
mask_path,
|
|
2813
|
+
output_path=None,
|
|
2814
|
+
simplify_tolerance=1.0,
|
|
2815
|
+
mask_threshold=0.5,
|
|
2816
|
+
min_area=100,
|
|
2817
|
+
nms_iou_threshold=0.5,
|
|
2818
|
+
):
|
|
2819
|
+
"""
|
|
2820
|
+
Convert a building mask GeoTIFF to vector polygons and save as a vector dataset.
|
|
2821
|
+
|
|
2822
|
+
Args:
|
|
2823
|
+
mask_path: Path to the building masks GeoTIFF
|
|
2824
|
+
output_path: Path to save the output GeoJSON (default: mask_path with .geojson extension)
|
|
2825
|
+
simplify_tolerance: Tolerance for polygon simplification (default: self.simplify_tolerance)
|
|
2826
|
+
mask_threshold: Threshold for mask binarization (default: self.mask_threshold)
|
|
2827
|
+
min_area: Minimum area in pixels to keep a building (default: self.small_building_area)
|
|
2828
|
+
nms_iou_threshold: IoU threshold for non-maximum suppression (default: self.nms_iou_threshold)
|
|
2829
|
+
|
|
2830
|
+
Returns:
|
|
2831
|
+
GeoDataFrame with building footprints
|
|
2832
|
+
"""
|
|
2833
|
+
# Set default output path if not provided
|
|
2834
|
+
# if output_path is None:
|
|
2835
|
+
# output_path = os.path.splitext(mask_path)[0] + ".geojson"
|
|
2836
|
+
|
|
2837
|
+
print(f"Converting mask to GeoJSON with parameters:")
|
|
2838
|
+
print(f"- Mask threshold: {mask_threshold}")
|
|
2839
|
+
print(f"- Min building area: {min_area}")
|
|
2840
|
+
print(f"- Simplify tolerance: {simplify_tolerance}")
|
|
2841
|
+
print(f"- NMS IoU threshold: {nms_iou_threshold}")
|
|
2842
|
+
|
|
2843
|
+
# Open the mask raster
|
|
2844
|
+
with rasterio.open(mask_path) as src:
|
|
2845
|
+
# Read the mask data
|
|
2846
|
+
mask_data = src.read(1)
|
|
2847
|
+
transform = src.transform
|
|
2848
|
+
crs = src.crs
|
|
2849
|
+
|
|
2850
|
+
# Print mask statistics
|
|
2851
|
+
print(f"Mask dimensions: {mask_data.shape}")
|
|
2852
|
+
print(f"Mask value range: {mask_data.min()} to {mask_data.max()}")
|
|
2853
|
+
|
|
2854
|
+
# Prepare for connected component analysis
|
|
2855
|
+
# Binarize the mask based on threshold
|
|
2856
|
+
binary_mask = (mask_data > (mask_threshold * 255)).astype(np.uint8)
|
|
2857
|
+
|
|
2858
|
+
# Apply morphological operations for better results (optional)
|
|
2859
|
+
kernel = np.ones((3, 3), np.uint8)
|
|
2860
|
+
binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
|
|
2861
|
+
|
|
2862
|
+
# Find connected components
|
|
2863
|
+
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
|
|
2864
|
+
binary_mask, connectivity=8
|
|
2865
|
+
)
|
|
2866
|
+
|
|
2867
|
+
print(f"Found {num_labels-1} potential buildings") # Subtract 1 for background
|
|
2868
|
+
|
|
2869
|
+
# Create list to store polygons and confidence values
|
|
2870
|
+
all_polygons = []
|
|
2871
|
+
all_confidences = []
|
|
2872
|
+
|
|
2873
|
+
# Process each component (skip the first one which is background)
|
|
2874
|
+
for i in tqdm(range(1, num_labels)):
|
|
2875
|
+
# Extract this building
|
|
2876
|
+
area = stats[i, cv2.CC_STAT_AREA]
|
|
2877
|
+
|
|
2878
|
+
# Skip if too small
|
|
2879
|
+
if area < min_area:
|
|
2880
|
+
continue
|
|
2881
|
+
|
|
2882
|
+
# Create a mask for this building
|
|
2883
|
+
building_mask = (labels == i).astype(np.uint8)
|
|
2884
|
+
|
|
2885
|
+
# Find contours
|
|
2886
|
+
contours, _ = cv2.findContours(
|
|
2887
|
+
building_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
|
2888
|
+
)
|
|
2889
|
+
|
|
2890
|
+
# Process each contour
|
|
2891
|
+
for contour in contours:
|
|
2892
|
+
# Skip if too few points
|
|
2893
|
+
if contour.shape[0] < 3:
|
|
2894
|
+
continue
|
|
2895
|
+
|
|
2896
|
+
# Simplify contour if it has many points
|
|
2897
|
+
if contour.shape[0] > 50 and simplify_tolerance > 0:
|
|
2898
|
+
epsilon = simplify_tolerance * cv2.arcLength(contour, True)
|
|
2899
|
+
contour = cv2.approxPolyDP(contour, epsilon, True)
|
|
2900
|
+
|
|
2901
|
+
# Convert to list of (x, y) coordinates
|
|
2902
|
+
polygon_points = contour.reshape(-1, 2)
|
|
2903
|
+
|
|
2904
|
+
# Convert pixel coordinates to geographic coordinates
|
|
2905
|
+
geo_points = []
|
|
2906
|
+
for x, y in polygon_points:
|
|
2907
|
+
gx, gy = transform * (x, y)
|
|
2908
|
+
geo_points.append((gx, gy))
|
|
2909
|
+
|
|
2910
|
+
# Create Shapely polygon
|
|
2911
|
+
if len(geo_points) >= 3:
|
|
2912
|
+
try:
|
|
2913
|
+
shapely_poly = Polygon(geo_points)
|
|
2914
|
+
if shapely_poly.is_valid and shapely_poly.area > 0:
|
|
2915
|
+
all_polygons.append(shapely_poly)
|
|
2916
|
+
|
|
2917
|
+
# Calculate "confidence" as normalized size
|
|
2918
|
+
# This is a proxy since we don't have model confidence scores
|
|
2919
|
+
normalized_size = min(1.0, area / 1000) # Cap at 1.0
|
|
2920
|
+
all_confidences.append(normalized_size)
|
|
2921
|
+
except Exception as e:
|
|
2922
|
+
print(f"Error creating polygon: {e}")
|
|
2923
|
+
|
|
2924
|
+
print(f"Created {len(all_polygons)} valid polygons")
|
|
2925
|
+
|
|
2926
|
+
# Create GeoDataFrame
|
|
2927
|
+
if not all_polygons:
|
|
2928
|
+
print("No valid polygons found")
|
|
2929
|
+
return None
|
|
2930
|
+
|
|
2931
|
+
gdf = gpd.GeoDataFrame(
|
|
2932
|
+
{
|
|
2933
|
+
"geometry": all_polygons,
|
|
2934
|
+
"confidence": all_confidences,
|
|
2935
|
+
"class": 1, # Building class
|
|
2936
|
+
},
|
|
2937
|
+
crs=crs,
|
|
2938
|
+
)
|
|
2939
|
+
|
|
2940
|
+
def filter_overlapping_polygons(gdf, **kwargs):
|
|
2941
|
+
"""
|
|
2942
|
+
Filter overlapping polygons using non-maximum suppression.
|
|
2943
|
+
|
|
2944
|
+
Args:
|
|
2945
|
+
gdf: GeoDataFrame with polygons
|
|
2946
|
+
**kwargs: Optional parameters:
|
|
2947
|
+
nms_iou_threshold: IoU threshold for filtering
|
|
2948
|
+
|
|
2949
|
+
Returns:
|
|
2950
|
+
Filtered GeoDataFrame
|
|
2951
|
+
"""
|
|
2952
|
+
if len(gdf) <= 1:
|
|
2953
|
+
return gdf
|
|
2954
|
+
|
|
2955
|
+
# Get parameters from kwargs or use instance defaults
|
|
2956
|
+
iou_threshold = kwargs.get("nms_iou_threshold", nms_iou_threshold)
|
|
2957
|
+
|
|
2958
|
+
# Sort by confidence
|
|
2959
|
+
gdf = gdf.sort_values("confidence", ascending=False)
|
|
2960
|
+
|
|
2961
|
+
# Fix any invalid geometries
|
|
2962
|
+
gdf["geometry"] = gdf["geometry"].apply(
|
|
2963
|
+
lambda geom: geom.buffer(0) if not geom.is_valid else geom
|
|
2964
|
+
)
|
|
2965
|
+
|
|
2966
|
+
keep_indices = []
|
|
2967
|
+
polygons = gdf.geometry.values
|
|
2968
|
+
|
|
2969
|
+
for i in range(len(polygons)):
|
|
2970
|
+
if i in keep_indices:
|
|
2971
|
+
continue
|
|
2972
|
+
|
|
2973
|
+
keep = True
|
|
2974
|
+
for j in keep_indices:
|
|
2975
|
+
# Skip invalid geometries
|
|
2976
|
+
if not polygons[i].is_valid or not polygons[j].is_valid:
|
|
2977
|
+
continue
|
|
2978
|
+
|
|
2979
|
+
# Calculate IoU
|
|
2980
|
+
try:
|
|
2981
|
+
intersection = polygons[i].intersection(polygons[j]).area
|
|
2982
|
+
union = polygons[i].area + polygons[j].area - intersection
|
|
2983
|
+
iou = intersection / union if union > 0 else 0
|
|
2984
|
+
|
|
2985
|
+
if iou > iou_threshold:
|
|
2986
|
+
keep = False
|
|
2987
|
+
break
|
|
2988
|
+
except Exception:
|
|
2989
|
+
# Skip on topology exceptions
|
|
2990
|
+
continue
|
|
2991
|
+
|
|
2992
|
+
if keep:
|
|
2993
|
+
keep_indices.append(i)
|
|
2994
|
+
|
|
2995
|
+
return gdf.iloc[keep_indices]
|
|
2996
|
+
|
|
2997
|
+
# Apply non-maximum suppression to remove overlapping polygons
|
|
2998
|
+
gdf = filter_overlapping_polygons(gdf, nms_iou_threshold=nms_iou_threshold)
|
|
2999
|
+
|
|
3000
|
+
print(f"Final building count after filtering: {len(gdf)}")
|
|
3001
|
+
|
|
3002
|
+
# Save to file
|
|
3003
|
+
if output_path is not None:
|
|
3004
|
+
gdf.to_file(output_path)
|
|
3005
|
+
print(f"Saved {len(gdf)} building footprints to {output_path}")
|
|
3006
|
+
|
|
3007
|
+
return gdf
|