geoai-py 0.3.5__py2.py3-none-any.whl → 0.4.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geoai/utils.py CHANGED
@@ -1,34 +1,36 @@
1
1
  """The utils module contains common functions and classes used by the other modules."""
2
2
 
3
+ # Standard Library
3
4
  import json
4
5
  import math
5
6
  import os
6
- from collections.abc import Iterable
7
- from PIL import Image
8
- from pathlib import Path
9
- import requests
10
7
  import warnings
11
8
  import xml.etree.ElementTree as ET
9
+ from collections.abc import Iterable
10
+ from pathlib import Path
12
11
  from typing import Any, Dict, List, Optional, Tuple, Union
13
- import numpy as np
14
- import matplotlib.pyplot as plt
12
+
13
+ # Third-Party Libraries
14
+ import cv2
15
15
  import geopandas as gpd
16
16
  import leafmap
17
+ import matplotlib.pyplot as plt
17
18
  import numpy as np
18
19
  import pandas as pd
19
- import xarray as xr
20
- import rioxarray as rxr
21
20
  import rasterio
22
- from torchvision.transforms import RandomRotation
23
- from rasterio.windows import Window
21
+ import requests
22
+ import rioxarray as rxr
23
+ import torch
24
+ import torchgeo
25
+ import xarray as xr
26
+ from PIL import Image
24
27
  from rasterio import features
25
28
  from rasterio.plot import show
26
- from shapely.geometry import box, shape, mapping, Polygon, MultiPolygon
29
+ from rasterio.windows import Window
27
30
  from shapely.affinity import rotate
31
+ from shapely.geometry import MultiPolygon, Polygon, box, mapping, shape
32
+ from torchvision.transforms import RandomRotation
28
33
  from tqdm import tqdm
29
- import torch
30
- import torchgeo
31
- import cv2
32
34
 
33
35
  try:
34
36
  from torchgeo.datasets import RasterDataset, unbind_samples
@@ -54,6 +56,7 @@ def view_raster(
54
56
  array_args: Optional[Dict] = {},
55
57
  client_args: Optional[Dict] = {"cors_all": False},
56
58
  basemap: Optional[str] = "OpenStreetMap",
59
+ basemap_args: Optional[Dict] = None,
57
60
  backend: Optional[str] = "folium",
58
61
  **kwargs,
59
62
  ):
@@ -76,6 +79,7 @@ def view_raster(
76
79
  array_args (Optional[Dict], optional): Additional arguments for array processing. Defaults to {}.
77
80
  client_args (Optional[Dict], optional): Additional arguments for the client. Defaults to {"cors_all": False}.
78
81
  basemap (Optional[str], optional): The basemap to use. Defaults to "OpenStreetMap".
82
+ basemap_args (Optional[Dict], optional): Additional arguments for the basemap. Defaults to None.
79
83
  **kwargs (Any): Additional keyword arguments.
80
84
 
81
85
  Returns:
@@ -87,7 +91,23 @@ def view_raster(
87
91
  else:
88
92
  import leafmap.leafmap as leafmap
89
93
 
90
- m = leafmap.Map(basemap=basemap)
94
+ if basemap_args is None:
95
+ basemap_args = {}
96
+
97
+ m = leafmap.Map()
98
+
99
+ if isinstance(basemap, str):
100
+ if basemap.lower().endswith(".tif"):
101
+ if basemap.lower().startswith("http"):
102
+ if "name" not in basemap_args:
103
+ basemap_args["name"] = "Basemap"
104
+ m.add_cog_layer(basemap, **basemap_args)
105
+ else:
106
+ if "layer_name" not in basemap_args:
107
+ basemap_args["layer_name"] = "Basemap"
108
+ m.add_raster(basemap, **basemap_args)
109
+ else:
110
+ m.add_basemap(basemap, **basemap_args)
91
111
 
92
112
  if isinstance(source, dict):
93
113
  source = dict_to_image(source)
@@ -493,6 +513,7 @@ def view_vector(
493
513
  title=None,
494
514
  legend=True,
495
515
  basemap=False,
516
+ basemap_type="streets",
496
517
  alpha=0.7,
497
518
  edge_color="black",
498
519
  classification="quantiles",
@@ -522,6 +543,8 @@ def view_vector(
522
543
  legend (bool, optional): Whether to display a legend. Defaults to True.
523
544
  basemap (bool, optional): Whether to add a web basemap. Requires contextily.
524
545
  Defaults to False.
546
+ basemap_type (str, optional): Type of basemap to use. Options: 'streets', 'satellite'.
547
+ Defaults to 'streets'.
525
548
  alpha (float, optional): Transparency of the vector features, between 0-1.
526
549
  Defaults to 0.7.
527
550
  edge_color (str, optional): Color for feature edges. Defaults to "black".
@@ -609,10 +632,13 @@ def view_vector(
609
632
  ax=ax, color=highlight_color, edgecolor="black", linewidth=2, zorder=5
610
633
  )
611
634
 
612
- # Add basemap if requested
613
635
  if basemap:
614
636
  try:
615
- ctx.add_basemap(ax, crs=gdf.crs, source=ctx.providers.OpenStreetMap.Mapnik)
637
+ basemap_options = {
638
+ "streets": ctx.providers.OpenStreetMap.Mapnik,
639
+ "satellite": ctx.providers.Esri.WorldImagery,
640
+ }
641
+ ctx.add_basemap(ax, crs=gdf.crs, source=basemap_options[basemap_type])
616
642
  except Exception as e:
617
643
  print(f"Could not add basemap: {e}")
618
644
 
@@ -667,8 +693,8 @@ def view_vector_interactive(
667
693
  """
668
694
  import folium
669
695
  import folium.plugins as plugins
670
- from localtileserver import get_folium_tile_layer, TileClient
671
696
  from leafmap import cog_tile
697
+ from localtileserver import TileClient, get_folium_tile_layer
672
698
 
673
699
  google_tiles = {
674
700
  "Roadmap": {
@@ -719,7 +745,10 @@ def view_vector_interactive(
719
745
  kwargs["max_zoom"] = 30
720
746
 
721
747
  if isinstance(vector_data, str):
722
- vector_data = gpd.read_file(vector_data)
748
+ if vector_data.endswith(".parquet"):
749
+ vector_data = gpd.read_parquet(vector_data)
750
+ else:
751
+ vector_data = gpd.read_file(vector_data)
723
752
 
724
753
  # Check if input is a GeoDataFrame
725
754
  if not isinstance(vector_data, gpd.GeoDataFrame):
@@ -767,9 +796,9 @@ def regularization(
767
796
  Returns:
768
797
  GeoDataFrame or list of shapely Polygons with regularized building footprints
769
798
  """
770
- from shapely.geometry import Polygon, shape
771
- from shapely.affinity import rotate, translate
772
799
  from shapely import wkt
800
+ from shapely.affinity import rotate, translate
801
+ from shapely.geometry import Polygon, shape
773
802
 
774
803
  regularized_buildings = []
775
804
 
@@ -888,8 +917,8 @@ def hybrid_regularization(building_polygons):
888
917
  Returns:
889
918
  GeoDataFrame or list of shapely Polygons with regularized building footprints
890
919
  """
891
- from shapely.geometry import Polygon
892
920
  from shapely.affinity import rotate
921
+ from shapely.geometry import Polygon
893
922
 
894
923
  # Use minimum_rotated_rectangle instead of oriented_envelope
895
924
  try:
@@ -1003,8 +1032,8 @@ def adaptive_regularization(
1003
1032
  Returns:
1004
1033
  GeoDataFrame or list of shapely Polygons with regularized building footprints
1005
1034
  """
1006
- from shapely.geometry import Polygon
1007
1035
  from shapely.affinity import rotate
1036
+ from shapely.geometry import Polygon
1008
1037
 
1009
1038
  # Analyze the overall dataset to set appropriate parameters
1010
1039
  if is_gdf := isinstance(building_polygons, gpd.GeoDataFrame):
@@ -2558,6 +2587,8 @@ def export_geotiff_tiles(
2558
2587
  print(f"\nRaster info for {in_raster}:")
2559
2588
  print(f" CRS: {src.crs}")
2560
2589
  print(f" Dimensions: {src.width} x {src.height}")
2590
+ print(f" Resolution: {src.res}")
2591
+ print(f" Bands: {src.count}")
2561
2592
  print(f" Bounds: {src.bounds}")
2562
2593
 
2563
2594
  # Calculate number of tiles
@@ -4239,9 +4270,10 @@ def read_vector(source, layer=None, **kwargs):
4239
4270
  >>> gdf = read_vector("path/to/data.gpkg", layer="layer_name")
4240
4271
  """
4241
4272
 
4242
- import fiona
4243
4273
  import urllib.parse
4244
4274
 
4275
+ import fiona
4276
+
4245
4277
  # Determine if source is a URL or local file
4246
4278
  parsed_url = urllib.parse.urlparse(source)
4247
4279
  is_url = parsed_url.scheme in ["http", "https"]
@@ -4315,6 +4347,7 @@ def read_raster(source, band=None, masked=True, **kwargs):
4315
4347
  >>> raster = read_raster("path/to/data.tif", masked=False)
4316
4348
  """
4317
4349
  import urllib.parse
4350
+
4318
4351
  from rasterio.errors import RasterioIOError
4319
4352
 
4320
4353
  # Determine if source is a URL or local file
@@ -4409,8 +4442,8 @@ def region_groups(
4409
4442
  Returns:
4410
4443
  Union[Tuple[np.ndarray, pd.DataFrame], Tuple[xr.DataArray, pd.DataFrame]]: Labeled image and properties DataFrame.
4411
4444
  """
4412
- from skimage import measure
4413
4445
  import scipy.ndimage as ndi
4446
+ from skimage import measure
4414
4447
 
4415
4448
  if isinstance(image, str):
4416
4449
  ds = rxr.open_rasterio(image)
@@ -4976,3 +5009,808 @@ def add_geometric_properties(data, properties=None, area_unit="m2", length_unit=
4976
5009
  result["complexity"] = result.geometry.apply(calc_complexity)
4977
5010
 
4978
5011
  return result
5012
+
5013
+
5014
+ def orthogonalize(
5015
+ input_path,
5016
+ output_path=None,
5017
+ epsilon=0.2,
5018
+ min_area=10,
5019
+ min_segments=4,
5020
+ area_tolerance=0.7,
5021
+ detect_triangles=True,
5022
+ ):
5023
+ """
5024
+ Orthogonalizes object masks in a GeoTIFF file.
5025
+
5026
+ This function reads a GeoTIFF containing object masks (binary or labeled regions),
5027
+ converts the raster masks to vector polygons, applies orthogonalization to each polygon,
5028
+ and optionally writes the result to a GeoJSON file.
5029
+ The source code is adapted from the Solar Panel Detection algorithm by Esri.
5030
+ See https://www.arcgis.com/home/item.html?id=c2508d72f2614104bfcfd5ccf1429284.
5031
+ Credits to Esri for the original code.
5032
+
5033
+ Args:
5034
+ input_path (str): Path to the input GeoTIFF file.
5035
+ output_path (str, optional): Path to save the output GeoJSON file. If None, no file is saved.
5036
+ epsilon (float, optional): Simplification tolerance for the Douglas-Peucker algorithm.
5037
+ Higher values result in more simplification. Default is 0.2.
5038
+ min_area (float, optional): Minimum area of polygons to process (smaller ones are kept as-is).
5039
+ min_segments (int, optional): Minimum number of segments to keep after simplification.
5040
+ Default is 4 (for rectangular shapes).
5041
+ area_tolerance (float, optional): Allowed ratio of area change. Values less than 1.0 restrict
5042
+ area change. Default is 0.7 (allows reduction to 70% of original area).
5043
+ detect_triangles (bool, optional): If True, performs additional check to avoid creating triangular shapes.
5044
+
5045
+ Returns:
5046
+ geopandas.GeoDataFrame: A GeoDataFrame containing the orthogonalized features.
5047
+ """
5048
+
5049
+ from functools import partial
5050
+
5051
+ def orthogonalize_ring(ring, epsilon=0.2, min_segments=4):
5052
+ """
5053
+ Orthogonalizes a ring (list of coordinates).
5054
+
5055
+ Args:
5056
+ ring (list): List of [x, y] coordinates forming a ring
5057
+ epsilon (float, optional): Simplification tolerance
5058
+ min_segments (int, optional): Minimum number of segments to keep
5059
+
5060
+ Returns:
5061
+ list: Orthogonalized list of coordinates
5062
+ """
5063
+ if len(ring) <= 3:
5064
+ return ring
5065
+
5066
+ # Convert to numpy array
5067
+ ring_arr = np.array(ring)
5068
+
5069
+ # Get orientation
5070
+ angle = math.degrees(get_orientation(ring_arr))
5071
+
5072
+ # Simplify using Ramer-Douglas-Peucker algorithm
5073
+ ring_arr = simplify(ring_arr, eps=epsilon)
5074
+
5075
+ # If simplified too much, adjust epsilon to maintain minimum segments
5076
+ if len(ring_arr) < min_segments:
5077
+ # Try with smaller epsilon until we get at least min_segments points
5078
+ for adjust_factor in [0.75, 0.5, 0.25, 0.1]:
5079
+ test_arr = simplify(np.array(ring), eps=epsilon * adjust_factor)
5080
+ if len(test_arr) >= min_segments:
5081
+ ring_arr = test_arr
5082
+ break
5083
+
5084
+ # Convert to dataframe for processing
5085
+ df = to_dataframe(ring_arr)
5086
+
5087
+ # Add orientation information
5088
+ add_orientation(df, angle)
5089
+
5090
+ # Align segments to orthogonal directions
5091
+ df = align(df)
5092
+
5093
+ # Merge collinear line segments
5094
+ df = merge_lines(df)
5095
+
5096
+ if len(df) == 0:
5097
+ return ring
5098
+
5099
+ # If we have a triangle-like result (3 segments), return the original shape
5100
+ if len(df) <= 3:
5101
+ return ring
5102
+
5103
+ # Join the orthogonalized segments back into a ring
5104
+ joined_ring = join_ring(df)
5105
+
5106
+ # If the join operation didn't produce a valid ring, return the original
5107
+ if len(joined_ring) == 0 or len(joined_ring[0]) < 3:
5108
+ return ring
5109
+
5110
+ # Basic validation: if result has 3 or fewer points (triangle), use original
5111
+ if len(joined_ring[0]) <= 3:
5112
+ return ring
5113
+
5114
+ # Convert back to a list and ensure it's closed
5115
+ result = joined_ring[0].tolist()
5116
+ if len(result) > 0 and (result[0] != result[-1]):
5117
+ result.append(result[0])
5118
+
5119
+ return result
5120
+
5121
+ def vectorize_mask(mask, transform):
5122
+ """
5123
+ Converts a binary mask to vector polygons.
5124
+
5125
+ Args:
5126
+ mask (numpy.ndarray): Binary mask where non-zero values represent objects
5127
+ transform (rasterio.transform.Affine): Affine transformation matrix
5128
+
5129
+ Returns:
5130
+ list: List of GeoJSON features
5131
+ """
5132
+ shapes = features.shapes(mask, transform=transform)
5133
+ features_list = []
5134
+
5135
+ for shape, value in shapes:
5136
+ if value > 0: # Only process non-zero values (actual objects)
5137
+ features_list.append(
5138
+ {
5139
+ "type": "Feature",
5140
+ "properties": {"value": int(value)},
5141
+ "geometry": shape,
5142
+ }
5143
+ )
5144
+
5145
+ return features_list
5146
+
5147
+ def rasterize_features(features, shape, transform, dtype=np.uint8):
5148
+ """
5149
+ Converts vector features back to a raster mask.
5150
+
5151
+ Args:
5152
+ features (list): List of GeoJSON features
5153
+ shape (tuple): Shape of the output raster (height, width)
5154
+ transform (rasterio.transform.Affine): Affine transformation matrix
5155
+ dtype (numpy.dtype, optional): Data type of the output raster
5156
+
5157
+ Returns:
5158
+ numpy.ndarray: Rasterized mask
5159
+ """
5160
+ mask = features.rasterize(
5161
+ [
5162
+ (feature["geometry"], feature["properties"]["value"])
5163
+ for feature in features
5164
+ ],
5165
+ out_shape=shape,
5166
+ transform=transform,
5167
+ fill=0,
5168
+ dtype=dtype,
5169
+ )
5170
+
5171
+ return mask
5172
+
5173
+ # The following helper functions are from the original code
5174
+ def get_orientation(contour):
5175
+ """
5176
+ Calculate the orientation angle of a contour.
5177
+
5178
+ Args:
5179
+ contour (numpy.ndarray): Array of shape (n, 2) containing point coordinates
5180
+
5181
+ Returns:
5182
+ float: Orientation angle in radians
5183
+ """
5184
+ box = cv2.minAreaRect(contour.astype(int))
5185
+ (cx, cy), (w, h), angle = box
5186
+ return math.radians(angle)
5187
+
5188
+ def simplify(contour, eps=0.2):
5189
+ """
5190
+ Simplify a contour using the Ramer-Douglas-Peucker algorithm.
5191
+
5192
+ Args:
5193
+ contour (numpy.ndarray): Array of shape (n, 2) containing point coordinates
5194
+ eps (float, optional): Epsilon value for simplification
5195
+
5196
+ Returns:
5197
+ numpy.ndarray: Simplified contour
5198
+ """
5199
+ return rdp(contour, epsilon=eps)
5200
+
5201
+ def to_dataframe(ring):
5202
+ """
5203
+ Convert a ring to a pandas DataFrame with line segment information.
5204
+
5205
+ Args:
5206
+ ring (numpy.ndarray): Array of shape (n, 2) containing point coordinates
5207
+
5208
+ Returns:
5209
+ pandas.DataFrame: DataFrame with line segment information
5210
+ """
5211
+ df = pd.DataFrame(ring, columns=["x1", "y1"])
5212
+ df["x2"] = df["x1"].shift(-1)
5213
+ df["y2"] = df["y1"].shift(-1)
5214
+ df.dropna(inplace=True)
5215
+ df["angle_atan"] = np.arctan2((df["y2"] - df["y1"]), (df["x2"] - df["x1"]))
5216
+ df["angle_atan_deg"] = df["angle_atan"] * 57.2958
5217
+ df["len"] = np.sqrt((df["y2"] - df["y1"]) ** 2 + (df["x2"] - df["x1"]) ** 2)
5218
+ df["cx"] = (df["x2"] + df["x1"]) / 2.0
5219
+ df["cy"] = (df["y2"] + df["y1"]) / 2.0
5220
+ return df
5221
+
5222
+ def add_orientation(df, angle):
5223
+ """
5224
+ Add orientation information to the DataFrame.
5225
+
5226
+ Args:
5227
+ df (pandas.DataFrame): DataFrame with line segment information
5228
+ angle (float): Orientation angle in degrees
5229
+
5230
+ Returns:
5231
+ None: Modifies the DataFrame in-place
5232
+ """
5233
+ rtangle = angle + 90
5234
+ is_parallel = (
5235
+ (df["angle_atan_deg"] > (angle - 45))
5236
+ & (df["angle_atan_deg"] < (angle + 45))
5237
+ ) | (
5238
+ (df["angle_atan_deg"] + 180 > (angle - 45))
5239
+ & (df["angle_atan_deg"] + 180 < (angle + 45))
5240
+ )
5241
+ df["angle"] = math.radians(angle)
5242
+ df["angle"] = df["angle"].where(is_parallel, math.radians(rtangle))
5243
+
5244
+ def align(df):
5245
+ """
5246
+ Align line segments to their nearest orthogonal direction.
5247
+
5248
+ Args:
5249
+ df (pandas.DataFrame): DataFrame with line segment information
5250
+
5251
+ Returns:
5252
+ pandas.DataFrame: DataFrame with aligned line segments
5253
+ """
5254
+ # Handle edge case with empty dataframe
5255
+ if len(df) == 0:
5256
+ return df.copy()
5257
+
5258
+ df_clone = df.copy()
5259
+
5260
+ # Ensure angle column exists and has valid values
5261
+ if "angle" not in df_clone.columns or df_clone["angle"].isna().any():
5262
+ # If angle data is missing, add default angles based on atan2
5263
+ df_clone["angle"] = df_clone["angle_atan"]
5264
+
5265
+ # Ensure length and center point data is valid
5266
+ if "len" not in df_clone.columns or df_clone["len"].isna().any():
5267
+ # Recalculate lengths if missing
5268
+ df_clone["len"] = np.sqrt(
5269
+ (df_clone["x2"] - df_clone["x1"]) ** 2
5270
+ + (df_clone["y2"] - df_clone["y1"]) ** 2
5271
+ )
5272
+
5273
+ if "cx" not in df_clone.columns or df_clone["cx"].isna().any():
5274
+ df_clone["cx"] = (df_clone["x1"] + df_clone["x2"]) / 2.0
5275
+
5276
+ if "cy" not in df_clone.columns or df_clone["cy"].isna().any():
5277
+ df_clone["cy"] = (df_clone["y1"] + df_clone["y2"]) / 2.0
5278
+
5279
+ # Apply orthogonal alignment
5280
+ df_clone["x1"] = df_clone["cx"] - ((df_clone["len"] / 2) * np.cos(df["angle"]))
5281
+ df_clone["x2"] = df_clone["cx"] + ((df_clone["len"] / 2) * np.cos(df["angle"]))
5282
+ df_clone["y1"] = df_clone["cy"] - ((df_clone["len"] / 2) * np.sin(df["angle"]))
5283
+ df_clone["y2"] = df_clone["cy"] + ((df_clone["len"] / 2) * np.sin(df["angle"]))
5284
+
5285
+ return df_clone
5286
+
5287
+ def merge_lines(df_aligned):
5288
+ """
5289
+ Merge collinear line segments.
5290
+
5291
+ Args:
5292
+ df_aligned (pandas.DataFrame): DataFrame with aligned line segments
5293
+
5294
+ Returns:
5295
+ pandas.DataFrame: DataFrame with merged line segments
5296
+ """
5297
+ ortho_lines = []
5298
+ groups = df_aligned.groupby(
5299
+ (df_aligned["angle"].shift() != df_aligned["angle"]).cumsum()
5300
+ )
5301
+ for x, y in groups:
5302
+ group_cx = (y["cx"] * y["len"]).sum() / y["len"].sum()
5303
+ group_cy = (y["cy"] * y["len"]).sum() / y["len"].sum()
5304
+ cumlen = y["len"].sum()
5305
+
5306
+ ortho_lines.append((group_cx, group_cy, cumlen, y["angle"].iloc[0]))
5307
+
5308
+ ortho_list = []
5309
+ for cx, cy, length, rot_angle in ortho_lines:
5310
+ X1 = cx - (length / 2) * math.cos(rot_angle)
5311
+ X2 = cx + (length / 2) * math.cos(rot_angle)
5312
+ Y1 = cy - (length / 2) * math.sin(rot_angle)
5313
+ Y2 = cy + (length / 2) * math.sin(rot_angle)
5314
+
5315
+ ortho_list.append(
5316
+ {
5317
+ "x1": X1,
5318
+ "y1": Y1,
5319
+ "x2": X2,
5320
+ "y2": Y2,
5321
+ "len": length,
5322
+ "cx": cx,
5323
+ "cy": cy,
5324
+ "angle": rot_angle,
5325
+ }
5326
+ )
5327
+
5328
+ if (
5329
+ len(ortho_list) > 0 and ortho_list[0]["angle"] == ortho_list[-1]["angle"]
5330
+ ): # join first and last segment if they're in same direction
5331
+ totlen = ortho_list[0]["len"] + ortho_list[-1]["len"]
5332
+ merge_cx = (
5333
+ (ortho_list[0]["cx"] * ortho_list[0]["len"])
5334
+ + (ortho_list[-1]["cx"] * ortho_list[-1]["len"])
5335
+ ) / totlen
5336
+
5337
+ merge_cy = (
5338
+ (ortho_list[0]["cy"] * ortho_list[0]["len"])
5339
+ + (ortho_list[-1]["cy"] * ortho_list[-1]["len"])
5340
+ ) / totlen
5341
+
5342
+ rot_angle = ortho_list[0]["angle"]
5343
+ X1 = merge_cx - (totlen / 2) * math.cos(rot_angle)
5344
+ X2 = merge_cx + (totlen / 2) * math.cos(rot_angle)
5345
+ Y1 = merge_cy - (totlen / 2) * math.sin(rot_angle)
5346
+ Y2 = merge_cy + (totlen / 2) * math.sin(rot_angle)
5347
+
5348
+ ortho_list[-1] = {
5349
+ "x1": X1,
5350
+ "y1": Y1,
5351
+ "x2": X2,
5352
+ "y2": Y2,
5353
+ "len": totlen,
5354
+ "cx": merge_cx,
5355
+ "cy": merge_cy,
5356
+ "angle": rot_angle,
5357
+ }
5358
+ ortho_list = ortho_list[1:]
5359
+ ortho_df = pd.DataFrame(ortho_list)
5360
+ return ortho_df
5361
+
5362
+ def find_intersection(x1, y1, x2, y2, x3, y3, x4, y4):
5363
+ """
5364
+ Find the intersection point of two line segments.
5365
+
5366
+ Args:
5367
+ x1, y1, x2, y2: Coordinates of the first line segment
5368
+ x3, y3, x4, y4: Coordinates of the second line segment
5369
+
5370
+ Returns:
5371
+ list: [x, y] coordinates of the intersection point
5372
+
5373
+ Raises:
5374
+ ZeroDivisionError: If the lines are parallel or collinear
5375
+ """
5376
+ # Calculate the denominator of the intersection formula
5377
+ denominator = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
5378
+
5379
+ # Check if lines are parallel or collinear (denominator close to zero)
5380
+ if abs(denominator) < 1e-10:
5381
+ raise ZeroDivisionError("Lines are parallel or collinear")
5382
+
5383
+ px = (
5384
+ (x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)
5385
+ ) / denominator
5386
+ py = (
5387
+ (x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)
5388
+ ) / denominator
5389
+
5390
+ # Check if the intersection point is within a reasonable distance
5391
+ # from both line segments to avoid extreme extrapolation
5392
+ def point_on_segment(x, y, x1, y1, x2, y2, tolerance=2.0):
5393
+ # Check if point (x,y) is near the line segment from (x1,y1) to (x2,y2)
5394
+ # First check if it's near the infinite line
5395
+ line_len = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
5396
+ if line_len < 1e-10:
5397
+ return np.sqrt((x - x1) ** 2 + (y - y1) ** 2) <= tolerance
5398
+
5399
+ t = ((x - x1) * (x2 - x1) + (y - y1) * (y2 - y1)) / (line_len**2)
5400
+
5401
+ # Check distance to the infinite line
5402
+ proj_x = x1 + t * (x2 - x1)
5403
+ proj_y = y1 + t * (y2 - y1)
5404
+ dist_to_line = np.sqrt((x - proj_x) ** 2 + (y - proj_y) ** 2)
5405
+
5406
+ # Check if the projection is near the segment, not just the infinite line
5407
+ if t < -tolerance or t > 1 + tolerance:
5408
+ # If far from the segment, compute distance to the nearest endpoint
5409
+ dist_to_start = np.sqrt((x - x1) ** 2 + (y - y1) ** 2)
5410
+ dist_to_end = np.sqrt((x - x2) ** 2 + (y - y2) ** 2)
5411
+ return min(dist_to_start, dist_to_end) <= tolerance * 2
5412
+
5413
+ return dist_to_line <= tolerance
5414
+
5415
+ # Check if intersection is reasonably close to both line segments
5416
+ if not (
5417
+ point_on_segment(px, py, x1, y1, x2, y2)
5418
+ and point_on_segment(px, py, x3, y3, x4, y4)
5419
+ ):
5420
+ # If intersection is far from segments, it's probably extrapolating too much
5421
+ raise ValueError("Intersection point too far from line segments")
5422
+
5423
+ return [px, py]
5424
+
5425
+ def join_ring(merged_df):
5426
+ """
5427
+ Join line segments to form a closed ring.
5428
+
5429
+ Args:
5430
+ merged_df (pandas.DataFrame): DataFrame with merged line segments
5431
+
5432
+ Returns:
5433
+ numpy.ndarray: Array of shape (1, n, 2) containing the ring coordinates
5434
+ """
5435
+ # Handle edge cases
5436
+ if len(merged_df) < 3:
5437
+ # Not enough segments to form a valid polygon
5438
+ return np.array([[]])
5439
+
5440
+ ring = []
5441
+
5442
+ # Find intersections between adjacent line segments
5443
+ for i in range(len(merged_df) - 1):
5444
+ x1, y1, x2, y2, *_ = merged_df.iloc[i]
5445
+ x3, y3, x4, y4, *_ = merged_df.iloc[i + 1]
5446
+
5447
+ try:
5448
+ intersection = find_intersection(x1, y1, x2, y2, x3, y3, x4, y4)
5449
+
5450
+ # Check if the intersection point is too far from either line segment
5451
+ # This helps prevent extending edges beyond reasonable bounds
5452
+ dist_to_seg1 = min(
5453
+ np.sqrt((intersection[0] - x1) ** 2 + (intersection[1] - y1) ** 2),
5454
+ np.sqrt((intersection[0] - x2) ** 2 + (intersection[1] - y2) ** 2),
5455
+ )
5456
+ dist_to_seg2 = min(
5457
+ np.sqrt((intersection[0] - x3) ** 2 + (intersection[1] - y3) ** 2),
5458
+ np.sqrt((intersection[0] - x4) ** 2 + (intersection[1] - y4) ** 2),
5459
+ )
5460
+
5461
+ # Use the maximum of line segment lengths as a reference
5462
+ max_len = max(
5463
+ np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2),
5464
+ np.sqrt((x4 - x3) ** 2 + (y4 - y3) ** 2),
5465
+ )
5466
+
5467
+ # If intersection is too far away, use the endpoint of the first segment instead
5468
+ if dist_to_seg1 > max_len * 0.5 or dist_to_seg2 > max_len * 0.5:
5469
+ ring.append([x2, y2])
5470
+ else:
5471
+ ring.append(intersection)
5472
+ except Exception as e:
5473
+ # If intersection calculation fails, use the endpoint of the first segment
5474
+ ring.append([x2, y2])
5475
+
5476
+ # Connect last segment with first segment
5477
+ x1, y1, x2, y2, *_ = merged_df.iloc[-1]
5478
+ x3, y3, x4, y4, *_ = merged_df.iloc[0]
5479
+
5480
+ try:
5481
+ intersection = find_intersection(x1, y1, x2, y2, x3, y3, x4, y4)
5482
+
5483
+ # Check if the intersection point is too far from either line segment
5484
+ dist_to_seg1 = min(
5485
+ np.sqrt((intersection[0] - x1) ** 2 + (intersection[1] - y1) ** 2),
5486
+ np.sqrt((intersection[0] - x2) ** 2 + (intersection[1] - y2) ** 2),
5487
+ )
5488
+ dist_to_seg2 = min(
5489
+ np.sqrt((intersection[0] - x3) ** 2 + (intersection[1] - y3) ** 2),
5490
+ np.sqrt((intersection[0] - x4) ** 2 + (intersection[1] - y4) ** 2),
5491
+ )
5492
+
5493
+ max_len = max(
5494
+ np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2),
5495
+ np.sqrt((x4 - x3) ** 2 + (y4 - y3) ** 2),
5496
+ )
5497
+
5498
+ if dist_to_seg1 > max_len * 0.5 or dist_to_seg2 > max_len * 0.5:
5499
+ ring.append([x2, y2])
5500
+ else:
5501
+ ring.append(intersection)
5502
+ except Exception as e:
5503
+ # If intersection calculation fails, use the endpoint of the last segment
5504
+ ring.append([x2, y2])
5505
+
5506
+ # Ensure the ring is closed
5507
+ if len(ring) > 0 and (ring[0][0] != ring[-1][0] or ring[0][1] != ring[-1][1]):
5508
+ ring.append(ring[0])
5509
+
5510
+ return np.array([ring])
5511
+
5512
+ def rdp(M, epsilon=0, dist=None, algo="iter", return_mask=False):
5513
+ """
5514
+ Simplifies a given array of points using the Ramer-Douglas-Peucker algorithm.
5515
+
5516
+ Args:
5517
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5518
+ epsilon (float, optional): Epsilon value for simplification
5519
+ dist (callable, optional): Distance function
5520
+ algo (str, optional): Algorithm to use ('iter' or 'rec')
5521
+ return_mask (bool, optional): Whether to return a mask instead of the simplified array
5522
+
5523
+ Returns:
5524
+ numpy.ndarray or list: Simplified points or mask
5525
+ """
5526
+ if dist is None:
5527
+ dist = pldist
5528
+
5529
+ if algo == "iter":
5530
+ algo = partial(rdp_iter, return_mask=return_mask)
5531
+ elif algo == "rec":
5532
+ if return_mask:
5533
+ raise NotImplementedError(
5534
+ 'return_mask=True not supported with algo="rec"'
5535
+ )
5536
+ algo = rdp_rec
5537
+
5538
+ if "numpy" in str(type(M)):
5539
+ return algo(M, epsilon, dist)
5540
+
5541
+ return algo(np.array(M), epsilon, dist).tolist()
5542
+
5543
+ def pldist(point, start, end):
5544
+ """
5545
+ Calculates the distance from 'point' to the line given by 'start' and 'end'.
5546
+
5547
+ Args:
5548
+ point (numpy.ndarray): Point coordinates
5549
+ start (numpy.ndarray): Start point of the line
5550
+ end (numpy.ndarray): End point of the line
5551
+
5552
+ Returns:
5553
+ float: Distance from point to line
5554
+ """
5555
+ if np.all(np.equal(start, end)):
5556
+ return np.linalg.norm(point - start)
5557
+
5558
+ # Fix for NumPy 2.0 deprecation warning - handle 2D vectors properly
5559
+ # Instead of using cross product directly, calculate the area of the
5560
+ # parallelogram formed by the vectors and divide by the length of the line
5561
+ line_vec = end - start
5562
+ point_vec = point - start
5563
+
5564
+ # Area of parallelogram = |a|*|b|*sin(θ)
5565
+ # For 2D vectors: |a×b| = |a|*|b|*sin(θ) = determinant([ax, ay], [bx, by])
5566
+ area = abs(line_vec[0] * point_vec[1] - line_vec[1] * point_vec[0])
5567
+
5568
+ # Distance = Area / |line_vec|
5569
+ return area / np.linalg.norm(line_vec)
5570
+
5571
+ def rdp_rec(M, epsilon, dist=pldist):
5572
+ """
5573
+ Recursive implementation of the Ramer-Douglas-Peucker algorithm.
5574
+
5575
+ Args:
5576
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5577
+ epsilon (float): Epsilon value for simplification
5578
+ dist (callable, optional): Distance function
5579
+
5580
+ Returns:
5581
+ numpy.ndarray: Simplified points
5582
+ """
5583
+ dmax = 0.0
5584
+ index = -1
5585
+
5586
+ for i in range(1, M.shape[0]):
5587
+ d = dist(M[i], M[0], M[-1])
5588
+
5589
+ if d > dmax:
5590
+ index = i
5591
+ dmax = d
5592
+
5593
+ if dmax > epsilon:
5594
+ r1 = rdp_rec(M[: index + 1], epsilon, dist)
5595
+ r2 = rdp_rec(M[index:], epsilon, dist)
5596
+
5597
+ return np.vstack((r1[:-1], r2))
5598
+ else:
5599
+ return np.vstack((M[0], M[-1]))
5600
+
5601
+ def _rdp_iter(M, start_index, last_index, epsilon, dist=pldist):
5602
+ """
5603
+ Internal iterative implementation of the Ramer-Douglas-Peucker algorithm.
5604
+
5605
+ Args:
5606
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5607
+ start_index (int): Start index
5608
+ last_index (int): Last index
5609
+ epsilon (float): Epsilon value for simplification
5610
+ dist (callable, optional): Distance function
5611
+
5612
+ Returns:
5613
+ numpy.ndarray: Boolean mask of points to keep
5614
+ """
5615
+ stk = []
5616
+ stk.append([start_index, last_index])
5617
+ global_start_index = start_index
5618
+ indices = np.ones(last_index - start_index + 1, dtype=bool)
5619
+
5620
+ while stk:
5621
+ start_index, last_index = stk.pop()
5622
+
5623
+ dmax = 0.0
5624
+ index = start_index
5625
+
5626
+ for i in range(index + 1, last_index):
5627
+ if indices[i - global_start_index]:
5628
+ d = dist(M[i], M[start_index], M[last_index])
5629
+ if d > dmax:
5630
+ index = i
5631
+ dmax = d
5632
+
5633
+ if dmax > epsilon:
5634
+ stk.append([start_index, index])
5635
+ stk.append([index, last_index])
5636
+ else:
5637
+ for i in range(start_index + 1, last_index):
5638
+ indices[i - global_start_index] = False
5639
+
5640
+ return indices
5641
+
5642
+ def rdp_iter(M, epsilon, dist=pldist, return_mask=False):
5643
+ """
5644
+ Iterative implementation of the Ramer-Douglas-Peucker algorithm.
5645
+
5646
+ Args:
5647
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5648
+ epsilon (float): Epsilon value for simplification
5649
+ dist (callable, optional): Distance function
5650
+ return_mask (bool, optional): Whether to return a mask instead of the simplified array
5651
+
5652
+ Returns:
5653
+ numpy.ndarray: Simplified points or boolean mask
5654
+ """
5655
+ mask = _rdp_iter(M, 0, len(M) - 1, epsilon, dist)
5656
+
5657
+ if return_mask:
5658
+ return mask
5659
+
5660
+ return M[mask]
5661
+
5662
+ # Read the raster data
5663
+ with rasterio.open(input_path) as src:
5664
+ # Read the first band (assuming it contains the mask)
5665
+ mask = src.read(1)
5666
+ transform = src.transform
5667
+ crs = src.crs
5668
+
5669
+ # Extract shapes from the raster mask
5670
+ shapes = features.shapes(mask, transform=transform)
5671
+
5672
+ # Convert shapes to GeoJSON features
5673
+ features_list = []
5674
+ for shape, value in shapes:
5675
+ if value > 0: # Only process non-zero values (actual objects)
5676
+ # Convert GeoJSON geometry to Shapely polygon
5677
+ polygon = Polygon(shape["coordinates"][0])
5678
+
5679
+ # Skip tiny polygons
5680
+ if polygon.area < min_area:
5681
+ features_list.append(
5682
+ {
5683
+ "type": "Feature",
5684
+ "properties": {"value": int(value)},
5685
+ "geometry": shape,
5686
+ }
5687
+ )
5688
+ continue
5689
+
5690
+ # Check if shape is triangular and if we want to avoid triangular shapes
5691
+ if detect_triangles:
5692
+ # Create a simplified version to check number of vertices
5693
+ simple_polygon = polygon.simplify(epsilon)
5694
+ if (
5695
+ len(simple_polygon.exterior.coords) <= 4
5696
+ ): # 3 points + closing point
5697
+ # Likely a triangular shape - skip orthogonalization
5698
+ features_list.append(
5699
+ {
5700
+ "type": "Feature",
5701
+ "properties": {"value": int(value)},
5702
+ "geometry": shape,
5703
+ }
5704
+ )
5705
+ continue
5706
+
5707
+ # Process larger, non-triangular polygons
5708
+ try:
5709
+ # Convert shapely polygon to a ring format for orthogonalization
5710
+ exterior_ring = list(polygon.exterior.coords)
5711
+ interior_rings = [
5712
+ list(interior.coords) for interior in polygon.interiors
5713
+ ]
5714
+
5715
+ # Calculate bounding box aspect ratio to help with parameter tuning
5716
+ minx, miny, maxx, maxy = polygon.bounds
5717
+ width = maxx - minx
5718
+ height = maxy - miny
5719
+ aspect_ratio = max(width, height) / max(1.0, min(width, height))
5720
+
5721
+ # Determine if this shape is likely to be a building/rectangular object
5722
+ # Long thin objects might require different treatment
5723
+ is_rectangular = aspect_ratio < 3.0
5724
+
5725
+ # Rectangular objects usually need more careful orthogonalization
5726
+ epsilon_adjusted = epsilon
5727
+ min_segments_adjusted = min_segments
5728
+
5729
+ if is_rectangular:
5730
+ # For rectangular objects, use more conservative epsilon
5731
+ epsilon_adjusted = epsilon * 0.75
5732
+ # Ensure we get at least 4 points for a proper rectangle
5733
+ min_segments_adjusted = max(4, min_segments)
5734
+
5735
+ # Orthogonalize the exterior and interior rings
5736
+ orthogonalized_exterior = orthogonalize_ring(
5737
+ exterior_ring,
5738
+ epsilon=epsilon_adjusted,
5739
+ min_segments=min_segments_adjusted,
5740
+ )
5741
+
5742
+ orthogonalized_interiors = [
5743
+ orthogonalize_ring(
5744
+ ring,
5745
+ epsilon=epsilon_adjusted,
5746
+ min_segments=min_segments_adjusted,
5747
+ )
5748
+ for ring in interior_rings
5749
+ ]
5750
+
5751
+ # Validate the result - calculate area change
5752
+ original_area = polygon.area
5753
+ orthogonalized_poly = Polygon(orthogonalized_exterior)
5754
+
5755
+ if orthogonalized_poly.is_valid:
5756
+ area_ratio = (
5757
+ orthogonalized_poly.area / original_area
5758
+ if original_area > 0
5759
+ else 0
5760
+ )
5761
+
5762
+ # If area changed too much, revert to original
5763
+ if area_ratio < area_tolerance or area_ratio > (
5764
+ 1.0 / area_tolerance
5765
+ ):
5766
+ # Use original polygon instead
5767
+ geometry = shape
5768
+ else:
5769
+ # Create a new geometry with orthogonalized rings
5770
+ geometry = {
5771
+ "type": "Polygon",
5772
+ "coordinates": [orthogonalized_exterior],
5773
+ }
5774
+
5775
+ # Add interior rings if they exist
5776
+ if orthogonalized_interiors:
5777
+ geometry["coordinates"].extend(
5778
+ [ring for ring in orthogonalized_interiors]
5779
+ )
5780
+ else:
5781
+ # If resulting polygon is invalid, use original
5782
+ geometry = shape
5783
+
5784
+ # Add the feature to the list
5785
+ features_list.append(
5786
+ {
5787
+ "type": "Feature",
5788
+ "properties": {"value": int(value)},
5789
+ "geometry": geometry,
5790
+ }
5791
+ )
5792
+ except Exception as e:
5793
+ # Keep the original shape if orthogonalization fails
5794
+ features_list.append(
5795
+ {
5796
+ "type": "Feature",
5797
+ "properties": {"value": int(value)},
5798
+ "geometry": shape,
5799
+ }
5800
+ )
5801
+
5802
+ # Create the final GeoJSON structure
5803
+ geojson = {
5804
+ "type": "FeatureCollection",
5805
+ "crs": {"type": "name", "properties": {"name": str(crs)}},
5806
+ "features": features_list,
5807
+ }
5808
+
5809
+ # Convert to GeoDataFrame and set the CRS
5810
+ gdf = gpd.GeoDataFrame.from_features(geojson["features"], crs=crs)
5811
+
5812
+ # Save to file if output_path is provided
5813
+ if output_path:
5814
+ gdf.to_file(output_path)
5815
+
5816
+ return gdf