geoai-py 0.3.5__py2.py3-none-any.whl → 0.3.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
geoai/utils.py CHANGED
@@ -1,34 +1,43 @@
1
1
  """The utils module contains common functions and classes used by the other modules."""
2
2
 
3
+ # Standard Library
3
4
  import json
4
5
  import math
5
6
  import os
6
- from collections.abc import Iterable
7
- from PIL import Image
8
- from pathlib import Path
9
- import requests
10
7
  import warnings
11
8
  import xml.etree.ElementTree as ET
9
+ from collections.abc import Iterable
10
+ from pathlib import Path
12
11
  from typing import Any, Dict, List, Optional, Tuple, Union
13
- import numpy as np
14
- import matplotlib.pyplot as plt
12
+
13
+ # Third-Party Libraries
14
+ import cv2
15
15
  import geopandas as gpd
16
16
  import leafmap
17
+ import matplotlib.pyplot as plt
17
18
  import numpy as np
18
19
  import pandas as pd
19
- import xarray as xr
20
- import rioxarray as rxr
21
20
  import rasterio
22
- from torchvision.transforms import RandomRotation
23
- from rasterio.windows import Window
21
+ import requests
22
+ import rioxarray as rxr
23
+ import torch
24
+ import torchgeo
25
+ import xarray as xr
26
+ from PIL import Image
24
27
  from rasterio import features
25
28
  from rasterio.plot import show
26
- from shapely.geometry import box, shape, mapping, Polygon, MultiPolygon
29
+ from rasterio.windows import Window
27
30
  from shapely.affinity import rotate
31
+ from shapely.geometry import (
32
+ MultiPolygon,
33
+ Polygon,
34
+ box,
35
+ mapping,
36
+ shape,
37
+ )
38
+ from torchvision.transforms import RandomRotation
28
39
  from tqdm import tqdm
29
- import torch
30
- import torchgeo
31
- import cv2
40
+
32
41
 
33
42
  try:
34
43
  from torchgeo.datasets import RasterDataset, unbind_samples
@@ -54,6 +63,7 @@ def view_raster(
54
63
  array_args: Optional[Dict] = {},
55
64
  client_args: Optional[Dict] = {"cors_all": False},
56
65
  basemap: Optional[str] = "OpenStreetMap",
66
+ basemap_args: Optional[Dict] = None,
57
67
  backend: Optional[str] = "folium",
58
68
  **kwargs,
59
69
  ):
@@ -76,6 +86,7 @@ def view_raster(
76
86
  array_args (Optional[Dict], optional): Additional arguments for array processing. Defaults to {}.
77
87
  client_args (Optional[Dict], optional): Additional arguments for the client. Defaults to {"cors_all": False}.
78
88
  basemap (Optional[str], optional): The basemap to use. Defaults to "OpenStreetMap".
89
+ basemap_args (Optional[Dict], optional): Additional arguments for the basemap. Defaults to None.
79
90
  **kwargs (Any): Additional keyword arguments.
80
91
 
81
92
  Returns:
@@ -87,7 +98,23 @@ def view_raster(
87
98
  else:
88
99
  import leafmap.leafmap as leafmap
89
100
 
90
- m = leafmap.Map(basemap=basemap)
101
+ if basemap_args is None:
102
+ basemap_args = {}
103
+
104
+ m = leafmap.Map()
105
+
106
+ if isinstance(basemap, str):
107
+ if basemap.lower().endswith(".tif"):
108
+ if basemap.lower().startswith("http"):
109
+ if "name" not in basemap_args:
110
+ basemap_args["name"] = "Basemap"
111
+ m.add_cog_layer(basemap, **basemap_args)
112
+ else:
113
+ if "layer_name" not in basemap_args:
114
+ basemap_args["layer_name"] = "Basemap"
115
+ m.add_raster(basemap, **basemap_args)
116
+ else:
117
+ m.add_basemap(basemap, **basemap_args)
91
118
 
92
119
  if isinstance(source, dict):
93
120
  source = dict_to_image(source)
@@ -493,6 +520,7 @@ def view_vector(
493
520
  title=None,
494
521
  legend=True,
495
522
  basemap=False,
523
+ basemap_type="streets",
496
524
  alpha=0.7,
497
525
  edge_color="black",
498
526
  classification="quantiles",
@@ -522,6 +550,8 @@ def view_vector(
522
550
  legend (bool, optional): Whether to display a legend. Defaults to True.
523
551
  basemap (bool, optional): Whether to add a web basemap. Requires contextily.
524
552
  Defaults to False.
553
+ basemap_type (str, optional): Type of basemap to use. Options: 'streets', 'satellite'.
554
+ Defaults to 'streets'.
525
555
  alpha (float, optional): Transparency of the vector features, between 0-1.
526
556
  Defaults to 0.7.
527
557
  edge_color (str, optional): Color for feature edges. Defaults to "black".
@@ -609,10 +639,13 @@ def view_vector(
609
639
  ax=ax, color=highlight_color, edgecolor="black", linewidth=2, zorder=5
610
640
  )
611
641
 
612
- # Add basemap if requested
613
642
  if basemap:
614
643
  try:
615
- ctx.add_basemap(ax, crs=gdf.crs, source=ctx.providers.OpenStreetMap.Mapnik)
644
+ basemap_options = {
645
+ "streets": ctx.providers.OpenStreetMap.Mapnik,
646
+ "satellite": ctx.providers.Esri.WorldImagery,
647
+ }
648
+ ctx.add_basemap(ax, crs=gdf.crs, source=basemap_options[basemap_type])
616
649
  except Exception as e:
617
650
  print(f"Could not add basemap: {e}")
618
651
 
@@ -719,7 +752,10 @@ def view_vector_interactive(
719
752
  kwargs["max_zoom"] = 30
720
753
 
721
754
  if isinstance(vector_data, str):
722
- vector_data = gpd.read_file(vector_data)
755
+ if vector_data.endswith(".parquet"):
756
+ vector_data = gpd.read_parquet(vector_data)
757
+ else:
758
+ vector_data = gpd.read_file(vector_data)
723
759
 
724
760
  # Check if input is a GeoDataFrame
725
761
  if not isinstance(vector_data, gpd.GeoDataFrame):
@@ -4976,3 +5012,808 @@ def add_geometric_properties(data, properties=None, area_unit="m2", length_unit=
4976
5012
  result["complexity"] = result.geometry.apply(calc_complexity)
4977
5013
 
4978
5014
  return result
5015
+
5016
+
5017
+ def orthogonalize(
5018
+ input_path,
5019
+ output_path=None,
5020
+ epsilon=0.2,
5021
+ min_area=10,
5022
+ min_segments=4,
5023
+ area_tolerance=0.7,
5024
+ detect_triangles=True,
5025
+ ):
5026
+ """
5027
+ Orthogonalizes object masks in a GeoTIFF file.
5028
+
5029
+ This function reads a GeoTIFF containing object masks (binary or labeled regions),
5030
+ converts the raster masks to vector polygons, applies orthogonalization to each polygon,
5031
+ and optionally writes the result to a GeoJSON file.
5032
+ The source code is adapted from the Solar Panel Detection algorithm by Esri.
5033
+ See https://www.arcgis.com/home/item.html?id=c2508d72f2614104bfcfd5ccf1429284.
5034
+ Credits to Esri for the original code.
5035
+
5036
+ Args:
5037
+ input_path (str): Path to the input GeoTIFF file.
5038
+ output_path (str, optional): Path to save the output GeoJSON file. If None, no file is saved.
5039
+ epsilon (float, optional): Simplification tolerance for the Douglas-Peucker algorithm.
5040
+ Higher values result in more simplification. Default is 0.2.
5041
+ min_area (float, optional): Minimum area of polygons to process (smaller ones are kept as-is).
5042
+ min_segments (int, optional): Minimum number of segments to keep after simplification.
5043
+ Default is 4 (for rectangular shapes).
5044
+ area_tolerance (float, optional): Allowed ratio of area change. Values less than 1.0 restrict
5045
+ area change. Default is 0.7 (allows reduction to 70% of original area).
5046
+ detect_triangles (bool, optional): If True, performs additional check to avoid creating triangular shapes.
5047
+
5048
+ Returns:
5049
+ geopandas.GeoDataFrame: A GeoDataFrame containing the orthogonalized features.
5050
+ """
5051
+
5052
+ from functools import partial
5053
+
5054
+ def orthogonalize_ring(ring, epsilon=0.2, min_segments=4):
5055
+ """
5056
+ Orthogonalizes a ring (list of coordinates).
5057
+
5058
+ Args:
5059
+ ring (list): List of [x, y] coordinates forming a ring
5060
+ epsilon (float, optional): Simplification tolerance
5061
+ min_segments (int, optional): Minimum number of segments to keep
5062
+
5063
+ Returns:
5064
+ list: Orthogonalized list of coordinates
5065
+ """
5066
+ if len(ring) <= 3:
5067
+ return ring
5068
+
5069
+ # Convert to numpy array
5070
+ ring_arr = np.array(ring)
5071
+
5072
+ # Get orientation
5073
+ angle = math.degrees(get_orientation(ring_arr))
5074
+
5075
+ # Simplify using Ramer-Douglas-Peucker algorithm
5076
+ ring_arr = simplify(ring_arr, eps=epsilon)
5077
+
5078
+ # If simplified too much, adjust epsilon to maintain minimum segments
5079
+ if len(ring_arr) < min_segments:
5080
+ # Try with smaller epsilon until we get at least min_segments points
5081
+ for adjust_factor in [0.75, 0.5, 0.25, 0.1]:
5082
+ test_arr = simplify(np.array(ring), eps=epsilon * adjust_factor)
5083
+ if len(test_arr) >= min_segments:
5084
+ ring_arr = test_arr
5085
+ break
5086
+
5087
+ # Convert to dataframe for processing
5088
+ df = to_dataframe(ring_arr)
5089
+
5090
+ # Add orientation information
5091
+ add_orientation(df, angle)
5092
+
5093
+ # Align segments to orthogonal directions
5094
+ df = align(df)
5095
+
5096
+ # Merge collinear line segments
5097
+ df = merge_lines(df)
5098
+
5099
+ if len(df) == 0:
5100
+ return ring
5101
+
5102
+ # If we have a triangle-like result (3 segments), return the original shape
5103
+ if len(df) <= 3:
5104
+ return ring
5105
+
5106
+ # Join the orthogonalized segments back into a ring
5107
+ joined_ring = join_ring(df)
5108
+
5109
+ # If the join operation didn't produce a valid ring, return the original
5110
+ if len(joined_ring) == 0 or len(joined_ring[0]) < 3:
5111
+ return ring
5112
+
5113
+ # Basic validation: if result has 3 or fewer points (triangle), use original
5114
+ if len(joined_ring[0]) <= 3:
5115
+ return ring
5116
+
5117
+ # Convert back to a list and ensure it's closed
5118
+ result = joined_ring[0].tolist()
5119
+ if len(result) > 0 and (result[0] != result[-1]):
5120
+ result.append(result[0])
5121
+
5122
+ return result
5123
+
5124
+ def vectorize_mask(mask, transform):
5125
+ """
5126
+ Converts a binary mask to vector polygons.
5127
+
5128
+ Args:
5129
+ mask (numpy.ndarray): Binary mask where non-zero values represent objects
5130
+ transform (rasterio.transform.Affine): Affine transformation matrix
5131
+
5132
+ Returns:
5133
+ list: List of GeoJSON features
5134
+ """
5135
+ shapes = features.shapes(mask, transform=transform)
5136
+ features_list = []
5137
+
5138
+ for shape, value in shapes:
5139
+ if value > 0: # Only process non-zero values (actual objects)
5140
+ features_list.append(
5141
+ {
5142
+ "type": "Feature",
5143
+ "properties": {"value": int(value)},
5144
+ "geometry": shape,
5145
+ }
5146
+ )
5147
+
5148
+ return features_list
5149
+
5150
+ def rasterize_features(features, shape, transform, dtype=np.uint8):
5151
+ """
5152
+ Converts vector features back to a raster mask.
5153
+
5154
+ Args:
5155
+ features (list): List of GeoJSON features
5156
+ shape (tuple): Shape of the output raster (height, width)
5157
+ transform (rasterio.transform.Affine): Affine transformation matrix
5158
+ dtype (numpy.dtype, optional): Data type of the output raster
5159
+
5160
+ Returns:
5161
+ numpy.ndarray: Rasterized mask
5162
+ """
5163
+ mask = features.rasterize(
5164
+ [
5165
+ (feature["geometry"], feature["properties"]["value"])
5166
+ for feature in features
5167
+ ],
5168
+ out_shape=shape,
5169
+ transform=transform,
5170
+ fill=0,
5171
+ dtype=dtype,
5172
+ )
5173
+
5174
+ return mask
5175
+
5176
+ # The following helper functions are from the original code
5177
+ def get_orientation(contour):
5178
+ """
5179
+ Calculate the orientation angle of a contour.
5180
+
5181
+ Args:
5182
+ contour (numpy.ndarray): Array of shape (n, 2) containing point coordinates
5183
+
5184
+ Returns:
5185
+ float: Orientation angle in radians
5186
+ """
5187
+ box = cv2.minAreaRect(contour.astype(int))
5188
+ (cx, cy), (w, h), angle = box
5189
+ return math.radians(angle)
5190
+
5191
+ def simplify(contour, eps=0.2):
5192
+ """
5193
+ Simplify a contour using the Ramer-Douglas-Peucker algorithm.
5194
+
5195
+ Args:
5196
+ contour (numpy.ndarray): Array of shape (n, 2) containing point coordinates
5197
+ eps (float, optional): Epsilon value for simplification
5198
+
5199
+ Returns:
5200
+ numpy.ndarray: Simplified contour
5201
+ """
5202
+ return rdp(contour, epsilon=eps)
5203
+
5204
+ def to_dataframe(ring):
5205
+ """
5206
+ Convert a ring to a pandas DataFrame with line segment information.
5207
+
5208
+ Args:
5209
+ ring (numpy.ndarray): Array of shape (n, 2) containing point coordinates
5210
+
5211
+ Returns:
5212
+ pandas.DataFrame: DataFrame with line segment information
5213
+ """
5214
+ df = pd.DataFrame(ring, columns=["x1", "y1"])
5215
+ df["x2"] = df["x1"].shift(-1)
5216
+ df["y2"] = df["y1"].shift(-1)
5217
+ df.dropna(inplace=True)
5218
+ df["angle_atan"] = np.arctan2((df["y2"] - df["y1"]), (df["x2"] - df["x1"]))
5219
+ df["angle_atan_deg"] = df["angle_atan"] * 57.2958
5220
+ df["len"] = np.sqrt((df["y2"] - df["y1"]) ** 2 + (df["x2"] - df["x1"]) ** 2)
5221
+ df["cx"] = (df["x2"] + df["x1"]) / 2.0
5222
+ df["cy"] = (df["y2"] + df["y1"]) / 2.0
5223
+ return df
5224
+
5225
+ def add_orientation(df, angle):
5226
+ """
5227
+ Add orientation information to the DataFrame.
5228
+
5229
+ Args:
5230
+ df (pandas.DataFrame): DataFrame with line segment information
5231
+ angle (float): Orientation angle in degrees
5232
+
5233
+ Returns:
5234
+ None: Modifies the DataFrame in-place
5235
+ """
5236
+ rtangle = angle + 90
5237
+ is_parallel = (
5238
+ (df["angle_atan_deg"] > (angle - 45))
5239
+ & (df["angle_atan_deg"] < (angle + 45))
5240
+ ) | (
5241
+ (df["angle_atan_deg"] + 180 > (angle - 45))
5242
+ & (df["angle_atan_deg"] + 180 < (angle + 45))
5243
+ )
5244
+ df["angle"] = math.radians(angle)
5245
+ df["angle"] = df["angle"].where(is_parallel, math.radians(rtangle))
5246
+
5247
+ def align(df):
5248
+ """
5249
+ Align line segments to their nearest orthogonal direction.
5250
+
5251
+ Args:
5252
+ df (pandas.DataFrame): DataFrame with line segment information
5253
+
5254
+ Returns:
5255
+ pandas.DataFrame: DataFrame with aligned line segments
5256
+ """
5257
+ # Handle edge case with empty dataframe
5258
+ if len(df) == 0:
5259
+ return df.copy()
5260
+
5261
+ df_clone = df.copy()
5262
+
5263
+ # Ensure angle column exists and has valid values
5264
+ if "angle" not in df_clone.columns or df_clone["angle"].isna().any():
5265
+ # If angle data is missing, add default angles based on atan2
5266
+ df_clone["angle"] = df_clone["angle_atan"]
5267
+
5268
+ # Ensure length and center point data is valid
5269
+ if "len" not in df_clone.columns or df_clone["len"].isna().any():
5270
+ # Recalculate lengths if missing
5271
+ df_clone["len"] = np.sqrt(
5272
+ (df_clone["x2"] - df_clone["x1"]) ** 2
5273
+ + (df_clone["y2"] - df_clone["y1"]) ** 2
5274
+ )
5275
+
5276
+ if "cx" not in df_clone.columns or df_clone["cx"].isna().any():
5277
+ df_clone["cx"] = (df_clone["x1"] + df_clone["x2"]) / 2.0
5278
+
5279
+ if "cy" not in df_clone.columns or df_clone["cy"].isna().any():
5280
+ df_clone["cy"] = (df_clone["y1"] + df_clone["y2"]) / 2.0
5281
+
5282
+ # Apply orthogonal alignment
5283
+ df_clone["x1"] = df_clone["cx"] - ((df_clone["len"] / 2) * np.cos(df["angle"]))
5284
+ df_clone["x2"] = df_clone["cx"] + ((df_clone["len"] / 2) * np.cos(df["angle"]))
5285
+ df_clone["y1"] = df_clone["cy"] - ((df_clone["len"] / 2) * np.sin(df["angle"]))
5286
+ df_clone["y2"] = df_clone["cy"] + ((df_clone["len"] / 2) * np.sin(df["angle"]))
5287
+
5288
+ return df_clone
5289
+
5290
+ def merge_lines(df_aligned):
5291
+ """
5292
+ Merge collinear line segments.
5293
+
5294
+ Args:
5295
+ df_aligned (pandas.DataFrame): DataFrame with aligned line segments
5296
+
5297
+ Returns:
5298
+ pandas.DataFrame: DataFrame with merged line segments
5299
+ """
5300
+ ortho_lines = []
5301
+ groups = df_aligned.groupby(
5302
+ (df_aligned["angle"].shift() != df_aligned["angle"]).cumsum()
5303
+ )
5304
+ for x, y in groups:
5305
+ group_cx = (y["cx"] * y["len"]).sum() / y["len"].sum()
5306
+ group_cy = (y["cy"] * y["len"]).sum() / y["len"].sum()
5307
+ cumlen = y["len"].sum()
5308
+
5309
+ ortho_lines.append((group_cx, group_cy, cumlen, y["angle"].iloc[0]))
5310
+
5311
+ ortho_list = []
5312
+ for cx, cy, length, rot_angle in ortho_lines:
5313
+ X1 = cx - (length / 2) * math.cos(rot_angle)
5314
+ X2 = cx + (length / 2) * math.cos(rot_angle)
5315
+ Y1 = cy - (length / 2) * math.sin(rot_angle)
5316
+ Y2 = cy + (length / 2) * math.sin(rot_angle)
5317
+
5318
+ ortho_list.append(
5319
+ {
5320
+ "x1": X1,
5321
+ "y1": Y1,
5322
+ "x2": X2,
5323
+ "y2": Y2,
5324
+ "len": length,
5325
+ "cx": cx,
5326
+ "cy": cy,
5327
+ "angle": rot_angle,
5328
+ }
5329
+ )
5330
+
5331
+ if (
5332
+ len(ortho_list) > 0 and ortho_list[0]["angle"] == ortho_list[-1]["angle"]
5333
+ ): # join first and last segment if they're in same direction
5334
+ totlen = ortho_list[0]["len"] + ortho_list[-1]["len"]
5335
+ merge_cx = (
5336
+ (ortho_list[0]["cx"] * ortho_list[0]["len"])
5337
+ + (ortho_list[-1]["cx"] * ortho_list[-1]["len"])
5338
+ ) / totlen
5339
+
5340
+ merge_cy = (
5341
+ (ortho_list[0]["cy"] * ortho_list[0]["len"])
5342
+ + (ortho_list[-1]["cy"] * ortho_list[-1]["len"])
5343
+ ) / totlen
5344
+
5345
+ rot_angle = ortho_list[0]["angle"]
5346
+ X1 = merge_cx - (totlen / 2) * math.cos(rot_angle)
5347
+ X2 = merge_cx + (totlen / 2) * math.cos(rot_angle)
5348
+ Y1 = merge_cy - (totlen / 2) * math.sin(rot_angle)
5349
+ Y2 = merge_cy + (totlen / 2) * math.sin(rot_angle)
5350
+
5351
+ ortho_list[-1] = {
5352
+ "x1": X1,
5353
+ "y1": Y1,
5354
+ "x2": X2,
5355
+ "y2": Y2,
5356
+ "len": totlen,
5357
+ "cx": merge_cx,
5358
+ "cy": merge_cy,
5359
+ "angle": rot_angle,
5360
+ }
5361
+ ortho_list = ortho_list[1:]
5362
+ ortho_df = pd.DataFrame(ortho_list)
5363
+ return ortho_df
5364
+
5365
+ def find_intersection(x1, y1, x2, y2, x3, y3, x4, y4):
5366
+ """
5367
+ Find the intersection point of two line segments.
5368
+
5369
+ Args:
5370
+ x1, y1, x2, y2: Coordinates of the first line segment
5371
+ x3, y3, x4, y4: Coordinates of the second line segment
5372
+
5373
+ Returns:
5374
+ list: [x, y] coordinates of the intersection point
5375
+
5376
+ Raises:
5377
+ ZeroDivisionError: If the lines are parallel or collinear
5378
+ """
5379
+ # Calculate the denominator of the intersection formula
5380
+ denominator = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
5381
+
5382
+ # Check if lines are parallel or collinear (denominator close to zero)
5383
+ if abs(denominator) < 1e-10:
5384
+ raise ZeroDivisionError("Lines are parallel or collinear")
5385
+
5386
+ px = (
5387
+ (x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)
5388
+ ) / denominator
5389
+ py = (
5390
+ (x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)
5391
+ ) / denominator
5392
+
5393
+ # Check if the intersection point is within a reasonable distance
5394
+ # from both line segments to avoid extreme extrapolation
5395
+ def point_on_segment(x, y, x1, y1, x2, y2, tolerance=2.0):
5396
+ # Check if point (x,y) is near the line segment from (x1,y1) to (x2,y2)
5397
+ # First check if it's near the infinite line
5398
+ line_len = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
5399
+ if line_len < 1e-10:
5400
+ return np.sqrt((x - x1) ** 2 + (y - y1) ** 2) <= tolerance
5401
+
5402
+ t = ((x - x1) * (x2 - x1) + (y - y1) * (y2 - y1)) / (line_len**2)
5403
+
5404
+ # Check distance to the infinite line
5405
+ proj_x = x1 + t * (x2 - x1)
5406
+ proj_y = y1 + t * (y2 - y1)
5407
+ dist_to_line = np.sqrt((x - proj_x) ** 2 + (y - proj_y) ** 2)
5408
+
5409
+ # Check if the projection is near the segment, not just the infinite line
5410
+ if t < -tolerance or t > 1 + tolerance:
5411
+ # If far from the segment, compute distance to the nearest endpoint
5412
+ dist_to_start = np.sqrt((x - x1) ** 2 + (y - y1) ** 2)
5413
+ dist_to_end = np.sqrt((x - x2) ** 2 + (y - y2) ** 2)
5414
+ return min(dist_to_start, dist_to_end) <= tolerance * 2
5415
+
5416
+ return dist_to_line <= tolerance
5417
+
5418
+ # Check if intersection is reasonably close to both line segments
5419
+ if not (
5420
+ point_on_segment(px, py, x1, y1, x2, y2)
5421
+ and point_on_segment(px, py, x3, y3, x4, y4)
5422
+ ):
5423
+ # If intersection is far from segments, it's probably extrapolating too much
5424
+ raise ValueError("Intersection point too far from line segments")
5425
+
5426
+ return [px, py]
5427
+
5428
+ def join_ring(merged_df):
5429
+ """
5430
+ Join line segments to form a closed ring.
5431
+
5432
+ Args:
5433
+ merged_df (pandas.DataFrame): DataFrame with merged line segments
5434
+
5435
+ Returns:
5436
+ numpy.ndarray: Array of shape (1, n, 2) containing the ring coordinates
5437
+ """
5438
+ # Handle edge cases
5439
+ if len(merged_df) < 3:
5440
+ # Not enough segments to form a valid polygon
5441
+ return np.array([[]])
5442
+
5443
+ ring = []
5444
+
5445
+ # Find intersections between adjacent line segments
5446
+ for i in range(len(merged_df) - 1):
5447
+ x1, y1, x2, y2, *_ = merged_df.iloc[i]
5448
+ x3, y3, x4, y4, *_ = merged_df.iloc[i + 1]
5449
+
5450
+ try:
5451
+ intersection = find_intersection(x1, y1, x2, y2, x3, y3, x4, y4)
5452
+
5453
+ # Check if the intersection point is too far from either line segment
5454
+ # This helps prevent extending edges beyond reasonable bounds
5455
+ dist_to_seg1 = min(
5456
+ np.sqrt((intersection[0] - x1) ** 2 + (intersection[1] - y1) ** 2),
5457
+ np.sqrt((intersection[0] - x2) ** 2 + (intersection[1] - y2) ** 2),
5458
+ )
5459
+ dist_to_seg2 = min(
5460
+ np.sqrt((intersection[0] - x3) ** 2 + (intersection[1] - y3) ** 2),
5461
+ np.sqrt((intersection[0] - x4) ** 2 + (intersection[1] - y4) ** 2),
5462
+ )
5463
+
5464
+ # Use the maximum of line segment lengths as a reference
5465
+ max_len = max(
5466
+ np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2),
5467
+ np.sqrt((x4 - x3) ** 2 + (y4 - y3) ** 2),
5468
+ )
5469
+
5470
+ # If intersection is too far away, use the endpoint of the first segment instead
5471
+ if dist_to_seg1 > max_len * 0.5 or dist_to_seg2 > max_len * 0.5:
5472
+ ring.append([x2, y2])
5473
+ else:
5474
+ ring.append(intersection)
5475
+ except Exception as e:
5476
+ # If intersection calculation fails, use the endpoint of the first segment
5477
+ ring.append([x2, y2])
5478
+
5479
+ # Connect last segment with first segment
5480
+ x1, y1, x2, y2, *_ = merged_df.iloc[-1]
5481
+ x3, y3, x4, y4, *_ = merged_df.iloc[0]
5482
+
5483
+ try:
5484
+ intersection = find_intersection(x1, y1, x2, y2, x3, y3, x4, y4)
5485
+
5486
+ # Check if the intersection point is too far from either line segment
5487
+ dist_to_seg1 = min(
5488
+ np.sqrt((intersection[0] - x1) ** 2 + (intersection[1] - y1) ** 2),
5489
+ np.sqrt((intersection[0] - x2) ** 2 + (intersection[1] - y2) ** 2),
5490
+ )
5491
+ dist_to_seg2 = min(
5492
+ np.sqrt((intersection[0] - x3) ** 2 + (intersection[1] - y3) ** 2),
5493
+ np.sqrt((intersection[0] - x4) ** 2 + (intersection[1] - y4) ** 2),
5494
+ )
5495
+
5496
+ max_len = max(
5497
+ np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2),
5498
+ np.sqrt((x4 - x3) ** 2 + (y4 - y3) ** 2),
5499
+ )
5500
+
5501
+ if dist_to_seg1 > max_len * 0.5 or dist_to_seg2 > max_len * 0.5:
5502
+ ring.append([x2, y2])
5503
+ else:
5504
+ ring.append(intersection)
5505
+ except Exception as e:
5506
+ # If intersection calculation fails, use the endpoint of the last segment
5507
+ ring.append([x2, y2])
5508
+
5509
+ # Ensure the ring is closed
5510
+ if len(ring) > 0 and (ring[0][0] != ring[-1][0] or ring[0][1] != ring[-1][1]):
5511
+ ring.append(ring[0])
5512
+
5513
+ return np.array([ring])
5514
+
5515
+ def rdp(M, epsilon=0, dist=None, algo="iter", return_mask=False):
5516
+ """
5517
+ Simplifies a given array of points using the Ramer-Douglas-Peucker algorithm.
5518
+
5519
+ Args:
5520
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5521
+ epsilon (float, optional): Epsilon value for simplification
5522
+ dist (callable, optional): Distance function
5523
+ algo (str, optional): Algorithm to use ('iter' or 'rec')
5524
+ return_mask (bool, optional): Whether to return a mask instead of the simplified array
5525
+
5526
+ Returns:
5527
+ numpy.ndarray or list: Simplified points or mask
5528
+ """
5529
+ if dist is None:
5530
+ dist = pldist
5531
+
5532
+ if algo == "iter":
5533
+ algo = partial(rdp_iter, return_mask=return_mask)
5534
+ elif algo == "rec":
5535
+ if return_mask:
5536
+ raise NotImplementedError(
5537
+ 'return_mask=True not supported with algo="rec"'
5538
+ )
5539
+ algo = rdp_rec
5540
+
5541
+ if "numpy" in str(type(M)):
5542
+ return algo(M, epsilon, dist)
5543
+
5544
+ return algo(np.array(M), epsilon, dist).tolist()
5545
+
5546
+ def pldist(point, start, end):
5547
+ """
5548
+ Calculates the distance from 'point' to the line given by 'start' and 'end'.
5549
+
5550
+ Args:
5551
+ point (numpy.ndarray): Point coordinates
5552
+ start (numpy.ndarray): Start point of the line
5553
+ end (numpy.ndarray): End point of the line
5554
+
5555
+ Returns:
5556
+ float: Distance from point to line
5557
+ """
5558
+ if np.all(np.equal(start, end)):
5559
+ return np.linalg.norm(point - start)
5560
+
5561
+ # Fix for NumPy 2.0 deprecation warning - handle 2D vectors properly
5562
+ # Instead of using cross product directly, calculate the area of the
5563
+ # parallelogram formed by the vectors and divide by the length of the line
5564
+ line_vec = end - start
5565
+ point_vec = point - start
5566
+
5567
+ # Area of parallelogram = |a|*|b|*sin(θ)
5568
+ # For 2D vectors: |a×b| = |a|*|b|*sin(θ) = determinant([ax, ay], [bx, by])
5569
+ area = abs(line_vec[0] * point_vec[1] - line_vec[1] * point_vec[0])
5570
+
5571
+ # Distance = Area / |line_vec|
5572
+ return area / np.linalg.norm(line_vec)
5573
+
5574
+ def rdp_rec(M, epsilon, dist=pldist):
5575
+ """
5576
+ Recursive implementation of the Ramer-Douglas-Peucker algorithm.
5577
+
5578
+ Args:
5579
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5580
+ epsilon (float): Epsilon value for simplification
5581
+ dist (callable, optional): Distance function
5582
+
5583
+ Returns:
5584
+ numpy.ndarray: Simplified points
5585
+ """
5586
+ dmax = 0.0
5587
+ index = -1
5588
+
5589
+ for i in range(1, M.shape[0]):
5590
+ d = dist(M[i], M[0], M[-1])
5591
+
5592
+ if d > dmax:
5593
+ index = i
5594
+ dmax = d
5595
+
5596
+ if dmax > epsilon:
5597
+ r1 = rdp_rec(M[: index + 1], epsilon, dist)
5598
+ r2 = rdp_rec(M[index:], epsilon, dist)
5599
+
5600
+ return np.vstack((r1[:-1], r2))
5601
+ else:
5602
+ return np.vstack((M[0], M[-1]))
5603
+
5604
+ def _rdp_iter(M, start_index, last_index, epsilon, dist=pldist):
5605
+ """
5606
+ Internal iterative implementation of the Ramer-Douglas-Peucker algorithm.
5607
+
5608
+ Args:
5609
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5610
+ start_index (int): Start index
5611
+ last_index (int): Last index
5612
+ epsilon (float): Epsilon value for simplification
5613
+ dist (callable, optional): Distance function
5614
+
5615
+ Returns:
5616
+ numpy.ndarray: Boolean mask of points to keep
5617
+ """
5618
+ stk = []
5619
+ stk.append([start_index, last_index])
5620
+ global_start_index = start_index
5621
+ indices = np.ones(last_index - start_index + 1, dtype=bool)
5622
+
5623
+ while stk:
5624
+ start_index, last_index = stk.pop()
5625
+
5626
+ dmax = 0.0
5627
+ index = start_index
5628
+
5629
+ for i in range(index + 1, last_index):
5630
+ if indices[i - global_start_index]:
5631
+ d = dist(M[i], M[start_index], M[last_index])
5632
+ if d > dmax:
5633
+ index = i
5634
+ dmax = d
5635
+
5636
+ if dmax > epsilon:
5637
+ stk.append([start_index, index])
5638
+ stk.append([index, last_index])
5639
+ else:
5640
+ for i in range(start_index + 1, last_index):
5641
+ indices[i - global_start_index] = False
5642
+
5643
+ return indices
5644
+
5645
+ def rdp_iter(M, epsilon, dist=pldist, return_mask=False):
5646
+ """
5647
+ Iterative implementation of the Ramer-Douglas-Peucker algorithm.
5648
+
5649
+ Args:
5650
+ M (numpy.ndarray): Array of shape (n, d) containing point coordinates
5651
+ epsilon (float): Epsilon value for simplification
5652
+ dist (callable, optional): Distance function
5653
+ return_mask (bool, optional): Whether to return a mask instead of the simplified array
5654
+
5655
+ Returns:
5656
+ numpy.ndarray: Simplified points or boolean mask
5657
+ """
5658
+ mask = _rdp_iter(M, 0, len(M) - 1, epsilon, dist)
5659
+
5660
+ if return_mask:
5661
+ return mask
5662
+
5663
+ return M[mask]
5664
+
5665
+ # Read the raster data
5666
+ with rasterio.open(input_path) as src:
5667
+ # Read the first band (assuming it contains the mask)
5668
+ mask = src.read(1)
5669
+ transform = src.transform
5670
+ crs = src.crs
5671
+
5672
+ # Extract shapes from the raster mask
5673
+ shapes = features.shapes(mask, transform=transform)
5674
+
5675
+ # Convert shapes to GeoJSON features
5676
+ features_list = []
5677
+ for shape, value in shapes:
5678
+ if value > 0: # Only process non-zero values (actual objects)
5679
+ # Convert GeoJSON geometry to Shapely polygon
5680
+ polygon = Polygon(shape["coordinates"][0])
5681
+
5682
+ # Skip tiny polygons
5683
+ if polygon.area < min_area:
5684
+ features_list.append(
5685
+ {
5686
+ "type": "Feature",
5687
+ "properties": {"value": int(value)},
5688
+ "geometry": shape,
5689
+ }
5690
+ )
5691
+ continue
5692
+
5693
+ # Check if shape is triangular and if we want to avoid triangular shapes
5694
+ if detect_triangles:
5695
+ # Create a simplified version to check number of vertices
5696
+ simple_polygon = polygon.simplify(epsilon)
5697
+ if (
5698
+ len(simple_polygon.exterior.coords) <= 4
5699
+ ): # 3 points + closing point
5700
+ # Likely a triangular shape - skip orthogonalization
5701
+ features_list.append(
5702
+ {
5703
+ "type": "Feature",
5704
+ "properties": {"value": int(value)},
5705
+ "geometry": shape,
5706
+ }
5707
+ )
5708
+ continue
5709
+
5710
+ # Process larger, non-triangular polygons
5711
+ try:
5712
+ # Convert shapely polygon to a ring format for orthogonalization
5713
+ exterior_ring = list(polygon.exterior.coords)
5714
+ interior_rings = [
5715
+ list(interior.coords) for interior in polygon.interiors
5716
+ ]
5717
+
5718
+ # Calculate bounding box aspect ratio to help with parameter tuning
5719
+ minx, miny, maxx, maxy = polygon.bounds
5720
+ width = maxx - minx
5721
+ height = maxy - miny
5722
+ aspect_ratio = max(width, height) / max(1.0, min(width, height))
5723
+
5724
+ # Determine if this shape is likely to be a building/rectangular object
5725
+ # Long thin objects might require different treatment
5726
+ is_rectangular = aspect_ratio < 3.0
5727
+
5728
+ # Rectangular objects usually need more careful orthogonalization
5729
+ epsilon_adjusted = epsilon
5730
+ min_segments_adjusted = min_segments
5731
+
5732
+ if is_rectangular:
5733
+ # For rectangular objects, use more conservative epsilon
5734
+ epsilon_adjusted = epsilon * 0.75
5735
+ # Ensure we get at least 4 points for a proper rectangle
5736
+ min_segments_adjusted = max(4, min_segments)
5737
+
5738
+ # Orthogonalize the exterior and interior rings
5739
+ orthogonalized_exterior = orthogonalize_ring(
5740
+ exterior_ring,
5741
+ epsilon=epsilon_adjusted,
5742
+ min_segments=min_segments_adjusted,
5743
+ )
5744
+
5745
+ orthogonalized_interiors = [
5746
+ orthogonalize_ring(
5747
+ ring,
5748
+ epsilon=epsilon_adjusted,
5749
+ min_segments=min_segments_adjusted,
5750
+ )
5751
+ for ring in interior_rings
5752
+ ]
5753
+
5754
+ # Validate the result - calculate area change
5755
+ original_area = polygon.area
5756
+ orthogonalized_poly = Polygon(orthogonalized_exterior)
5757
+
5758
+ if orthogonalized_poly.is_valid:
5759
+ area_ratio = (
5760
+ orthogonalized_poly.area / original_area
5761
+ if original_area > 0
5762
+ else 0
5763
+ )
5764
+
5765
+ # If area changed too much, revert to original
5766
+ if area_ratio < area_tolerance or area_ratio > (
5767
+ 1.0 / area_tolerance
5768
+ ):
5769
+ # Use original polygon instead
5770
+ geometry = shape
5771
+ else:
5772
+ # Create a new geometry with orthogonalized rings
5773
+ geometry = {
5774
+ "type": "Polygon",
5775
+ "coordinates": [orthogonalized_exterior],
5776
+ }
5777
+
5778
+ # Add interior rings if they exist
5779
+ if orthogonalized_interiors:
5780
+ geometry["coordinates"].extend(
5781
+ [ring for ring in orthogonalized_interiors]
5782
+ )
5783
+ else:
5784
+ # If resulting polygon is invalid, use original
5785
+ geometry = shape
5786
+
5787
+ # Add the feature to the list
5788
+ features_list.append(
5789
+ {
5790
+ "type": "Feature",
5791
+ "properties": {"value": int(value)},
5792
+ "geometry": geometry,
5793
+ }
5794
+ )
5795
+ except Exception as e:
5796
+ # Keep the original shape if orthogonalization fails
5797
+ features_list.append(
5798
+ {
5799
+ "type": "Feature",
5800
+ "properties": {"value": int(value)},
5801
+ "geometry": shape,
5802
+ }
5803
+ )
5804
+
5805
+ # Create the final GeoJSON structure
5806
+ geojson = {
5807
+ "type": "FeatureCollection",
5808
+ "crs": {"type": "name", "properties": {"name": str(crs)}},
5809
+ "features": features_list,
5810
+ }
5811
+
5812
+ # Convert to GeoDataFrame and set the CRS
5813
+ gdf = gpd.GeoDataFrame.from_features(geojson["features"], crs=crs)
5814
+
5815
+ # Save to file if output_path is provided
5816
+ if output_path:
5817
+ gdf.to_file(output_path)
5818
+
5819
+ return gdf