geoai-py 0.3.5__py2.py3-none-any.whl → 0.4.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoai/__init__.py +1 -1
- geoai/download.py +9 -8
- geoai/extract.py +158 -38
- geoai/geoai.py +4 -1
- geoai/hf.py +447 -0
- geoai/segment.py +306 -0
- geoai/segmentation.py +8 -7
- geoai/train.py +1039 -0
- geoai/utils.py +863 -25
- {geoai_py-0.3.5.dist-info → geoai_py-0.4.0.dist-info}/METADATA +5 -1
- geoai_py-0.4.0.dist-info/RECORD +15 -0
- {geoai_py-0.3.5.dist-info → geoai_py-0.4.0.dist-info}/WHEEL +1 -1
- geoai/preprocess.py +0 -3021
- geoai_py-0.3.5.dist-info/RECORD +0 -13
- {geoai_py-0.3.5.dist-info → geoai_py-0.4.0.dist-info}/LICENSE +0 -0
- {geoai_py-0.3.5.dist-info → geoai_py-0.4.0.dist-info}/entry_points.txt +0 -0
- {geoai_py-0.3.5.dist-info → geoai_py-0.4.0.dist-info}/top_level.txt +0 -0
geoai/utils.py
CHANGED
|
@@ -1,34 +1,36 @@
|
|
|
1
1
|
"""The utils module contains common functions and classes used by the other modules."""
|
|
2
2
|
|
|
3
|
+
# Standard Library
|
|
3
4
|
import json
|
|
4
5
|
import math
|
|
5
6
|
import os
|
|
6
|
-
from collections.abc import Iterable
|
|
7
|
-
from PIL import Image
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
import requests
|
|
10
7
|
import warnings
|
|
11
8
|
import xml.etree.ElementTree as ET
|
|
9
|
+
from collections.abc import Iterable
|
|
10
|
+
from pathlib import Path
|
|
12
11
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
13
|
-
|
|
14
|
-
|
|
12
|
+
|
|
13
|
+
# Third-Party Libraries
|
|
14
|
+
import cv2
|
|
15
15
|
import geopandas as gpd
|
|
16
16
|
import leafmap
|
|
17
|
+
import matplotlib.pyplot as plt
|
|
17
18
|
import numpy as np
|
|
18
19
|
import pandas as pd
|
|
19
|
-
import xarray as xr
|
|
20
|
-
import rioxarray as rxr
|
|
21
20
|
import rasterio
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
import requests
|
|
22
|
+
import rioxarray as rxr
|
|
23
|
+
import torch
|
|
24
|
+
import torchgeo
|
|
25
|
+
import xarray as xr
|
|
26
|
+
from PIL import Image
|
|
24
27
|
from rasterio import features
|
|
25
28
|
from rasterio.plot import show
|
|
26
|
-
from
|
|
29
|
+
from rasterio.windows import Window
|
|
27
30
|
from shapely.affinity import rotate
|
|
31
|
+
from shapely.geometry import MultiPolygon, Polygon, box, mapping, shape
|
|
32
|
+
from torchvision.transforms import RandomRotation
|
|
28
33
|
from tqdm import tqdm
|
|
29
|
-
import torch
|
|
30
|
-
import torchgeo
|
|
31
|
-
import cv2
|
|
32
34
|
|
|
33
35
|
try:
|
|
34
36
|
from torchgeo.datasets import RasterDataset, unbind_samples
|
|
@@ -54,6 +56,7 @@ def view_raster(
|
|
|
54
56
|
array_args: Optional[Dict] = {},
|
|
55
57
|
client_args: Optional[Dict] = {"cors_all": False},
|
|
56
58
|
basemap: Optional[str] = "OpenStreetMap",
|
|
59
|
+
basemap_args: Optional[Dict] = None,
|
|
57
60
|
backend: Optional[str] = "folium",
|
|
58
61
|
**kwargs,
|
|
59
62
|
):
|
|
@@ -76,6 +79,7 @@ def view_raster(
|
|
|
76
79
|
array_args (Optional[Dict], optional): Additional arguments for array processing. Defaults to {}.
|
|
77
80
|
client_args (Optional[Dict], optional): Additional arguments for the client. Defaults to {"cors_all": False}.
|
|
78
81
|
basemap (Optional[str], optional): The basemap to use. Defaults to "OpenStreetMap".
|
|
82
|
+
basemap_args (Optional[Dict], optional): Additional arguments for the basemap. Defaults to None.
|
|
79
83
|
**kwargs (Any): Additional keyword arguments.
|
|
80
84
|
|
|
81
85
|
Returns:
|
|
@@ -87,7 +91,23 @@ def view_raster(
|
|
|
87
91
|
else:
|
|
88
92
|
import leafmap.leafmap as leafmap
|
|
89
93
|
|
|
90
|
-
|
|
94
|
+
if basemap_args is None:
|
|
95
|
+
basemap_args = {}
|
|
96
|
+
|
|
97
|
+
m = leafmap.Map()
|
|
98
|
+
|
|
99
|
+
if isinstance(basemap, str):
|
|
100
|
+
if basemap.lower().endswith(".tif"):
|
|
101
|
+
if basemap.lower().startswith("http"):
|
|
102
|
+
if "name" not in basemap_args:
|
|
103
|
+
basemap_args["name"] = "Basemap"
|
|
104
|
+
m.add_cog_layer(basemap, **basemap_args)
|
|
105
|
+
else:
|
|
106
|
+
if "layer_name" not in basemap_args:
|
|
107
|
+
basemap_args["layer_name"] = "Basemap"
|
|
108
|
+
m.add_raster(basemap, **basemap_args)
|
|
109
|
+
else:
|
|
110
|
+
m.add_basemap(basemap, **basemap_args)
|
|
91
111
|
|
|
92
112
|
if isinstance(source, dict):
|
|
93
113
|
source = dict_to_image(source)
|
|
@@ -493,6 +513,7 @@ def view_vector(
|
|
|
493
513
|
title=None,
|
|
494
514
|
legend=True,
|
|
495
515
|
basemap=False,
|
|
516
|
+
basemap_type="streets",
|
|
496
517
|
alpha=0.7,
|
|
497
518
|
edge_color="black",
|
|
498
519
|
classification="quantiles",
|
|
@@ -522,6 +543,8 @@ def view_vector(
|
|
|
522
543
|
legend (bool, optional): Whether to display a legend. Defaults to True.
|
|
523
544
|
basemap (bool, optional): Whether to add a web basemap. Requires contextily.
|
|
524
545
|
Defaults to False.
|
|
546
|
+
basemap_type (str, optional): Type of basemap to use. Options: 'streets', 'satellite'.
|
|
547
|
+
Defaults to 'streets'.
|
|
525
548
|
alpha (float, optional): Transparency of the vector features, between 0-1.
|
|
526
549
|
Defaults to 0.7.
|
|
527
550
|
edge_color (str, optional): Color for feature edges. Defaults to "black".
|
|
@@ -609,10 +632,13 @@ def view_vector(
|
|
|
609
632
|
ax=ax, color=highlight_color, edgecolor="black", linewidth=2, zorder=5
|
|
610
633
|
)
|
|
611
634
|
|
|
612
|
-
# Add basemap if requested
|
|
613
635
|
if basemap:
|
|
614
636
|
try:
|
|
615
|
-
|
|
637
|
+
basemap_options = {
|
|
638
|
+
"streets": ctx.providers.OpenStreetMap.Mapnik,
|
|
639
|
+
"satellite": ctx.providers.Esri.WorldImagery,
|
|
640
|
+
}
|
|
641
|
+
ctx.add_basemap(ax, crs=gdf.crs, source=basemap_options[basemap_type])
|
|
616
642
|
except Exception as e:
|
|
617
643
|
print(f"Could not add basemap: {e}")
|
|
618
644
|
|
|
@@ -667,8 +693,8 @@ def view_vector_interactive(
|
|
|
667
693
|
"""
|
|
668
694
|
import folium
|
|
669
695
|
import folium.plugins as plugins
|
|
670
|
-
from localtileserver import get_folium_tile_layer, TileClient
|
|
671
696
|
from leafmap import cog_tile
|
|
697
|
+
from localtileserver import TileClient, get_folium_tile_layer
|
|
672
698
|
|
|
673
699
|
google_tiles = {
|
|
674
700
|
"Roadmap": {
|
|
@@ -719,7 +745,10 @@ def view_vector_interactive(
|
|
|
719
745
|
kwargs["max_zoom"] = 30
|
|
720
746
|
|
|
721
747
|
if isinstance(vector_data, str):
|
|
722
|
-
vector_data
|
|
748
|
+
if vector_data.endswith(".parquet"):
|
|
749
|
+
vector_data = gpd.read_parquet(vector_data)
|
|
750
|
+
else:
|
|
751
|
+
vector_data = gpd.read_file(vector_data)
|
|
723
752
|
|
|
724
753
|
# Check if input is a GeoDataFrame
|
|
725
754
|
if not isinstance(vector_data, gpd.GeoDataFrame):
|
|
@@ -767,9 +796,9 @@ def regularization(
|
|
|
767
796
|
Returns:
|
|
768
797
|
GeoDataFrame or list of shapely Polygons with regularized building footprints
|
|
769
798
|
"""
|
|
770
|
-
from shapely.geometry import Polygon, shape
|
|
771
|
-
from shapely.affinity import rotate, translate
|
|
772
799
|
from shapely import wkt
|
|
800
|
+
from shapely.affinity import rotate, translate
|
|
801
|
+
from shapely.geometry import Polygon, shape
|
|
773
802
|
|
|
774
803
|
regularized_buildings = []
|
|
775
804
|
|
|
@@ -888,8 +917,8 @@ def hybrid_regularization(building_polygons):
|
|
|
888
917
|
Returns:
|
|
889
918
|
GeoDataFrame or list of shapely Polygons with regularized building footprints
|
|
890
919
|
"""
|
|
891
|
-
from shapely.geometry import Polygon
|
|
892
920
|
from shapely.affinity import rotate
|
|
921
|
+
from shapely.geometry import Polygon
|
|
893
922
|
|
|
894
923
|
# Use minimum_rotated_rectangle instead of oriented_envelope
|
|
895
924
|
try:
|
|
@@ -1003,8 +1032,8 @@ def adaptive_regularization(
|
|
|
1003
1032
|
Returns:
|
|
1004
1033
|
GeoDataFrame or list of shapely Polygons with regularized building footprints
|
|
1005
1034
|
"""
|
|
1006
|
-
from shapely.geometry import Polygon
|
|
1007
1035
|
from shapely.affinity import rotate
|
|
1036
|
+
from shapely.geometry import Polygon
|
|
1008
1037
|
|
|
1009
1038
|
# Analyze the overall dataset to set appropriate parameters
|
|
1010
1039
|
if is_gdf := isinstance(building_polygons, gpd.GeoDataFrame):
|
|
@@ -2558,6 +2587,8 @@ def export_geotiff_tiles(
|
|
|
2558
2587
|
print(f"\nRaster info for {in_raster}:")
|
|
2559
2588
|
print(f" CRS: {src.crs}")
|
|
2560
2589
|
print(f" Dimensions: {src.width} x {src.height}")
|
|
2590
|
+
print(f" Resolution: {src.res}")
|
|
2591
|
+
print(f" Bands: {src.count}")
|
|
2561
2592
|
print(f" Bounds: {src.bounds}")
|
|
2562
2593
|
|
|
2563
2594
|
# Calculate number of tiles
|
|
@@ -4239,9 +4270,10 @@ def read_vector(source, layer=None, **kwargs):
|
|
|
4239
4270
|
>>> gdf = read_vector("path/to/data.gpkg", layer="layer_name")
|
|
4240
4271
|
"""
|
|
4241
4272
|
|
|
4242
|
-
import fiona
|
|
4243
4273
|
import urllib.parse
|
|
4244
4274
|
|
|
4275
|
+
import fiona
|
|
4276
|
+
|
|
4245
4277
|
# Determine if source is a URL or local file
|
|
4246
4278
|
parsed_url = urllib.parse.urlparse(source)
|
|
4247
4279
|
is_url = parsed_url.scheme in ["http", "https"]
|
|
@@ -4315,6 +4347,7 @@ def read_raster(source, band=None, masked=True, **kwargs):
|
|
|
4315
4347
|
>>> raster = read_raster("path/to/data.tif", masked=False)
|
|
4316
4348
|
"""
|
|
4317
4349
|
import urllib.parse
|
|
4350
|
+
|
|
4318
4351
|
from rasterio.errors import RasterioIOError
|
|
4319
4352
|
|
|
4320
4353
|
# Determine if source is a URL or local file
|
|
@@ -4409,8 +4442,8 @@ def region_groups(
|
|
|
4409
4442
|
Returns:
|
|
4410
4443
|
Union[Tuple[np.ndarray, pd.DataFrame], Tuple[xr.DataArray, pd.DataFrame]]: Labeled image and properties DataFrame.
|
|
4411
4444
|
"""
|
|
4412
|
-
from skimage import measure
|
|
4413
4445
|
import scipy.ndimage as ndi
|
|
4446
|
+
from skimage import measure
|
|
4414
4447
|
|
|
4415
4448
|
if isinstance(image, str):
|
|
4416
4449
|
ds = rxr.open_rasterio(image)
|
|
@@ -4976,3 +5009,808 @@ def add_geometric_properties(data, properties=None, area_unit="m2", length_unit=
|
|
|
4976
5009
|
result["complexity"] = result.geometry.apply(calc_complexity)
|
|
4977
5010
|
|
|
4978
5011
|
return result
|
|
5012
|
+
|
|
5013
|
+
|
|
5014
|
+
def orthogonalize(
|
|
5015
|
+
input_path,
|
|
5016
|
+
output_path=None,
|
|
5017
|
+
epsilon=0.2,
|
|
5018
|
+
min_area=10,
|
|
5019
|
+
min_segments=4,
|
|
5020
|
+
area_tolerance=0.7,
|
|
5021
|
+
detect_triangles=True,
|
|
5022
|
+
):
|
|
5023
|
+
"""
|
|
5024
|
+
Orthogonalizes object masks in a GeoTIFF file.
|
|
5025
|
+
|
|
5026
|
+
This function reads a GeoTIFF containing object masks (binary or labeled regions),
|
|
5027
|
+
converts the raster masks to vector polygons, applies orthogonalization to each polygon,
|
|
5028
|
+
and optionally writes the result to a GeoJSON file.
|
|
5029
|
+
The source code is adapted from the Solar Panel Detection algorithm by Esri.
|
|
5030
|
+
See https://www.arcgis.com/home/item.html?id=c2508d72f2614104bfcfd5ccf1429284.
|
|
5031
|
+
Credits to Esri for the original code.
|
|
5032
|
+
|
|
5033
|
+
Args:
|
|
5034
|
+
input_path (str): Path to the input GeoTIFF file.
|
|
5035
|
+
output_path (str, optional): Path to save the output GeoJSON file. If None, no file is saved.
|
|
5036
|
+
epsilon (float, optional): Simplification tolerance for the Douglas-Peucker algorithm.
|
|
5037
|
+
Higher values result in more simplification. Default is 0.2.
|
|
5038
|
+
min_area (float, optional): Minimum area of polygons to process (smaller ones are kept as-is).
|
|
5039
|
+
min_segments (int, optional): Minimum number of segments to keep after simplification.
|
|
5040
|
+
Default is 4 (for rectangular shapes).
|
|
5041
|
+
area_tolerance (float, optional): Allowed ratio of area change. Values less than 1.0 restrict
|
|
5042
|
+
area change. Default is 0.7 (allows reduction to 70% of original area).
|
|
5043
|
+
detect_triangles (bool, optional): If True, performs additional check to avoid creating triangular shapes.
|
|
5044
|
+
|
|
5045
|
+
Returns:
|
|
5046
|
+
geopandas.GeoDataFrame: A GeoDataFrame containing the orthogonalized features.
|
|
5047
|
+
"""
|
|
5048
|
+
|
|
5049
|
+
from functools import partial
|
|
5050
|
+
|
|
5051
|
+
def orthogonalize_ring(ring, epsilon=0.2, min_segments=4):
|
|
5052
|
+
"""
|
|
5053
|
+
Orthogonalizes a ring (list of coordinates).
|
|
5054
|
+
|
|
5055
|
+
Args:
|
|
5056
|
+
ring (list): List of [x, y] coordinates forming a ring
|
|
5057
|
+
epsilon (float, optional): Simplification tolerance
|
|
5058
|
+
min_segments (int, optional): Minimum number of segments to keep
|
|
5059
|
+
|
|
5060
|
+
Returns:
|
|
5061
|
+
list: Orthogonalized list of coordinates
|
|
5062
|
+
"""
|
|
5063
|
+
if len(ring) <= 3:
|
|
5064
|
+
return ring
|
|
5065
|
+
|
|
5066
|
+
# Convert to numpy array
|
|
5067
|
+
ring_arr = np.array(ring)
|
|
5068
|
+
|
|
5069
|
+
# Get orientation
|
|
5070
|
+
angle = math.degrees(get_orientation(ring_arr))
|
|
5071
|
+
|
|
5072
|
+
# Simplify using Ramer-Douglas-Peucker algorithm
|
|
5073
|
+
ring_arr = simplify(ring_arr, eps=epsilon)
|
|
5074
|
+
|
|
5075
|
+
# If simplified too much, adjust epsilon to maintain minimum segments
|
|
5076
|
+
if len(ring_arr) < min_segments:
|
|
5077
|
+
# Try with smaller epsilon until we get at least min_segments points
|
|
5078
|
+
for adjust_factor in [0.75, 0.5, 0.25, 0.1]:
|
|
5079
|
+
test_arr = simplify(np.array(ring), eps=epsilon * adjust_factor)
|
|
5080
|
+
if len(test_arr) >= min_segments:
|
|
5081
|
+
ring_arr = test_arr
|
|
5082
|
+
break
|
|
5083
|
+
|
|
5084
|
+
# Convert to dataframe for processing
|
|
5085
|
+
df = to_dataframe(ring_arr)
|
|
5086
|
+
|
|
5087
|
+
# Add orientation information
|
|
5088
|
+
add_orientation(df, angle)
|
|
5089
|
+
|
|
5090
|
+
# Align segments to orthogonal directions
|
|
5091
|
+
df = align(df)
|
|
5092
|
+
|
|
5093
|
+
# Merge collinear line segments
|
|
5094
|
+
df = merge_lines(df)
|
|
5095
|
+
|
|
5096
|
+
if len(df) == 0:
|
|
5097
|
+
return ring
|
|
5098
|
+
|
|
5099
|
+
# If we have a triangle-like result (3 segments), return the original shape
|
|
5100
|
+
if len(df) <= 3:
|
|
5101
|
+
return ring
|
|
5102
|
+
|
|
5103
|
+
# Join the orthogonalized segments back into a ring
|
|
5104
|
+
joined_ring = join_ring(df)
|
|
5105
|
+
|
|
5106
|
+
# If the join operation didn't produce a valid ring, return the original
|
|
5107
|
+
if len(joined_ring) == 0 or len(joined_ring[0]) < 3:
|
|
5108
|
+
return ring
|
|
5109
|
+
|
|
5110
|
+
# Basic validation: if result has 3 or fewer points (triangle), use original
|
|
5111
|
+
if len(joined_ring[0]) <= 3:
|
|
5112
|
+
return ring
|
|
5113
|
+
|
|
5114
|
+
# Convert back to a list and ensure it's closed
|
|
5115
|
+
result = joined_ring[0].tolist()
|
|
5116
|
+
if len(result) > 0 and (result[0] != result[-1]):
|
|
5117
|
+
result.append(result[0])
|
|
5118
|
+
|
|
5119
|
+
return result
|
|
5120
|
+
|
|
5121
|
+
def vectorize_mask(mask, transform):
|
|
5122
|
+
"""
|
|
5123
|
+
Converts a binary mask to vector polygons.
|
|
5124
|
+
|
|
5125
|
+
Args:
|
|
5126
|
+
mask (numpy.ndarray): Binary mask where non-zero values represent objects
|
|
5127
|
+
transform (rasterio.transform.Affine): Affine transformation matrix
|
|
5128
|
+
|
|
5129
|
+
Returns:
|
|
5130
|
+
list: List of GeoJSON features
|
|
5131
|
+
"""
|
|
5132
|
+
shapes = features.shapes(mask, transform=transform)
|
|
5133
|
+
features_list = []
|
|
5134
|
+
|
|
5135
|
+
for shape, value in shapes:
|
|
5136
|
+
if value > 0: # Only process non-zero values (actual objects)
|
|
5137
|
+
features_list.append(
|
|
5138
|
+
{
|
|
5139
|
+
"type": "Feature",
|
|
5140
|
+
"properties": {"value": int(value)},
|
|
5141
|
+
"geometry": shape,
|
|
5142
|
+
}
|
|
5143
|
+
)
|
|
5144
|
+
|
|
5145
|
+
return features_list
|
|
5146
|
+
|
|
5147
|
+
def rasterize_features(features, shape, transform, dtype=np.uint8):
|
|
5148
|
+
"""
|
|
5149
|
+
Converts vector features back to a raster mask.
|
|
5150
|
+
|
|
5151
|
+
Args:
|
|
5152
|
+
features (list): List of GeoJSON features
|
|
5153
|
+
shape (tuple): Shape of the output raster (height, width)
|
|
5154
|
+
transform (rasterio.transform.Affine): Affine transformation matrix
|
|
5155
|
+
dtype (numpy.dtype, optional): Data type of the output raster
|
|
5156
|
+
|
|
5157
|
+
Returns:
|
|
5158
|
+
numpy.ndarray: Rasterized mask
|
|
5159
|
+
"""
|
|
5160
|
+
mask = features.rasterize(
|
|
5161
|
+
[
|
|
5162
|
+
(feature["geometry"], feature["properties"]["value"])
|
|
5163
|
+
for feature in features
|
|
5164
|
+
],
|
|
5165
|
+
out_shape=shape,
|
|
5166
|
+
transform=transform,
|
|
5167
|
+
fill=0,
|
|
5168
|
+
dtype=dtype,
|
|
5169
|
+
)
|
|
5170
|
+
|
|
5171
|
+
return mask
|
|
5172
|
+
|
|
5173
|
+
# The following helper functions are from the original code
|
|
5174
|
+
def get_orientation(contour):
|
|
5175
|
+
"""
|
|
5176
|
+
Calculate the orientation angle of a contour.
|
|
5177
|
+
|
|
5178
|
+
Args:
|
|
5179
|
+
contour (numpy.ndarray): Array of shape (n, 2) containing point coordinates
|
|
5180
|
+
|
|
5181
|
+
Returns:
|
|
5182
|
+
float: Orientation angle in radians
|
|
5183
|
+
"""
|
|
5184
|
+
box = cv2.minAreaRect(contour.astype(int))
|
|
5185
|
+
(cx, cy), (w, h), angle = box
|
|
5186
|
+
return math.radians(angle)
|
|
5187
|
+
|
|
5188
|
+
def simplify(contour, eps=0.2):
|
|
5189
|
+
"""
|
|
5190
|
+
Simplify a contour using the Ramer-Douglas-Peucker algorithm.
|
|
5191
|
+
|
|
5192
|
+
Args:
|
|
5193
|
+
contour (numpy.ndarray): Array of shape (n, 2) containing point coordinates
|
|
5194
|
+
eps (float, optional): Epsilon value for simplification
|
|
5195
|
+
|
|
5196
|
+
Returns:
|
|
5197
|
+
numpy.ndarray: Simplified contour
|
|
5198
|
+
"""
|
|
5199
|
+
return rdp(contour, epsilon=eps)
|
|
5200
|
+
|
|
5201
|
+
def to_dataframe(ring):
|
|
5202
|
+
"""
|
|
5203
|
+
Convert a ring to a pandas DataFrame with line segment information.
|
|
5204
|
+
|
|
5205
|
+
Args:
|
|
5206
|
+
ring (numpy.ndarray): Array of shape (n, 2) containing point coordinates
|
|
5207
|
+
|
|
5208
|
+
Returns:
|
|
5209
|
+
pandas.DataFrame: DataFrame with line segment information
|
|
5210
|
+
"""
|
|
5211
|
+
df = pd.DataFrame(ring, columns=["x1", "y1"])
|
|
5212
|
+
df["x2"] = df["x1"].shift(-1)
|
|
5213
|
+
df["y2"] = df["y1"].shift(-1)
|
|
5214
|
+
df.dropna(inplace=True)
|
|
5215
|
+
df["angle_atan"] = np.arctan2((df["y2"] - df["y1"]), (df["x2"] - df["x1"]))
|
|
5216
|
+
df["angle_atan_deg"] = df["angle_atan"] * 57.2958
|
|
5217
|
+
df["len"] = np.sqrt((df["y2"] - df["y1"]) ** 2 + (df["x2"] - df["x1"]) ** 2)
|
|
5218
|
+
df["cx"] = (df["x2"] + df["x1"]) / 2.0
|
|
5219
|
+
df["cy"] = (df["y2"] + df["y1"]) / 2.0
|
|
5220
|
+
return df
|
|
5221
|
+
|
|
5222
|
+
def add_orientation(df, angle):
|
|
5223
|
+
"""
|
|
5224
|
+
Add orientation information to the DataFrame.
|
|
5225
|
+
|
|
5226
|
+
Args:
|
|
5227
|
+
df (pandas.DataFrame): DataFrame with line segment information
|
|
5228
|
+
angle (float): Orientation angle in degrees
|
|
5229
|
+
|
|
5230
|
+
Returns:
|
|
5231
|
+
None: Modifies the DataFrame in-place
|
|
5232
|
+
"""
|
|
5233
|
+
rtangle = angle + 90
|
|
5234
|
+
is_parallel = (
|
|
5235
|
+
(df["angle_atan_deg"] > (angle - 45))
|
|
5236
|
+
& (df["angle_atan_deg"] < (angle + 45))
|
|
5237
|
+
) | (
|
|
5238
|
+
(df["angle_atan_deg"] + 180 > (angle - 45))
|
|
5239
|
+
& (df["angle_atan_deg"] + 180 < (angle + 45))
|
|
5240
|
+
)
|
|
5241
|
+
df["angle"] = math.radians(angle)
|
|
5242
|
+
df["angle"] = df["angle"].where(is_parallel, math.radians(rtangle))
|
|
5243
|
+
|
|
5244
|
+
def align(df):
|
|
5245
|
+
"""
|
|
5246
|
+
Align line segments to their nearest orthogonal direction.
|
|
5247
|
+
|
|
5248
|
+
Args:
|
|
5249
|
+
df (pandas.DataFrame): DataFrame with line segment information
|
|
5250
|
+
|
|
5251
|
+
Returns:
|
|
5252
|
+
pandas.DataFrame: DataFrame with aligned line segments
|
|
5253
|
+
"""
|
|
5254
|
+
# Handle edge case with empty dataframe
|
|
5255
|
+
if len(df) == 0:
|
|
5256
|
+
return df.copy()
|
|
5257
|
+
|
|
5258
|
+
df_clone = df.copy()
|
|
5259
|
+
|
|
5260
|
+
# Ensure angle column exists and has valid values
|
|
5261
|
+
if "angle" not in df_clone.columns or df_clone["angle"].isna().any():
|
|
5262
|
+
# If angle data is missing, add default angles based on atan2
|
|
5263
|
+
df_clone["angle"] = df_clone["angle_atan"]
|
|
5264
|
+
|
|
5265
|
+
# Ensure length and center point data is valid
|
|
5266
|
+
if "len" not in df_clone.columns or df_clone["len"].isna().any():
|
|
5267
|
+
# Recalculate lengths if missing
|
|
5268
|
+
df_clone["len"] = np.sqrt(
|
|
5269
|
+
(df_clone["x2"] - df_clone["x1"]) ** 2
|
|
5270
|
+
+ (df_clone["y2"] - df_clone["y1"]) ** 2
|
|
5271
|
+
)
|
|
5272
|
+
|
|
5273
|
+
if "cx" not in df_clone.columns or df_clone["cx"].isna().any():
|
|
5274
|
+
df_clone["cx"] = (df_clone["x1"] + df_clone["x2"]) / 2.0
|
|
5275
|
+
|
|
5276
|
+
if "cy" not in df_clone.columns or df_clone["cy"].isna().any():
|
|
5277
|
+
df_clone["cy"] = (df_clone["y1"] + df_clone["y2"]) / 2.0
|
|
5278
|
+
|
|
5279
|
+
# Apply orthogonal alignment
|
|
5280
|
+
df_clone["x1"] = df_clone["cx"] - ((df_clone["len"] / 2) * np.cos(df["angle"]))
|
|
5281
|
+
df_clone["x2"] = df_clone["cx"] + ((df_clone["len"] / 2) * np.cos(df["angle"]))
|
|
5282
|
+
df_clone["y1"] = df_clone["cy"] - ((df_clone["len"] / 2) * np.sin(df["angle"]))
|
|
5283
|
+
df_clone["y2"] = df_clone["cy"] + ((df_clone["len"] / 2) * np.sin(df["angle"]))
|
|
5284
|
+
|
|
5285
|
+
return df_clone
|
|
5286
|
+
|
|
5287
|
+
def merge_lines(df_aligned):
|
|
5288
|
+
"""
|
|
5289
|
+
Merge collinear line segments.
|
|
5290
|
+
|
|
5291
|
+
Args:
|
|
5292
|
+
df_aligned (pandas.DataFrame): DataFrame with aligned line segments
|
|
5293
|
+
|
|
5294
|
+
Returns:
|
|
5295
|
+
pandas.DataFrame: DataFrame with merged line segments
|
|
5296
|
+
"""
|
|
5297
|
+
ortho_lines = []
|
|
5298
|
+
groups = df_aligned.groupby(
|
|
5299
|
+
(df_aligned["angle"].shift() != df_aligned["angle"]).cumsum()
|
|
5300
|
+
)
|
|
5301
|
+
for x, y in groups:
|
|
5302
|
+
group_cx = (y["cx"] * y["len"]).sum() / y["len"].sum()
|
|
5303
|
+
group_cy = (y["cy"] * y["len"]).sum() / y["len"].sum()
|
|
5304
|
+
cumlen = y["len"].sum()
|
|
5305
|
+
|
|
5306
|
+
ortho_lines.append((group_cx, group_cy, cumlen, y["angle"].iloc[0]))
|
|
5307
|
+
|
|
5308
|
+
ortho_list = []
|
|
5309
|
+
for cx, cy, length, rot_angle in ortho_lines:
|
|
5310
|
+
X1 = cx - (length / 2) * math.cos(rot_angle)
|
|
5311
|
+
X2 = cx + (length / 2) * math.cos(rot_angle)
|
|
5312
|
+
Y1 = cy - (length / 2) * math.sin(rot_angle)
|
|
5313
|
+
Y2 = cy + (length / 2) * math.sin(rot_angle)
|
|
5314
|
+
|
|
5315
|
+
ortho_list.append(
|
|
5316
|
+
{
|
|
5317
|
+
"x1": X1,
|
|
5318
|
+
"y1": Y1,
|
|
5319
|
+
"x2": X2,
|
|
5320
|
+
"y2": Y2,
|
|
5321
|
+
"len": length,
|
|
5322
|
+
"cx": cx,
|
|
5323
|
+
"cy": cy,
|
|
5324
|
+
"angle": rot_angle,
|
|
5325
|
+
}
|
|
5326
|
+
)
|
|
5327
|
+
|
|
5328
|
+
if (
|
|
5329
|
+
len(ortho_list) > 0 and ortho_list[0]["angle"] == ortho_list[-1]["angle"]
|
|
5330
|
+
): # join first and last segment if they're in same direction
|
|
5331
|
+
totlen = ortho_list[0]["len"] + ortho_list[-1]["len"]
|
|
5332
|
+
merge_cx = (
|
|
5333
|
+
(ortho_list[0]["cx"] * ortho_list[0]["len"])
|
|
5334
|
+
+ (ortho_list[-1]["cx"] * ortho_list[-1]["len"])
|
|
5335
|
+
) / totlen
|
|
5336
|
+
|
|
5337
|
+
merge_cy = (
|
|
5338
|
+
(ortho_list[0]["cy"] * ortho_list[0]["len"])
|
|
5339
|
+
+ (ortho_list[-1]["cy"] * ortho_list[-1]["len"])
|
|
5340
|
+
) / totlen
|
|
5341
|
+
|
|
5342
|
+
rot_angle = ortho_list[0]["angle"]
|
|
5343
|
+
X1 = merge_cx - (totlen / 2) * math.cos(rot_angle)
|
|
5344
|
+
X2 = merge_cx + (totlen / 2) * math.cos(rot_angle)
|
|
5345
|
+
Y1 = merge_cy - (totlen / 2) * math.sin(rot_angle)
|
|
5346
|
+
Y2 = merge_cy + (totlen / 2) * math.sin(rot_angle)
|
|
5347
|
+
|
|
5348
|
+
ortho_list[-1] = {
|
|
5349
|
+
"x1": X1,
|
|
5350
|
+
"y1": Y1,
|
|
5351
|
+
"x2": X2,
|
|
5352
|
+
"y2": Y2,
|
|
5353
|
+
"len": totlen,
|
|
5354
|
+
"cx": merge_cx,
|
|
5355
|
+
"cy": merge_cy,
|
|
5356
|
+
"angle": rot_angle,
|
|
5357
|
+
}
|
|
5358
|
+
ortho_list = ortho_list[1:]
|
|
5359
|
+
ortho_df = pd.DataFrame(ortho_list)
|
|
5360
|
+
return ortho_df
|
|
5361
|
+
|
|
5362
|
+
def find_intersection(x1, y1, x2, y2, x3, y3, x4, y4):
|
|
5363
|
+
"""
|
|
5364
|
+
Find the intersection point of two line segments.
|
|
5365
|
+
|
|
5366
|
+
Args:
|
|
5367
|
+
x1, y1, x2, y2: Coordinates of the first line segment
|
|
5368
|
+
x3, y3, x4, y4: Coordinates of the second line segment
|
|
5369
|
+
|
|
5370
|
+
Returns:
|
|
5371
|
+
list: [x, y] coordinates of the intersection point
|
|
5372
|
+
|
|
5373
|
+
Raises:
|
|
5374
|
+
ZeroDivisionError: If the lines are parallel or collinear
|
|
5375
|
+
"""
|
|
5376
|
+
# Calculate the denominator of the intersection formula
|
|
5377
|
+
denominator = (x1 - x2) * (y3 - y4) - (y1 - y2) * (x3 - x4)
|
|
5378
|
+
|
|
5379
|
+
# Check if lines are parallel or collinear (denominator close to zero)
|
|
5380
|
+
if abs(denominator) < 1e-10:
|
|
5381
|
+
raise ZeroDivisionError("Lines are parallel or collinear")
|
|
5382
|
+
|
|
5383
|
+
px = (
|
|
5384
|
+
(x1 * y2 - y1 * x2) * (x3 - x4) - (x1 - x2) * (x3 * y4 - y3 * x4)
|
|
5385
|
+
) / denominator
|
|
5386
|
+
py = (
|
|
5387
|
+
(x1 * y2 - y1 * x2) * (y3 - y4) - (y1 - y2) * (x3 * y4 - y3 * x4)
|
|
5388
|
+
) / denominator
|
|
5389
|
+
|
|
5390
|
+
# Check if the intersection point is within a reasonable distance
|
|
5391
|
+
# from both line segments to avoid extreme extrapolation
|
|
5392
|
+
def point_on_segment(x, y, x1, y1, x2, y2, tolerance=2.0):
|
|
5393
|
+
# Check if point (x,y) is near the line segment from (x1,y1) to (x2,y2)
|
|
5394
|
+
# First check if it's near the infinite line
|
|
5395
|
+
line_len = np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
|
5396
|
+
if line_len < 1e-10:
|
|
5397
|
+
return np.sqrt((x - x1) ** 2 + (y - y1) ** 2) <= tolerance
|
|
5398
|
+
|
|
5399
|
+
t = ((x - x1) * (x2 - x1) + (y - y1) * (y2 - y1)) / (line_len**2)
|
|
5400
|
+
|
|
5401
|
+
# Check distance to the infinite line
|
|
5402
|
+
proj_x = x1 + t * (x2 - x1)
|
|
5403
|
+
proj_y = y1 + t * (y2 - y1)
|
|
5404
|
+
dist_to_line = np.sqrt((x - proj_x) ** 2 + (y - proj_y) ** 2)
|
|
5405
|
+
|
|
5406
|
+
# Check if the projection is near the segment, not just the infinite line
|
|
5407
|
+
if t < -tolerance or t > 1 + tolerance:
|
|
5408
|
+
# If far from the segment, compute distance to the nearest endpoint
|
|
5409
|
+
dist_to_start = np.sqrt((x - x1) ** 2 + (y - y1) ** 2)
|
|
5410
|
+
dist_to_end = np.sqrt((x - x2) ** 2 + (y - y2) ** 2)
|
|
5411
|
+
return min(dist_to_start, dist_to_end) <= tolerance * 2
|
|
5412
|
+
|
|
5413
|
+
return dist_to_line <= tolerance
|
|
5414
|
+
|
|
5415
|
+
# Check if intersection is reasonably close to both line segments
|
|
5416
|
+
if not (
|
|
5417
|
+
point_on_segment(px, py, x1, y1, x2, y2)
|
|
5418
|
+
and point_on_segment(px, py, x3, y3, x4, y4)
|
|
5419
|
+
):
|
|
5420
|
+
# If intersection is far from segments, it's probably extrapolating too much
|
|
5421
|
+
raise ValueError("Intersection point too far from line segments")
|
|
5422
|
+
|
|
5423
|
+
return [px, py]
|
|
5424
|
+
|
|
5425
|
+
def join_ring(merged_df):
|
|
5426
|
+
"""
|
|
5427
|
+
Join line segments to form a closed ring.
|
|
5428
|
+
|
|
5429
|
+
Args:
|
|
5430
|
+
merged_df (pandas.DataFrame): DataFrame with merged line segments
|
|
5431
|
+
|
|
5432
|
+
Returns:
|
|
5433
|
+
numpy.ndarray: Array of shape (1, n, 2) containing the ring coordinates
|
|
5434
|
+
"""
|
|
5435
|
+
# Handle edge cases
|
|
5436
|
+
if len(merged_df) < 3:
|
|
5437
|
+
# Not enough segments to form a valid polygon
|
|
5438
|
+
return np.array([[]])
|
|
5439
|
+
|
|
5440
|
+
ring = []
|
|
5441
|
+
|
|
5442
|
+
# Find intersections between adjacent line segments
|
|
5443
|
+
for i in range(len(merged_df) - 1):
|
|
5444
|
+
x1, y1, x2, y2, *_ = merged_df.iloc[i]
|
|
5445
|
+
x3, y3, x4, y4, *_ = merged_df.iloc[i + 1]
|
|
5446
|
+
|
|
5447
|
+
try:
|
|
5448
|
+
intersection = find_intersection(x1, y1, x2, y2, x3, y3, x4, y4)
|
|
5449
|
+
|
|
5450
|
+
# Check if the intersection point is too far from either line segment
|
|
5451
|
+
# This helps prevent extending edges beyond reasonable bounds
|
|
5452
|
+
dist_to_seg1 = min(
|
|
5453
|
+
np.sqrt((intersection[0] - x1) ** 2 + (intersection[1] - y1) ** 2),
|
|
5454
|
+
np.sqrt((intersection[0] - x2) ** 2 + (intersection[1] - y2) ** 2),
|
|
5455
|
+
)
|
|
5456
|
+
dist_to_seg2 = min(
|
|
5457
|
+
np.sqrt((intersection[0] - x3) ** 2 + (intersection[1] - y3) ** 2),
|
|
5458
|
+
np.sqrt((intersection[0] - x4) ** 2 + (intersection[1] - y4) ** 2),
|
|
5459
|
+
)
|
|
5460
|
+
|
|
5461
|
+
# Use the maximum of line segment lengths as a reference
|
|
5462
|
+
max_len = max(
|
|
5463
|
+
np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2),
|
|
5464
|
+
np.sqrt((x4 - x3) ** 2 + (y4 - y3) ** 2),
|
|
5465
|
+
)
|
|
5466
|
+
|
|
5467
|
+
# If intersection is too far away, use the endpoint of the first segment instead
|
|
5468
|
+
if dist_to_seg1 > max_len * 0.5 or dist_to_seg2 > max_len * 0.5:
|
|
5469
|
+
ring.append([x2, y2])
|
|
5470
|
+
else:
|
|
5471
|
+
ring.append(intersection)
|
|
5472
|
+
except Exception as e:
|
|
5473
|
+
# If intersection calculation fails, use the endpoint of the first segment
|
|
5474
|
+
ring.append([x2, y2])
|
|
5475
|
+
|
|
5476
|
+
# Connect last segment with first segment
|
|
5477
|
+
x1, y1, x2, y2, *_ = merged_df.iloc[-1]
|
|
5478
|
+
x3, y3, x4, y4, *_ = merged_df.iloc[0]
|
|
5479
|
+
|
|
5480
|
+
try:
|
|
5481
|
+
intersection = find_intersection(x1, y1, x2, y2, x3, y3, x4, y4)
|
|
5482
|
+
|
|
5483
|
+
# Check if the intersection point is too far from either line segment
|
|
5484
|
+
dist_to_seg1 = min(
|
|
5485
|
+
np.sqrt((intersection[0] - x1) ** 2 + (intersection[1] - y1) ** 2),
|
|
5486
|
+
np.sqrt((intersection[0] - x2) ** 2 + (intersection[1] - y2) ** 2),
|
|
5487
|
+
)
|
|
5488
|
+
dist_to_seg2 = min(
|
|
5489
|
+
np.sqrt((intersection[0] - x3) ** 2 + (intersection[1] - y3) ** 2),
|
|
5490
|
+
np.sqrt((intersection[0] - x4) ** 2 + (intersection[1] - y4) ** 2),
|
|
5491
|
+
)
|
|
5492
|
+
|
|
5493
|
+
max_len = max(
|
|
5494
|
+
np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2),
|
|
5495
|
+
np.sqrt((x4 - x3) ** 2 + (y4 - y3) ** 2),
|
|
5496
|
+
)
|
|
5497
|
+
|
|
5498
|
+
if dist_to_seg1 > max_len * 0.5 or dist_to_seg2 > max_len * 0.5:
|
|
5499
|
+
ring.append([x2, y2])
|
|
5500
|
+
else:
|
|
5501
|
+
ring.append(intersection)
|
|
5502
|
+
except Exception as e:
|
|
5503
|
+
# If intersection calculation fails, use the endpoint of the last segment
|
|
5504
|
+
ring.append([x2, y2])
|
|
5505
|
+
|
|
5506
|
+
# Ensure the ring is closed
|
|
5507
|
+
if len(ring) > 0 and (ring[0][0] != ring[-1][0] or ring[0][1] != ring[-1][1]):
|
|
5508
|
+
ring.append(ring[0])
|
|
5509
|
+
|
|
5510
|
+
return np.array([ring])
|
|
5511
|
+
|
|
5512
|
+
def rdp(M, epsilon=0, dist=None, algo="iter", return_mask=False):
|
|
5513
|
+
"""
|
|
5514
|
+
Simplifies a given array of points using the Ramer-Douglas-Peucker algorithm.
|
|
5515
|
+
|
|
5516
|
+
Args:
|
|
5517
|
+
M (numpy.ndarray): Array of shape (n, d) containing point coordinates
|
|
5518
|
+
epsilon (float, optional): Epsilon value for simplification
|
|
5519
|
+
dist (callable, optional): Distance function
|
|
5520
|
+
algo (str, optional): Algorithm to use ('iter' or 'rec')
|
|
5521
|
+
return_mask (bool, optional): Whether to return a mask instead of the simplified array
|
|
5522
|
+
|
|
5523
|
+
Returns:
|
|
5524
|
+
numpy.ndarray or list: Simplified points or mask
|
|
5525
|
+
"""
|
|
5526
|
+
if dist is None:
|
|
5527
|
+
dist = pldist
|
|
5528
|
+
|
|
5529
|
+
if algo == "iter":
|
|
5530
|
+
algo = partial(rdp_iter, return_mask=return_mask)
|
|
5531
|
+
elif algo == "rec":
|
|
5532
|
+
if return_mask:
|
|
5533
|
+
raise NotImplementedError(
|
|
5534
|
+
'return_mask=True not supported with algo="rec"'
|
|
5535
|
+
)
|
|
5536
|
+
algo = rdp_rec
|
|
5537
|
+
|
|
5538
|
+
if "numpy" in str(type(M)):
|
|
5539
|
+
return algo(M, epsilon, dist)
|
|
5540
|
+
|
|
5541
|
+
return algo(np.array(M), epsilon, dist).tolist()
|
|
5542
|
+
|
|
5543
|
+
def pldist(point, start, end):
|
|
5544
|
+
"""
|
|
5545
|
+
Calculates the distance from 'point' to the line given by 'start' and 'end'.
|
|
5546
|
+
|
|
5547
|
+
Args:
|
|
5548
|
+
point (numpy.ndarray): Point coordinates
|
|
5549
|
+
start (numpy.ndarray): Start point of the line
|
|
5550
|
+
end (numpy.ndarray): End point of the line
|
|
5551
|
+
|
|
5552
|
+
Returns:
|
|
5553
|
+
float: Distance from point to line
|
|
5554
|
+
"""
|
|
5555
|
+
if np.all(np.equal(start, end)):
|
|
5556
|
+
return np.linalg.norm(point - start)
|
|
5557
|
+
|
|
5558
|
+
# Fix for NumPy 2.0 deprecation warning - handle 2D vectors properly
|
|
5559
|
+
# Instead of using cross product directly, calculate the area of the
|
|
5560
|
+
# parallelogram formed by the vectors and divide by the length of the line
|
|
5561
|
+
line_vec = end - start
|
|
5562
|
+
point_vec = point - start
|
|
5563
|
+
|
|
5564
|
+
# Area of parallelogram = |a|*|b|*sin(θ)
|
|
5565
|
+
# For 2D vectors: |a×b| = |a|*|b|*sin(θ) = determinant([ax, ay], [bx, by])
|
|
5566
|
+
area = abs(line_vec[0] * point_vec[1] - line_vec[1] * point_vec[0])
|
|
5567
|
+
|
|
5568
|
+
# Distance = Area / |line_vec|
|
|
5569
|
+
return area / np.linalg.norm(line_vec)
|
|
5570
|
+
|
|
5571
|
+
def rdp_rec(M, epsilon, dist=pldist):
|
|
5572
|
+
"""
|
|
5573
|
+
Recursive implementation of the Ramer-Douglas-Peucker algorithm.
|
|
5574
|
+
|
|
5575
|
+
Args:
|
|
5576
|
+
M (numpy.ndarray): Array of shape (n, d) containing point coordinates
|
|
5577
|
+
epsilon (float): Epsilon value for simplification
|
|
5578
|
+
dist (callable, optional): Distance function
|
|
5579
|
+
|
|
5580
|
+
Returns:
|
|
5581
|
+
numpy.ndarray: Simplified points
|
|
5582
|
+
"""
|
|
5583
|
+
dmax = 0.0
|
|
5584
|
+
index = -1
|
|
5585
|
+
|
|
5586
|
+
for i in range(1, M.shape[0]):
|
|
5587
|
+
d = dist(M[i], M[0], M[-1])
|
|
5588
|
+
|
|
5589
|
+
if d > dmax:
|
|
5590
|
+
index = i
|
|
5591
|
+
dmax = d
|
|
5592
|
+
|
|
5593
|
+
if dmax > epsilon:
|
|
5594
|
+
r1 = rdp_rec(M[: index + 1], epsilon, dist)
|
|
5595
|
+
r2 = rdp_rec(M[index:], epsilon, dist)
|
|
5596
|
+
|
|
5597
|
+
return np.vstack((r1[:-1], r2))
|
|
5598
|
+
else:
|
|
5599
|
+
return np.vstack((M[0], M[-1]))
|
|
5600
|
+
|
|
5601
|
+
def _rdp_iter(M, start_index, last_index, epsilon, dist=pldist):
|
|
5602
|
+
"""
|
|
5603
|
+
Internal iterative implementation of the Ramer-Douglas-Peucker algorithm.
|
|
5604
|
+
|
|
5605
|
+
Args:
|
|
5606
|
+
M (numpy.ndarray): Array of shape (n, d) containing point coordinates
|
|
5607
|
+
start_index (int): Start index
|
|
5608
|
+
last_index (int): Last index
|
|
5609
|
+
epsilon (float): Epsilon value for simplification
|
|
5610
|
+
dist (callable, optional): Distance function
|
|
5611
|
+
|
|
5612
|
+
Returns:
|
|
5613
|
+
numpy.ndarray: Boolean mask of points to keep
|
|
5614
|
+
"""
|
|
5615
|
+
stk = []
|
|
5616
|
+
stk.append([start_index, last_index])
|
|
5617
|
+
global_start_index = start_index
|
|
5618
|
+
indices = np.ones(last_index - start_index + 1, dtype=bool)
|
|
5619
|
+
|
|
5620
|
+
while stk:
|
|
5621
|
+
start_index, last_index = stk.pop()
|
|
5622
|
+
|
|
5623
|
+
dmax = 0.0
|
|
5624
|
+
index = start_index
|
|
5625
|
+
|
|
5626
|
+
for i in range(index + 1, last_index):
|
|
5627
|
+
if indices[i - global_start_index]:
|
|
5628
|
+
d = dist(M[i], M[start_index], M[last_index])
|
|
5629
|
+
if d > dmax:
|
|
5630
|
+
index = i
|
|
5631
|
+
dmax = d
|
|
5632
|
+
|
|
5633
|
+
if dmax > epsilon:
|
|
5634
|
+
stk.append([start_index, index])
|
|
5635
|
+
stk.append([index, last_index])
|
|
5636
|
+
else:
|
|
5637
|
+
for i in range(start_index + 1, last_index):
|
|
5638
|
+
indices[i - global_start_index] = False
|
|
5639
|
+
|
|
5640
|
+
return indices
|
|
5641
|
+
|
|
5642
|
+
def rdp_iter(M, epsilon, dist=pldist, return_mask=False):
|
|
5643
|
+
"""
|
|
5644
|
+
Iterative implementation of the Ramer-Douglas-Peucker algorithm.
|
|
5645
|
+
|
|
5646
|
+
Args:
|
|
5647
|
+
M (numpy.ndarray): Array of shape (n, d) containing point coordinates
|
|
5648
|
+
epsilon (float): Epsilon value for simplification
|
|
5649
|
+
dist (callable, optional): Distance function
|
|
5650
|
+
return_mask (bool, optional): Whether to return a mask instead of the simplified array
|
|
5651
|
+
|
|
5652
|
+
Returns:
|
|
5653
|
+
numpy.ndarray: Simplified points or boolean mask
|
|
5654
|
+
"""
|
|
5655
|
+
mask = _rdp_iter(M, 0, len(M) - 1, epsilon, dist)
|
|
5656
|
+
|
|
5657
|
+
if return_mask:
|
|
5658
|
+
return mask
|
|
5659
|
+
|
|
5660
|
+
return M[mask]
|
|
5661
|
+
|
|
5662
|
+
# Read the raster data
|
|
5663
|
+
with rasterio.open(input_path) as src:
|
|
5664
|
+
# Read the first band (assuming it contains the mask)
|
|
5665
|
+
mask = src.read(1)
|
|
5666
|
+
transform = src.transform
|
|
5667
|
+
crs = src.crs
|
|
5668
|
+
|
|
5669
|
+
# Extract shapes from the raster mask
|
|
5670
|
+
shapes = features.shapes(mask, transform=transform)
|
|
5671
|
+
|
|
5672
|
+
# Convert shapes to GeoJSON features
|
|
5673
|
+
features_list = []
|
|
5674
|
+
for shape, value in shapes:
|
|
5675
|
+
if value > 0: # Only process non-zero values (actual objects)
|
|
5676
|
+
# Convert GeoJSON geometry to Shapely polygon
|
|
5677
|
+
polygon = Polygon(shape["coordinates"][0])
|
|
5678
|
+
|
|
5679
|
+
# Skip tiny polygons
|
|
5680
|
+
if polygon.area < min_area:
|
|
5681
|
+
features_list.append(
|
|
5682
|
+
{
|
|
5683
|
+
"type": "Feature",
|
|
5684
|
+
"properties": {"value": int(value)},
|
|
5685
|
+
"geometry": shape,
|
|
5686
|
+
}
|
|
5687
|
+
)
|
|
5688
|
+
continue
|
|
5689
|
+
|
|
5690
|
+
# Check if shape is triangular and if we want to avoid triangular shapes
|
|
5691
|
+
if detect_triangles:
|
|
5692
|
+
# Create a simplified version to check number of vertices
|
|
5693
|
+
simple_polygon = polygon.simplify(epsilon)
|
|
5694
|
+
if (
|
|
5695
|
+
len(simple_polygon.exterior.coords) <= 4
|
|
5696
|
+
): # 3 points + closing point
|
|
5697
|
+
# Likely a triangular shape - skip orthogonalization
|
|
5698
|
+
features_list.append(
|
|
5699
|
+
{
|
|
5700
|
+
"type": "Feature",
|
|
5701
|
+
"properties": {"value": int(value)},
|
|
5702
|
+
"geometry": shape,
|
|
5703
|
+
}
|
|
5704
|
+
)
|
|
5705
|
+
continue
|
|
5706
|
+
|
|
5707
|
+
# Process larger, non-triangular polygons
|
|
5708
|
+
try:
|
|
5709
|
+
# Convert shapely polygon to a ring format for orthogonalization
|
|
5710
|
+
exterior_ring = list(polygon.exterior.coords)
|
|
5711
|
+
interior_rings = [
|
|
5712
|
+
list(interior.coords) for interior in polygon.interiors
|
|
5713
|
+
]
|
|
5714
|
+
|
|
5715
|
+
# Calculate bounding box aspect ratio to help with parameter tuning
|
|
5716
|
+
minx, miny, maxx, maxy = polygon.bounds
|
|
5717
|
+
width = maxx - minx
|
|
5718
|
+
height = maxy - miny
|
|
5719
|
+
aspect_ratio = max(width, height) / max(1.0, min(width, height))
|
|
5720
|
+
|
|
5721
|
+
# Determine if this shape is likely to be a building/rectangular object
|
|
5722
|
+
# Long thin objects might require different treatment
|
|
5723
|
+
is_rectangular = aspect_ratio < 3.0
|
|
5724
|
+
|
|
5725
|
+
# Rectangular objects usually need more careful orthogonalization
|
|
5726
|
+
epsilon_adjusted = epsilon
|
|
5727
|
+
min_segments_adjusted = min_segments
|
|
5728
|
+
|
|
5729
|
+
if is_rectangular:
|
|
5730
|
+
# For rectangular objects, use more conservative epsilon
|
|
5731
|
+
epsilon_adjusted = epsilon * 0.75
|
|
5732
|
+
# Ensure we get at least 4 points for a proper rectangle
|
|
5733
|
+
min_segments_adjusted = max(4, min_segments)
|
|
5734
|
+
|
|
5735
|
+
# Orthogonalize the exterior and interior rings
|
|
5736
|
+
orthogonalized_exterior = orthogonalize_ring(
|
|
5737
|
+
exterior_ring,
|
|
5738
|
+
epsilon=epsilon_adjusted,
|
|
5739
|
+
min_segments=min_segments_adjusted,
|
|
5740
|
+
)
|
|
5741
|
+
|
|
5742
|
+
orthogonalized_interiors = [
|
|
5743
|
+
orthogonalize_ring(
|
|
5744
|
+
ring,
|
|
5745
|
+
epsilon=epsilon_adjusted,
|
|
5746
|
+
min_segments=min_segments_adjusted,
|
|
5747
|
+
)
|
|
5748
|
+
for ring in interior_rings
|
|
5749
|
+
]
|
|
5750
|
+
|
|
5751
|
+
# Validate the result - calculate area change
|
|
5752
|
+
original_area = polygon.area
|
|
5753
|
+
orthogonalized_poly = Polygon(orthogonalized_exterior)
|
|
5754
|
+
|
|
5755
|
+
if orthogonalized_poly.is_valid:
|
|
5756
|
+
area_ratio = (
|
|
5757
|
+
orthogonalized_poly.area / original_area
|
|
5758
|
+
if original_area > 0
|
|
5759
|
+
else 0
|
|
5760
|
+
)
|
|
5761
|
+
|
|
5762
|
+
# If area changed too much, revert to original
|
|
5763
|
+
if area_ratio < area_tolerance or area_ratio > (
|
|
5764
|
+
1.0 / area_tolerance
|
|
5765
|
+
):
|
|
5766
|
+
# Use original polygon instead
|
|
5767
|
+
geometry = shape
|
|
5768
|
+
else:
|
|
5769
|
+
# Create a new geometry with orthogonalized rings
|
|
5770
|
+
geometry = {
|
|
5771
|
+
"type": "Polygon",
|
|
5772
|
+
"coordinates": [orthogonalized_exterior],
|
|
5773
|
+
}
|
|
5774
|
+
|
|
5775
|
+
# Add interior rings if they exist
|
|
5776
|
+
if orthogonalized_interiors:
|
|
5777
|
+
geometry["coordinates"].extend(
|
|
5778
|
+
[ring for ring in orthogonalized_interiors]
|
|
5779
|
+
)
|
|
5780
|
+
else:
|
|
5781
|
+
# If resulting polygon is invalid, use original
|
|
5782
|
+
geometry = shape
|
|
5783
|
+
|
|
5784
|
+
# Add the feature to the list
|
|
5785
|
+
features_list.append(
|
|
5786
|
+
{
|
|
5787
|
+
"type": "Feature",
|
|
5788
|
+
"properties": {"value": int(value)},
|
|
5789
|
+
"geometry": geometry,
|
|
5790
|
+
}
|
|
5791
|
+
)
|
|
5792
|
+
except Exception as e:
|
|
5793
|
+
# Keep the original shape if orthogonalization fails
|
|
5794
|
+
features_list.append(
|
|
5795
|
+
{
|
|
5796
|
+
"type": "Feature",
|
|
5797
|
+
"properties": {"value": int(value)},
|
|
5798
|
+
"geometry": shape,
|
|
5799
|
+
}
|
|
5800
|
+
)
|
|
5801
|
+
|
|
5802
|
+
# Create the final GeoJSON structure
|
|
5803
|
+
geojson = {
|
|
5804
|
+
"type": "FeatureCollection",
|
|
5805
|
+
"crs": {"type": "name", "properties": {"name": str(crs)}},
|
|
5806
|
+
"features": features_list,
|
|
5807
|
+
}
|
|
5808
|
+
|
|
5809
|
+
# Convert to GeoDataFrame and set the CRS
|
|
5810
|
+
gdf = gpd.GeoDataFrame.from_features(geojson["features"], crs=crs)
|
|
5811
|
+
|
|
5812
|
+
# Save to file if output_path is provided
|
|
5813
|
+
if output_path:
|
|
5814
|
+
gdf.to_file(output_path)
|
|
5815
|
+
|
|
5816
|
+
return gdf
|