giga-spatial 0.6.5__py3-none-any.whl → 0.6.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -948,6 +948,9 @@ def aggregate_polygons_to_zones(
948
948
  if not isinstance(zones, gpd.GeoDataFrame):
949
949
  raise TypeError("zones must be a GeoDataFrame")
950
950
 
951
+ if zones.empty:
952
+ raise ValueError("zones GeoDataFrame is empty")
953
+
951
954
  if zone_id_column not in zones.columns:
952
955
  raise ValueError(f"Zone ID column '{zone_id_column}' not found in zones")
953
956
 
@@ -960,11 +963,17 @@ def aggregate_polygons_to_zones(
960
963
  if not isinstance(polygons, gpd.GeoDataFrame):
961
964
  try:
962
965
  polygons_gdf = convert_to_geodataframe(polygons)
963
- except:
964
- raise TypeError("polygons must be a GeoDataFrame or convertible to one")
966
+ except Exception as e:
967
+ raise TypeError(
968
+ f"polygons must be a GeoDataFrame or convertible to one: {e}"
969
+ )
965
970
  else:
966
971
  polygons_gdf = polygons.copy()
967
972
 
973
+ if polygons_gdf.empty:
974
+ LOGGER.warning("Empty polygons GeoDataFrame provided")
975
+ return zones
976
+
968
977
  # Validate geometry types
969
978
  non_polygon_geoms = [
970
979
  geom_type
@@ -991,8 +1000,53 @@ def aggregate_polygons_to_zones(
991
1000
  polygons_gdf = polygons_gdf.to_crs(zones.crs)
992
1001
 
993
1002
  # Handle aggregation method
1003
+ agg_funcs = _process_aggregation_methods(aggregation, value_columns)
1004
+
1005
+ # Prepare minimal zones for spatial operations (only zone_id_column and geometry)
1006
+ minimal_zones = zones[[zone_id_column, "geometry"]].copy()
1007
+
1008
+ if predicate == "fractional":
1009
+ aggregated_data = _fractional_aggregation(
1010
+ polygons_gdf, minimal_zones, value_columns, agg_funcs, zone_id_column
1011
+ )
1012
+ else:
1013
+ aggregated_data = _simple_aggregation(
1014
+ polygons_gdf,
1015
+ minimal_zones,
1016
+ value_columns,
1017
+ agg_funcs,
1018
+ zone_id_column,
1019
+ predicate,
1020
+ )
1021
+
1022
+ # Merge aggregated results back to complete zones data
1023
+ result = zones.merge(
1024
+ aggregated_data[[col for col in aggregated_data.columns if col != "geometry"]],
1025
+ on=zone_id_column,
1026
+ how="left",
1027
+ )
1028
+
1029
+ # Fill NaN values with zeros for the newly aggregated columns only
1030
+ aggregated_cols = [col for col in result.columns if col not in zones.columns]
1031
+ for col in aggregated_cols:
1032
+ if pd.api.types.is_numeric_dtype(result[col]):
1033
+ result[col] = result[col].fillna(0)
1034
+
1035
+ # Apply output suffix consistently to result columns only
1036
+ if output_suffix:
1037
+ rename_dict = {col: f"{col}{output_suffix}" for col in aggregated_cols}
1038
+ result = result.rename(columns=rename_dict)
1039
+
1040
+ if drop_geometry:
1041
+ result = result.drop(columns=["geometry"])
1042
+
1043
+ return result
1044
+
1045
+
1046
+ def _process_aggregation_methods(aggregation, value_columns):
1047
+ """Process and validate aggregation methods"""
994
1048
  if isinstance(aggregation, str):
995
- agg_funcs = {col: aggregation for col in value_columns}
1049
+ return {col: aggregation for col in value_columns}
996
1050
  elif isinstance(aggregation, dict):
997
1051
  # Validate dictionary keys
998
1052
  missing_aggs = [col for col in value_columns if col not in aggregation]
@@ -1005,112 +1059,98 @@ def aggregate_polygons_to_zones(
1005
1059
  f"Aggregation methods specified for non-existent columns: {extra_aggs}"
1006
1060
  )
1007
1061
 
1008
- agg_funcs = aggregation
1062
+ return aggregation
1009
1063
  else:
1010
1064
  raise TypeError("aggregation must be a string or dictionary")
1011
1065
 
1012
- # Create a copy of the zones
1013
- result = zones.copy()
1014
1066
 
1015
- if predicate == "fractional":
1016
- # Use area-weighted aggregation with polygon overlay
1067
+ def _fractional_aggregation(
1068
+ polygons_gdf, zones, value_columns, agg_funcs, zone_id_column
1069
+ ):
1070
+ """Perform area-weighted (fractional) aggregation"""
1071
+ try:
1072
+ # Compute UTM CRS for accurate area calculations
1017
1073
  try:
1018
- # Compute UTM CRS for accurate area calculations
1019
- try:
1020
- overlay_utm_crs = polygons_gdf.estimate_utm_crs()
1021
- except Exception as e:
1022
- LOGGER.warning(
1023
- f"Warning: UTM CRS estimation failed, using Web Mercator. Error: {e}"
1024
- )
1025
- overlay_utm_crs = "EPSG:3857" # Fallback to Web Mercator
1074
+ overlay_utm_crs = polygons_gdf.estimate_utm_crs()
1075
+ except Exception as e:
1076
+ LOGGER.warning(f"UTM CRS estimation failed, using Web Mercator. Error: {e}")
1077
+ overlay_utm_crs = "EPSG:3857" # Fallback to Web Mercator
1026
1078
 
1027
- # Prepare polygons for overlay
1028
- polygons_utm = polygons_gdf.to_crs(overlay_utm_crs)
1029
- polygons_utm["orig_area"] = polygons_utm.area
1079
+ # Prepare polygons for overlay - only necessary columns
1080
+ polygons_utm = polygons_gdf.to_crs(overlay_utm_crs)
1081
+ polygons_utm["orig_area"] = polygons_utm.area
1030
1082
 
1031
- # Keep only necessary columns
1032
- overlay_cols = value_columns + ["geometry", "orig_area"]
1033
- overlay_gdf = polygons_utm[overlay_cols].copy()
1083
+ # Keep only necessary columns
1084
+ overlay_cols = value_columns + ["geometry", "orig_area"]
1085
+ overlay_gdf = polygons_utm[overlay_cols].copy()
1034
1086
 
1035
- # Prepare zones for overlay
1036
- zones_utm = zones.to_crs(overlay_utm_crs)
1087
+ # Prepare zones for overlay
1088
+ zones_utm = zones.to_crs(overlay_utm_crs)
1037
1089
 
1038
- # Perform the spatial overlay
1039
- gdf_overlayed = gpd.overlay(
1040
- overlay_gdf, zones_utm[[zone_id_column, "geometry"]], how="intersection"
1041
- )
1090
+ # Perform the spatial overlay
1091
+ gdf_overlayed = gpd.overlay(overlay_gdf, zones_utm, how="intersection")
1042
1092
 
1043
- # Calculate fractional areas
1044
- gdf_overlayed["intersection_area"] = gdf_overlayed.area
1045
- gdf_overlayed["area_fraction"] = (
1046
- gdf_overlayed["intersection_area"] / gdf_overlayed["orig_area"]
1047
- )
1093
+ if gdf_overlayed.empty:
1094
+ LOGGER.warning("No intersections found during fractional aggregation")
1095
+ return zones
1048
1096
 
1049
- # Apply area weighting to value columns
1050
- for col in value_columns:
1051
- gdf_overlayed[col] = gdf_overlayed[col] * gdf_overlayed["area_fraction"]
1097
+ # Calculate fractional areas
1098
+ gdf_overlayed["intersection_area"] = gdf_overlayed.area
1099
+ gdf_overlayed["area_fraction"] = (
1100
+ gdf_overlayed["intersection_area"] / gdf_overlayed["orig_area"]
1101
+ )
1052
1102
 
1053
- # Aggregate by zone ID
1054
- aggregated = gdf_overlayed.groupby(zone_id_column)[value_columns].agg(
1055
- agg_funcs
1056
- )
1103
+ # Apply area weighting to value columns
1104
+ for col in value_columns:
1105
+ gdf_overlayed[col] = gdf_overlayed[col] * gdf_overlayed["area_fraction"]
1057
1106
 
1058
- # Handle column naming for multi-level index
1059
- if isinstance(aggregated.columns, pd.MultiIndex):
1060
- aggregated.columns = [
1061
- f"{col[0]}_{col[1]}{output_suffix}" for col in aggregated.columns
1062
- ]
1107
+ # Aggregate by zone ID
1108
+ aggregated = gdf_overlayed.groupby(zone_id_column)[value_columns].agg(agg_funcs)
1063
1109
 
1064
- # Reset index
1065
- aggregated = aggregated.reset_index()
1110
+ # Handle column naming for multi-level index
1111
+ aggregated = _handle_multiindex_columns(aggregated)
1066
1112
 
1067
- # Merge aggregated values back to the zones
1068
- result = result.merge(aggregated, on=zone_id_column, how="left")
1113
+ # Reset index and merge back to zones
1114
+ aggregated = aggregated.reset_index()
1069
1115
 
1070
- # Fill NaN values with zeros
1071
- for col in result.columns:
1072
- if (
1073
- col != zone_id_column
1074
- and col != "geometry"
1075
- and pd.api.types.is_numeric_dtype(result[col])
1076
- ):
1077
- result[col] = result[col].fillna(0)
1116
+ # Return only the aggregated data (will be merged with full zones later)
1117
+ return aggregated
1078
1118
 
1079
- except Exception as e:
1080
- raise RuntimeError(f"Error during area-weighted aggregation: {e}")
1119
+ except Exception as e:
1120
+ raise RuntimeError(f"Error during area-weighted aggregation: {e}")
1081
1121
 
1082
- else:
1083
- # Non-weighted aggregation - simpler approach
1084
- # Perform spatial join
1085
- joined = gpd.sjoin(polygons_gdf, zones, how="inner", predicate=predicate)
1086
1122
 
1087
- # Remove geometry column for aggregation
1088
- if "geometry" in joined.columns:
1089
- joined = joined.drop(columns=["geometry"])
1123
+ def _simple_aggregation(
1124
+ polygons_gdf, zones, value_columns, agg_funcs, zone_id_column, predicate
1125
+ ):
1126
+ """Perform simple (non-weighted) aggregation"""
1127
+ # Perform spatial join
1128
+ joined = gpd.sjoin(polygons_gdf, zones, how="inner", predicate=predicate)
1090
1129
 
1091
- # Group by zone ID and aggregate
1092
- aggregated = joined.groupby(zone_id_column)[value_columns].agg(agg_funcs)
1130
+ if joined.empty:
1131
+ LOGGER.warning(f"No {predicate} relationships found during spatial join")
1132
+ return zones
1093
1133
 
1094
- # Handle column naming for multi-level index
1095
- if isinstance(aggregated.columns, pd.MultiIndex):
1096
- aggregated.columns = [
1097
- f"{col[0]}_{col[1]}{output_suffix}" for col in aggregated.columns
1098
- ]
1134
+ # Remove geometry column for aggregation (keep only necessary columns)
1135
+ agg_cols = value_columns + [zone_id_column]
1136
+ joined_subset = joined[agg_cols].copy()
1099
1137
 
1100
- # Reset index and merge back to zones
1101
- aggregated = aggregated.reset_index()
1102
- result = result.merge(aggregated, on=zone_id_column, how="left")
1138
+ # Group by zone ID and aggregate
1139
+ aggregated = joined_subset.groupby(zone_id_column)[value_columns].agg(agg_funcs)
1103
1140
 
1104
- # Fill NaN values with zeros
1105
- for col in result.columns:
1106
- if (
1107
- col != zone_id_column
1108
- and col != "geometry"
1109
- and pd.api.types.is_numeric_dtype(result[col])
1110
- ):
1111
- result[col] = result[col].fillna(0)
1141
+ # Handle column naming for multi-level index
1142
+ aggregated = _handle_multiindex_columns(aggregated)
1112
1143
 
1113
- if drop_geometry:
1114
- result = result.drop(columns=["geometry"])
1144
+ # Reset index and merge back to zones
1145
+ aggregated = aggregated.reset_index()
1115
1146
 
1116
- return result
1147
+ # Return only the aggregated data (will be merged with full zones later)
1148
+ return aggregated
1149
+
1150
+
1151
+ def _handle_multiindex_columns(aggregated):
1152
+ """Handle multi-level column index from groupby aggregation"""
1153
+ if isinstance(aggregated.columns, pd.MultiIndex):
1154
+ # Flatten multi-level columns: combine column name with aggregation method
1155
+ aggregated.columns = [f"{col[0]}_{col[1]}" for col in aggregated.columns]
1156
+ return aggregated