giga-spatial 0.6.5__py3-none-any.whl → 0.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/METADATA +2 -1
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/RECORD +17 -17
- gigaspatial/__init__.py +1 -1
- gigaspatial/core/io/data_api.py +3 -1
- gigaspatial/core/io/database.py +4 -1
- gigaspatial/generators/poi.py +75 -12
- gigaspatial/generators/zonal/base.py +34 -35
- gigaspatial/generators/zonal/geometry.py +87 -32
- gigaspatial/handlers/__init__.py +8 -1
- gigaspatial/handlers/base.py +26 -6
- gigaspatial/handlers/boundaries.py +50 -0
- gigaspatial/handlers/ghsl.py +15 -3
- gigaspatial/handlers/worldpop.py +771 -186
- gigaspatial/processing/geo.py +127 -87
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/WHEEL +0 -0
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/licenses/LICENSE +0 -0
- {giga_spatial-0.6.5.dist-info → giga_spatial-0.6.6.dist-info}/top_level.txt +0 -0
gigaspatial/processing/geo.py
CHANGED
@@ -948,6 +948,9 @@ def aggregate_polygons_to_zones(
|
|
948
948
|
if not isinstance(zones, gpd.GeoDataFrame):
|
949
949
|
raise TypeError("zones must be a GeoDataFrame")
|
950
950
|
|
951
|
+
if zones.empty:
|
952
|
+
raise ValueError("zones GeoDataFrame is empty")
|
953
|
+
|
951
954
|
if zone_id_column not in zones.columns:
|
952
955
|
raise ValueError(f"Zone ID column '{zone_id_column}' not found in zones")
|
953
956
|
|
@@ -960,11 +963,17 @@ def aggregate_polygons_to_zones(
|
|
960
963
|
if not isinstance(polygons, gpd.GeoDataFrame):
|
961
964
|
try:
|
962
965
|
polygons_gdf = convert_to_geodataframe(polygons)
|
963
|
-
except:
|
964
|
-
raise TypeError(
|
966
|
+
except Exception as e:
|
967
|
+
raise TypeError(
|
968
|
+
f"polygons must be a GeoDataFrame or convertible to one: {e}"
|
969
|
+
)
|
965
970
|
else:
|
966
971
|
polygons_gdf = polygons.copy()
|
967
972
|
|
973
|
+
if polygons_gdf.empty:
|
974
|
+
LOGGER.warning("Empty polygons GeoDataFrame provided")
|
975
|
+
return zones
|
976
|
+
|
968
977
|
# Validate geometry types
|
969
978
|
non_polygon_geoms = [
|
970
979
|
geom_type
|
@@ -991,8 +1000,53 @@ def aggregate_polygons_to_zones(
|
|
991
1000
|
polygons_gdf = polygons_gdf.to_crs(zones.crs)
|
992
1001
|
|
993
1002
|
# Handle aggregation method
|
1003
|
+
agg_funcs = _process_aggregation_methods(aggregation, value_columns)
|
1004
|
+
|
1005
|
+
# Prepare minimal zones for spatial operations (only zone_id_column and geometry)
|
1006
|
+
minimal_zones = zones[[zone_id_column, "geometry"]].copy()
|
1007
|
+
|
1008
|
+
if predicate == "fractional":
|
1009
|
+
aggregated_data = _fractional_aggregation(
|
1010
|
+
polygons_gdf, minimal_zones, value_columns, agg_funcs, zone_id_column
|
1011
|
+
)
|
1012
|
+
else:
|
1013
|
+
aggregated_data = _simple_aggregation(
|
1014
|
+
polygons_gdf,
|
1015
|
+
minimal_zones,
|
1016
|
+
value_columns,
|
1017
|
+
agg_funcs,
|
1018
|
+
zone_id_column,
|
1019
|
+
predicate,
|
1020
|
+
)
|
1021
|
+
|
1022
|
+
# Merge aggregated results back to complete zones data
|
1023
|
+
result = zones.merge(
|
1024
|
+
aggregated_data[[col for col in aggregated_data.columns if col != "geometry"]],
|
1025
|
+
on=zone_id_column,
|
1026
|
+
how="left",
|
1027
|
+
)
|
1028
|
+
|
1029
|
+
# Fill NaN values with zeros for the newly aggregated columns only
|
1030
|
+
aggregated_cols = [col for col in result.columns if col not in zones.columns]
|
1031
|
+
for col in aggregated_cols:
|
1032
|
+
if pd.api.types.is_numeric_dtype(result[col]):
|
1033
|
+
result[col] = result[col].fillna(0)
|
1034
|
+
|
1035
|
+
# Apply output suffix consistently to result columns only
|
1036
|
+
if output_suffix:
|
1037
|
+
rename_dict = {col: f"{col}{output_suffix}" for col in aggregated_cols}
|
1038
|
+
result = result.rename(columns=rename_dict)
|
1039
|
+
|
1040
|
+
if drop_geometry:
|
1041
|
+
result = result.drop(columns=["geometry"])
|
1042
|
+
|
1043
|
+
return result
|
1044
|
+
|
1045
|
+
|
1046
|
+
def _process_aggregation_methods(aggregation, value_columns):
|
1047
|
+
"""Process and validate aggregation methods"""
|
994
1048
|
if isinstance(aggregation, str):
|
995
|
-
|
1049
|
+
return {col: aggregation for col in value_columns}
|
996
1050
|
elif isinstance(aggregation, dict):
|
997
1051
|
# Validate dictionary keys
|
998
1052
|
missing_aggs = [col for col in value_columns if col not in aggregation]
|
@@ -1005,112 +1059,98 @@ def aggregate_polygons_to_zones(
|
|
1005
1059
|
f"Aggregation methods specified for non-existent columns: {extra_aggs}"
|
1006
1060
|
)
|
1007
1061
|
|
1008
|
-
|
1062
|
+
return aggregation
|
1009
1063
|
else:
|
1010
1064
|
raise TypeError("aggregation must be a string or dictionary")
|
1011
1065
|
|
1012
|
-
# Create a copy of the zones
|
1013
|
-
result = zones.copy()
|
1014
1066
|
|
1015
|
-
|
1016
|
-
|
1067
|
+
def _fractional_aggregation(
|
1068
|
+
polygons_gdf, zones, value_columns, agg_funcs, zone_id_column
|
1069
|
+
):
|
1070
|
+
"""Perform area-weighted (fractional) aggregation"""
|
1071
|
+
try:
|
1072
|
+
# Compute UTM CRS for accurate area calculations
|
1017
1073
|
try:
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
LOGGER.warning(
|
1023
|
-
f"Warning: UTM CRS estimation failed, using Web Mercator. Error: {e}"
|
1024
|
-
)
|
1025
|
-
overlay_utm_crs = "EPSG:3857" # Fallback to Web Mercator
|
1074
|
+
overlay_utm_crs = polygons_gdf.estimate_utm_crs()
|
1075
|
+
except Exception as e:
|
1076
|
+
LOGGER.warning(f"UTM CRS estimation failed, using Web Mercator. Error: {e}")
|
1077
|
+
overlay_utm_crs = "EPSG:3857" # Fallback to Web Mercator
|
1026
1078
|
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1079
|
+
# Prepare polygons for overlay - only necessary columns
|
1080
|
+
polygons_utm = polygons_gdf.to_crs(overlay_utm_crs)
|
1081
|
+
polygons_utm["orig_area"] = polygons_utm.area
|
1030
1082
|
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1083
|
+
# Keep only necessary columns
|
1084
|
+
overlay_cols = value_columns + ["geometry", "orig_area"]
|
1085
|
+
overlay_gdf = polygons_utm[overlay_cols].copy()
|
1034
1086
|
|
1035
|
-
|
1036
|
-
|
1087
|
+
# Prepare zones for overlay
|
1088
|
+
zones_utm = zones.to_crs(overlay_utm_crs)
|
1037
1089
|
|
1038
|
-
|
1039
|
-
|
1040
|
-
overlay_gdf, zones_utm[[zone_id_column, "geometry"]], how="intersection"
|
1041
|
-
)
|
1090
|
+
# Perform the spatial overlay
|
1091
|
+
gdf_overlayed = gpd.overlay(overlay_gdf, zones_utm, how="intersection")
|
1042
1092
|
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
gdf_overlayed["intersection_area"] / gdf_overlayed["orig_area"]
|
1047
|
-
)
|
1093
|
+
if gdf_overlayed.empty:
|
1094
|
+
LOGGER.warning("No intersections found during fractional aggregation")
|
1095
|
+
return zones
|
1048
1096
|
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1097
|
+
# Calculate fractional areas
|
1098
|
+
gdf_overlayed["intersection_area"] = gdf_overlayed.area
|
1099
|
+
gdf_overlayed["area_fraction"] = (
|
1100
|
+
gdf_overlayed["intersection_area"] / gdf_overlayed["orig_area"]
|
1101
|
+
)
|
1052
1102
|
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
)
|
1103
|
+
# Apply area weighting to value columns
|
1104
|
+
for col in value_columns:
|
1105
|
+
gdf_overlayed[col] = gdf_overlayed[col] * gdf_overlayed["area_fraction"]
|
1057
1106
|
|
1058
|
-
|
1059
|
-
|
1060
|
-
aggregated.columns = [
|
1061
|
-
f"{col[0]}_{col[1]}{output_suffix}" for col in aggregated.columns
|
1062
|
-
]
|
1107
|
+
# Aggregate by zone ID
|
1108
|
+
aggregated = gdf_overlayed.groupby(zone_id_column)[value_columns].agg(agg_funcs)
|
1063
1109
|
|
1064
|
-
|
1065
|
-
|
1110
|
+
# Handle column naming for multi-level index
|
1111
|
+
aggregated = _handle_multiindex_columns(aggregated)
|
1066
1112
|
|
1067
|
-
|
1068
|
-
|
1113
|
+
# Reset index and merge back to zones
|
1114
|
+
aggregated = aggregated.reset_index()
|
1069
1115
|
|
1070
|
-
|
1071
|
-
|
1072
|
-
if (
|
1073
|
-
col != zone_id_column
|
1074
|
-
and col != "geometry"
|
1075
|
-
and pd.api.types.is_numeric_dtype(result[col])
|
1076
|
-
):
|
1077
|
-
result[col] = result[col].fillna(0)
|
1116
|
+
# Return only the aggregated data (will be merged with full zones later)
|
1117
|
+
return aggregated
|
1078
1118
|
|
1079
|
-
|
1080
|
-
|
1119
|
+
except Exception as e:
|
1120
|
+
raise RuntimeError(f"Error during area-weighted aggregation: {e}")
|
1081
1121
|
|
1082
|
-
else:
|
1083
|
-
# Non-weighted aggregation - simpler approach
|
1084
|
-
# Perform spatial join
|
1085
|
-
joined = gpd.sjoin(polygons_gdf, zones, how="inner", predicate=predicate)
|
1086
1122
|
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1123
|
+
def _simple_aggregation(
|
1124
|
+
polygons_gdf, zones, value_columns, agg_funcs, zone_id_column, predicate
|
1125
|
+
):
|
1126
|
+
"""Perform simple (non-weighted) aggregation"""
|
1127
|
+
# Perform spatial join
|
1128
|
+
joined = gpd.sjoin(polygons_gdf, zones, how="inner", predicate=predicate)
|
1090
1129
|
|
1091
|
-
|
1092
|
-
|
1130
|
+
if joined.empty:
|
1131
|
+
LOGGER.warning(f"No {predicate} relationships found during spatial join")
|
1132
|
+
return zones
|
1093
1133
|
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
f"{col[0]}_{col[1]}{output_suffix}" for col in aggregated.columns
|
1098
|
-
]
|
1134
|
+
# Remove geometry column for aggregation (keep only necessary columns)
|
1135
|
+
agg_cols = value_columns + [zone_id_column]
|
1136
|
+
joined_subset = joined[agg_cols].copy()
|
1099
1137
|
|
1100
|
-
|
1101
|
-
|
1102
|
-
result = result.merge(aggregated, on=zone_id_column, how="left")
|
1138
|
+
# Group by zone ID and aggregate
|
1139
|
+
aggregated = joined_subset.groupby(zone_id_column)[value_columns].agg(agg_funcs)
|
1103
1140
|
|
1104
|
-
|
1105
|
-
|
1106
|
-
if (
|
1107
|
-
col != zone_id_column
|
1108
|
-
and col != "geometry"
|
1109
|
-
and pd.api.types.is_numeric_dtype(result[col])
|
1110
|
-
):
|
1111
|
-
result[col] = result[col].fillna(0)
|
1141
|
+
# Handle column naming for multi-level index
|
1142
|
+
aggregated = _handle_multiindex_columns(aggregated)
|
1112
1143
|
|
1113
|
-
|
1114
|
-
|
1144
|
+
# Reset index and merge back to zones
|
1145
|
+
aggregated = aggregated.reset_index()
|
1115
1146
|
|
1116
|
-
|
1147
|
+
# Return only the aggregated data (will be merged with full zones later)
|
1148
|
+
return aggregated
|
1149
|
+
|
1150
|
+
|
1151
|
+
def _handle_multiindex_columns(aggregated):
|
1152
|
+
"""Handle multi-level column index from groupby aggregation"""
|
1153
|
+
if isinstance(aggregated.columns, pd.MultiIndex):
|
1154
|
+
# Flatten multi-level columns: combine column name with aggregation method
|
1155
|
+
aggregated.columns = [f"{col[0]}_{col[1]}" for col in aggregated.columns]
|
1156
|
+
return aggregated
|
File without changes
|
File without changes
|
File without changes
|