PyPI - giga-spatial - Versions diffs - 0.6.5__py3-none-any.whl → 0.6.7__py3-none-any.whl - Mend

giga-spatial 0.6.5py3-none-any.whl → 0.6.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/METADATA +2 -1
{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/RECORD +17 -17
gigaspatial/__init__.py +1 -1
gigaspatial/core/io/data_api.py +3 -1
gigaspatial/core/io/database.py +4 -1
gigaspatial/generators/poi.py +75 -12
gigaspatial/generators/zonal/base.py +34 -35
gigaspatial/generators/zonal/geometry.py +87 -32
gigaspatial/handlers/__init__.py +8 -1
gigaspatial/handlers/base.py +26 -6
gigaspatial/handlers/boundaries.py +50 -0
gigaspatial/handlers/ghsl.py +15 -3
gigaspatial/handlers/worldpop.py +771 -186
gigaspatial/processing/geo.py +127 -87
{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/WHEEL +0 -0
{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/licenses/LICENSE +0 -0
{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/top_level.txt +0 -0

gigaspatial/processing/geo.py CHANGED Viewed

@@ -948,6 +948,9 @@ def aggregate_polygons_to_zones(
     if not isinstance(zones, gpd.GeoDataFrame):
         raise TypeError("zones must be a GeoDataFrame")
+    if zones.empty:
+        raise ValueError("zones GeoDataFrame is empty")
     if zone_id_column not in zones.columns:
         raise ValueError(f"Zone ID column '{zone_id_column}' not found in zones")
@@ -960,11 +963,17 @@ def aggregate_polygons_to_zones(
     if not isinstance(polygons, gpd.GeoDataFrame):
         try:
             polygons_gdf = convert_to_geodataframe(polygons)
-        except:
-            raise TypeError("polygons must be a GeoDataFrame or convertible to one")
+        except Exception as e:
+            raise TypeError(
+                f"polygons must be a GeoDataFrame or convertible to one: {e}"
+            )
     else:
         polygons_gdf = polygons.copy()
+    if polygons_gdf.empty:
+        LOGGER.warning("Empty polygons GeoDataFrame provided")
+        return zones
     # Validate geometry types
     non_polygon_geoms = [
         geom_type
@@ -991,8 +1000,53 @@ def aggregate_polygons_to_zones(
         polygons_gdf = polygons_gdf.to_crs(zones.crs)
     # Handle aggregation method
+    agg_funcs = _process_aggregation_methods(aggregation, value_columns)
+    # Prepare minimal zones for spatial operations (only zone_id_column and geometry)
+    minimal_zones = zones[[zone_id_column, "geometry"]].copy()
+    if predicate == "fractional":
+        aggregated_data = _fractional_aggregation(
+            polygons_gdf, minimal_zones, value_columns, agg_funcs, zone_id_column
+        )
+    else:
+        aggregated_data = _simple_aggregation(
+            polygons_gdf,
+            minimal_zones,
+            value_columns,
+            agg_funcs,
+            zone_id_column,
+            predicate,
+        )
+    # Merge aggregated results back to complete zones data
+    result = zones.merge(
+        aggregated_data[[col for col in aggregated_data.columns if col != "geometry"]],
+        on=zone_id_column,
+        how="left",
+    )
+    # Fill NaN values with zeros for the newly aggregated columns only
+    aggregated_cols = [col for col in result.columns if col not in zones.columns]
+    for col in aggregated_cols:
+        if pd.api.types.is_numeric_dtype(result[col]):
+            result[col] = result[col].fillna(0)
+    # Apply output suffix consistently to result columns only
+    if output_suffix:
+        rename_dict = {col: f"{col}{output_suffix}" for col in aggregated_cols}
+        result = result.rename(columns=rename_dict)
+    if drop_geometry:
+        result = result.drop(columns=["geometry"])
+    return result
+def _process_aggregation_methods(aggregation, value_columns):
+    """Process and validate aggregation methods"""
     if isinstance(aggregation, str):
-        agg_funcs = {col: aggregation for col in value_columns}
+        return {col: aggregation for col in value_columns}
     elif isinstance(aggregation, dict):
         # Validate dictionary keys
         missing_aggs = [col for col in value_columns if col not in aggregation]
@@ -1005,112 +1059,98 @@ def aggregate_polygons_to_zones(
                 f"Aggregation methods specified for non-existent columns: {extra_aggs}"
             )
-        agg_funcs = aggregation
+        return aggregation
     else:
         raise TypeError("aggregation must be a string or dictionary")
-    # Create a copy of the zones
-    result = zones.copy()
-    if predicate == "fractional":
-        # Use area-weighted aggregation with polygon overlay
+def _fractional_aggregation(
+    polygons_gdf, zones, value_columns, agg_funcs, zone_id_column
+):
+    """Perform area-weighted (fractional) aggregation"""
+    try:
+        # Compute UTM CRS for accurate area calculations
         try:
-            # Compute UTM CRS for accurate area calculations
-            try:
-                overlay_utm_crs = polygons_gdf.estimate_utm_crs()
-            except Exception as e:
-                LOGGER.warning(
-                    f"Warning: UTM CRS estimation failed, using Web Mercator. Error: {e}"
-                )
-                overlay_utm_crs = "EPSG:3857"  # Fallback to Web Mercator
+            overlay_utm_crs = polygons_gdf.estimate_utm_crs()
+        except Exception as e:
+            LOGGER.warning(f"UTM CRS estimation failed, using Web Mercator. Error: {e}")
+            overlay_utm_crs = "EPSG:3857"  # Fallback to Web Mercator
-            # Prepare polygons for overlay
-            polygons_utm = polygons_gdf.to_crs(overlay_utm_crs)
-            polygons_utm["orig_area"] = polygons_utm.area
+        # Prepare polygons for overlay - only necessary columns
+        polygons_utm = polygons_gdf.to_crs(overlay_utm_crs)
+        polygons_utm["orig_area"] = polygons_utm.area
-            # Keep only necessary columns
-            overlay_cols = value_columns + ["geometry", "orig_area"]
-            overlay_gdf = polygons_utm[overlay_cols].copy()
+        # Keep only necessary columns
+        overlay_cols = value_columns + ["geometry", "orig_area"]
+        overlay_gdf = polygons_utm[overlay_cols].copy()
-            # Prepare zones for overlay
-            zones_utm = zones.to_crs(overlay_utm_crs)
+        # Prepare zones for overlay
+        zones_utm = zones.to_crs(overlay_utm_crs)
-            # Perform the spatial overlay
-            gdf_overlayed = gpd.overlay(
-                overlay_gdf, zones_utm[[zone_id_column, "geometry"]], how="intersection"
-            )
+        # Perform the spatial overlay
+        gdf_overlayed = gpd.overlay(overlay_gdf, zones_utm, how="intersection")
-            # Calculate fractional areas
-            gdf_overlayed["intersection_area"] = gdf_overlayed.area
-            gdf_overlayed["area_fraction"] = (
-                gdf_overlayed["intersection_area"] / gdf_overlayed["orig_area"]
-            )
+        if gdf_overlayed.empty:
+            LOGGER.warning("No intersections found during fractional aggregation")
+            return zones
-            # Apply area weighting to value columns
-            for col in value_columns:
-                gdf_overlayed[col] = gdf_overlayed[col] * gdf_overlayed["area_fraction"]
+        # Calculate fractional areas
+        gdf_overlayed["intersection_area"] = gdf_overlayed.area
+        gdf_overlayed["area_fraction"] = (
+            gdf_overlayed["intersection_area"] / gdf_overlayed["orig_area"]
+        )
-            # Aggregate by zone ID
-            aggregated = gdf_overlayed.groupby(zone_id_column)[value_columns].agg(
-                agg_funcs
-            )
+        # Apply area weighting to value columns
+        for col in value_columns:
+            gdf_overlayed[col] = gdf_overlayed[col] * gdf_overlayed["area_fraction"]
-            # Handle column naming for multi-level index
-            if isinstance(aggregated.columns, pd.MultiIndex):
-                aggregated.columns = [
-                    f"{col[0]}_{col[1]}{output_suffix}" for col in aggregated.columns
-                ]
+        # Aggregate by zone ID
+        aggregated = gdf_overlayed.groupby(zone_id_column)[value_columns].agg(agg_funcs)
-            # Reset index
-            aggregated = aggregated.reset_index()
+        # Handle column naming for multi-level index
+        aggregated = _handle_multiindex_columns(aggregated)
-            # Merge aggregated values back to the zones
-            result = result.merge(aggregated, on=zone_id_column, how="left")
+        # Reset index and merge back to zones
+        aggregated = aggregated.reset_index()
-            # Fill NaN values with zeros
-            for col in result.columns:
-                if (
-                    col != zone_id_column
-                    and col != "geometry"
-                    and pd.api.types.is_numeric_dtype(result[col])
-                ):
-                    result[col] = result[col].fillna(0)
+        # Return only the aggregated data (will be merged with full zones later)
+        return aggregated
-        except Exception as e:
-            raise RuntimeError(f"Error during area-weighted aggregation: {e}")
+    except Exception as e:
+        raise RuntimeError(f"Error during area-weighted aggregation: {e}")
-    else:
-        # Non-weighted aggregation - simpler approach
-        # Perform spatial join
-        joined = gpd.sjoin(polygons_gdf, zones, how="inner", predicate=predicate)
-        # Remove geometry column for aggregation
-        if "geometry" in joined.columns:
-            joined = joined.drop(columns=["geometry"])
+def _simple_aggregation(
+    polygons_gdf, zones, value_columns, agg_funcs, zone_id_column, predicate
+):
+    """Perform simple (non-weighted) aggregation"""
+    # Perform spatial join
+    joined = gpd.sjoin(polygons_gdf, zones, how="inner", predicate=predicate)
-        # Group by zone ID and aggregate
-        aggregated = joined.groupby(zone_id_column)[value_columns].agg(agg_funcs)
+    if joined.empty:
+        LOGGER.warning(f"No {predicate} relationships found during spatial join")
+        return zones
-        # Handle column naming for multi-level index
-        if isinstance(aggregated.columns, pd.MultiIndex):
-            aggregated.columns = [
-                f"{col[0]}_{col[1]}{output_suffix}" for col in aggregated.columns
-            ]
+    # Remove geometry column for aggregation (keep only necessary columns)
+    agg_cols = value_columns + [zone_id_column]
+    joined_subset = joined[agg_cols].copy()
-        # Reset index and merge back to zones
-        aggregated = aggregated.reset_index()
-        result = result.merge(aggregated, on=zone_id_column, how="left")
+    # Group by zone ID and aggregate
+    aggregated = joined_subset.groupby(zone_id_column)[value_columns].agg(agg_funcs)
-        # Fill NaN values with zeros
-        for col in result.columns:
-            if (
-                col != zone_id_column
-                and col != "geometry"
-                and pd.api.types.is_numeric_dtype(result[col])
-            ):
-                result[col] = result[col].fillna(0)
+    # Handle column naming for multi-level index
+    aggregated = _handle_multiindex_columns(aggregated)
-    if drop_geometry:
-        result = result.drop(columns=["geometry"])
+    # Reset index and merge back to zones
+    aggregated = aggregated.reset_index()
-    return result
+    # Return only the aggregated data (will be merged with full zones later)
+    return aggregated
+def _handle_multiindex_columns(aggregated):
+    """Handle multi-level column index from groupby aggregation"""
+    if isinstance(aggregated.columns, pd.MultiIndex):
+        # Flatten multi-level columns: combine column name with aggregation method
+        aggregated.columns = [f"{col[0]}_{col[1]}" for col in aggregated.columns]
+    return aggregated

{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{giga_spatial-0.6.5.dist-info → giga_spatial-0.6.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

giga-spatial 0.6.5__py3-none-any.whl → 0.6.7__py3-none-any.whl

giga-spatial 0.6.5py3-none-any.whl → 0.6.7py3-none-any.whl