PyPI - openforis-whisp - Versions diffs - 2.0.0b1__py3-none-any.whl → 2.0.0b3__py3-none-any.whl - Mend

openforis-whisp 2.0.0b1py3-none-any.whl → 2.0.0b3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

openforis_whisp/__init__.py +2 -1
openforis_whisp/data_conversion.py +11 -0
openforis_whisp/datasets.py +207 -247
openforis_whisp/parameters/lookup_gee_datasets.csv +2 -5
openforis_whisp/risk.py +29 -29
openforis_whisp/stats.py +297 -47
openforis_whisp/utils.py +298 -5
{openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b3.dist-info}/METADATA +1 -1
openforis_whisp-2.0.0b3.dist-info/RECORD +16 -0
openforis_whisp/parameters/__init__.py +0 -15
openforis_whisp-2.0.0b1.dist-info/RECORD +0 -17
{openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b3.dist-info}/LICENSE +0 -0
{openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b3.dist-info}/WHEEL +0 -0

openforis_whisp/parameters/lookup_gee_datasets.csv CHANGED Viewed

@@ -2,7 +2,7 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
 EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
 GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
 TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
-GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_gfc_tc_2020_prep
+GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
 Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
 ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
 TMF_plant,80,,commodities,NA,1,1,0,float32,1,0,g_jrc_tmf_plantation_prep
@@ -163,13 +163,11 @@ GFT_planted_plantation,1900,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_gft_
 IIASA_planted_plantation,1910,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_iiasa_planted_prep
 TMF_regrowth_2023,2000,,NA,treecover_after_2020,0,1,0,float32,1,0,g_tmf_regrowth_prep
 ESRI_2023_TC,2010,,NA,treecover_after_2020,0,1,0,float32,1,0,g_esri_2023_tc_prep
-GLC_FCS30D_TC_2022,2020,,NA,treecover_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_tc_2022_prep
 Oil_palm_2023_FDaP,2100,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_palm_2023_prep
 Rubber_2023_FDaP,2110,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_rubber_2023_prep
 Coffee_FDaP_2023,2111,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_coffee_2023_prep
 Cocoa_2023_FDaP,2120,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_cocoa_2023_prep
-ESRI_2023_crop,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2023_crop_prep
-GLC_FCS30D_crop_2022,2140,,NA,agri_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_crop_2022_prep
+ESRI_crop_gain_2020_2023,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2020_2023_crop_prep
 GFW_logging_before_2020,2200,,NA,logging_concession,0,1,0,float32,1,0,g_logging_concessions_prep
 nCO_ideam_forest_2020,2310,CO,treecover,NA,1,1,0,float32,1,0,nco_ideam_forest_2020_prep
 nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,1,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
@@ -199,4 +197,3 @@ nBR_INPE_TCamz_pasture_2020,2422,BR,commodities,NA,1,1,0,float32,1,0,nbr_terracl
 nBR_INPE_TCcer_pasture_2020,2423,BR,commodities,NA,1,1,0,float32,1,0,nbr_terraclass_cer20_ac_prep
 nBR_MapBiomas_col9_pasture_2020,2424,BR,commodities,NA,1,1,0,float32,1,0,nbr_mapbiomasc9_pasture_prep
 nCI_Cocoa_bnetd,3000,CI,commodities,NA,1,1,0,float32,1,0,nci_ocs2020_prep

openforis_whisp/risk.py CHANGED Viewed

@@ -161,9 +161,8 @@ def whisp_risk(
             lookup_df_copy, custom_bands_info, df.columns
         )
         print(f"Including custom bands: {list(custom_bands_info.keys())}")
-        # print(f"appended custom bands info to lookup table")
     if national_codes:
-        print(f"Filtering by national codes: {national_codes}")
+        print(f"Including additional national data for: {national_codes}")
     # Filter by national codes
     filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
         lookup_df=lookup_df_copy,
@@ -473,7 +472,6 @@ def add_indicators(
     return df
-# Update add_indicator_column to use the unit_type parameter
 def add_indicator_column(
     df: data_lookup_type,
     input_columns: list[str],
@@ -482,49 +480,51 @@ def add_indicator_column(
     low_name: str = "no",
     high_name: str = "yes",
     sum_comparison: bool = False,
-    unit_type: str = None,  # unit_type parameter
+    unit_type: str = None,
 ) -> data_lookup_type:
-    """
-    Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
+    """Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign."""
-    Parameters:
-        df (data_lookup_type): The pandas DataFrame to which the column will be added.
-        input_columns (list): List of column names to check for threshold.
-        threshold (float): The threshold value to compare against.
-        new_column_name (str): The name of the new column to be added.
-        The '>' sign is used for comparisons.
-        When 'sum comparison' == True, then the threshold is compared to the sum of all those listed in 'input_columns', as opposed to when Flalse, when each column in the list is compared to the threshold individually
-        low_name (str): The name for the value when below or equal to threshold (default is 'no').
-        high_name (str): The name for the value when above threshold (default is 'yes').
-        sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
-        unit_type (str): Whether values are in "ha" or "percent".
-    Returns:
-        data_lookup_type: The DataFrame with the new column added.
-    """
     # Create a new column and initialize with low_name
     new_column = pd.Series(low_name, index=df.index, name=new_column_name)
-    # Default behavior: use '>' for single column comparison
     if sum_comparison:
         # Sum all values in specified columns and compare to threshold
         sum_values = df[input_columns].sum(axis=1)
         new_column[sum_values > threshold] = high_name
     else:
-        # Check if any values in specified columns are above the threshold and update the new column accordingly
+        # Check if any values in specified columns are above the threshold
         for col in input_columns:
-            # So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
-            # Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
             if unit_type == "ha":
                 df[geometry_area_column] = pd.to_numeric(
                     df[geometry_area_column], errors="coerce"
                 )
-                val_to_check = clamp(
-                    ((df[col] / df[geometry_area_column]) * 100), 0, 100
-                )
+                # Handle points (Area = 0) separately
+                is_point = df[geometry_area_column] == 0
+                # For points: any value > 0 exceeds threshold
+                point_mask = is_point & (df[col] > 0)
+                new_column[point_mask] = high_name
+                # For polygons: convert to percentage and check threshold
+                polygon_mask = ~is_point
+                if polygon_mask.any():
+                    val_to_check = clamp(
+                        (
+                            (
+                                df.loc[polygon_mask, col]
+                                / df.loc[polygon_mask, geometry_area_column]
+                            )
+                            * 100
+                        ),
+                        0,
+                        100,
+                    )
+                    new_column[polygon_mask & (val_to_check > threshold)] = high_name
             else:
+                # For percentage values, use direct comparison
                 val_to_check = df[col]
-            new_column[val_to_check > threshold] = high_name
+                new_column[val_to_check > threshold] = high_name
     # Concatenate the new column to the DataFrame
     df = pd.concat([df, new_column], axis=1)

openforis_whisp/stats.py CHANGED Viewed

@@ -34,6 +34,53 @@ from .reformat import (
 # NB functions that included "formatted" in the name apply a schema for validation and reformatting of the output dataframe. The schema is created from lookup tables.
+# ============================================================================
+# PERFORMANCE OPTIMIZATION: Cache expensive Earth Engine datasets
+# ============================================================================
+# These images/collections are loaded once and reused across all features
+# to avoid repeated expensive operations. This saves 7-15 seconds per analysis.
+_WATER_FLAG_IMAGE = None
+_GEOBOUNDARIES_FC = None
+def get_water_flag_image():
+    """
+    Get cached water flag image.
+    OPTIMIZATION: Water flag image is created once and reused for all features.
+    This avoids recreating ocean/water datasets for every feature (previously
+    called in get_type_and_location for each feature).
+    Returns
+    -------
+    ee.Image
+        Cached water flag image
+    """
+    global _WATER_FLAG_IMAGE
+    if _WATER_FLAG_IMAGE is None:
+        _WATER_FLAG_IMAGE = water_flag_all_prep()
+    return _WATER_FLAG_IMAGE
+def get_geoboundaries_fc():
+    """
+    Get cached geoboundaries feature collection.
+    OPTIMIZATION: Geoboundaries collection is loaded once and reused for all features.
+    This avoids loading the large FeatureCollection for every feature (previously
+    called in get_geoboundaries_info for each feature).
+    Returns
+    -------
+    ee.FeatureCollection
+        Cached geoboundaries feature collection
+    """
+    global _GEOBOUNDARIES_FC
+    if _GEOBOUNDARIES_FC is None:
+        _GEOBOUNDARIES_FC = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM1")
+    return _GEOBOUNDARIES_FC
 def whisp_formatted_stats_geojson_to_df(
     input_geojson_filepath: Path | str,
@@ -425,7 +472,9 @@ def whisp_stats_ee_to_ee(
     national_codes=None,
     unit_type="ha",
     keep_properties=None,
-    whisp_image=None,  # New parameter
+    whisp_image=None,
+    validate_external_id=True,
+    validate_bands=False,  # New parameter
 ):
     """
     Process a feature collection to get statistics for each feature.
@@ -442,19 +491,25 @@ def whisp_stats_ee_to_ee(
         whisp_image (ee.Image, optional): Pre-combined multiband Earth Engine Image containing
             all Whisp datasets. If provided, this image will be used instead of combining
             datasets based on national_codes.
+        validate_external_id (bool, optional): If True, validates that external_id_column exists
+            in all features (default: True). Set to False to skip validation and save 2-4 seconds.
+            Only disable if you're confident the column exists in all features.
     Returns:
         ee.FeatureCollection: The output feature collection with statistics.
     """
     if external_id_column is not None:
         try:
-            # Validate that the external_id_column exists in all features
-            validation_result = validate_external_id_column(
-                feature_collection, external_id_column
-            )
+            # OPTIMIZATION: Make validation optional to save 2-4 seconds
+            # Validation includes multiple .getInfo() calls which are slow
+            if validate_external_id:
+                # Validate that the external_id_column exists in all features
+                validation_result = validate_external_id_column(
+                    feature_collection, external_id_column
+                )
-            if not validation_result["is_valid"]:
-                raise ValueError(validation_result["error_message"])
+                if not validation_result["is_valid"]:
+                    raise ValueError(validation_result["error_message"])
             # First handle property selection, but preserve the external_id_column
             if keep_properties is not None:
@@ -506,19 +561,27 @@ def whisp_stats_ee_to_ee(
         national_codes=national_codes,
         unit_type=unit_type,
         whisp_image=whisp_image,  # Pass through
+        validate_bands=validate_bands,
     )
     return add_id_to_feature_collection(dataset=fc, id_name=plot_id_column)
 def _keep_fc_properties(feature_collection, keep_properties):
+    """
+    Filter feature collection properties based on keep_properties parameter.
+    OPTIMIZATION: When keep_properties is True, we no longer call .getInfo()
+    to get property names. Instead, we simply return the collection as-is,
+    since True means "keep all properties". This saves 1-2 seconds.
+    """
     # If keep_properties is specified, select only those properties
     if keep_properties is None:
         feature_collection = feature_collection.select([])
     elif keep_properties == True:
-        # If keep_properties is true, select all properties
-        first_feature_props = feature_collection.first().propertyNames().getInfo()
-        feature_collection = feature_collection.select(first_feature_props)
+        # If keep_properties is true, keep all properties
+        # No need to call .select() or .getInfo() - just return as-is
+        pass
     elif isinstance(keep_properties, list):
         feature_collection = feature_collection.select(keep_properties)
     else:
@@ -534,7 +597,8 @@ def whisp_stats_ee_to_df(
     remove_geom=False,
     national_codes=None,
     unit_type="ha",
-    whisp_image=None,  # New parameter
+    whisp_image=None,
+    validate_bands=False,  # New parameter
 ) -> pd.DataFrame:
     """
     Convert a Google Earth Engine FeatureCollection to a pandas DataFrame and convert ISO3 to ISO2 country codes.
@@ -561,27 +625,52 @@ def whisp_stats_ee_to_df(
     """
     # First, do the whisp processing to get the EE feature collection with stats
     try:
-        stats_feature_collection = whisp_stats_ee_to_ee(
-            feature_collection,
-            external_id_column,
-            national_codes=national_codes,
-            unit_type=unit_type,
-            whisp_image=whisp_image,  # Pass through
-        )
-    except Exception as e:
-        print(f"An error occurred during Whisp stats processing: {e}")
-        raise e
+        try:
+            stats_feature_collection = whisp_stats_ee_to_ee(
+                feature_collection,
+                external_id_column,
+                national_codes=national_codes,
+                unit_type=unit_type,
+                whisp_image=whisp_image,  # Pass through
+                validate_bands=False,  # try withoutb validation first
+            )
+        except Exception as e:
+            print(f"An error occurred during Whisp stats processing: {e}")
+            raise e
-    # Then, convert the EE feature collection to DataFrame
-    try:
-        df_stats = convert_ee_to_df(
-            ee_object=stats_feature_collection,
-            remove_geom=remove_geom,
-        )
-    except Exception as e:
-        print(f"An error occurred during the conversion from EE to DataFrame: {e}")
-        raise e
+        # Then, convert the EE feature collection to DataFrame
+        try:
+            df_stats = convert_ee_to_df(
+                ee_object=stats_feature_collection,
+                remove_geom=remove_geom,
+            )
+        except Exception as e:
+            print(f"An error occurred during the conversion from EE to DataFrame: {e}")
+            raise e
+    except:  # retry with validation of whisp input datasets
+        try:
+            stats_feature_collection = whisp_stats_ee_to_ee(
+                feature_collection,
+                external_id_column,
+                national_codes=national_codes,
+                unit_type=unit_type,
+                whisp_image=whisp_image,
+                validate_bands=True,  # If error, try with validation
+            )
+        except Exception as e:
+            print(f"An error occurred during Whisp stats processing: {e}")
+            raise e
+        # Then, convert the EE feature collection to DataFrame
+        try:
+            df_stats = convert_ee_to_df(
+                ee_object=stats_feature_collection,
+                remove_geom=remove_geom,
+            )
+        except Exception as e:
+            print(f"An error occurred during the conversion from EE to DataFrame: {e}")
+            raise e
     try:
         df_stats = convert_iso3_to_iso2(
             df=df_stats,
@@ -592,9 +681,52 @@ def whisp_stats_ee_to_df(
         print(f"An error occurred during the ISO3 to ISO2 conversion: {e}")
         return pd.DataFrame()  # Return an empty DataFrame in case of error
+    # NEW: Set area to 0 for point geometries
+    try:
+        df_stats = set_point_geometry_area_to_zero(df_stats)
+    except Exception as e:
+        print(f"An error occurred during point geometry area adjustment: {e}")
+        # Continue without the adjustment rather than failing completely
     return df_stats
+def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Set the geometry area column to 0 for features with Point geometry type.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame containing geometry type and area columns
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with area set to 0 for Point geometries
+    """
+    # Check if required columns exist
+    if geometry_type_column not in df.columns:
+        print(
+            f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
+        )
+        return df
+    # Create a copy to avoid modifying the original
+    df_modified = df.copy()
+    # Set area to 0 where geometry type is Point
+    point_mask = df_modified[geometry_type_column] == "Point"
+    df_modified.loc[point_mask, geometry_area_column] = 0.0
+    # Log the changes
+    num_points = point_mask.sum()
+    if num_points > 0:
+        print(f"Set area to 0 for {num_points} Point geometries")
+    return df_modified
 def whisp_stats_ee_to_drive(
     feature_collection: ee.FeatureCollection,
     external_id_column=None,
@@ -647,7 +779,11 @@ def whisp_stats_ee_to_drive(
 # Get stats for a feature or feature collection
 def get_stats(
-    feature_or_feature_col, national_codes=None, unit_type="ha", whisp_image=None
+    feature_or_feature_col,
+    national_codes=None,
+    unit_type="ha",
+    whisp_image=None,
+    validate_bands=False,
 ):
     """
     Get stats for a feature or feature collection with optional pre-combined image.
@@ -676,16 +812,25 @@ def get_stats(
         img_combined = whisp_image
         print("Using provided whisp_image")
     else:
-        img_combined = combine_datasets(national_codes=national_codes)
+        img_combined = combine_datasets(
+            national_codes=national_codes, validate_bands=validate_bands
+        )
         print(f"Combining datasets with national_codes: {national_codes}")
     # Check if the input is a Feature or a FeatureCollection
     if isinstance(feature_or_feature_col, ee.Feature):
         print("Processing single feature")
+        # OPTIMIZATION: Create cached images for single feature processing
+        water_all = get_water_flag_image()
+        gbounds_ADM0 = get_geoboundaries_fc()
         output = ee.FeatureCollection(
             [
                 get_stats_feature(
-                    feature_or_feature_col, img_combined, unit_type=unit_type
+                    feature_or_feature_col,
+                    img_combined,
+                    unit_type=unit_type,
+                    water_all=water_all,
+                    gbounds_ADM0=gbounds_ADM0,
                 )
             ]
         )
@@ -707,6 +852,10 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
     """
     Calculate statistics for a feature collection using Whisp datasets.
+    OPTIMIZATION: Creates water flag and geoboundaries images once and reuses
+    them for all features instead of recreating them for each feature.
+    This saves 7-15 seconds per analysis.
     Parameters
     ----------
     feature_col : ee.FeatureCollection
@@ -726,15 +875,19 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
     ee.FeatureCollection
         Feature collection with calculated statistics
     """
-    # # Use provided image or combine datasets
-    # if img_combined is None:
-    #     img_combined = combine_datasets(national_codes=national_codes)
+    # OPTIMIZATION: Create cached images once before processing features
+    # These will be reused for all features instead of being recreated each time
+    water_all = get_water_flag_image()
+    gbounds_ADM0 = get_geoboundaries_fc()
     out_feature_col = ee.FeatureCollection(
         feature_col.map(
             lambda feature: get_stats_feature(
-                feature, img_combined, unit_type=unit_type
+                feature,
+                img_combined,
+                unit_type=unit_type,
+                water_all=water_all,
+                gbounds_ADM0=gbounds_ADM0,
             )
         )
     )
@@ -747,10 +900,15 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
 # Note: This function doesn't need whisp_image parameter since it already accepts img_combined directly
-def get_stats_feature(feature, img_combined, unit_type="ha"):
+def get_stats_feature(
+    feature, img_combined, unit_type="ha", water_all=None, gbounds_ADM0=None
+):
     """
     Get statistics for a single feature using a pre-combined image.
+    OPTIMIZATION: Accepts cached water/geoboundaries images to avoid recreating
+    them for every feature.
     Parameters
     ----------
     feature : ee.Feature
@@ -759,6 +917,10 @@ def get_stats_feature(feature, img_combined, unit_type="ha"):
         Pre-combined image with all the datasets
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    water_all : ee.Image, optional
+        Cached water flag image
+    gbounds_ADM0 : ee.FeatureCollection, optional
+        Cached geoboundaries feature collection
     Returns
     -------
@@ -773,8 +935,8 @@ def get_stats_feature(feature, img_combined, unit_type="ha"):
         tileScale=8,
     )
-    # Get basic feature information
-    feature_info = get_type_and_location(feature)
+    # Get basic feature information with cached images
+    feature_info = get_type_and_location(feature, water_all, gbounds_ADM0)
     # add statistics unit type (e.g., percentage or hectares) to dictionary
     stats_unit_type = ee.Dictionary({stats_unit_type_column: unit_type})
@@ -823,22 +985,47 @@ def get_stats_feature(feature, img_combined, unit_type="ha"):
 # Get basic feature information - uses admin and water datasets in gee.
-def get_type_and_location(feature):
-    """Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags."""
+def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
+    """
+    Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags.
+    OPTIMIZATION: Accepts cached water flag image and geoboundaries collection
+    to avoid recreating them for every feature (saves 7-15 seconds per analysis).
+    Parameters
+    ----------
+    feature : ee.Feature
+        The feature to extract information from
+    water_all : ee.Image, optional
+        Cached water flag image. If None, creates it.
+    gbounds_ADM0 : ee.FeatureCollection, optional
+        Cached geoboundaries feature collection. If None, loads it.
+    Returns
+    -------
+    ee.Dictionary
+        Dictionary with feature information
+    """
     # Get centroid of the feature's geometry
     centroid = feature.geometry().centroid(1)
+    # OPTIMIZATION: Use cached geoboundaries
+    if gbounds_ADM0 is None:
+        gbounds_ADM0 = get_geoboundaries_fc()
     # Fetch location info from geoboundaries (country, admin)
-    location = ee.Dictionary(get_geoboundaries_info(centroid))
+    location = ee.Dictionary(get_geoboundaries_info(centroid, gbounds_ADM0))
     country = ee.Dictionary({iso3_country_column: location.get("shapeGroup")})
     admin_1 = ee.Dictionary(
         {admin_1_column: location.get("shapeName")}
     )  # Administrative level 1 (if available)
+    # OPTIMIZATION: Use cached water flag image
+    if water_all is None:
+        water_all = get_water_flag_image()
     # Prepare the water flag information
-    water_all = water_flag_all_prep()
     water_flag_dict = value_at_point_flag(
         point=centroid, image=water_all, band_name=water_flag, output_name=water_flag
     )
@@ -890,8 +1077,28 @@ def percent_and_format(val, area_ha):
 # geoboundaries - admin units from a freqently updated database, allows commercial use (CC BY 4.0 DEED) (disputed territories may need checking)
-def get_geoboundaries_info(geometry):
-    gbounds_ADM0 = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM1")
+def get_geoboundaries_info(geometry, gbounds_ADM0=None):
+    """
+    Get geoboundaries info for a geometry.
+    OPTIMIZATION: Accepts cached geoboundaries FeatureCollection to avoid
+    reloading it for every feature (saves 2-5 seconds per analysis).
+    Parameters
+    ----------
+    geometry : ee.Geometry
+        The geometry to query
+    gbounds_ADM0 : ee.FeatureCollection, optional
+        Cached geoboundaries feature collection. If None, loads it.
+    Returns
+    -------
+    ee.Dictionary
+        Dictionary with shapeGroup and shapeName
+    """
+    if gbounds_ADM0 is None:
+        gbounds_ADM0 = get_geoboundaries_fc()
     polygonsIntersectPoint = gbounds_ADM0.filterBounds(geometry)
     backup_dict = ee.Dictionary({"shapeGroup": "Unknown", "shapeName": "Unknown"})
     return ee.Algorithms.If(
@@ -1226,3 +1433,46 @@ def debug_feature_collection_properties(feature_collection, max_features=5):
     except Exception as e:
         return {"error": f"Error during debugging: {str(e)}"}
+# helper function to set area to 0 for point geometries
+def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Set the geometry area column to 0 for features with Point geometry type.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame containing geometry type and area columns
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with area set to 0 for Point geometries
+    """
+    # Check if required columns exist
+    if geometry_type_column not in df.columns:
+        print(
+            f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
+        )
+        return df
+    if geometry_area_column not in df.columns:
+        print(
+            f"Warning: {geometry_area_column} column not found. Skipping area adjustment for points."
+        )
+        return df
+    # Create a copy to avoid modifying the original
+    df_modified = df.copy()
+    # Set area to 0 where geometry type is Point
+    point_mask = df_modified[geometry_type_column] == "Point"
+    df_modified.loc[point_mask, geometry_area_column] = 0.0
+    # Log the changes
+    num_points = point_mask.sum()
+    # if num_points > 0:
+    #     print(f"Set area to 0 for {num_points} Point geometries")
+    return df_modified

openforis-whisp 2.0.0b1__py3-none-any.whl → 2.0.0b3__py3-none-any.whl

openforis-whisp 2.0.0b1py3-none-any.whl → 2.0.0b3py3-none-any.whl