PyPI - openforis-whisp - Versions diffs - 2.0.0a5__py3-none-any.whl → 2.0.0b1__py3-none-any.whl - Mend

openforis-whisp 2.0.0a5py3-none-any.whl → 2.0.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

openforis_whisp/__init__.py +2 -3
openforis_whisp/datasets.py +29 -36
openforis_whisp/parameters/lookup_gee_datasets.csv +2 -1
openforis_whisp/reformat.py +362 -161
openforis_whisp/risk.py +85 -8
openforis_whisp/stats.py +145 -51
openforis_whisp/utils.py +40 -0
{openforis_whisp-2.0.0a5.dist-info → openforis_whisp-2.0.0b1.dist-info}/METADATA +2 -2
openforis_whisp-2.0.0b1.dist-info/RECORD +17 -0
openforis_whisp-2.0.0a5.dist-info/RECORD +0 -17
{openforis_whisp-2.0.0a5.dist-info → openforis_whisp-2.0.0b1.dist-info}/LICENSE +0 -0
{openforis_whisp-2.0.0a5.dist-info → openforis_whisp-2.0.0b1.dist-info}/WHEEL +0 -0

openforis_whisp/risk.py CHANGED Viewed

@@ -112,6 +112,7 @@ def whisp_risk(
     high_name: str = "yes",
     explicit_unit_type: str = None,
     national_codes: list[str] = None,  # List of ISO2 country codes to filter by
+    custom_bands_info: dict = None,  # New parameter for custom band risk info
 ) -> data_lookup_type:
     """
     Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
@@ -134,25 +135,43 @@ def whisp_risk(
         high_name (str, optional): Value shown in table if more than the threshold. Defaults to "yes".
         explicit_unit_type (str, optional): Override the autodetected unit type ('ha' or 'percent').
                                       If not provided, will detect from dataframe 'unit' column.
+        custom_bands_info (dict, optional): Custom band risk information. Dict format:
+            {
+                'band_name': {
+                    'theme': 'treecover',  # or 'commodities', 'disturbance_before', 'disturbance_after'
+                    'theme_timber': 'primary',  # or 'naturally_reg_2020', 'planted_plantation_2020', etc.
+                    'use_for_risk': 1,  # 0 or 1
+                    'use_for_risk_timber': 1,  # 0 or 1
+                }
+            }
+            If None, custom bands won't be included in risk calculations.
     Returns:
-        data_lookup_type: DataFrame with added 'EUDR_risk' column.
+        data_lookup_type: DataFrame with added risk columns.
     """
-    # Determine the unit type to use based on input data and overrid
+    # Determine the unit type
     unit_type = detect_unit_type(df, explicit_unit_type)
     print(f"Using unit type: {unit_type}")
     lookup_df_copy = lookup_gee_datasets_df.copy()
-    # filter by national codes (even if None - this removes all country columns unless specified)
+    # Add custom bands to lookup if provided
+    if custom_bands_info:
+        lookup_df_copy = add_custom_bands_info_to_lookup(
+            lookup_df_copy, custom_bands_info, df.columns
+        )
+        print(f"Including custom bands: {list(custom_bands_info.keys())}")
+        # print(f"appended custom bands info to lookup table")
+    if national_codes:
+        print(f"Filtering by national codes: {national_codes}")
+    # Filter by national codes
     filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
         lookup_df=lookup_df_copy,
         filter_col="ISO2_code",
         national_codes=national_codes,
     )
-    # Rest of the function remains the same, but pass unit_type to add_indicators
+    # Get indicator columns (now includes custom bands)
     if ind_1_input_columns is None:
         ind_1_input_columns = get_cols_ind_01_treecover(filtered_lookup_gee_datasets_df)
     if ind_2_input_columns is None:
@@ -393,7 +412,7 @@ def add_eudr_risk_timber_col(
     """
     for index, row in df.iterrows():
-        # If there is a commodity in 2020 (ind_2_name)
+        # If there is a commodity in 2020 (ind_2_name)
         # OR if there is planted-plantation in 2020 (ind_7_name) AND no agriculture in 2023 (ind_10_name), set EUDR_risk_timber to "low"
         if row[ind_2_name] == "yes" or (
             row[ind_7_name] == "yes" and row[ind_10_name] == "no"
@@ -411,7 +430,7 @@ def add_eudr_risk_timber_col(
             ind_8_name
         ] == "yes":
             df.at[index, "risk_timber"] = "high"
-        # No data yet on OWL conversion
+        # No data yet on OWL conversion
         # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
         # elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
         #    df.at[index, 'EUDR_risk_timber'] = "high"
@@ -699,7 +718,7 @@ def get_cols_ind_09_treecover_after_2020(lookup_gee_datasets_df):
     return list(
         lookup_gee_datasets_df["name"][
             (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
-            & (lookup_gee_datasets_df["theme_timber"] == "treecover_post2020")
+            & (lookup_gee_datasets_df["theme_timber"] == "treecover_after_2020")
         ]
     )
@@ -769,3 +788,61 @@ def clamp(
 def check_range(value: float) -> None:
     if not (0 <= value <= 100):
         raise ValueError("Value must be between 0 and 100.")
+def add_custom_bands_info_to_lookup(
+    lookup_df: pd.DataFrame, custom_bands_info: dict, df_columns: list
+) -> pd.DataFrame:
+    """
+    Add custom bands to the lookup DataFrame for risk calculations.
+    Parameters
+    ----------
+    lookup_df : pd.DataFrame
+        Original lookup DataFrame
+    custom_bands_info : dict
+        Custom band definitions with risk info
+    df_columns : list
+        List of columns in the actual data DataFrame
+    Returns
+    -------
+    pd.DataFrame
+        Lookup DataFrame with custom bands added
+    """
+    custom_rows = []
+    for band_name, band_info in custom_bands_info.items():
+        # Only add bands that actually exist in the DataFrame
+        if band_name in df_columns:
+            custom_row = {
+                "name": band_name,  # Use the band name as provided
+                "theme": band_info.get(
+                    "theme", pd.NA
+                ),  # default to empty if not provided
+                "theme_timber": band_info.get(
+                    "theme_timber", pd.NA
+                ),  # default to empty if not provided
+                "use_for_risk": band_info.get(
+                    "use_for_risk", 0
+                ),  # default to 0 if not provided
+                "use_for_risk_timber": band_info.get(
+                    "use_for_risk_timber", 0
+                ),  # default to 0 if not provided
+                "exclude_from_output": 0,  # 0 here is so we don't exclude custom bands
+                "ISO2_code": pd.NA,  # Global, i.e., empty string, by default
+                # Add other required columns with defaults
+                "col_type": "float64",  # default to float64 if not provided
+                "is_nullable": 1,
+                "is_required": 0,
+                "order": 9999,  # Put at end unless specified otherwise
+                "corresponding_variable": pd.NA,  # not necessary for custom bands
+            }
+            custom_rows.append(custom_row)
+    if custom_rows:
+        custom_df = pd.DataFrame(custom_rows)
+        # Combine with original lookup
+        lookup_df = pd.concat([lookup_df, custom_df], ignore_index=True)
+    return lookup_df

openforis_whisp/stats.py CHANGED Viewed

@@ -27,7 +27,10 @@ from .data_conversion import (
     # convert_csv_to_geojson,
     convert_df_to_geojson,
 )  # copied functions from whisp-api and geemap (accessed 2024) to avoid dependency
-from .reformat import validate_dataframe_using_lookups
+from .reformat import (
+    validate_dataframe_using_lookups,
+    validate_dataframe_using_lookups_flexible,
+)
 # NB functions that included "formatted" in the name apply a schema for validation and reformatting of the output dataframe. The schema is created from lookup tables.
@@ -38,6 +41,8 @@ def whisp_formatted_stats_geojson_to_df(
     remove_geom=False,
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,
+    custom_bands=None,  # New parameter
 ) -> pd.DataFrame:
     """
         Main function for most users.
@@ -65,6 +70,15 @@ def whisp_formatted_stats_geojson_to_df(
             List of ISO2 country codes to include national datasets.
         unit_type: str, optional
             Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+        whisp_image : ee.Image, optional
+            Pre-combined multiband Earth Engine Image containing all Whisp datasets.
+            If provided, this image will be used instead of combining datasets based on national_codes.
+            If None, datasets will be combined automatically using national_codes parameter.
+        custom_bands : list or dict, optional
+            Custom band information for extra columns. Can be:
+            - List of band names: ['Aa_test', 'elevation']
+            - Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
+            - None: preserves all extra columns automatically
     Returns
         -------
@@ -78,7 +92,9 @@ def whisp_formatted_stats_geojson_to_df(
         external_id_column,
         remove_geom,
         national_codes=national_codes,
-        unit_type=unit_type,  # Fixed: now it's a keyword argument
+        unit_type=unit_type,
+        whisp_image=whisp_image,
+        custom_bands=custom_bands,  # Pass through
     )
@@ -89,6 +105,7 @@ def whisp_formatted_stats_geojson_to_geojson(
     geo_column: str = "geo",
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,  # New parameter
 ):
     """
     Convert a formatted GeoJSON file with a geo column into a GeoJSON file containing Whisp stats.
@@ -107,6 +124,8 @@ def whisp_formatted_stats_geojson_to_geojson(
         List of ISO2 country codes to include national datasets.
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
     Returns
     -------
@@ -117,6 +136,7 @@ def whisp_formatted_stats_geojson_to_geojson(
         external_id_column=external_id_column,
         national_codes=national_codes,
         unit_type=unit_type,
+        whisp_image=whisp_image,  # Pass through
     )
     # Convert the df to GeoJSON
     convert_df_to_geojson(df, output_geojson_filepath, geo_column)
@@ -131,6 +151,7 @@ def whisp_formatted_stats_ee_to_geojson(
     geo_column: str = "geo",
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,  # New parameter
 ):
     """
     Convert an Earth Engine FeatureCollection to a GeoJSON file containing Whisp stats.
@@ -149,6 +170,8 @@ def whisp_formatted_stats_ee_to_geojson(
         List of ISO2 country codes to include national datasets.
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
     Returns
     -------
     None
@@ -159,6 +182,7 @@ def whisp_formatted_stats_ee_to_geojson(
         external_id_column,
         national_codes=national_codes,
         unit_type=unit_type,
+        whisp_image=whisp_image,  # Pass through
     )
     # Convert the df to GeoJSON
@@ -173,6 +197,8 @@ def whisp_formatted_stats_ee_to_df(
     remove_geom=False,
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,
+    custom_bands=None,  # New parameter
 ) -> pd.DataFrame:
     """
     Convert a feature collection to a validated DataFrame with Whisp statistics.
@@ -189,6 +215,10 @@ def whisp_formatted_stats_ee_to_df(
         List of ISO2 country codes to include national datasets.
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
+    custom_bands : list or dict, optional
+        Custom band information for extra columns.
     Returns
     -------
@@ -202,11 +232,12 @@ def whisp_formatted_stats_ee_to_df(
         remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
+        whisp_image=whisp_image,
     )
-    # Pass national_codes to validation function to filter schema
-    validated_df = validate_dataframe_using_lookups(
-        df_stats, national_codes=national_codes
+    # Use flexible validation that handles custom bands
+    validated_df = validate_dataframe_using_lookups_flexible(
+        df_stats, national_codes=national_codes, custom_bands=custom_bands
     )
     return validated_df
@@ -220,6 +251,7 @@ def whisp_stats_geojson_to_df(
     remove_geom=False,
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,  # New parameter
 ) -> pd.DataFrame:
     """
     Convert a GeoJSON file to a pandas DataFrame with Whisp statistics.
@@ -236,6 +268,8 @@ def whisp_stats_geojson_to_df(
         List of ISO2 country codes to include national datasets.
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
     Returns
     -------
@@ -250,6 +284,7 @@ def whisp_stats_geojson_to_df(
         remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
+        whisp_image=whisp_image,  # Pass through
     )
@@ -257,6 +292,7 @@ def whisp_stats_geojson_to_ee(
     input_geojson_filepath: Path | str,
     external_id_column=None,
     national_codes=None,
+    whisp_image=None,  # New parameter
 ) -> ee.FeatureCollection:
     """
     Convert a GeoJSON file to an Earth Engine FeatureCollection with Whisp statistics.
@@ -269,6 +305,8 @@ def whisp_stats_geojson_to_ee(
         The name of the external ID column, by default None.
     national_codes : list, optional
         List of ISO2 country codes to include national datasets.
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
     Returns
     -------
@@ -278,7 +316,10 @@ def whisp_stats_geojson_to_ee(
     feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
     return whisp_stats_ee_to_ee(
-        feature_collection, external_id_column, national_codes=national_codes
+        feature_collection,
+        external_id_column,
+        national_codes=national_codes,
+        whisp_image=whisp_image,  # Pass through
     )
@@ -288,6 +329,7 @@ def whisp_stats_geojson_to_geojson(
     external_id_column=None,
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,  # New parameter
 ):
     """
     Convert a GeoJSON file to a GeoJSON object containing Whisp stats for the input ROI.
@@ -304,6 +346,8 @@ def whisp_stats_geojson_to_geojson(
         List of ISO2 country codes to include national datasets.
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
     Returns
     -------
@@ -318,6 +362,7 @@ def whisp_stats_geojson_to_geojson(
         external_id_column,
         national_codes=national_codes,
         unit_type=unit_type,
+        whisp_image=whisp_image,  # Pass through
     )
     # Convert the stats FeatureCollection to GeoJSON
@@ -333,6 +378,7 @@ def whisp_stats_geojson_to_drive(
     external_id_column=None,
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,  # New parameter
 ):
     """
     Export Whisp statistics for a GeoJSON file to Google Drive.
@@ -347,6 +393,8 @@ def whisp_stats_geojson_to_drive(
         List of ISO2 country codes to include national datasets.
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
     Returns
     -------
@@ -364,6 +412,7 @@ def whisp_stats_geojson_to_drive(
             external_id_column,
             national_codes=national_codes,
             unit_type=unit_type,
+            whisp_image=whisp_image,  # Pass through
         )
     except Exception as e:
@@ -376,6 +425,7 @@ def whisp_stats_ee_to_ee(
     national_codes=None,
     unit_type="ha",
     keep_properties=None,
+    whisp_image=None,  # New parameter
 ):
     """
     Process a feature collection to get statistics for each feature.
@@ -389,6 +439,9 @@ def whisp_stats_ee_to_ee(
             - None: Remove all properties (default behavior)
             - True: Keep all properties
             - list: Keep only the specified properties
+        whisp_image (ee.Image, optional): Pre-combined multiband Earth Engine Image containing
+            all Whisp datasets. If provided, this image will be used instead of combining
+            datasets based on national_codes.
     Returns:
         ee.FeatureCollection: The output feature collection with statistics.
@@ -449,7 +502,10 @@ def whisp_stats_ee_to_ee(
         feature_collection = _keep_fc_properties(feature_collection, keep_properties)
     fc = get_stats(
-        feature_collection, national_codes=national_codes, unit_type=unit_type
+        feature_collection,
+        national_codes=national_codes,
+        unit_type=unit_type,
+        whisp_image=whisp_image,  # Pass through
     )
     return add_id_to_feature_collection(dataset=fc, id_name=plot_id_column)
@@ -478,6 +534,7 @@ def whisp_stats_ee_to_df(
     remove_geom=False,
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,  # New parameter
 ) -> pd.DataFrame:
     """
     Convert a Google Earth Engine FeatureCollection to a pandas DataFrame and convert ISO3 to ISO2 country codes.
@@ -494,25 +551,36 @@ def whisp_stats_ee_to_df(
         List of ISO2 country codes to include national datasets.
     unit_type : str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
     Returns
     -------
     df_stats : pd.DataFrame
         The dataframe containing the Whisp stats for the input ROI.
     """
+    # First, do the whisp processing to get the EE feature collection with stats
+    try:
+        stats_feature_collection = whisp_stats_ee_to_ee(
+            feature_collection,
+            external_id_column,
+            national_codes=national_codes,
+            unit_type=unit_type,
+            whisp_image=whisp_image,  # Pass through
+        )
+    except Exception as e:
+        print(f"An error occurred during Whisp stats processing: {e}")
+        raise e
+    # Then, convert the EE feature collection to DataFrame
     try:
         df_stats = convert_ee_to_df(
-            ee_object=whisp_stats_ee_to_ee(
-                feature_collection,
-                external_id_column,
-                national_codes=national_codes,
-                unit_type=unit_type,
-            ),
+            ee_object=stats_feature_collection,
             remove_geom=remove_geom,
         )
     except Exception as e:
         print(f"An error occurred during the conversion from EE to DataFrame: {e}")
-        return pd.DataFrame()  # Return an empty DataFrame in case of error
+        raise e
     try:
         df_stats = convert_iso3_to_iso2(
@@ -532,6 +600,7 @@ def whisp_stats_ee_to_drive(
     external_id_column=None,
     national_codes=None,
     unit_type="ha",
+    whisp_image=None,  # New parameter
 ):
     """
      Export Whisp statistics for a feature collection to Google Drive.
@@ -546,6 +615,8 @@ def whisp_stats_ee_to_drive(
          List of ISO2 country codes to include national datasets.
     unit_type : str, optional
          Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+         Pre-combined multiband Earth Engine Image containing all Whisp datasets.
      Returns
      -------
      None
@@ -557,6 +628,7 @@ def whisp_stats_ee_to_drive(
                 external_id_column,
                 national_codes=national_codes,
                 unit_type=unit_type,
+                whisp_image=whisp_image,  # Pass through
             ),
             description="whisp_output_table",
             # folder="whisp_results",
@@ -574,29 +646,42 @@ def whisp_stats_ee_to_drive(
 # Get stats for a feature or feature collection
-def get_stats(feature_or_feature_col, national_codes=None, unit_type="ha"):
+def get_stats(
+    feature_or_feature_col, national_codes=None, unit_type="ha", whisp_image=None
+):
     """
-     Get stats for a feature or feature collection with optional filtering by national codes.
+    Get stats for a feature or feature collection with optional pre-combined image.
-     Parameters
-     ----------
-     feature_or_feature_col : ee.Feature or ee.FeatureCollection
-         The input feature or feature collection to analyze
-     national_codes : list, optional
-         List of ISO2 country codes to include national datasets
+    Parameters
+    ----------
+    feature_or_feature_col : ee.Feature or ee.FeatureCollection
+        The input feature or feature collection to analyze
+    national_codes : list, optional
+        List of ISO2 country codes to include national datasets.
+        Only used if whisp_image is None.
     unit_type : str, optional
-         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
-     Returns
-     -------
-     ee.FeatureCollection
-         Feature collection with calculated statistics
+        Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    whisp_image : ee.Image, optional
+        Pre-combined multiband Earth Engine Image containing all Whisp datasets.
+        If provided, this will be used instead of combining datasets based on national_codes.
+        If None, datasets will be combined automatically using national_codes parameter.
+    Returns
+    -------
+    ee.FeatureCollection
+        Feature collection with calculated statistics
     """
+    # Use provided image or combine datasets
+    if whisp_image is not None:
+        img_combined = whisp_image
+        print("Using provided whisp_image")
+    else:
+        img_combined = combine_datasets(national_codes=national_codes)
+        print(f"Combining datasets with national_codes: {national_codes}")
     # Check if the input is a Feature or a FeatureCollection
     if isinstance(feature_or_feature_col, ee.Feature):
-        # If the input is a Feature, call the server-side function for processing
-        print("feature")
-        # For a single feature, we need to combine datasets with the national_codes filter
-        img_combined = combine_datasets(national_codes=national_codes)
+        print("Processing single feature")
         output = ee.FeatureCollection(
             [
                 get_stats_feature(
@@ -605,9 +690,12 @@ def get_stats(feature_or_feature_col, national_codes=None, unit_type="ha"):
             ]
         )
     elif isinstance(feature_or_feature_col, ee.FeatureCollection):
-        # If the input is a FeatureCollection, call the server-side function for processing
+        print("Processing feature collection")
         output = get_stats_fc(
-            feature_or_feature_col, national_codes=national_codes, unit_type=unit_type
+            feature_or_feature_col,
+            national_codes=national_codes,
+            unit_type=unit_type,
+            img_combined=img_combined,  # Pass the image directly
         )
     else:
         output = "Check inputs: not an ee.Feature or ee.FeatureCollection"
@@ -615,28 +703,33 @@ def get_stats(feature_or_feature_col, national_codes=None, unit_type="ha"):
 # Get statistics for a feature collection
-def get_stats_fc(feature_col, national_codes=None, unit_type="ha"):
+def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=None):
     """
-     Calculate statistics for a feature collection using Whisp datasets.
+    Calculate statistics for a feature collection using Whisp datasets.
-     Parameters
-     ----------
-     feature_col : ee.FeatureCollection
-         The input feature collection to analyze
-     national_codes : list, optional
-         List of ISO2 country codes (e.g., ["BR", "US"]) to include national datasets.
-         If provided, only national datasets for these countries and global datasets will be used.
-         If None (default), only global datasets will be used.
+    Parameters
+    ----------
+    feature_col : ee.FeatureCollection
+        The input feature collection to analyze
+    national_codes : list, optional
+        List of ISO2 country codes (e.g., ["BR", "US"]) to include national datasets.
+        If provided, only national datasets for these countries and global datasets will be used.
+        If None (default), only global datasets will be used.
+        Only used if img_combined is None.
     unit_type : str, optional
-         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
-     Returns
-     -------
-     ee.FeatureCollection
-         Feature collection with calculated statistics
+        Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
+    img_combined : ee.Image, optional
+        Pre-combined multiband image containing all Whisp datasets.
+        If provided, this will be used instead of combining datasets based on national_codes.
+    Returns
+    -------
+    ee.FeatureCollection
+        Feature collection with calculated statistics
     """
-    img_combined = combine_datasets(
-        national_codes=national_codes
-    )  # Pass national_codes to combine_datasets
+    # # Use provided image or combine datasets
+    # if img_combined is None:
+    #     img_combined = combine_datasets(national_codes=national_codes)
     out_feature_col = ee.FeatureCollection(
         feature_col.map(
@@ -651,6 +744,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha"):
 # Get statistics for a single feature
+# Note: This function doesn't need whisp_image parameter since it already accepts img_combined directly
 def get_stats_feature(feature, img_combined, unit_type="ha"):

openforis_whisp/utils.py CHANGED Viewed

@@ -4,6 +4,9 @@ import math
 import os
 import pandas as pd
+import urllib.request
+import os
 import importlib.resources as pkg_resources
 from dotenv import load_dotenv
@@ -152,3 +155,40 @@ class DotEnvNotFoundError(FileNotFoundError):
             "Running tests requires setting an appropriate '.env' in the root directory or in your current working "
             "directory. You may copy and edit the '.env.template' file from the root directory or from the README.",
         )
+def get_example_geojson(filename="geojson_example.geojson", cache=True):
+    """
+    Download example geojson file for testing whisp functionality.
+    Parameters:
+    -----------
+    filename : str
+        Local filename to save the geojson
+    cache : bool
+        If True, cache file in user directory to avoid re-downloading
+    Returns:
+    --------
+    str
+        Path to the downloaded geojson file
+    """
+    url = "https://raw.githubusercontent.com/forestdatapartnership/whisp/main/tests/fixtures/geojson_example.geojson"
+    if cache:
+        cache_dir = os.path.join(os.path.expanduser("~"), ".whisp_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        filepath = os.path.join(cache_dir, filename)
+        if os.path.exists(filepath):
+            return filepath
+    else:
+        filepath = filename
+    try:
+        urllib.request.urlretrieve(url, filepath)
+        return filepath
+    except Exception as e:
+        raise RuntimeError(f"Failed to download example geojson: {e}")

{openforis_whisp-2.0.0a5.dist-info → openforis_whisp-2.0.0b1.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.3
 Name: openforis-whisp
-Version: 2.0.0a5
+Version: 2.0.0b1
 Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
 License: MIT
 Keywords: whisp,geospatial,data-processing
 Author: Andy Arnell
-Author-email: and.arnell@fao.org
+Author-email: andrew.arnell@fao.org
 Requires-Python: >=3.10
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers

openforis-whisp 2.0.0a5__py3-none-any.whl → 2.0.0b1__py3-none-any.whl

openforis-whisp 2.0.0a5py3-none-any.whl → 2.0.0b1py3-none-any.whl