PyPI - openforis-whisp - Versions diffs - 3.0.0a7__py3-none-any.whl → 3.0.0a8__py3-none-any.whl - Mend

openforis-whisp 3.0.0a7py3-none-any.whl → 3.0.0a8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

openforis_whisp/advanced_stats.py CHANGED Viewed

@@ -33,6 +33,7 @@ import subprocess
 from contextlib import redirect_stdout, contextmanager
 from pathlib import Path
 from typing import Optional, List, Dict, Any, Tuple, Union
+from importlib.metadata import version as get_version
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import tempfile
@@ -924,10 +925,67 @@ def clean_geodataframe(
 # ============================================================================
-# BATCH RETRY HELPER
+# AUDIT TRAIL HELPER
 # ============================================================================
+def _add_geometry_audit_trail(
+    df_validated: pd.DataFrame,
+    input_geojson_filepath: str,
+    gdf_original_geoms: gpd.GeoDataFrame = None,
+    logger: logging.Logger = None,
+) -> pd.DataFrame:
+    """
+    Add original input geometries as geo_original column for audit trail.
+    Parameters
+    ----------
+    df_validated : pd.DataFrame
+        Validated DataFrame to add audit trail to
+    input_geojson_filepath : str
+        Path to original GeoJSON file
+    gdf_original_geoms : gpd.GeoDataFrame, optional
+        Pre-loaded original geometries (to avoid reloading)
+    logger : logging.Logger, optional
+        Logger for output
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with geo_original column added
+    """
+    import json
+    from shapely.geometry import mapping
+    logger = logger or logging.getLogger("whisp")
+    try:
+        # Load original geometries if not provided
+        if gdf_original_geoms is None:
+            logger.warning("Original geometries not pre-loaded, loading now...")
+            gdf_original_geoms = _load_and_prepare_geojson(input_geojson_filepath)
+        # Create DataFrame with plotId and geo_original
+        df_original_geom = pd.DataFrame(
+            {
+                "plotId": df_validated["plotId"].values[: len(gdf_original_geoms)],
+                "geo_original": gdf_original_geoms["geometry"].apply(
+                    lambda g: json.dumps(mapping(g)) if g is not None else None
+                ),
+            }
+        )
+        # Merge original geometries back
+        df_result = df_validated.merge(df_original_geom, on="plotId", how="left")
+        logger.info("Audit trail added: geo_original column")
+        return df_result
+    except Exception as e:
+        logger.warning(f"Error adding audit trail: {e}")
+        # Return original DataFrame if audit trail fails
+        return df_validated
 # ============================================================================
 # BATCH RETRY HELPER - DEPRECATED (removed due to semaphore deadlock issues)
 # ============================================================================
@@ -1727,8 +1785,7 @@ def whisp_stats_geojson_to_df_concurrent(
             logger.warning(f"{plot_id_column} column missing, regenerating...")
             formatted.insert(0, plot_id_column, range(1, len(formatted) + 1))
-        # Sort by plot_id to ensure consistent output order
-        formatted = formatted.sort_values(by=plot_id_column).reset_index(drop=True)
+        # Note: Sorting is handled by format_stats_dataframe in the formatted wrapper functions
         logger.info(f"Processing complete: {len(formatted):,} features")
         return formatted
@@ -1981,10 +2038,11 @@ def whisp_stats_geojson_to_df_sequential(
         convert_water_flag=True,
     )
-    # Ensure plot_id exists and sort by it
+    # Ensure plot_id exists
     if plot_id_column not in formatted.columns:
         formatted.insert(0, plot_id_column, range(1, len(formatted) + 1))
-    formatted = formatted.sort_values(by=plot_id_column).reset_index(drop=True)
+    # Note: Sorting is handled by format_stats_dataframe in the formatted wrapper functions
     logger.info(f"Processing complete: {len(formatted):,} features")
@@ -2154,50 +2212,21 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
         custom_bands=custom_bands,
     )
-    # Step 2c: Add audit trail columns (AFTER validation to preserve columns)
+    # Step 2c: Add audit trail column (AFTER validation to preserve columns)
     if geometry_audit_trail:
-        logger.debug("Adding audit trail columns...")
-        try:
-            # Use pre-loaded original geometries (loaded at wrapper start to avoid reloading)
-            if gdf_original_geoms is None:
-                logger.warning("Original geometries not pre-loaded, loading now...")
-                gdf_original_geoms = _load_and_prepare_geojson(input_geojson_filepath)
-            # Use plotId from df_validated to maintain mapping
-            df_original_geom = pd.DataFrame(
-                {
-                    "plotId": df_validated["plotId"].values[: len(gdf_original_geoms)],
-                    "geo_original": gdf_original_geoms["geometry"].apply(
-                        lambda g: json.dumps(mapping(g)) if g is not None else None
-                    ),
-                }
-            )
-            # Merge original geometries back
-            df_validated = df_validated.merge(df_original_geom, on="plotId", how="left")
-            # Store processing metadata
-            df_validated.attrs["processing_metadata"] = {
-                "whisp_version": "3.0.0a1",
-                "processing_date": datetime.now().isoformat(),
-                "processing_mode": "concurrent",
-                "ee_endpoint": "high_volume",
-                "validate_geometries": validate_geometries,
-                "datasets_used": national_codes or [],
-                "geometry_audit_trail": True,
-            }
-            logger.info(f"Audit trail added: geo_original column")
-        except Exception as e:
-            logger.warning(f"Error adding audit trail: {e}")
-            # Continue without audit trail if something fails
+        logger.debug("Adding geo_original column for audit trail...")
+        df_validated = _add_geometry_audit_trail(
+            df_validated=df_validated,
+            input_geojson_filepath=input_geojson_filepath,
+            gdf_original_geoms=gdf_original_geoms,
+            logger=logger,
+        )
     # Add processing metadata column using pd.concat to avoid fragmentation warning
     metadata_dict = {
-        "whisp_version": "3.0.0a1",
+        "whisp_version": get_version("openforis-whisp"),
         "processing_timestamp_utc": datetime.now(timezone.utc).strftime(
-            "%Y-%m-%d %H:%M:%S UTC"
+            "%Y-%m-%d %H:%M:%S%z"
         ),
     }
     metadata_series = pd.Series(
@@ -2349,49 +2378,21 @@ def whisp_formatted_stats_geojson_to_df_sequential(
         custom_bands=custom_bands,
     )
-    # Step 2c: Add audit trail columns (AFTER validation to preserve columns)
+    # Step 2c: Add audit trail column (AFTER validation to preserve columns)
     if geometry_audit_trail:
-        logger.debug("Adding audit trail columns...")
-        try:
-            # Use pre-loaded original geometries (loaded at wrapper start to avoid reloading)
-            if gdf_original_geoms is None:
-                logger.warning("Original geometries not pre-loaded, loading now...")
-                gdf_original_geoms = _load_and_prepare_geojson(input_geojson_filepath)
-            # Use plotId from df_validated to maintain mapping
-            df_original_geom = pd.DataFrame(
-                {
-                    "plotId": df_validated["plotId"].values[: len(gdf_original_geoms)],
-                    "geo_original": gdf_original_geoms["geometry"].apply(
-                        lambda g: json.dumps(mapping(g)) if g is not None else None
-                    ),
-                }
-            )
-            # Merge original geometries back
-            df_validated = df_validated.merge(df_original_geom, on="plotId", how="left")
-            # Store processing metadata
-            df_validated.attrs["processing_metadata"] = {
-                "whisp_version": "3.0.0a1",
-                "processing_date": datetime.now().isoformat(),
-                "processing_mode": "sequential",
-                "ee_endpoint": "standard",
-                "datasets_used": national_codes or [],
-                "geometry_audit_trail": True,
-            }
-            logger.info(f"Audit trail added: geo_original column")
-        except Exception as e:
-            logger.warning(f"Error adding audit trail: {e}")
-            # Continue without audit trail if something fails
+        logger.debug("Adding geo_original column for audit trail...")
+        df_validated = _add_geometry_audit_trail(
+            df_validated=df_validated,
+            input_geojson_filepath=input_geojson_filepath,
+            gdf_original_geoms=gdf_original_geoms,
+            logger=logger,
+        )
     # Add processing metadata column using pd.concat to avoid fragmentation warning
     metadata_dict = {
-        "whisp_version": "3.0.0a1",
+        "whisp_version": get_version("openforis-whisp"),
         "processing_timestamp_utc": datetime.now(timezone.utc).strftime(
-            "%Y-%m-%d %H:%M:%S UTC"
+            "%Y-%m-%d %H:%M:%S%z"
         ),
     }
     metadata_series = pd.Series(

openforis_whisp/datasets.py CHANGED Viewed

@@ -374,14 +374,12 @@ def g_esri_2020_2023_crop_prep():
 # RADD_year_2019 to RADD_year_< current year >
 def g_radd_year_prep():
-    from datetime import datetime
     radd = ee.ImageCollection("projects/radar-wur/raddalert/v1")
     radd_date = (
         radd.filterMetadata("layer", "contains", "alert").select("Date").mosaic()
     )
     start_year = 19
-    current_year = datetime.now().year % 100
+    current_year = CURRENT_YEAR_2DIGIT
     def make_band(year, img_stack):
         start = year * 1000

openforis_whisp/reformat.py CHANGED Viewed

@@ -859,12 +859,14 @@ def format_stats_dataframe(
     )
     df.rename(columns={area_col: area_col_stripped}, inplace=True)
-    # 10) reorder by plotId column if present
-    df = (
-        df.sort_values(sort_column).reset_index(drop=True)
-        if sort_column in df.columns
-        else df
-    )
+    # 10) reorder by plotId column numerically if present (column is string but contains int values)
+    if sort_column in df.columns:
+        df["_sort_key"] = pd.to_numeric(df[sort_column], errors="coerce")
+        df = (
+            df.sort_values(by="_sort_key")
+            .drop(columns=["_sort_key"])
+            .reset_index(drop=True)
+        )
     # 11) Defragment final DataFrame and return
     return df.copy()

openforis_whisp/risk.py CHANGED Viewed

@@ -1,12 +1,14 @@
 import pandas as pd
 from .pd_schemas import data_lookup_type
+from .logger import StdoutLogger
 from openforis_whisp.parameters.config_runtime import (
     geometry_area_column,
     DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH,
-    stats_unit_type_column,  # Add this import
+    DEFAULT_CONTEXT_LOOKUP_TABLE_PATH,
+    stats_unit_type_column,
 )
 from openforis_whisp.reformat import filter_lookup_by_country_codes
@@ -16,6 +18,8 @@ lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
     DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH
 )
+logger = StdoutLogger(__name__)
 # requires lookup_gee_datasets_df
@@ -113,9 +117,10 @@ def whisp_risk(
     explicit_unit_type: str = None,
     national_codes: list[str] = None,  # List of ISO2 country codes to filter by
     custom_bands_info: dict = None,  # New parameter for custom band risk info
+    drop_unused_columns: bool = False,  # Remove columns not used in risk calculations
 ) -> data_lookup_type:
     """
-    Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
+    Adds the risk column to the DataFrame based on indicator values.
     Args:
         df (DataFrame): Input DataFrame.
@@ -145,6 +150,9 @@ def whisp_risk(
                 }
             }
             If None, custom bands won't be included in risk calculations.
+        drop_unused_columns (bool, optional): If True, removes dataset columns not used in risk calculations,
+            keeping only context/metadata columns, datasets used in indicators, indicator columns,
+            and final risk columns. Defaults to False (backward compatible).
     Returns:
         data_lookup_type: DataFrame with added risk columns.
@@ -278,7 +286,8 @@ def whisp_risk(
         unit_type,  # Pass the unit type
     )
-    df_w_indicators_and_risk_pcrop = add_eudr_risk_pcrop_col(
+    # these "add_" functions modify the 'df_w_indicators' dataframe in place
+    add_risk_pcrop_col(
         df=df_w_indicators,
         ind_1_name=ind_1_name,
         ind_2_name=ind_2_name,
@@ -286,14 +295,14 @@ def whisp_risk(
         ind_4_name=ind_4_name,
     )
-    df_w_indicators_and_risk_acrop = add_eudr_risk_acrop_col(
+    add_risk_acrop_col(
         df=df_w_indicators,
         ind_1_name=ind_1_name,
         ind_2_name=ind_2_name,
         ind_4_name=ind_4_name,
     )
-    df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
+    add_risk_timber_col(
         df=df_w_indicators,
         ind_2_name=ind_2_name,
         ind_5_name=ind_5_name,
@@ -305,10 +314,14 @@ def whisp_risk(
         ind_11_name=ind_11_name,
     )
-    return df_w_indicators_and_risk_timber
+    # Filter to risk-relevant columns if requested (after all columns added)
+    if drop_unused_columns:
+        df_w_indicators = filter_to_risk_columns(df_w_indicators, input_cols, names)
+    return df_w_indicators
-def add_eudr_risk_pcrop_col(
+def add_risk_pcrop_col(
     df: data_lookup_type,
     ind_1_name: str,
     ind_2_name: str,
@@ -316,7 +329,7 @@ def add_eudr_risk_pcrop_col(
     ind_4_name: str,
 ) -> data_lookup_type:
     """
-    Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
+    Adds the risk column to the DataFrame based on indicator values.
     Args:
         df (DataFrame): Input DataFrame.
@@ -326,35 +339,35 @@ def add_eudr_risk_pcrop_col(
         ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
     Returns:
-        DataFrame: DataFrame with added 'EUDR_risk' column.
+        DataFrame: DataFrame with added 'risk' column.
     """
     for index, row in df.iterrows():
-        # If any of the first three indicators suggest low risk, set EUDR_risk to "low"
+        # If any of the first three indicators suggest low risk, set risk to "low"
         if (
             row[ind_1_name] == "no"
             or row[ind_2_name] == "yes"
             or row[ind_3_name] == "yes"
         ):
             df.at[index, "risk_pcrop"] = "low"
-        # If none of the first three indicators suggest low risk and Indicator 4 suggests no risk, set EUDR_risk to "more_info_needed"
+        # If none of the first three indicators suggest low risk and Indicator 4 suggests no risk, set risk to "more_info_needed"
         elif row[ind_4_name] == "no":
             df.at[index, "risk_pcrop"] = "more_info_needed"
-        # If none of the above conditions are met, set EUDR_risk to "high"
+        # If none of the above conditions are met, set risk to "high"
         else:
             df.at[index, "risk_pcrop"] = "high"
     return df
-def add_eudr_risk_acrop_col(
+def add_risk_acrop_col(
     df: data_lookup_type,
     ind_1_name: str,
     ind_2_name: str,
     ind_4_name: str,
 ) -> data_lookup_type:
     """
-    Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
+    Adds the risk column to the DataFrame based on indicator values.
     Args:
         df (DataFrame): Input DataFrame.
@@ -363,25 +376,25 @@ def add_eudr_risk_acrop_col(
         ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
     Returns:
-        DataFrame: DataFrame with added 'EUDR_risk' column.
+        DataFrame: DataFrame with added 'risk' column.
     """
     # soy risk
     for index, row in df.iterrows():
-        # If there is no tree cover in 2020, set EUDR_risk_soy to "low"
+        # If there is no tree cover in 2020, set risk_soy to "low"
         if row[ind_1_name] == "no" or row[ind_2_name] == "yes":
             df.at[index, "risk_acrop"] = "low"
-        # If there is tree cover in 2020 and distrubances post 2020, set EUDR_risk_soy to "high"
+        # If there is tree cover in 2020 and distrubances post 2020, set risk_soy to "high"
         elif row[ind_1_name] == "yes" and row[ind_4_name] == "yes":
             df.at[index, "risk_acrop"] = "high"
-        # If tree cover and no disturbances post 2020, set EUDR_risk to "more_info_needed"
+        # If tree cover and no disturbances post 2020, set risk to "more_info_needed"
         else:
             df.at[index, "risk_acrop"] = "more_info_needed"
     return df
-def add_eudr_risk_timber_col(
+def add_risk_timber_col(
     df: data_lookup_type,
     ind_2_name: str,
     ind_5_name: str,
@@ -393,7 +406,7 @@ def add_eudr_risk_timber_col(
     ind_11_name: str,
 ) -> data_lookup_type:
     """
-    Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
+    Adds the risk column to the DataFrame based on indicator values.
     Args:
         df (DataFrame): Input DataFrame.
@@ -407,42 +420,42 @@ def add_eudr_risk_timber_col(
         ind_11_name (str, optional): Name of eleventh indicator column. Defaults to "Ind_11_logging_concession_before_2020".
     Returns:
-        DataFrame: DataFrame with added 'EUDR_risk' column.
+        DataFrame: DataFrame with added risk column.
     """
     for index, row in df.iterrows():
         # If there is a commodity in 2020 (ind_2_name)
-        # OR if there is planted-plantation in 2020 (ind_7_name) AND no agriculture in 2023 (ind_10_name), set EUDR_risk_timber to "low"
+        # OR if there is planted-plantation in 2020 (ind_7_name) AND no agriculture in 2023 (ind_10_name), set risk_timber to "low"
         if row[ind_2_name] == "yes" or (
             row[ind_7_name] == "yes" and row[ind_10_name] == "no"
         ):
             df.at[index, "risk_timber"] = "low"
-        # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) or planted forest (ind_7_name) in 2020 AND agricultural after 2020 (ind_10_name), set EUDR_timber to high
+        # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) or planted forest (ind_7_name) in 2020 AND agricultural after 2020 (ind_10_name), set risk_timber to high
         elif (
             row[ind_5_name] == "yes"
             or row[ind_6_name] == "yes"
             or row[ind_7_name] == "yes"
         ) and row[ind_10_name] == "yes":
             df.at[index, "risk_timber"] = "high"
-        # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) AND planted after 2020 (ind_8_name), set EUDR_risk to "high"
+        # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) AND planted after 2020 (ind_8_name), set risk to "high"
         elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
             ind_8_name
         ] == "yes":
             df.at[index, "risk_timber"] = "high"
         # No data yet on OWL conversion
-        # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
+        # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set risk to high
         # elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
-        #    df.at[index, 'EUDR_risk_timber'] = "high"
+        #    df.at[index, 'risk_timber'] = "high"
-        # If there is a natural primary forest (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) AND an information on management practice any time (ind_11_name) OR tree cover or regrowth post 2020 (ind_9_name), set EUDR_risk_timber to "low"
+        # If there is a natural primary forest (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) AND an information on management practice any time (ind_11_name) OR tree cover or regrowth post 2020 (ind_9_name), set risk_timber to "low"
         elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
             row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
         ):
             df.at[index, "risk_timber"] = "low"
-        # If primary (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) and no other info, set EUDR_risk to "more_info_needed"
+        # If primary (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) and no other info, set risk to "more_info_needed"
         elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
             df.at[index, "risk_timber"] = "more_info_needed"
-        # If none of the above conditions are met, set EUDR_risk to "low"
+        # If none of the above conditions are met, set risk to "low"
         else:
             df.at[index, "risk_timber"] = "low"
@@ -790,6 +803,77 @@ def check_range(value: float) -> None:
         raise ValueError("Value must be between 0 and 100.")
+def get_context_metadata_columns() -> list[str]:
+    """
+    Get list of context/metadata column names from lookup CSV.
+    Returns
+    -------
+    list[str]
+        List of column names marked as context_and_metadata
+    """
+    lookup_df = pd.read_csv(DEFAULT_CONTEXT_LOOKUP_TABLE_PATH)
+    return list(lookup_df["name"])
+def filter_to_risk_columns(
+    df: pd.DataFrame, input_cols: list[list[str]], names: list[str]
+) -> pd.DataFrame:
+    """
+    Filter DataFrame to only columns relevant for risk calculations.
+    Keeps:
+    - Context/metadata columns (plotId, Area, Country, etc.)
+    - Dataset columns used in risk indicators
+    - Indicator columns (Ind_01_treecover, etc.)
+    - Risk columns (risk_pcrop, risk_acrop, risk_timber, risk_livestock)
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame with all columns
+    input_cols : list[list[str]]
+        List of lists containing dataset column names used in each indicator
+    names : list[str]
+        Names of indicator columns
+    Returns
+    -------
+    pd.DataFrame
+        Filtered DataFrame with only risk-relevant columns
+    """
+    # Get context/metadata columns
+    context_cols = get_context_metadata_columns()
+    # Flatten input_cols to get dataset columns used in risk
+    dataset_cols = []
+    for col_list in input_cols:
+        dataset_cols.extend(col_list)
+    # Risk output columns (present in df if function called at end)
+    risk_cols = ["risk_pcrop", "risk_acrop", "risk_timber", "risk_livestock"]
+    # Post-processing metadata columns (added after validation, not in schema CSV)
+    metadata_cols = ["whisp_processing_metadata", "geo_original"]
+    # Build set of all columns to keep (for fast lookup)
+    cols_to_keep_set = set(
+        context_cols + dataset_cols + names + risk_cols + metadata_cols
+    )
+    # Preserve original DataFrame column order, filter to only columns we want to keep
+    cols_to_keep = [col for col in df.columns if col in cols_to_keep_set]
+    # Log dropped columns at debug level
+    dropped_cols = [col for col in df.columns if col not in cols_to_keep_set]
+    if dropped_cols:
+        logger.debug(
+            f"Dropped {len(dropped_cols)} columns: {', '.join(sorted(dropped_cols))}"
+        )
+    return df[cols_to_keep]
 def add_custom_bands_info_to_lookup(
     lookup_df: pd.DataFrame, custom_bands_info: dict, df_columns: list
 ) -> pd.DataFrame:

{openforis_whisp-3.0.0a7.dist-info → openforis_whisp-3.0.0a8.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,8 @@
-Metadata-Version: 2.4
+Metadata-Version: 2.3
 Name: openforis-whisp
-Version: 3.0.0a7
+Version: 3.0.0a8
 Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
 License: MIT
-License-File: LICENSE
 Keywords: whisp,geospatial,data-processing
 Author: Andy Arnell
 Author-email: andrew.arnell@fao.org
@@ -17,7 +16,6 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
-Classifier: Programming Language :: Python :: 3.14
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Dist: country_converter (>=0.7,<2.0.0)
 Requires-Dist: earthengine-api
@@ -31,6 +29,7 @@ Requires-Dist: pydantic-core (>=2.14.0,<3.0.0)
 Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
 Requires-Dist: rsa (>=4.2,<5.0.0)
 Requires-Dist: shapely (>=2.0.2,<3.0.0)
+Project-URL: Changelog, https://github.com/forestdatapartnership/whisp/releases
 Project-URL: Documentation, https://github.com/forestdatapartnership/whisp#readme
 Project-URL: Development Branch, https://github.com/forestdatapartnership/whisp/tree/main
 Project-URL: Issues, https://github.com/forestdatapartnership/whisp/issues
@@ -64,6 +63,7 @@ Description-Content-Type: text/markdown
   - [Add data layers](#whisp_add_data)
   - [Contribute to the code](#whisp_contribute)
   - [Code of conduct](#whisp_conduct)
+  - [Feedback](#whisp_feedback)
   <br>
@@ -71,11 +71,11 @@ Description-Content-Type: text/markdown
   ***Whisp*** can currently be used directly or implemented in your own code through three different pathways:
-  1. The Whisp App with its simple interface can be used [right here](https://whisp.openforis.org/) or called from other software by [API](https://whisp.openforis.org/documentation/api-guide). The Whisp App currently supports the processing of up to 3,000 geometries per job. The original JS & Python code behind the Whisp App and API can be found [here](https://github.com/forestdatapartnership/whisp-app).
+  1. The Whisp App with its simple interface can be accessed [here](https://whisp.openforis.org/) or called from other software by [API](https://whisp.openforis.org/documentation/api-guide). The Whisp App currently supports the processing of up to 3,000 geometries per job. The original JS & Python code behind the Whisp App and API can be found [here](https://github.com/forestdatapartnership/whisp-app).
   2. [Whisp in Earthmap](https://whisp.earthmap.org/?aoi=WHISP&boundary=plot1&layers=%7B%22CocoaETH%22%3A%7B%22opacity%22%3A1%7D%2C%22JRCForestMask%22%3A%7B%22opacity%22%3A1%7D%2C%22planet_rgb%22%3A%7B%22opacity%22%3A1%2C%22date%22%3A%222020-12%22%7D%7D&map=%7B%22center%22%3A%7B%22lat%22%3A7%2C%22lng%22%3A4%7D%2C%22zoom%22%3A3%2C%22mapType%22%3A%22satellite%22%7D&statisticsOpen=true) supports the visualization of geometries on actual maps with the possibility to toggle different relevant map products around tree cover, commodities and deforestation. It is practical for demonstration purposes and spot checks of single geometries but not recommended for larger datasets.
-  3. Datasets of any size, especially when holding more than 3,000 geometries, can be analyzed with Whisp through the [python package on pip](https://pypi.org/project/openforis-whisp/). See example [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb) for implementation with a geojson input. For the detailed procedure please go to the section [Whisp notebooks](#whisp_notebooks).
+  3. Datasets of any size, especially when holding more than 3,000 geometries, can be analyzed with Whisp through the [python package on pip](https://pypi.org/project/openforis-whisp/). See example [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb) for implementation with a geojson input. For further notebooks processing options see [Whisp notebooks](#whisp_notebooks).
   ## Whisp datasets <a name="whisp_datasets"></a>
@@ -98,7 +98,7 @@ Additional categories are specific for the timber commodity, considering a harve
   There are multiple datasets for each category. Find the full current [list of datasets used in Whisp here](https://github.com/forestdatapartnership/whisp/blob/main/layers_description.md).
-  ### Whisp risk assessment <a name="whisp_risk"></a>
+  ### Whisp risk assessment <a name="whisp_risk"></a>
 Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
@@ -155,24 +155,24 @@ The **relevant risk assessment column depends on the commodity** in question:
   *The Whisp algorithm for **Perennial Crops** visualized:*
   ![CoE_Graphic 5](https://github.com/user-attachments/assets/007b5f50-3939-4707-95fa-98be4d56745f)
   If no treecover dataset indicates any tree cover for a plot by the end of 2020, **Whisp will categorize the deforestation risk as low.**
   If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
-  If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
+  If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation, such as EUDR (European Union Deforestation Risk), and is therefore not considered high risk.
   Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
   However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
   ## Run Whisp python package from a notebook <a name="whisp_notebooks"></a>
   For most users we suggest using the Whisp App to process their plot data. But for some, using the python package directly will fit their workflow.
-  A simple example of the package functionality can be seen in this [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb)
+  An example of the package functionality can be seen in this [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb)
-  For an example notebook adapted for running locally (or in Sepal), see: [whisp_geojson_to_csv.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_csv.ipynb) or if datasets are very large, see [whisp_geojson_to_drive.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_drive.ipynb)
+  For running locally (or in Sepal), see: [whisp_geojson_to_csv.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_csv.ipynb) or if datasets are very large (e.g., >100,000 features), see [whisp_ee_asset_to_drive.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_ee_asset_to_drive.ipynb)
   ### Requirements for running the package
@@ -180,8 +180,6 @@ The **relevant risk assessment column depends on the commodity** in question:
   - A registered cloud GEE project.
   - Some experience in Python or a similar language.
-  More info on Whisp can be found in [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
   ### Python package installation
@@ -195,7 +193,6 @@ The **relevant risk assessment column depends on the commodity** in question:
   pip install --pre openforis-whisp
   ```
-  If running the package locally we recommend a [virtual environment](https://docs.python.org/3/library/venv.html) to keep your main python installation clean. For users running the package in Sepal see [here](https://docs.sepal.io/en/latest/cli/python.html#virtual-environment).
   The package relies upon the google earth engine api being setup correctly using a registered cloud project.
@@ -242,129 +239,46 @@ Before submitting a request, consider the following:
 ---
 ### Adding your own data directly
+The python notebooks allow the user to add custom data layers. You can edit the Prepare layers section to do this in the [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb)
+To add your own data directly you will need some coding experience as well as familiarity with Google Earth Engine.
-To add your own data you will need some coding experience as well as familiarity with GitHub and Google Earth Engine.
-This approach is for those who want to run a bespoke analysis combining their own data with those already in Whisp.
-Firstly follow the steps below to install the package in editable mode.
-As with the regular pip installation, we recommend a separate [virtual environment](https://docs.python.org/3/library/venv.html) for running in editable mode. For Sepal users see [here](https://docs.sepal.io/en/latest/cli/python.html#virtual-environment).
-```bash
-git  clone  https://github.com/forestdatapartnership/whisp.git
-cd  whisp/
-pip  install  -e  .[dev]
-```
-Once in editable mode you are running the Whisp package locally based on a cloned version of the code.
-There are two files to edit to add your own data:
--  `src/openforis_whisp/datasets.py`
--  `src/openforis_whisp/parameters/lookup_gee_datasets.csv`
-The `datasets.py` file is a Python script that defines functions which return GEE images composed of one or more bands.
-#### To add your own dataset:
-1. Add code to `datasets.py` in the form of a function that returns a **single-band binary image** for your dataset. See notes at the top of the file and example functions for formatting.
-2. Edit the `lookup_gee_datasets.csv` and add a row for your dataset.
-**NB:** You need to know what the dataset represents and define how it will be used in the different risk decision trees (if at all).
-For example, if it is a dataset for tree cover in 2000, then add `'treecover'` under the `Theme` column.
-####  Example function in `datasets.py`:
-```python
-def  my_custom_dataset_prep():
-image = ee.Image("MY/GEE/DATASET")
-binary = image.gt(10) # Example threshold
-return binary.rename("My_custom_dataset")
-```
----
-We are working on ways to make this process smoother. However, in the meantime do contact us through the [issues page on GitHub](https://github.com/forestdatapartnership/whisp/issues), or via the Open Foris email, if this functionality is useful to you or you need help.
----
-## Contributing to the Whisp code base <a name="whisp_contribute"></a>
-Contributions to the Whisp code in GitHub are welcome. These could be additional functionality, datasets or just cleaner code! Contributions can be made by forking the repository, making and pushing the required changes, then making a pull request to the Whisp repository. After briefly reviewing the request, we can make a branch for which to make a new pull request to. After final checks, we can then incorporate the code into the main branch. If in doubt, get in contact first or log as an issue [here](https://github.com/forestdatapartnership/whisp/issues/).
-Install the package in editable mode (see Adding your own data directly above):
-Then add additional dependencies required for testing and running pre-commit hooks:
-```bash
-pre-commit  install
-```
+## Contributing <a name="whisp_contribute"></a>
+Contributions are welcome!
+- Fork the repo, make changes, and open a pull request.
+- For adding new datasets to the codebase and for project-specific coding standards see [.github/copilot-instructions.md](.github/copilot-instructions.md)
-You should be able to run the Pytest suite by simply running the `pytest` command from the repo's root folder.
+## Code of Conduct <a name="whisp_conduct"></a>
+**Purpose**
+We are dedicated to maintaining a safe and respectful environment for all users. Harassment or abusive behavior will not be tolerated. <br>
-Please read the [contributing guidelines](contributing_guidelines.md) for good practice recommendations
+**Scope**
+This Code applies to all interactions on the repository and on the app.
+**Expectations** <br>
+*- Respect others:* Treat all contributors and users with courtesy and kindness. <br>
+*- Constructive communication:* Engage respectfully, even in disagreements. <br>
+*- Protect privacy:* Do not share personal information without consent.
-  ## Code of Conduct <a name="whisp_conduct"></a>
+**Prohibited Conduct** <br>
+*- Harassment:* Unwanted or abusive communication, stalking, threats, or bullying.<br>
+*- Discrimination:* Any form of hate speech or exclusion based on race, gender, orientation, or other identities.<br>
+*- Inappropriate Content:* Posting offensive, harmful, or explicit material.
-  **Purpose**
-  We are dedicated to maintaining a safe and respectful environment for all users. Harassment or abusive behavior will not be tolerated. <br>
+**Reporting** <br>
+Users can report violations of this Code of Conduct confidentially by contacting the Open Foris team at
+[open-foris@fao.org](mailto:open-foris@fao.org).
-  **Scope**
-  This Code applies to all interactions on the repository and on the app.
-  **Expectations** <br>
-  *- Respect others:* Treat all contributors and users with courtesy and kindness. <br>
-  *- Constructive communication:* Engage respectfully, even in disagreements. <br>
-  *- Protect privacy:* Do not share personal information without consent.
+## Feedback <a name="whisp_feedback"></a>
+- For issues or feature requests [open a GitHub issue](https://github.com/forestdatapartnership/whisp/issues).
+- For general questions, feedback or support, email [open-foris@fao.org](mailto:open-foris@fao.org).
-  **Prohibited Conduct** <br>
-  *- Harassment:* Unwanted or abusive communication, stalking, threats, or bullying.<br>
-  *- Discrimination:* Any form of hate speech or exclusion based on race, gender, orientation, or other identities.<br>
-  *- Inappropriate Content:* Posting offensive, harmful, or explicit material.
+We welcome all feedback and contributions!
-  **Reporting**
-  Users can report violations directly to us by emailing the address listed in the "Contact Us" section of the website:
-  https://openforis.org/solutions/whisp/

{openforis_whisp-3.0.0a7.dist-info → openforis_whisp-3.0.0a8.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 openforis_whisp/__init__.py,sha256=YihdNrybfFygwcwa2Bis59V7sYpNR9aAxL-VNO4dqEI,3659
-openforis_whisp/advanced_stats.py,sha256=1ZhIwdlZjephXvXVChVrNmouPgN_urXvYXYGeCs0Ay0,99731
+openforis_whisp/advanced_stats.py,sha256=yXwPIimbHZV3jxRL-mLMQoWZk9_UEec30I-0flNsOx8,99055
 openforis_whisp/data_checks.py,sha256=jxShBiihtX0rel__Vkzu1bZfqgVQIx_l-uPP1OeCaKY,37015
 openforis_whisp/data_conversion.py,sha256=L2IsiUyQUt3aHgSYGbIhgPGwM7eyS3nLVEoNO9YqQeM,21888
-openforis_whisp/datasets.py,sha256=05m-8dj1r11CWTQd5xAStV3JEStmfiNuBm2zjyiTr0Y,53898
+openforis_whisp/datasets.py,sha256=fAGj1jaeoPszWm60p8N00x2qrw398-iDklX-4nkC6mI,53855
 openforis_whisp/logger.py,sha256=gFkRTwJDJKIBWcHDOK74Uln3JM7fAybURo7pQpGL790,3395
 openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
@@ -10,11 +10,11 @@ openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq
 openforis_whisp/parameters/lookup_gaul1_admin.py,sha256=cQr5liRdXi85QieTxrz4VAkn0COvRCp82ZV0dYFWOio,474980
 openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=7KdnFocEgbZO5m8JmWQchzZTurg9rJ96y17z8UyLtI0,17537
 openforis_whisp/pd_schemas.py,sha256=0z-oPmYIDUIn7mNY41W_uUpmTwjoR7e254mOCoHVsOg,2878
-openforis_whisp/reformat.py,sha256=gvhIa-_kTT5BSO8LuVmJ1TQcf_NwheskXboFM9e0KJY,32758
-openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
+openforis_whisp/reformat.py,sha256=i_ckmxuOirrfRHbeY05_5JajrJ00T5MoZ_jgzj_h0wA,32939
+openforis_whisp/risk.py,sha256=tVkgVdRpdxaCBtyCjw8Z8MQt7EV9lGy34Bz8r_1Qb8Y,37135
 openforis_whisp/stats.py,sha256=RJ_PJSXyvz9FnoHeQ3tqrfhhWibXjz9AlX27suSKiO4,63319
 openforis_whisp/utils.py,sha256=AISWF-MpfFdYkhd6bei4BViw2Iag20mmq61ykrF9YTk,31287
-openforis_whisp-3.0.0a7.dist-info/licenses/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
-openforis_whisp-3.0.0a7.dist-info/METADATA,sha256=U-VC2XOZJ1DIz_Ar8ZIuXqJFhasA7NkzufKP_ykl2NY,16760
-openforis_whisp-3.0.0a7.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-openforis_whisp-3.0.0a7.dist-info/RECORD,,
+openforis_whisp-3.0.0a8.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
+openforis_whisp-3.0.0a8.dist-info/METADATA,sha256=2kDHgW5mjXMry11nvYsX7auboQMf4Mzj6BVgVa8TIsI,14173
+openforis_whisp-3.0.0a8.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+openforis_whisp-3.0.0a8.dist-info/RECORD,,

{openforis_whisp-3.0.0a7.dist-info → openforis_whisp-3.0.0a8.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.2.1
+Generator: poetry-core 2.1.1
 Root-Is-Purelib: true
 Tag: py3-none-any

{openforis_whisp-3.0.0a7.dist-info/licenses → openforis_whisp-3.0.0a8.dist-info}/LICENSE RENAMED Viewed

File without changes

openforis-whisp 3.0.0a7__py3-none-any.whl → 3.0.0a8__py3-none-any.whl

openforis-whisp 3.0.0a7py3-none-any.whl → 3.0.0a8py3-none-any.whl