PyPI - openforis-whisp - Versions diffs - 0.1.0a8__py3-none-any.whl → 2.0.0a1__py3-none-any.whl - Mend

openforis-whisp 0.1.0a8py3-none-any.whl → 2.0.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

openforis_whisp/__init__.py +6 -3
openforis_whisp/data_conversion.py +36 -13
openforis_whisp/datasets.py +787 -90
openforis_whisp/logger.py +38 -2
openforis_whisp/parameters/config_runtime.py +4 -7
openforis_whisp/parameters/lookup_context_and_metadata.csv +13 -13
openforis_whisp/parameters/lookup_gee_datasets.csv +202 -155
openforis_whisp/reformat.py +245 -96
openforis_whisp/risk.py +472 -24
openforis_whisp/stats.py +271 -70
openforis_whisp-2.0.0a1.dist-info/METADATA +381 -0
openforis_whisp-2.0.0a1.dist-info/RECORD +17 -0
openforis_whisp-0.1.0a8.dist-info/METADATA +0 -188
openforis_whisp-0.1.0a8.dist-info/RECORD +0 -17
{openforis_whisp-0.1.0a8.dist-info → openforis_whisp-2.0.0a1.dist-info}/LICENSE +0 -0
{openforis_whisp-0.1.0a8.dist-info → openforis_whisp-2.0.0a1.dist-info}/WHEEL +0 -0

openforis_whisp/risk.py CHANGED Viewed

@@ -2,12 +2,15 @@ import pandas as pd
 from .pd_schemas import data_lookup_type
 from openforis_whisp.parameters.config_runtime import (
-    percent_or_ha,
     geometry_area_column,
     DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH,
+    stats_unit_type_column,  # Add this import
 )
+from openforis_whisp.reformat import filter_lookup_by_country_codes
 # could embed this in each function below that uses lookup_gee_datasets_df.
 lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
     DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH
@@ -17,22 +20,98 @@ lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
 # requires lookup_gee_datasets_df
+# Add function to detect unit type from dataframe
+def detect_unit_type(df, explicit_unit_type=None):
+    """
+    Determine the unit type from the dataframe or use the override value.
+    Args:
+        df (DataFrame): Input DataFrame.
+        explicit_unit_type (str, optional): Override unit type ('ha' or 'percent').
+    Returns:
+        str: The unit type to use for calculations.
+    Raises:
+        ValueError: If the unit type can't be determined and no override is provided,
+                   or if there are mixed unit types in the dataframe.
+    """
+    # If override is provided, use it
+    if explicit_unit_type is not None:
+        if explicit_unit_type not in ["ha", "percent"]:
+            raise ValueError(
+                f"Invalid unit type: {explicit_unit_type}. Must be 'ha' or 'percent'."
+            )
+        return explicit_unit_type
+    # Check if unit type column exists in the dataframe
+    if stats_unit_type_column not in df.columns:
+        raise ValueError(
+            f"Column '{stats_unit_type_column}' not found in dataframe. "
+            "Please provide 'explicit_unit_type' parameter to specify the unit type."
+        )
+    # Get unique values from the column
+    unit_types = df[stats_unit_type_column].unique()
+    # Check for mixed unit types
+    if len(unit_types) > 1:
+        raise ValueError(
+            f"Mixed unit types in dataframe: {unit_types}. All rows must use the same unit type."
+        )
+    # Get the single unit type
+    unit_type = unit_types[0]
+    # Validate that the unit type is recognized
+    if unit_type not in ["ha", "percent"]:
+        raise ValueError(
+            f"Unrecognized unit type: {unit_type}. Must be 'ha' or 'percent'."
+        )
+    return unit_type
+# Update whisp_risk to accept and pass the unit_type parameter
 def whisp_risk(
     df: data_lookup_type,  # CHECK THIS
     ind_1_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
     ind_2_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
     ind_3_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
     ind_4_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
+    ind_5_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
+    ind_6_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
+    ind_7_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
+    ind_8_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
+    ind_9_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
+    ind_10_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
+    ind_11_pcent_threshold: float = 10,  # default values (draft decision tree and parameters)
     ind_1_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
     ind_2_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
     ind_3_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
     ind_4_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
-    ind_1_name: str = "Indicator_1_treecover",
-    ind_2_name: str = "Indicator_2_commodities",
-    ind_3_name: str = "Indicator_3_disturbance_before_2020",
-    ind_4_name: str = "Indicator_4_disturbance_after_2020",
+    ind_5_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
+    ind_6_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
+    ind_7_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
+    ind_8_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
+    ind_9_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
+    ind_10_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
+    ind_11_input_columns: pd.Series = None,  # see lookup_gee_datasets for details
+    ind_1_name: str = "Ind_01_treecover",
+    ind_2_name: str = "Ind_02_commodities",
+    ind_3_name: str = "Ind_03_disturbance_before_2020",
+    ind_4_name: str = "Ind_04_disturbance_after_2020",
+    ind_5_name: str = "Ind_05_primary_2020",
+    ind_6_name: str = "Ind_06_nat_reg_forest_2020",
+    ind_7_name: str = "Ind_07_planted_plantations_2020",
+    ind_8_name: str = "Ind_08_planted_plantations_after_2020",
+    ind_9_name: str = "Ind_09_treecover_after_2020",
+    ind_10_name: str = "Ind_10_agri_after_2020",
+    ind_11_name: str = "Ind_11_logging_concession_before_2020",
     low_name: str = "no",
     high_name: str = "yes",
+    explicit_unit_type: str = None,
+    national_codes: list[str] = None,  # List of ISO2 country codes to filter by
 ) -> data_lookup_type:
     """
     Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
@@ -53,39 +132,122 @@ def whisp_risk(
         ind_4_name (str, optional): Name of the fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
         low_name (str, optional): Value shown in table if less than or equal to the threshold. Defaults to "no".
         high_name (str, optional): Value shown in table if more than the threshold. Defaults to "yes".
+        explicit_unit_type (str, optional): Override the autodetected unit type ('ha' or 'percent').
+                                      If not provided, will detect from dataframe 'unit' column.
     Returns:
         data_lookup_type: DataFrame with added 'EUDR_risk' column.
     """
+    # Determine the unit type to use based on input data and overrid
+    unit_type = detect_unit_type(df, explicit_unit_type)
+    print(f"Using unit type: {unit_type}")
+    lookup_df_copy = lookup_gee_datasets_df.copy()
+    # filter by national codes (even if None - this removes all country columns unless specified)
+    filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
+        lookup_df=lookup_df_copy,
+        filter_col="ISO2_code",
+        national_codes=national_codes,
+    )
+    # Rest of the function remains the same, but pass unit_type to add_indicators
     if ind_1_input_columns is None:
-        ind_1_input_columns = get_cols_ind_1_treecover(lookup_gee_datasets_df)
+        ind_1_input_columns = get_cols_ind_01_treecover(filtered_lookup_gee_datasets_df)
     if ind_2_input_columns is None:
-        ind_2_input_columns = get_cols_ind_2_commodities(lookup_gee_datasets_df)
+        ind_2_input_columns = get_cols_ind_02_commodities(
+            filtered_lookup_gee_datasets_df
+        )
     if ind_3_input_columns is None:
-        ind_3_input_columns = get_cols_ind_3_dist_before_2020(lookup_gee_datasets_df)
+        ind_3_input_columns = get_cols_ind_03_dist_before_2020(
+            filtered_lookup_gee_datasets_df
+        )
     if ind_4_input_columns is None:
-        ind_4_input_columns = get_cols_ind_4_dist_after_2020(lookup_gee_datasets_df)
+        ind_4_input_columns = get_cols_ind_04_dist_after_2020(
+            filtered_lookup_gee_datasets_df
+        )
+    if ind_5_input_columns is None:
+        ind_5_input_columns = get_cols_ind_05_primary_2020(
+            filtered_lookup_gee_datasets_df
+        )
+    if ind_6_input_columns is None:
+        ind_6_input_columns = get_cols_ind_06_nat_reg_2020(
+            filtered_lookup_gee_datasets_df
+        )
+    if ind_7_input_columns is None:
+        ind_7_input_columns = get_cols_ind_07_planted_2020(
+            filtered_lookup_gee_datasets_df
+        )
+    if ind_8_input_columns is None:
+        ind_8_input_columns = get_cols_ind_08_planted_after_2020(
+            filtered_lookup_gee_datasets_df
+        )
+    if ind_9_input_columns is None:
+        ind_9_input_columns = get_cols_ind_09_treecover_after_2020(
+            filtered_lookup_gee_datasets_df
+        )
+    if ind_10_input_columns is None:
+        ind_10_input_columns = get_cols_ind_10_agri_after_2020(
+            filtered_lookup_gee_datasets_df
+        )
+    if ind_11_input_columns is None:
+        ind_11_input_columns = get_cols_ind_11_logging_before_2020(
+            filtered_lookup_gee_datasets_df
+        )
     # Check range of values
     check_range(ind_1_pcent_threshold)
     check_range(ind_2_pcent_threshold)
     check_range(ind_3_pcent_threshold)
     check_range(ind_4_pcent_threshold)
+    check_range(ind_5_pcent_threshold)
+    check_range(ind_6_pcent_threshold)
+    check_range(ind_7_pcent_threshold)
+    check_range(ind_8_pcent_threshold)
+    check_range(ind_9_pcent_threshold)
+    check_range(ind_10_pcent_threshold)
+    check_range(ind_11_pcent_threshold)
     input_cols = [
         ind_1_input_columns,
         ind_2_input_columns,
         ind_3_input_columns,
         ind_4_input_columns,
+        ind_5_input_columns,
+        ind_6_input_columns,
+        ind_7_input_columns,
+        ind_8_input_columns,
+        ind_9_input_columns,
+        ind_10_input_columns,
+        ind_11_input_columns,
     ]
     thresholds = [
         ind_1_pcent_threshold,
         ind_2_pcent_threshold,
         ind_3_pcent_threshold,
         ind_4_pcent_threshold,
+        ind_5_pcent_threshold,
+        ind_6_pcent_threshold,
+        ind_7_pcent_threshold,
+        ind_8_pcent_threshold,
+        ind_9_pcent_threshold,
+        ind_10_pcent_threshold,
+        ind_11_pcent_threshold,
+    ]
+    names = [
+        ind_1_name,
+        ind_2_name,
+        ind_3_name,
+        ind_4_name,
+        ind_5_name,
+        ind_6_name,
+        ind_7_name,
+        ind_8_name,
+        ind_9_name,
+        ind_10_name,
+        ind_11_name,
     ]
-    names = [ind_1_name, ind_2_name, ind_3_name, ind_4_name]
     [check_range(threshold) for threshold in thresholds]
     df_w_indicators = add_indicators(
@@ -95,9 +257,18 @@ def whisp_risk(
         names,
         low_name,
         high_name,
+        unit_type,  # Pass the unit type
+    )
+    df_w_indicators_and_risk_pcrop = add_eudr_risk_pcrop_col(
+        df=df_w_indicators,
+        ind_1_name=ind_1_name,
+        ind_2_name=ind_2_name,
+        ind_3_name=ind_3_name,
+        ind_4_name=ind_4_name,
     )
-    df_w_indicators_and_risk = add_eudr_risk_col(
+    df_w_indicators_and_risk_acrop = add_eudr_risk_acrop_col(
         df=df_w_indicators,
         ind_1_name=ind_1_name,
         ind_2_name=ind_2_name,
@@ -105,10 +276,25 @@ def whisp_risk(
         ind_4_name=ind_4_name,
     )
-    return df_w_indicators_and_risk
+    df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
+        df=df_w_indicators,
+        ind_1_name=ind_1_name,
+        ind_2_name=ind_2_name,
+        ind_3_name=ind_3_name,
+        ind_4_name=ind_4_name,
+        ind_5_name=ind_5_name,
+        ind_6_name=ind_6_name,
+        ind_7_name=ind_7_name,
+        ind_8_name=ind_8_name,
+        ind_9_name=ind_9_name,
+        ind_10_name=ind_10_name,
+        ind_11_name=ind_11_name,
+    )
+    return df_w_indicators_and_risk_timber
-def add_eudr_risk_col(
+def add_eudr_risk_pcrop_col(
     df: data_lookup_type,
     ind_1_name: str,
     ind_2_name: str,
@@ -136,13 +322,117 @@ def add_eudr_risk_col(
             or row[ind_2_name] == "yes"
             or row[ind_3_name] == "yes"
         ):
-            df.at[index, "EUDR_risk"] = "low"
+            df.at[index, "risk_pcrop"] = "low"
         # If none of the first three indicators suggest low risk and Indicator 4 suggests no risk, set EUDR_risk to "more_info_needed"
         elif row[ind_4_name] == "no":
-            df.at[index, "EUDR_risk"] = "more_info_needed"
+            df.at[index, "risk_pcrop"] = "more_info_needed"
+        # If none of the above conditions are met, set EUDR_risk to "high"
+        else:
+            df.at[index, "risk_pcrop"] = "high"
+    return df
+def add_eudr_risk_acrop_col(
+    df: data_lookup_type,
+    ind_1_name: str,
+    ind_2_name: str,
+    ind_3_name: str,
+    ind_4_name: str,
+) -> data_lookup_type:
+    """
+    Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
+    Args:
+        df (DataFrame): Input DataFrame.
+        ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
+        ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
+        ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
+        ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
+    Returns:
+        DataFrame: DataFrame with added 'EUDR_risk' column.
+    """
+    # soy risk
+    for index, row in df.iterrows():
+        # If there is no tree cover in 2020, set EUDR_risk_soy to "low"
+        if row[ind_1_name] == "no" or row[ind_2_name] == "yes":
+            df.at[index, "risk_acrop"] = "low"
+        # If there is tree cover in 2020 and distrubances post 2020, set EUDR_risk_soy to "high"
+        elif row[ind_1_name] == "yes" and row[ind_4_name] == "yes":
+            df.at[index, "risk_acrop"] = "high"
+        # If tree cover and no disturbances post 2020, set EUDR_risk to "more_info_needed"
+        else:
+            df.at[index, "risk_acrop"] = "more_info_needed"
+    return df
+def add_eudr_risk_timber_col(
+    df: data_lookup_type,
+    ind_1_name: str,
+    ind_2_name: str,
+    ind_3_name: str,
+    ind_4_name: str,
+    ind_5_name: str,
+    ind_6_name: str,
+    ind_7_name: str,
+    ind_8_name: str,
+    ind_9_name: str,
+    ind_10_name: str,
+    ind_11_name: str,
+) -> data_lookup_type:
+    """
+    Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
+    Args:
+        df (DataFrame): Input DataFrame.
+        ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
+        ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
+        ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
+        ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
+    Returns:
+        DataFrame: DataFrame with added 'EUDR_risk' column.
+    """
+    for index, row in df.iterrows():
+        # If there is a commodity in 2020 OR if there is planted-plantation in 2020 AND no agriculture in 2023, set EUDR_risk_degrad to "low"
+        if row[ind_2_name] == "yes" or (
+            row[ind_7_name] == "yes" and row[ind_10_name] == "no"
+        ):
+            df.at[index, "risk_timber"] = "low"
+        # If there is no tree cover, set EUDR_risk_degrad to "low"? no because of unstocked forests
+        # if row[ind_1_name] == "no" or row[ind_3_name] == "yes" or row[ind_7_name] == "yes":
+        #   df.at[index, 'EUDR_risk_degrad'] = "low"
+        # If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
+        elif (
+            row[ind_5_name] == "yes"
+            or row[ind_6_name] == "yes"
+            or row[ind_7_name] == "yes"
+        ) and row[ind_10_name] == "yes":
+            df.at[index, "risk_timber"] = "high"
+        # If primary or naturally regenerating AND planted post 2020, set EUDR_risk to "high"
+        elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
+            ind_8_name
+        ] == "yes":
+            df.at[index, "risk_timber"] = "high"
+        # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
+        # elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
+        #    df.at[index, 'EUDR_risk_timber'] = "high"
+        # If primary forest OR naturally regenerating AND an information on management practice OR tree cover post 2020, set EUDR_risk_degrad to "low"
+        elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
+            row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
+        ):
+            df.at[index, "risk_timber"] = "low"
+        # If primary or naturally regenerating and no other info, set EUDR_risk to "more_info_needed"
+        elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
+            df.at[index, "risk_timber"] = "more_info_needed"
         # If none of the above conditions are met, set EUDR_risk to "high"
         else:
-            df.at[index, "EUDR_risk"] = "high"
+            df.at[index, "risk_timber"] = "high"
     return df
@@ -154,6 +444,7 @@ def add_indicators(
     names: list[str],
     low_name: str = "no",
     high_name: str = "yes",
+    unit_type: str = None,
 ) -> data_lookup_type:
     for input_col, threshold, name in zip(input_cols, thresholds, names):
         df = add_indicator_column(
@@ -163,19 +454,22 @@ def add_indicators(
             new_column_name=name,
             low_name=low_name,
             high_name=high_name,
+            sum_comparison=False,
+            unit_type=unit_type,  # Pass the unit type
         )
     return df
+# Update add_indicator_column to use the unit_type parameter
 def add_indicator_column(
     df: data_lookup_type,
     input_columns: list[str],
     threshold: float,
     new_column_name: str,
-    low_name: str = "yes",
-    high_name: str = "no",
+    low_name: str = "no",
+    high_name: str = "yes",
     sum_comparison: bool = False,
+    unit_type: str = None,  # unit_type parameter
 ) -> data_lookup_type:
     """
     Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
@@ -190,6 +484,7 @@ def add_indicator_column(
         low_name (str): The name for the value when below or equal to threshold (default is 'no').
         high_name (str): The name for the value when above threshold (default is 'yes').
         sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
+        unit_type (str): Whether values are in "ha" or "percent".
     Returns:
         data_lookup_type: The DataFrame with the new column added.
@@ -207,7 +502,10 @@ def add_indicator_column(
         for col in input_columns:
             # So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
             # Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
-            if percent_or_ha == "ha":
+            if unit_type == "ha":
+                df[geometry_area_column] = pd.to_numeric(
+                    df[geometry_area_column], errors="coerce"
+                )
                 val_to_check = clamp(
                     ((df[col] / df[geometry_area_column]) * 100), 0, 100
                 )
@@ -220,7 +518,7 @@ def add_indicator_column(
     return df
-def get_cols_ind_1_treecover(lookup_gee_datasets_df):
+def get_cols_ind_01_treecover(lookup_gee_datasets_df):
     """
     Generate a list of dataset names for the treecover theme, excluding those marked for exclusion.
@@ -241,7 +539,7 @@ def get_cols_ind_1_treecover(lookup_gee_datasets_df):
     )
-def get_cols_ind_2_commodities(lookup_gee_datasets_df):
+def get_cols_ind_02_commodities(lookup_gee_datasets_df):
     """
     Generate a list of dataset names for the commodities theme, excluding those marked for exclusion.
@@ -262,7 +560,7 @@ def get_cols_ind_2_commodities(lookup_gee_datasets_df):
     )
-def get_cols_ind_3_dist_before_2020(lookup_gee_datasets_df):
+def get_cols_ind_03_dist_before_2020(lookup_gee_datasets_df):
     """
     Generate a list of dataset names for the disturbance before 2020 theme, excluding those marked for exclusion.
@@ -283,7 +581,7 @@ def get_cols_ind_3_dist_before_2020(lookup_gee_datasets_df):
     )
-def get_cols_ind_4_dist_after_2020(lookup_gee_datasets_df):
+def get_cols_ind_04_dist_after_2020(lookup_gee_datasets_df):
     """
     Generate a list of dataset names for the disturbance after 2020 theme, excluding those marked for exclusion.
@@ -304,6 +602,156 @@ def get_cols_ind_4_dist_after_2020(lookup_gee_datasets_df):
     )
+def get_cols_ind_05_primary_2020(lookup_gee_datasets_df):
+    """
+    Generate a list of dataset names for primary forests in 2020
+    Args:
+    lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
+    Returns:
+    list: List of dataset names set to be used in the risk calculations for the degradation - primary forest in 2020, excluding those marked for exclusion.
+    """
+    lookup_gee_datasets_df = lookup_gee_datasets_df[
+        lookup_gee_datasets_df["exclude_from_output"] != 1
+    ]
+    return list(
+        lookup_gee_datasets_df["name"][
+            (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
+            & (lookup_gee_datasets_df["theme_timber"] == "primary")
+        ]
+    )
+def get_cols_ind_06_nat_reg_2020(lookup_gee_datasets_df):
+    """
+    Generate a list of dataset names for naturally_reg_2020 forests in 2020
+    Args:
+    lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
+    Returns:
+    list: List of dataset names set to be used in the risk calculations for the degradation - naturally_reg_2020 in 2020, excluding those marked for exclusion.
+    """
+    lookup_gee_datasets_df = lookup_gee_datasets_df[
+        lookup_gee_datasets_df["exclude_from_output"] != 1
+    ]
+    return list(
+        lookup_gee_datasets_df["name"][
+            (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
+            & (lookup_gee_datasets_df["theme_timber"] == "naturally_reg_2020")
+        ]
+    )
+def get_cols_ind_07_planted_2020(lookup_gee_datasets_df):
+    """
+    Generate a list of dataset names for planted and plantation forests in 2020
+    Args:
+    lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
+    Returns:
+    list: List of dataset names set to be used in the risk calculations for the degradation - planted and plantation forests in 2020, excluding those marked for exclusion.
+    """
+    lookup_gee_datasets_df = lookup_gee_datasets_df[
+        lookup_gee_datasets_df["exclude_from_output"] != 1
+    ]
+    return list(
+        lookup_gee_datasets_df["name"][
+            (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
+            & (lookup_gee_datasets_df["theme_timber"] == "planted_plantation_2020")
+        ]
+    )
+def get_cols_ind_08_planted_after_2020(lookup_gee_datasets_df):
+    """
+    Generate a list of dataset names for planted and plantation forests post 2020
+    Args:
+    lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
+    Returns:
+    list: List of dataset names set to be used in the risk calculations for the degradation - planted and plantation forests post 2020, excluding those marked for exclusion.
+    """
+    lookup_gee_datasets_df = lookup_gee_datasets_df[
+        lookup_gee_datasets_df["exclude_from_output"] != 1
+    ]
+    return list(
+        lookup_gee_datasets_df["name"][
+            (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
+            & (
+                lookup_gee_datasets_df["theme_timber"]
+                == "planted_plantation_after_2020"
+            )
+        ]
+    )
+def get_cols_ind_09_treecover_after_2020(lookup_gee_datasets_df):
+    """
+    Generate a list of dataset names for treecover post 2020
+    Args:
+    lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
+    Returns:
+    list: List of dataset names set to be used in the risk calculations for the degradation - treecover post 2020, excluding those marked for exclusion.
+    """
+    lookup_gee_datasets_df = lookup_gee_datasets_df[
+        lookup_gee_datasets_df["exclude_from_output"] != 1
+    ]
+    return list(
+        lookup_gee_datasets_df["name"][
+            (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
+            & (lookup_gee_datasets_df["theme_timber"] == "treecover_post2020")
+        ]
+    )
+def get_cols_ind_10_agri_after_2020(lookup_gee_datasets_df):
+    """
+    Generate a list of dataset names for croplands post 2020
+    Args:
+    lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
+    Returns:
+    list: List of dataset names set to be used in the risk calculations for the degradation - croplands post 2020, excluding those marked for exclusion.
+    """
+    lookup_gee_datasets_df = lookup_gee_datasets_df[
+        lookup_gee_datasets_df["exclude_from_output"] != 1
+    ]
+    return list(
+        lookup_gee_datasets_df["name"][
+            (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
+            & (lookup_gee_datasets_df["theme_timber"] == "agri_after_2020")
+        ]
+    )
+def get_cols_ind_11_logging_before_2020(lookup_gee_datasets_df):
+    """
+    Generate a list of dataset names for logging concessions (2020 if available)
+    Args:
+    lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
+    Returns:
+    list: List of dataset names set to be used in the risk calculations for the degradation - logging concessions, excluding those marked for exclusion.
+    """
+    lookup_gee_datasets_df = lookup_gee_datasets_df[
+        lookup_gee_datasets_df["exclude_from_output"] != 1
+    ]
+    return list(
+        lookup_gee_datasets_df["name"][
+            (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
+            & (lookup_gee_datasets_df["theme_timber"] == "logging_concession")
+        ]
+    )
 def clamp(
     value: float | pd.Series, min_val: float, max_val: float
 ) -> float | pd.Series:

openforis-whisp 0.1.0a8__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

openforis-whisp 0.1.0a8py3-none-any.whl → 2.0.0a1py3-none-any.whl