PyPI - openforis-whisp - Versions diffs - 2.0.0a3__py3-none-any.whl → 2.0.0a5__py3-none-any.whl - Mend

openforis-whisp 2.0.0a3py3-none-any.whl → 2.0.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

openforis_whisp/data_conversion.py +176 -54
openforis_whisp/datasets.py +7 -17
openforis_whisp/parameters/config_runtime.py +1 -1
openforis_whisp/parameters/lookup_context_and_metadata.csv +1 -1
openforis_whisp/parameters/lookup_gee_datasets.csv +2 -3
openforis_whisp/risk.py +24 -30
openforis_whisp/stats.py +206 -25
openforis_whisp/utils.py +3 -3
{openforis_whisp-2.0.0a3.dist-info → openforis_whisp-2.0.0a5.dist-info}/METADATA +60 -73
openforis_whisp-2.0.0a5.dist-info/RECORD +17 -0
openforis_whisp-2.0.0a3.dist-info/RECORD +0 -17
{openforis_whisp-2.0.0a3.dist-info → openforis_whisp-2.0.0a5.dist-info}/LICENSE +0 -0
{openforis_whisp-2.0.0a3.dist-info → openforis_whisp-2.0.0a5.dist-info}/WHEEL +0 -0

openforis_whisp/data_conversion.py CHANGED Viewed

@@ -12,55 +12,32 @@ import geopandas as gpd
 import ee
-def convert_ee_to_geojson(ee_object, filename=None, indent=2, **kwargs):
-    """Converts Earth Engine object to geojson.
-    Args:
-        ee_object (object): An Earth Engine object.
-        filename (str, optional): The file path to save the geojson. Defaults to None.
-    Returns:
-        object: GeoJSON object.
-    """
-    try:
-        if (
-            isinstance(ee_object, ee.Geometry)
-            or isinstance(ee_object, ee.Feature)
-            or isinstance(ee_object, ee.FeatureCollection)
-        ):
-            json_object = ee_object.getInfo()
-            if filename is not None:
-                filename = os.path.abspath(filename)
-                if not os.path.exists(os.path.dirname(filename)):
-                    os.makedirs(os.path.dirname(filename))
-                with open(filename, "w") as f:
-                    f.write(json.dumps(json_object, indent=indent, **kwargs) + "\n")
-            else:
-                return json_object
-        else:
-            print("Could not convert the Earth Engine object to geojson")
-    except Exception as e:
-        raise Exception(e)
 def convert_geojson_to_ee(
-    geojson_filepath: Any, enforce_wgs84: bool = True
+    geojson_filepath: Any, enforce_wgs84: bool = True, strip_z_coords: bool = True
 ) -> ee.FeatureCollection:
     """
     Reads a GeoJSON file from the given path and converts it to an Earth Engine FeatureCollection.
     Optionally checks and converts the CRS to WGS 84 (EPSG:4326) if needed.
+    Automatically handles 3D coordinates by stripping Z values when necessary.
     Args:
         geojson_filepath (Any): The filepath to the GeoJSON file.
         enforce_wgs84 (bool): Whether to enforce WGS 84 projection (EPSG:4326). Defaults to True.
+        strip_z_coords (bool): Whether to automatically strip Z coordinates from 3D geometries. Defaults to True.
     Returns:
         ee.FeatureCollection: Earth Engine FeatureCollection created from the GeoJSON.
     """
     if isinstance(geojson_filepath, (str, Path)):
         file_path = os.path.abspath(geojson_filepath)
-        print(f"Reading GeoJSON file from: {file_path}")
+        # Apply print_once deduplication for file reading message
+        if not hasattr(convert_geojson_to_ee, "_printed_file_messages"):
+            convert_geojson_to_ee._printed_file_messages = set()
+        if file_path not in convert_geojson_to_ee._printed_file_messages:
+            print(f"Reading GeoJSON file from: {file_path}")
+            convert_geojson_to_ee._printed_file_messages.add(file_path)
         # Use GeoPandas to read the file and handle CRS
         gdf = gpd.read_file(file_path)
@@ -82,9 +59,133 @@ def convert_geojson_to_ee(
     if validation_errors:
         raise ValueError(f"GeoJSON validation errors: {validation_errors}")
-    feature_collection = ee.FeatureCollection(create_feature_collection(geojson_data))
+    # Try to create the feature collection, handle 3D coordinate issues automatically
+    try:
+        feature_collection = ee.FeatureCollection(
+            create_feature_collection(geojson_data)
+        )
+        return feature_collection
+    except ee.EEException as e:
+        if "Invalid GeoJSON geometry" in str(e) and strip_z_coords:
+            # Apply print_once deduplication for Z-coordinate stripping messages
+            if not hasattr(convert_geojson_to_ee, "_printed_z_messages"):
+                convert_geojson_to_ee._printed_z_messages = set()
+            z_message_key = f"z_coords_{file_path}"
+            if z_message_key not in convert_geojson_to_ee._printed_z_messages:
+                print(
+                    "Warning: Invalid GeoJSON geometry detected, likely due to 3D coordinates."
+                )
+                print("Attempting to fix by stripping Z coordinates...")
+                convert_geojson_to_ee._printed_z_messages.add(z_message_key)
+            # Apply Z-coordinate stripping
+            geojson_data_fixed = _strip_z_coordinates_from_geojson(geojson_data)
+            # Try again with the fixed data
+            try:
+                feature_collection = ee.FeatureCollection(
+                    create_feature_collection(geojson_data_fixed)
+                )
+                success_message_key = f"z_coords_success_{file_path}"
+                if success_message_key not in convert_geojson_to_ee._printed_z_messages:
+                    print("✓ Successfully converted after stripping Z coordinates")
+                    convert_geojson_to_ee._printed_z_messages.add(success_message_key)
+                return feature_collection
+            except Exception as retry_error:
+                raise ee.EEException(
+                    f"Failed to convert GeoJSON even after stripping Z coordinates: {retry_error}"
+                )
+        else:
+            raise e
-    return feature_collection
+def _strip_z_coordinates_from_geojson(geojson_data: dict) -> dict:
+    """
+    Helper function to strip Z coordinates from GeoJSON data.
+    Converts 3D coordinates to 2D by removing Z values.
+    Args:
+        geojson_data (dict): GeoJSON data dictionary
+    Returns:
+        dict: GeoJSON data with Z coordinates stripped
+    """
+    def strip_z(geometry):
+        """Remove Z coordinates from geometry to make it 2D"""
+        if geometry["type"] == "MultiPolygon":
+            geometry["coordinates"] = [
+                [[[lon, lat] for lon, lat, *_ in ring] for ring in polygon]
+                for polygon in geometry["coordinates"]
+            ]
+        elif geometry["type"] == "Polygon":
+            geometry["coordinates"] = [
+                [[lon, lat] for lon, lat, *_ in ring]
+                for ring in geometry["coordinates"]
+            ]
+        elif geometry["type"] == "Point":
+            if len(geometry["coordinates"]) > 2:
+                geometry["coordinates"] = geometry["coordinates"][:2]
+        elif geometry["type"] == "MultiPoint":
+            geometry["coordinates"] = [coord[:2] for coord in geometry["coordinates"]]
+        elif geometry["type"] == "LineString":
+            geometry["coordinates"] = [
+                [lon, lat] for lon, lat, *_ in geometry["coordinates"]
+            ]
+        elif geometry["type"] == "MultiLineString":
+            geometry["coordinates"] = [
+                [[lon, lat] for lon, lat, *_ in line]
+                for line in geometry["coordinates"]
+            ]
+        return geometry
+    # Create a deep copy to avoid modifying the original
+    import copy
+    geojson_copy = copy.deepcopy(geojson_data)
+    # Process all features
+    if "features" in geojson_copy:
+        for feature in geojson_copy["features"]:
+            if "geometry" in feature and feature["geometry"]:
+                feature["geometry"] = strip_z(feature["geometry"])
+    return geojson_copy
+def convert_ee_to_geojson(ee_object, filename=None, indent=2, **kwargs):
+    """Converts Earth Engine object to geojson.
+    Args:
+        ee_object (object): An Earth Engine object.
+        filename (str, optional): The file path to save the geojson. Defaults to None.
+    Returns:
+        object: GeoJSON object.
+    """
+    try:
+        if (
+            isinstance(ee_object, ee.Geometry)
+            or isinstance(ee_object, ee.Feature)
+            or isinstance(ee_object, ee.FeatureCollection)
+        ):
+            json_object = ee_object.getInfo()
+            if filename is not None:
+                filename = os.path.abspath(filename)
+                if not os.path.exists(os.path.dirname(filename)):
+                    os.makedirs(os.path.dirname(filename))
+                with open(filename, "w") as f:
+                    f.write(json.dumps(json_object, indent=indent, **kwargs) + "\n")
+            else:
+                return json_object
+        else:
+            print("Could not convert the Earth Engine object to geojson")
+    except Exception as e:
+        raise Exception(e)
 def convert_geojson_to_shapefile(geojson_path, shapefile_output_path):
@@ -252,28 +353,49 @@ def validate_geojson(input_data: Any) -> List[str]:
     return errors
-def extract_features(geometry: Any, features: List[Feature]) -> None:
+def extract_features(geojson_obj: Any, features: List[Feature]) -> None:
     """
-    Recursively extracts features from a geometry and adds them to the feature list.
+    Recursively extracts features from a GeoJSON object and adds them to the feature list.
-    :param geometry: GeoJSON geometry
+    :param geojson_obj: GeoJSON object (could be geometry, feature, or feature collection)
     :param features: List of extracted features
     """
-    if geometry["type"] == "Polygon":
-        features.append(Feature(geometry=Polygon(geometry["coordinates"])))
-    elif geometry["type"] == "Point":
-        features.append(Feature(geometry=Point(geometry["coordinates"])))
-    elif geometry["type"] == "MultiPolygon":
-        for polygon in geometry["coordinates"]:
-            features.append(Feature(geometry=Polygon(polygon)))
-    elif geometry["type"] == "GeometryCollection":
-        for geom in geometry["geometries"]:
-            extract_features(geom, features)
-    elif geometry["type"] == "Feature":
-        extract_features(geometry["geometry"], features)
-    elif geometry["type"] == "FeatureCollection":
-        for feature in geometry["features"]:
-            extract_features(feature, features)
+    if isinstance(geojson_obj, dict):
+        obj_type = geojson_obj.get("type")
+        if obj_type == "Feature":
+            # Extract the actual Feature with properties
+            geometry = geojson_obj.get("geometry", {})
+            properties = geojson_obj.get("properties", {})
+            if geometry and geometry.get("type"):
+                features.append(Feature(geometry=geometry, properties=properties))
+        elif obj_type == "FeatureCollection":
+            # Process each feature in the collection
+            for feature in geojson_obj.get("features", []):
+                extract_features(feature, features)
+        elif obj_type in [
+            "Polygon",
+            "Point",
+            "MultiPolygon",
+            "LineString",
+            "MultiPoint",
+            "MultiLineString",
+        ]:
+            # This is a raw geometry - create feature with empty properties
+            features.append(Feature(geometry=geojson_obj, properties={}))
+        elif obj_type == "GeometryCollection":
+            # Handle geometry collections
+            for geom in geojson_obj.get("geometries", []):
+                extract_features(geom, features)
+    elif isinstance(geojson_obj, list):
+        # Handle lists of features/geometries
+        for item in geojson_obj:
+            extract_features(item, features)
 def create_feature_collection(geojson_obj: Any) -> FeatureCollection:

openforis_whisp/datasets.py CHANGED Viewed

@@ -58,19 +58,6 @@ def g_jrc_gfc_2020_prep():
     return jrc_gfc2020_raw.mosaic().rename("EUFO_2020")
-## removing JAXA product due to repeat errors of commission being noted by users, compared to other datasets
-# # JAXA_FNF_2020
-# def g_jaxa_forest_prep():
-#     jaxa_forest_non_forest_raw = ee.ImageCollection("JAXA/ALOS/PALSAR/YEARLY/FNF4")
-#     jaxa_forest_non_forest_2020 = (
-#         jaxa_forest_non_forest_raw.filterDate("2020-01-01", "2020-12-31")
-#         .select("fnf")
-#         .mosaic()
-#     )
-#     return jaxa_forest_non_forest_2020.lte(2).rename("JAXA_FNF_2020")
 # GFC_TC_2020
 def g_glad_gfc_10pc_prep():
     gfc = ee.Image("UMD/hansen/global_forest_change_2024_v1_12")
@@ -285,7 +272,7 @@ def g_fdap_rubber_2023_prep():
     fdap_rubber = (
         fdap_rubber2020_model_raw.filterDate("2023-01-01", "2023-12-31")
         .mosaic()
-        .gt(0.93)  # Threshold for Rubber
+        .gt(0.59)  # Threshold for Rubber
     )
     return fdap_rubber.rename("Rubber_2023_FDaP")
@@ -896,9 +883,9 @@ def nbr_terraclass_amz20_secondary_prep():
 # Cerrado - filtered with QGIS because the original geodatabase is too large to export as a shapefile (GEE accepted format)
 def nbr_bfs_cer_f20_prep():
-    bfs_fcer20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_pmp_2020")
+    bfs_fcer20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_cerr_2020")
     bfs_fcer20_binary = ee.Image().paint(bfs_fcer20, 1)
-    return bfs_fcer20_binary.rename("nBR_BFS_primary&secondary_forest_Cerrado_2020")
+    return bfs_fcer20_binary.rename("nBR_BFS_primary_and_secondary_forest_Cerrado_2020")
 # %%
@@ -1277,7 +1264,9 @@ def combine_datasets(national_codes=None):
     try:
         # Attempt to print band names to check for errors
-        print(img_combined.bandNames().getInfo())
+        # print(img_combined.bandNames().getInfo())
+        img_combined.bandNames().getInfo()
     except ee.EEException as e:
         # logger.error(f"Error printing band names: {e}")
         # logger.info("Running code for filtering to only valid datasets due to error in input")
@@ -1294,6 +1283,7 @@ def combine_datasets(national_codes=None):
             img_combined = img_combined.addBands(img)
     img_combined = img_combined.multiply(ee.Image.pixelArea())
+    print("Whisp multiband image compiled")
     return img_combined

openforis_whisp/parameters/config_runtime.py CHANGED Viewed

@@ -16,7 +16,7 @@ centroid_x_coord_column = "Centroid_lon"
 centroid_y_coord_column = "Centroid_lat"
-geo_id_column = "external_id"
+external_id_column = "external_id"
 geometry_type_column = "Geometry_type"

openforis_whisp/parameters/lookup_context_and_metadata.csv CHANGED Viewed

@@ -1,6 +1,6 @@
 name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude_from_output,col_type,is_nullable,is_required,corresponding_variable
 plotId,-10,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,plot_id_column
-external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,geo_id_column
+external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,external_id_column
 Area,-8,,context_and_metadata,context_and_metadata,NA,NA,0,float32,1,1,geometry_area_column
 Geometry_type,-7,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_type_column
 Country,-6,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,iso3_country_column

openforis_whisp/parameters/lookup_gee_datasets.csv CHANGED Viewed

@@ -2,7 +2,6 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
 EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
 GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
 TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
-JAXA_FNF_2020,40,,treecover,NA,1,0,1,float32,1,0,g_jaxa_forest_prep
 GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
 Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
 ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
@@ -173,14 +172,14 @@ ESRI_2023_crop,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2023_crop_prep
 GLC_FCS30D_crop_2022,2140,,NA,agri_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_crop_2022_prep
 GFW_logging_before_2020,2200,,NA,logging_concession,0,1,0,float32,1,0,g_logging_concessions_prep
 nCO_ideam_forest_2020,2310,CO,treecover,NA,1,1,0,float32,1,0,nco_ideam_forest_2020_prep
-nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,0,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
+nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,1,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
 nBR_INPE_TC_primary_forest_Amazon_2020,2400,BR,treecover,primary,1,1,0,float32,1,0,nbr_terraclass_amz20_primary_prep
 nBR_INPE_TC_secondary_forest_Amazon_2020,2401,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_terraclass_amz20_secondary_prep
 nBR_BFS_primary_forest_Pantanal_2020,2402,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_ptn_f20_prep
 nBR_BFS_primary_forest_Caatinga_2020,2403,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_caat_f20_prep
 nBR_BFS_primary_forest_AtlanticForest_2020,2404,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_atlf_f20_prep
 nBR_BFS_primary_forest_Pampa_2020,2405,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_pmp_f20_prep
-nBR_BFS_primary&secondary_forest_Cerrado_2020,2406,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_bfs_cer_f20_prep
+nBR_BFS_primary_and_secondary_forest_Cerrado_2020,2406,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_bfs_cer_f20_prep
 nBR_MapBiomas_col9_forest_Brazil_2020,2407,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_mapbiomasc9_f20_prep
 nBR_INPE_TCsilviculture_Amazon_2020,2408,BR,treecover,planted_plantation_2020,1,1,0,float32,1,0,nbr_terraclass_amz20_silv_prep
 nBR_INPE_TCsilviculture_Cerrado_2020,2409,BR,treecover,planted_plantation_2020,1,1,0,float32,1,0,nbr_terraclass_silv_cer20_prep

openforis_whisp/risk.py CHANGED Viewed

@@ -272,16 +272,12 @@ def whisp_risk(
         df=df_w_indicators,
         ind_1_name=ind_1_name,
         ind_2_name=ind_2_name,
-        ind_3_name=ind_3_name,
         ind_4_name=ind_4_name,
     )
     df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
         df=df_w_indicators,
-        ind_1_name=ind_1_name,
         ind_2_name=ind_2_name,
-        ind_3_name=ind_3_name,
-        ind_4_name=ind_4_name,
         ind_5_name=ind_5_name,
         ind_6_name=ind_6_name,
         ind_7_name=ind_7_name,
@@ -306,10 +302,10 @@ def add_eudr_risk_pcrop_col(
     Args:
         df (DataFrame): Input DataFrame.
-        ind_1_name (str): Name of first indicator column.
-        ind_2_name (str): Name of second indicator column.
-        ind_3_name (str): Name of third indicator column.
-        ind_4_name (str): Name of fourth indicator column.
+        ind_1_name (str, optional): Name of first indicator column. Defaults to "Ind_01_treecover".
+        ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
+        ind_3_name (str, optional): Name of third indicator column. Defaults to "Ind_03_disturbance_before_2020".
+        ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
     Returns:
         DataFrame: DataFrame with added 'EUDR_risk' column.
@@ -337,7 +333,6 @@ def add_eudr_risk_acrop_col(
     df: data_lookup_type,
     ind_1_name: str,
     ind_2_name: str,
-    ind_3_name: str,
     ind_4_name: str,
 ) -> data_lookup_type:
     """
@@ -345,10 +340,9 @@ def add_eudr_risk_acrop_col(
     Args:
         df (DataFrame): Input DataFrame.
-        ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
-        ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
-        ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
-        ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
+        ind_1_name (str, optional): Name of first indicator column. Defaults to "Ind_01_treecover".
+        ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
+        ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
     Returns:
         DataFrame: DataFrame with added 'EUDR_risk' column.
@@ -371,10 +365,7 @@ def add_eudr_risk_acrop_col(
 def add_eudr_risk_timber_col(
     df: data_lookup_type,
-    ind_1_name: str,
     ind_2_name: str,
-    ind_3_name: str,
-    ind_4_name: str,
     ind_5_name: str,
     ind_6_name: str,
     ind_7_name: str,
@@ -388,51 +379,54 @@ def add_eudr_risk_timber_col(
     Args:
         df (DataFrame): Input DataFrame.
-        ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
-        ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
-        ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
-        ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
+        ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
+        ind_5_name (str, optional): Name of fifth indicator column. Defaults to "Ind_05_primary_2020".
+        ind_6_name (str, optional): Name of sixth indicator column. Defaults to "Ind_06_nat_reg_forest_2020".
+        ind_7_name (str, optional): Name of seventh indicator column. Defaults to "Ind_07_planted_plantations_2020".
+        ind_8_name (str, optional): Name of eighth indicator column. Defaults to "Ind_08_planted_plantations_after_2020".
+        ind_9_name (str, optional): Name of ninth indicator column. Defaults to "Ind_09_treecover_after_2020".
+        ind_10_name (str, optional): Name of tenth indicator column. Defaults to "Ind_10_agri_after_2020".
+        ind_11_name (str, optional): Name of eleventh indicator column. Defaults to "Ind_11_logging_concession_before_2020".
     Returns:
         DataFrame: DataFrame with added 'EUDR_risk' column.
     """
     for index, row in df.iterrows():
-        # If there is a commodity in 2020 OR if there is planted-plantation in 2020 AND no agriculture in 2023, set EUDR_risk_degrad to "low"
+        # If there is a commodity in 2020 (ind_2_name)
+        # OR if there is planted-plantation in 2020 (ind_7_name) AND no agriculture in 2023 (ind_10_name), set EUDR_risk_timber to "low"
         if row[ind_2_name] == "yes" or (
             row[ind_7_name] == "yes" and row[ind_10_name] == "no"
         ):
             df.at[index, "risk_timber"] = "low"
-        # If there is no tree cover, set EUDR_risk_degrad to "low"? no because of unstocked forests
-        # if row[ind_1_name] == "no" or row[ind_3_name] == "yes" or row[ind_7_name] == "yes":
-        #   df.at[index, 'EUDR_risk_degrad'] = "low"
-        # If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
+        # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) or planted forest (ind_7_name) in 2020 AND agricultural after 2020 (ind_10_name), set EUDR_timber to high
         elif (
             row[ind_5_name] == "yes"
             or row[ind_6_name] == "yes"
             or row[ind_7_name] == "yes"
         ) and row[ind_10_name] == "yes":
             df.at[index, "risk_timber"] = "high"
-        # If primary or naturally regenerating AND planted post 2020, set EUDR_risk to "high"
+        # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) AND planted after 2020 (ind_8_name), set EUDR_risk to "high"
         elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
             ind_8_name
         ] == "yes":
             df.at[index, "risk_timber"] = "high"
+        # No data yet on OWL conversion
         # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
         # elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
         #    df.at[index, 'EUDR_risk_timber'] = "high"
-        # If primary forest OR naturally regenerating AND an information on management practice OR tree cover post 2020, set EUDR_risk_degrad to "low"
+        # If there is a natural primary forest (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) AND an information on management practice any time (ind_11_name) OR tree cover or regrowth post 2020 (ind_9_name), set EUDR_risk_timber to "low"
         elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
             row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
         ):
             df.at[index, "risk_timber"] = "low"
-        # If primary or naturally regenerating and no other info, set EUDR_risk to "more_info_needed"
+        # If primary (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) and no other info, set EUDR_risk to "more_info_needed"
         elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
             df.at[index, "risk_timber"] = "more_info_needed"
-        # If none of the above conditions are met, set EUDR_risk to "high"
+        # If none of the above conditions are met, set EUDR_risk to "low"
         else:
-            df.at[index, "risk_timber"] = "high"
+            df.at[index, "risk_timber"] = "low"
     return df

openforis_whisp/stats.py CHANGED Viewed

@@ -6,7 +6,7 @@ import json
 import country_converter as coco
 from openforis_whisp.parameters.config_runtime import (
     plot_id_column,
-    geo_id_column,
+    external_id_column,
     geometry_type_column,
     geometry_area_column,
     geometry_area_column_formatting,
@@ -57,6 +57,8 @@ def whisp_formatted_stats_geojson_to_df(
             The filepath to the GeoJSON of the ROI to analyze.
         external_id_column : str, optional
             The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
+            This column must exist as a property in ALL features of the GeoJSON file.
+            Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
         remove_geom : bool, default=False
             If True, the geometry of the GeoJSON is removed from the output DataFrame.
         national_codes : list, optional
@@ -369,7 +371,11 @@ def whisp_stats_geojson_to_drive(
 def whisp_stats_ee_to_ee(
-    feature_collection, external_id_column, national_codes=None, unit_type="ha"
+    feature_collection,
+    external_id_column,
+    national_codes=None,
+    unit_type="ha",
+    keep_properties=None,
 ):
     """
     Process a feature collection to get statistics for each feature.
@@ -379,46 +385,68 @@ def whisp_stats_ee_to_ee(
         external_id_column (str): The name of the external ID column to check.
         national_codes (list, optional): List of ISO2 country codes to include national datasets.
         unit_type (str): Whether to use hectares ("ha") or percentage ("percent"), default "ha".
+        keep_properties (None, bool, or list, optional): Properties to keep from the input features.
+            - None: Remove all properties (default behavior)
+            - True: Keep all properties
+            - list: Keep only the specified properties
     Returns:
         ee.FeatureCollection: The output feature collection with statistics.
     """
     if external_id_column is not None:
         try:
-            # Check if external_id_column is a property in feature_collection (server-side)
-            def check_column_exists(feature):
-                return ee.Algorithms.If(
-                    feature.propertyNames().contains(external_id_column),
-                    feature,
-                    ee.Feature(
-                        None
-                    ),  # Return an empty feature if the column does not exist
-                )
-            feature_collection_with_check = feature_collection.map(check_column_exists)
-            size_fc = feature_collection.size()
-            valid_feature_count = feature_collection_with_check.filter(
-                ee.Filter.notNull([external_id_column])
-            ).size()
+            # Validate that the external_id_column exists in all features
+            validation_result = validate_external_id_column(
+                feature_collection, external_id_column
+            )
-            # Raise an error if the column does not exist in any feature
-            if valid_feature_count.neq(size_fc).getInfo():
-                raise ValueError(
-                    f"The column '{external_id_column}' is not a property throughout the feature collection."
+            if not validation_result["is_valid"]:
+                raise ValueError(validation_result["error_message"])
+            # First handle property selection, but preserve the external_id_column
+            if keep_properties is not None:
+                if keep_properties == True:
+                    # Keep all properties including external_id_column
+                    pass  # No need to modify feature_collection
+                elif isinstance(keep_properties, list):
+                    # Ensure external_id_column is included in the list
+                    if external_id_column not in keep_properties:
+                        keep_properties = keep_properties + [external_id_column]
+                    feature_collection = feature_collection.select(keep_properties)
+                else:
+                    raise ValueError(
+                        "keep_properties must be None, True, or a list of property names."
+                    )
+            # Set the external_id with robust null handling
+            def set_external_id_safely_and_clean(feature):
+                external_id_value = feature.get(external_id_column)
+                # Use server-side null checking and string conversion
+                external_id_value = ee.Algorithms.If(
+                    ee.Algorithms.IsEqual(external_id_value, None),
+                    "unknown",
+                    ee.String(external_id_value),
                 )
+                # Create a new feature with the standardized external_id column
+                # Note: we use "external_id" as the standardized column name, not the original external_id_column name
+                return ee.Feature(feature.set("external_id", external_id_value))
-            # Set the geo_id_column
             feature_collection = feature_collection.map(
-                lambda feature: feature.set(
-                    geo_id_column, ee.String(feature.get(external_id_column))
-                )
+                set_external_id_safely_and_clean
             )
+            # Finally, clean up to keep only geometry and external_id if keep_properties is None
+            if keep_properties is None:
+                feature_collection = feature_collection.select(["external_id"])
         except Exception as e:
             # Handle the exception and provide a helpful error message
             print(
                 f"An error occurred when trying to set the external_id_column: {external_id_column}. Error: {e}"
             )
+            raise e  # Re-raise the exception to stop execution
+    else:
+        feature_collection = _keep_fc_properties(feature_collection, keep_properties)
     fc = get_stats(
         feature_collection, national_codes=national_codes, unit_type=unit_type
@@ -427,6 +455,23 @@ def whisp_stats_ee_to_ee(
     return add_id_to_feature_collection(dataset=fc, id_name=plot_id_column)
+def _keep_fc_properties(feature_collection, keep_properties):
+    # If keep_properties is specified, select only those properties
+    if keep_properties is None:
+        feature_collection = feature_collection.select([])
+    elif keep_properties == True:
+        # If keep_properties is true, select all properties
+        first_feature_props = feature_collection.first().propertyNames().getInfo()
+        feature_collection = feature_collection.select(first_feature_props)
+    elif isinstance(keep_properties, list):
+        feature_collection = feature_collection.select(keep_properties)
+    else:
+        raise ValueError(
+            "keep_properties must be None, True, or a list of property names."
+        )
+    return feature_collection
 def whisp_stats_ee_to_df(
     feature_collection: ee.FeatureCollection,
     external_id_column=None,
@@ -951,3 +996,139 @@ def convert_iso3_to_iso2(df, iso3_column, iso2_column):
     )
     return df
+def validate_external_id_column(feature_collection, external_id_column):
+    """
+    Validates that the external_id_column exists in all features of the collection.
+    Parameters
+    ----------
+    feature_collection : ee.FeatureCollection
+        The feature collection to validate
+    external_id_column : str
+        The name of the external ID column to check
+    Returns
+    -------
+    dict
+        Dictionary with validation results including:
+        - 'is_valid': bool indicating if column exists in all features
+        - 'total_features': int total number of features
+        - 'features_with_column': int number of features that have the column
+        - 'available_properties': list of properties available in first feature
+        - 'error_message': str error message if validation fails
+    """
+    try:
+        # Get total number of features
+        total_features = feature_collection.size().getInfo()
+        if total_features == 0:
+            return {
+                "is_valid": False,
+                "total_features": 0,
+                "features_with_column": 0,
+                "available_properties": [],
+                "error_message": "Feature collection is empty",
+            }
+        # Get available properties from first feature
+        first_feature_props = feature_collection.first().propertyNames().getInfo()
+        # Check if external_id_column exists in all features
+        def check_column_exists(feature):
+            has_column = feature.propertyNames().contains(external_id_column)
+            return feature.set("_has_external_id", has_column)
+        features_with_check = feature_collection.map(check_column_exists)
+        features_with_column = (
+            features_with_check.filter(ee.Filter.eq("_has_external_id", True))
+            .size()
+            .getInfo()
+        )
+        is_valid = features_with_column == total_features
+        error_message = None
+        if not is_valid:
+            missing_count = total_features - features_with_column
+            error_message = (
+                f"The column '{external_id_column}' is missing from {missing_count} "
+                f"out of {total_features} features in the collection. "
+                f"Available properties in first feature: {first_feature_props}"
+            )
+        return {
+            "is_valid": is_valid,
+            "total_features": total_features,
+            "features_with_column": features_with_column,
+            "available_properties": first_feature_props,
+            "error_message": error_message,
+        }
+    except Exception as e:
+        return {
+            "is_valid": False,
+            "total_features": 0,
+            "features_with_column": 0,
+            "available_properties": [],
+            "error_message": f"Error during validation: {str(e)}",
+        }
+def debug_feature_collection_properties(feature_collection, max_features=5):
+    """
+    Debug helper function to inspect the properties of features in a collection.
+    Parameters
+    ----------
+    feature_collection : ee.FeatureCollection
+        The feature collection to inspect
+    max_features : int, optional
+        Maximum number of features to inspect, by default 5
+    Returns
+    -------
+    dict
+        Dictionary with debugging information about the feature collection
+    """
+    try:
+        total_features = feature_collection.size().getInfo()
+        if total_features == 0:
+            return {"total_features": 0, "error": "Feature collection is empty"}
+        # Limit the number of features to inspect
+        features_to_check = min(max_features, total_features)
+        limited_fc = feature_collection.limit(features_to_check)
+        # Get properties for each feature
+        def get_feature_properties(feature):
+            return ee.Dictionary(
+                {
+                    "properties": feature.propertyNames(),
+                    "geometry_type": feature.geometry().type(),
+                }
+            )
+        feature_info = limited_fc.map(get_feature_properties).getInfo()
+        return {
+            "total_features": total_features,
+            "inspected_features": features_to_check,
+            "feature_details": [
+                {
+                    "feature_index": i,
+                    "properties": feature_info["features"][i]["properties"][
+                        "properties"
+                    ],
+                    "geometry_type": feature_info["features"][i]["properties"][
+                        "geometry_type"
+                    ],
+                }
+                for i in range(len(feature_info["features"]))
+            ],
+        }
+    except Exception as e:
+        return {"error": f"Error during debugging: {str(e)}"}

openforis_whisp/utils.py CHANGED Viewed

@@ -113,9 +113,9 @@ def remove_geometry_from_feature_collection(feature_collection):
     return feature_collection_no_geometry
-# Compute centroids of each polygon
-def get_centroid(feature, geo_id_column="Geo_id"):
-    keepProperties = [geo_id_column]
+# Compute centroids of each polygon including the external_id_column
+def get_centroid(feature, external_id_column="external_id"):
+    keepProperties = [external_id_column]
     # Get the centroid of the feature's geometry.
     centroid = feature.geometry().centroid(1)
     # Return a new Feature, copying properties from the old Feature.

{openforis_whisp-2.0.0a3.dist-info → openforis_whisp-2.0.0a5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: openforis-whisp
-Version: 2.0.0a3
+Version: 2.0.0a5
 Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
 License: MIT
 Keywords: whisp,geospatial,data-processing
@@ -59,7 +59,6 @@ Description-Content-Type: text/markdown
   - [Whisp pathways](#whisp_pathways)
   - [Whisp datasets](#whisp_datasets)
   - [Whisp notebooks](#whisp_notebooks)
-  - [System setup](#whisp_setup)
   - [Add data layers](#whisp_add_data)
   - [Contribute to the code](#whisp_contribute)
   - [Code of conduct](#whisp_conduct)
@@ -78,8 +77,6 @@ Description-Content-Type: text/markdown
   ## Whisp datasets <a name="whisp_datasets"></a>
-  All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
   ***Whisp***  implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
   1) Tree and forest cover (at the end of 2020);
@@ -87,27 +84,39 @@ Description-Content-Type: text/markdown
   3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
   4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
+Additional categories are specific for the timber commodity, considering a harvesting date in 2023:
+  5) Primary forests in 2020;
+  6) Naturally regenerating forests in 2020;
+  7) Planted and plantation forests in 2020;
+  8) Planted and plantation forests in 2023;
+  9) Treecover in 2023;
+  10) Commodities or croplands in 2023.
+  11) Logging concessions;
   There are multiple datasets for each category. Find the full current [list of datasets used in Whisp here](https://github.com/forestdatapartnership/whisp/blob/main/layers_description.md).
-  Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
+  ### Whisp risk assessment <a name="whisp_risk"></a>
+Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
   1) Was there tree cover in 2020?
   2) Were there commodity plantations or other agricultural uses in 2020?
   3) Were there disturbances until 2020-12-31?
   4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
-  If no treecover dataset indicates any tree cover for a plot by the end of 2020, **Whisp will categorize the deforestation risk as low.**
-  If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
-  If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
-  Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
-  However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
+And specifically for the timber commodity, considering a harvesting date in 2023:
+  5) Were there primary forests in 2020?
+  6) Were there naturally regenerating forests in 2020?
+  7) Were there planted and plantation forests in 2020?
+  8) Were there planted and plantation forests in 2023?
+  9) Was there treecover in 2023?
+  10) Were there commodity plantations or other agricultural uses in 2023?
+  11) Is it part of a logging concession?
-  *The Whisp algorithm for **Perennial Crops** visualized:*
-  ![CoE_Graphic 5](https://github.com/user-attachments/assets/007b5f50-3939-4707-95fa-98be4d56745f)
   The Whisp algorithm outputs multiple statistical columns with disaggregated data from the input datasets, followed by aggregated indicator columns, and the final risk assessment columns.
+    All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
 The **relevant risk assessment column depends on the commodity** in question:
@@ -142,47 +151,28 @@ The **relevant risk assessment column depends on the commodity** in question:
   </tr>
 </table>
-  The decision tree for the timber risk assessment slightly differs from the above. For more information see below.
+  *The Whisp algorithm for **Perennial Crops** visualized:*
+  ![CoE_Graphic 5](https://github.com/user-attachments/assets/007b5f50-3939-4707-95fa-98be4d56745f)
+  If no treecover dataset indicates any tree cover for a plot by the end of 2020, **Whisp will categorize the deforestation risk as low.**
+  If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
-  ## Whisp datasets for timber <a name="whisp_datasets_timber"></a>
-  ***Whisp***  implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
-  1) Tree and forest cover (at the end of 2020);
-  2) Commodities (i.e., crop plantations and other agricultural uses at the end of 2020);
-  3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
-  4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
-  5) Primary forests in 2020;
-  6) Naturally regenerating forests in 2020;
-  7) Planted and plantation forests in 2020;
-  8) Planted and plantation forests in 2023;
-  9) Treecover in 2023;
-  10) Commodities or croplands in 2023.
-  11) Logging concessions;
+  If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
-  There are multiple datasets for each category. Find the full current [list of datasets used in Whisp here](https://github.com/forestdatapartnership/whisp/blob/main/layers_description.md).
-  Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
+  Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
+  However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
-  1) Was there tree cover in 2020?
-  2) Were there commodity plantations or other agricultural uses in 2020?
-  3) Were there disturbances until 2020-12-31?
-  4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
-  5) Were there primary forests in 2020?
-  6) Were there naturally regenerating forests in 2020?
-  7) Were there planted and plantation forests in 2020?
-  8) Were there planted and plantation forests in 2023?
-  9) Was there treecover in 2023?
-  10) Were there commodity plantations or other agricultural uses in 2023?
-  11) Were there logging concessions?
-  # Run Whisp python package from a notebook
+  ## Run Whisp python package from a notebook <a name="whisp_notebooks"></a>
   For most users we suggest using the Whisp App to process their plot data. But for some, using the python package directly will fit their workflow.
   A simple example of the package functionality can be seen in this [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb)
-  ## Requirements for running the package
+  For an example notebook adapted for running locally (or in Sepal), see: [whisp_geojson_to_csv.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_csv.ipynb) or if datasets are very large, see [whisp_geojson_to_drive.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_drive.ipynb)
+  ### Requirements for running the package
   - A Google Earth Engine (GEE) account.
   - A registered cloud GEE project.
@@ -190,7 +180,8 @@ The **relevant risk assessment column depends on the commodity** in question:
   More info on Whisp can be found in [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
-  ## Python package installation
+  ### Python package installation
   The Whisp package is available on pip
   https://pypi.org/project/openforis-whisp/
@@ -202,15 +193,15 @@ The **relevant risk assessment column depends on the commodity** in question:
   pip install --pre openforis-whisp
   ```
-  If running locally we recommend a [virtual environment](https://docs.python.org/3/library/venv.html) to keep your main python installation clean.
+  If running the package locally we recommend a [virtual environment](https://docs.python.org/3/library/venv.html) to keep your main python installation clean. For users running the package in Sepal see [here](https://docs.sepal.io/en/latest/cli/python.html#virtual-environment).
   The package relies upon the google earth engine api being setup correctly using a registered cloud project.
-  More info on Whisp can be found in [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
+  More info on Whisp can be found [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
-## How to add data layers to Whisp
+## How to add data layers to Whisp <a name="whisp_add_data"></a>
@@ -253,12 +244,24 @@ Before submitting a request, consider the following:
 ### Adding your own data directly
 To add your own data you will need some coding experience as well as familiarity with GitHub and Google Earth Engine.
+This approach is for those who want to run a bespoke analysis combining their own data with those already in Whisp.
-Firstly follow the steps to install the package in editable mode (as detailed below in Contributing to the Whisp code base). Once in editable mode you are running the Whisp package locally based on a cloned version of the code. This approach is for those who want to run a bespoke analysis combining their own data with those already in Whisp. If, however, you think the datasets are of use to the wider community and you have the code running smoothly, you can make a pull request from a forked repository.
+Firstly follow the steps below to install the package in editable mode.
+As with the regular pip installation, we recommend a separate [virtual environment](https://docs.python.org/3/library/venv.html) for running in editable mode. For Sepal users see [here](https://docs.sepal.io/en/latest/cli/python.html#virtual-environment).
+```bash
+git  clone  https://github.com/forestdatapartnership/whisp.git
+cd  whisp/
+pip  install  -e  .[dev]
+```
+Once in editable mode you are running the Whisp package locally based on a cloned version of the code.
@@ -294,7 +297,7 @@ For example, if it is a dataset for tree cover in 2000, then add `'treecover'` u
 ```python
-def  nBR_my_custom_dataset_prep():
+def  my_custom_dataset_prep():
 image = ee.Image("MY/GEE/DATASET")
@@ -309,7 +312,6 @@ return binary.rename("My_custom_dataset")
 ---
 We are working on ways to make this process smoother. However, in the meantime do contact us through the [issues page on GitHub](https://github.com/forestdatapartnership/whisp/issues), or via the Open Foris email, if this functionality is useful to you or you need help.
@@ -318,28 +320,14 @@ We are working on ways to make this process smoother. However, in the meantime d
-## Contributing to the Whisp code base
-Contributions to the Whisp code in GitHub are welcome. They can be made by forking the repository, making and pushing the required changes, then making a pull request to the Whisp repository. After briefly reviewing the request, we can make a branch for which to make a new pull request to. After final checks, we can then incorporate the code into the main branch. If in doubt, get in contact first or log as an issue [here](https://github.com/forestdatapartnership/whisp/issues/).
+## Contributing to the Whisp code base <a name="whisp_contribute"></a>
+Contributions to the Whisp code in GitHub are welcome. These could be additional functionality, datasets or just cleaner code! Contributions can be made by forking the repository, making and pushing the required changes, then making a pull request to the Whisp repository. After briefly reviewing the request, we can make a branch for which to make a new pull request to. After final checks, we can then incorporate the code into the main branch. If in doubt, get in contact first or log as an issue [here](https://github.com/forestdatapartnership/whisp/issues/).
-Install the package in editable mode:
+Install the package in editable mode (see Adding your own data directly above):
-```bash
-git  clone  https://github.com/forestdatapartnership/whisp.git
-cd  whisp/
-pip  install  -e  .[dev]
-```
-Add additional dependencies required for testing and running pre-commit hooks:
+Then add additional dependencies required for testing and running pre-commit hooks:
 ```bash
@@ -352,7 +340,6 @@ pre-commit  install
 You should be able to run the Pytest suite by simply running the `pytest` command from the repo's root folder.
 Please read the [contributing guidelines](contributing_guidelines.md) for good practice recommendations

openforis_whisp-2.0.0a5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+openforis_whisp/__init__.py,sha256=xfXNzskPfnlQkmf3QZHEydhte3U9_uLdoYM04eowNqw,2403
+openforis_whisp/data_conversion.py,sha256=_HSjYozNO1xAOAk-uGmzTVCTOc3W7x3GDlvEUgrnj_Q,16909
+openforis_whisp/datasets.py,sha256=9Ofxyy2ignnN6mSXfXDP9n6SsQ8QPQQWivuolS_i8LY,52013
+openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
+openforis_whisp/parameters/__init__.py,sha256=KL7iORJVjSpZatYjoyWckcmQJnE89_DBC8R6_0_eR6o,349
+openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
+openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
+openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=5K1LQyuvwvG1vOdlyCknv_foDtRUKHPU3VvOU_zsoWQ,17626
+openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
+openforis_whisp/reformat.py,sha256=o3TpeuddR1UlP1C3uFeI957kIZYMQqEW1pXsjKbAtiY,17922
+openforis_whisp/risk.py,sha256=FNWH84xhSjVZW3yTnTWZF3MxiZtNA5jb154vu-C2kJ0,31951
+openforis_whisp/stats.py,sha256=_l2V8BWdbJ2GoK7N5Zswg0Gvs1I5RRT-JGgl9fyl2AY,40882
+openforis_whisp/utils.py,sha256=YqFYK1fH2WpuWolXa-gCeSGYiHdJ0_xQUIo15dQ9Sh8,5378
+openforis_whisp-2.0.0a5.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
+openforis_whisp-2.0.0a5.dist-info/METADATA,sha256=4ii5-gyxRZZmWyAhorNo9phcbpQoLRcmhagxxCCKHeA,16681
+openforis_whisp-2.0.0a5.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+openforis_whisp-2.0.0a5.dist-info/RECORD,,

openforis_whisp-2.0.0a3.dist-info/RECORD DELETED Viewed

@@ -1,17 +0,0 @@
-openforis_whisp/__init__.py,sha256=xfXNzskPfnlQkmf3QZHEydhte3U9_uLdoYM04eowNqw,2403
-openforis_whisp/data_conversion.py,sha256=Ean2SBxhGr1YwzhbrHQD9kDdRYdNTJZLBiAmYZtBIM8,11812
-openforis_whisp/datasets.py,sha256=EOiNwTaMUMc0hYXBwUVzP-5q0Vq2jqzdNQF0Y6GQCSQ,52411
-openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
-openforis_whisp/parameters/__init__.py,sha256=KL7iORJVjSpZatYjoyWckcmQJnE89_DBC8R6_0_eR6o,349
-openforis_whisp/parameters/config_runtime.py,sha256=aH00CFV09f7JQnZQzpCFR5BIlvsovVfM4K_KUjMl0N8,1416
-openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=54uZ4oqfsiHgj2I39pAcsCr4SeSUqgIRboDhlxIAdik,1293
-openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=r1s_eUneFOVk7RALukaJj7Rj374XWuZTDkE2dAZAeu0,17691
-openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
-openforis_whisp/reformat.py,sha256=o3TpeuddR1UlP1C3uFeI957kIZYMQqEW1pXsjKbAtiY,17922
-openforis_whisp/risk.py,sha256=E9yZJ2wCinYrOydKK7EB0O5Imk5quG9Cs1uNkcv8AlM,31531
-openforis_whisp/stats.py,sha256=yAa6j3RpkPIjAM06IKQ7XGaFrwXhxfzIXn37aTOEwP4,33562
-openforis_whisp/utils.py,sha256=hpeY9aA3BND2m9c15PZ6_nClemsfiVNUEzA4pQXfztA,5330
-openforis_whisp-2.0.0a3.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
-openforis_whisp-2.0.0a3.dist-info/METADATA,sha256=yjXgVT5Max81548KdmoHRCe1SDap2o1cKkVUYiUUA3Q,16933
-openforis_whisp-2.0.0a3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
-openforis_whisp-2.0.0a3.dist-info/RECORD,,

{openforis_whisp-2.0.0a3.dist-info → openforis_whisp-2.0.0a5.dist-info}/LICENSE RENAMED Viewed

File without changes

{openforis_whisp-2.0.0a3.dist-info → openforis_whisp-2.0.0a5.dist-info}/WHEEL RENAMED Viewed

File without changes

openforis-whisp 2.0.0a3__py3-none-any.whl → 2.0.0a5__py3-none-any.whl

openforis-whisp 2.0.0a3py3-none-any.whl → 2.0.0a5py3-none-any.whl