openforis-whisp 2.0.0a3__py3-none-any.whl → 2.0.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,55 +12,32 @@ import geopandas as gpd
12
12
  import ee
13
13
 
14
14
 
15
- def convert_ee_to_geojson(ee_object, filename=None, indent=2, **kwargs):
16
- """Converts Earth Engine object to geojson.
17
-
18
- Args:
19
- ee_object (object): An Earth Engine object.
20
- filename (str, optional): The file path to save the geojson. Defaults to None.
21
-
22
- Returns:
23
- object: GeoJSON object.
24
- """
25
-
26
- try:
27
- if (
28
- isinstance(ee_object, ee.Geometry)
29
- or isinstance(ee_object, ee.Feature)
30
- or isinstance(ee_object, ee.FeatureCollection)
31
- ):
32
- json_object = ee_object.getInfo()
33
- if filename is not None:
34
- filename = os.path.abspath(filename)
35
- if not os.path.exists(os.path.dirname(filename)):
36
- os.makedirs(os.path.dirname(filename))
37
- with open(filename, "w") as f:
38
- f.write(json.dumps(json_object, indent=indent, **kwargs) + "\n")
39
- else:
40
- return json_object
41
- else:
42
- print("Could not convert the Earth Engine object to geojson")
43
- except Exception as e:
44
- raise Exception(e)
45
-
46
-
47
15
  def convert_geojson_to_ee(
48
- geojson_filepath: Any, enforce_wgs84: bool = True
16
+ geojson_filepath: Any, enforce_wgs84: bool = True, strip_z_coords: bool = True
49
17
  ) -> ee.FeatureCollection:
50
18
  """
51
19
  Reads a GeoJSON file from the given path and converts it to an Earth Engine FeatureCollection.
52
20
  Optionally checks and converts the CRS to WGS 84 (EPSG:4326) if needed.
21
+ Automatically handles 3D coordinates by stripping Z values when necessary.
53
22
 
54
23
  Args:
55
24
  geojson_filepath (Any): The filepath to the GeoJSON file.
56
25
  enforce_wgs84 (bool): Whether to enforce WGS 84 projection (EPSG:4326). Defaults to True.
26
+ strip_z_coords (bool): Whether to automatically strip Z coordinates from 3D geometries. Defaults to True.
57
27
 
58
28
  Returns:
59
29
  ee.FeatureCollection: Earth Engine FeatureCollection created from the GeoJSON.
60
30
  """
61
31
  if isinstance(geojson_filepath, (str, Path)):
62
32
  file_path = os.path.abspath(geojson_filepath)
63
- print(f"Reading GeoJSON file from: {file_path}")
33
+
34
+ # Apply print_once deduplication for file reading message
35
+ if not hasattr(convert_geojson_to_ee, "_printed_file_messages"):
36
+ convert_geojson_to_ee._printed_file_messages = set()
37
+
38
+ if file_path not in convert_geojson_to_ee._printed_file_messages:
39
+ print(f"Reading GeoJSON file from: {file_path}")
40
+ convert_geojson_to_ee._printed_file_messages.add(file_path)
64
41
 
65
42
  # Use GeoPandas to read the file and handle CRS
66
43
  gdf = gpd.read_file(file_path)
@@ -82,9 +59,133 @@ def convert_geojson_to_ee(
82
59
  if validation_errors:
83
60
  raise ValueError(f"GeoJSON validation errors: {validation_errors}")
84
61
 
85
- feature_collection = ee.FeatureCollection(create_feature_collection(geojson_data))
62
+ # Try to create the feature collection, handle 3D coordinate issues automatically
63
+ try:
64
+ feature_collection = ee.FeatureCollection(
65
+ create_feature_collection(geojson_data)
66
+ )
67
+ return feature_collection
68
+ except ee.EEException as e:
69
+ if "Invalid GeoJSON geometry" in str(e) and strip_z_coords:
70
+ # Apply print_once deduplication for Z-coordinate stripping messages
71
+ if not hasattr(convert_geojson_to_ee, "_printed_z_messages"):
72
+ convert_geojson_to_ee._printed_z_messages = set()
73
+
74
+ z_message_key = f"z_coords_{file_path}"
75
+ if z_message_key not in convert_geojson_to_ee._printed_z_messages:
76
+ print(
77
+ "Warning: Invalid GeoJSON geometry detected, likely due to 3D coordinates."
78
+ )
79
+ print("Attempting to fix by stripping Z coordinates...")
80
+ convert_geojson_to_ee._printed_z_messages.add(z_message_key)
81
+
82
+ # Apply Z-coordinate stripping
83
+ geojson_data_fixed = _strip_z_coordinates_from_geojson(geojson_data)
84
+
85
+ # Try again with the fixed data
86
+ try:
87
+ feature_collection = ee.FeatureCollection(
88
+ create_feature_collection(geojson_data_fixed)
89
+ )
90
+
91
+ success_message_key = f"z_coords_success_{file_path}"
92
+ if success_message_key not in convert_geojson_to_ee._printed_z_messages:
93
+ print("✓ Successfully converted after stripping Z coordinates")
94
+ convert_geojson_to_ee._printed_z_messages.add(success_message_key)
95
+
96
+ return feature_collection
97
+ except Exception as retry_error:
98
+ raise ee.EEException(
99
+ f"Failed to convert GeoJSON even after stripping Z coordinates: {retry_error}"
100
+ )
101
+ else:
102
+ raise e
86
103
 
87
- return feature_collection
104
+
105
+ def _strip_z_coordinates_from_geojson(geojson_data: dict) -> dict:
106
+ """
107
+ Helper function to strip Z coordinates from GeoJSON data.
108
+ Converts 3D coordinates to 2D by removing Z values.
109
+
110
+ Args:
111
+ geojson_data (dict): GeoJSON data dictionary
112
+
113
+ Returns:
114
+ dict: GeoJSON data with Z coordinates stripped
115
+ """
116
+
117
+ def strip_z(geometry):
118
+ """Remove Z coordinates from geometry to make it 2D"""
119
+ if geometry["type"] == "MultiPolygon":
120
+ geometry["coordinates"] = [
121
+ [[[lon, lat] for lon, lat, *_ in ring] for ring in polygon]
122
+ for polygon in geometry["coordinates"]
123
+ ]
124
+ elif geometry["type"] == "Polygon":
125
+ geometry["coordinates"] = [
126
+ [[lon, lat] for lon, lat, *_ in ring]
127
+ for ring in geometry["coordinates"]
128
+ ]
129
+ elif geometry["type"] == "Point":
130
+ if len(geometry["coordinates"]) > 2:
131
+ geometry["coordinates"] = geometry["coordinates"][:2]
132
+ elif geometry["type"] == "MultiPoint":
133
+ geometry["coordinates"] = [coord[:2] for coord in geometry["coordinates"]]
134
+ elif geometry["type"] == "LineString":
135
+ geometry["coordinates"] = [
136
+ [lon, lat] for lon, lat, *_ in geometry["coordinates"]
137
+ ]
138
+ elif geometry["type"] == "MultiLineString":
139
+ geometry["coordinates"] = [
140
+ [[lon, lat] for lon, lat, *_ in line]
141
+ for line in geometry["coordinates"]
142
+ ]
143
+ return geometry
144
+
145
+ # Create a deep copy to avoid modifying the original
146
+ import copy
147
+
148
+ geojson_copy = copy.deepcopy(geojson_data)
149
+
150
+ # Process all features
151
+ if "features" in geojson_copy:
152
+ for feature in geojson_copy["features"]:
153
+ if "geometry" in feature and feature["geometry"]:
154
+ feature["geometry"] = strip_z(feature["geometry"])
155
+
156
+ return geojson_copy
157
+
158
+
159
+ def convert_ee_to_geojson(ee_object, filename=None, indent=2, **kwargs):
160
+ """Converts Earth Engine object to geojson.
161
+
162
+ Args:
163
+ ee_object (object): An Earth Engine object.
164
+ filename (str, optional): The file path to save the geojson. Defaults to None.
165
+
166
+ Returns:
167
+ object: GeoJSON object.
168
+ """
169
+
170
+ try:
171
+ if (
172
+ isinstance(ee_object, ee.Geometry)
173
+ or isinstance(ee_object, ee.Feature)
174
+ or isinstance(ee_object, ee.FeatureCollection)
175
+ ):
176
+ json_object = ee_object.getInfo()
177
+ if filename is not None:
178
+ filename = os.path.abspath(filename)
179
+ if not os.path.exists(os.path.dirname(filename)):
180
+ os.makedirs(os.path.dirname(filename))
181
+ with open(filename, "w") as f:
182
+ f.write(json.dumps(json_object, indent=indent, **kwargs) + "\n")
183
+ else:
184
+ return json_object
185
+ else:
186
+ print("Could not convert the Earth Engine object to geojson")
187
+ except Exception as e:
188
+ raise Exception(e)
88
189
 
89
190
 
90
191
  def convert_geojson_to_shapefile(geojson_path, shapefile_output_path):
@@ -252,28 +353,49 @@ def validate_geojson(input_data: Any) -> List[str]:
252
353
  return errors
253
354
 
254
355
 
255
- def extract_features(geometry: Any, features: List[Feature]) -> None:
356
+ def extract_features(geojson_obj: Any, features: List[Feature]) -> None:
256
357
  """
257
- Recursively extracts features from a geometry and adds them to the feature list.
358
+ Recursively extracts features from a GeoJSON object and adds them to the feature list.
258
359
 
259
- :param geometry: GeoJSON geometry
360
+ :param geojson_obj: GeoJSON object (could be geometry, feature, or feature collection)
260
361
  :param features: List of extracted features
261
362
  """
262
- if geometry["type"] == "Polygon":
263
- features.append(Feature(geometry=Polygon(geometry["coordinates"])))
264
- elif geometry["type"] == "Point":
265
- features.append(Feature(geometry=Point(geometry["coordinates"])))
266
- elif geometry["type"] == "MultiPolygon":
267
- for polygon in geometry["coordinates"]:
268
- features.append(Feature(geometry=Polygon(polygon)))
269
- elif geometry["type"] == "GeometryCollection":
270
- for geom in geometry["geometries"]:
271
- extract_features(geom, features)
272
- elif geometry["type"] == "Feature":
273
- extract_features(geometry["geometry"], features)
274
- elif geometry["type"] == "FeatureCollection":
275
- for feature in geometry["features"]:
276
- extract_features(feature, features)
363
+ if isinstance(geojson_obj, dict):
364
+ obj_type = geojson_obj.get("type")
365
+
366
+ if obj_type == "Feature":
367
+ # Extract the actual Feature with properties
368
+ geometry = geojson_obj.get("geometry", {})
369
+ properties = geojson_obj.get("properties", {})
370
+
371
+ if geometry and geometry.get("type"):
372
+ features.append(Feature(geometry=geometry, properties=properties))
373
+
374
+ elif obj_type == "FeatureCollection":
375
+ # Process each feature in the collection
376
+ for feature in geojson_obj.get("features", []):
377
+ extract_features(feature, features)
378
+
379
+ elif obj_type in [
380
+ "Polygon",
381
+ "Point",
382
+ "MultiPolygon",
383
+ "LineString",
384
+ "MultiPoint",
385
+ "MultiLineString",
386
+ ]:
387
+ # This is a raw geometry - create feature with empty properties
388
+ features.append(Feature(geometry=geojson_obj, properties={}))
389
+
390
+ elif obj_type == "GeometryCollection":
391
+ # Handle geometry collections
392
+ for geom in geojson_obj.get("geometries", []):
393
+ extract_features(geom, features)
394
+
395
+ elif isinstance(geojson_obj, list):
396
+ # Handle lists of features/geometries
397
+ for item in geojson_obj:
398
+ extract_features(item, features)
277
399
 
278
400
 
279
401
  def create_feature_collection(geojson_obj: Any) -> FeatureCollection:
@@ -58,19 +58,6 @@ def g_jrc_gfc_2020_prep():
58
58
  return jrc_gfc2020_raw.mosaic().rename("EUFO_2020")
59
59
 
60
60
 
61
- ## removing JAXA product due to repeat errors of commission being noted by users, compared to other datasets
62
-
63
- # # JAXA_FNF_2020
64
- # def g_jaxa_forest_prep():
65
- # jaxa_forest_non_forest_raw = ee.ImageCollection("JAXA/ALOS/PALSAR/YEARLY/FNF4")
66
- # jaxa_forest_non_forest_2020 = (
67
- # jaxa_forest_non_forest_raw.filterDate("2020-01-01", "2020-12-31")
68
- # .select("fnf")
69
- # .mosaic()
70
- # )
71
- # return jaxa_forest_non_forest_2020.lte(2).rename("JAXA_FNF_2020")
72
-
73
-
74
61
  # GFC_TC_2020
75
62
  def g_glad_gfc_10pc_prep():
76
63
  gfc = ee.Image("UMD/hansen/global_forest_change_2024_v1_12")
@@ -285,7 +272,7 @@ def g_fdap_rubber_2023_prep():
285
272
  fdap_rubber = (
286
273
  fdap_rubber2020_model_raw.filterDate("2023-01-01", "2023-12-31")
287
274
  .mosaic()
288
- .gt(0.93) # Threshold for Rubber
275
+ .gt(0.59) # Threshold for Rubber
289
276
  )
290
277
  return fdap_rubber.rename("Rubber_2023_FDaP")
291
278
 
@@ -896,9 +883,9 @@ def nbr_terraclass_amz20_secondary_prep():
896
883
 
897
884
  # Cerrado - filtered with QGIS because the original geodatabase is too large to export as a shapefile (GEE accepted format)
898
885
  def nbr_bfs_cer_f20_prep():
899
- bfs_fcer20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_pmp_2020")
886
+ bfs_fcer20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_cerr_2020")
900
887
  bfs_fcer20_binary = ee.Image().paint(bfs_fcer20, 1)
901
- return bfs_fcer20_binary.rename("nBR_BFS_primary&secondary_forest_Cerrado_2020")
888
+ return bfs_fcer20_binary.rename("nBR_BFS_primary_and_secondary_forest_Cerrado_2020")
902
889
 
903
890
 
904
891
  # %%
@@ -1277,7 +1264,9 @@ def combine_datasets(national_codes=None):
1277
1264
 
1278
1265
  try:
1279
1266
  # Attempt to print band names to check for errors
1280
- print(img_combined.bandNames().getInfo())
1267
+ # print(img_combined.bandNames().getInfo())
1268
+ img_combined.bandNames().getInfo()
1269
+
1281
1270
  except ee.EEException as e:
1282
1271
  # logger.error(f"Error printing band names: {e}")
1283
1272
  # logger.info("Running code for filtering to only valid datasets due to error in input")
@@ -1294,6 +1283,7 @@ def combine_datasets(national_codes=None):
1294
1283
  img_combined = img_combined.addBands(img)
1295
1284
 
1296
1285
  img_combined = img_combined.multiply(ee.Image.pixelArea())
1286
+ print("Whisp multiband image compiled")
1297
1287
 
1298
1288
  return img_combined
1299
1289
 
@@ -16,7 +16,7 @@ centroid_x_coord_column = "Centroid_lon"
16
16
 
17
17
  centroid_y_coord_column = "Centroid_lat"
18
18
 
19
- geo_id_column = "external_id"
19
+ external_id_column = "external_id"
20
20
 
21
21
  geometry_type_column = "Geometry_type"
22
22
 
@@ -1,6 +1,6 @@
1
1
  name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude_from_output,col_type,is_nullable,is_required,corresponding_variable
2
2
  plotId,-10,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,plot_id_column
3
- external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,geo_id_column
3
+ external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,external_id_column
4
4
  Area,-8,,context_and_metadata,context_and_metadata,NA,NA,0,float32,1,1,geometry_area_column
5
5
  Geometry_type,-7,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_type_column
6
6
  Country,-6,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,iso3_country_column
@@ -2,7 +2,6 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
2
2
  EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
3
3
  GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
4
4
  TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
5
- JAXA_FNF_2020,40,,treecover,NA,1,0,1,float32,1,0,g_jaxa_forest_prep
6
5
  GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
7
6
  Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
8
7
  ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
@@ -173,14 +172,14 @@ ESRI_2023_crop,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2023_crop_prep
173
172
  GLC_FCS30D_crop_2022,2140,,NA,agri_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_crop_2022_prep
174
173
  GFW_logging_before_2020,2200,,NA,logging_concession,0,1,0,float32,1,0,g_logging_concessions_prep
175
174
  nCO_ideam_forest_2020,2310,CO,treecover,NA,1,1,0,float32,1,0,nco_ideam_forest_2020_prep
176
- nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,0,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
175
+ nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,1,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
177
176
  nBR_INPE_TC_primary_forest_Amazon_2020,2400,BR,treecover,primary,1,1,0,float32,1,0,nbr_terraclass_amz20_primary_prep
178
177
  nBR_INPE_TC_secondary_forest_Amazon_2020,2401,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_terraclass_amz20_secondary_prep
179
178
  nBR_BFS_primary_forest_Pantanal_2020,2402,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_ptn_f20_prep
180
179
  nBR_BFS_primary_forest_Caatinga_2020,2403,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_caat_f20_prep
181
180
  nBR_BFS_primary_forest_AtlanticForest_2020,2404,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_atlf_f20_prep
182
181
  nBR_BFS_primary_forest_Pampa_2020,2405,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_pmp_f20_prep
183
- nBR_BFS_primary&secondary_forest_Cerrado_2020,2406,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_bfs_cer_f20_prep
182
+ nBR_BFS_primary_and_secondary_forest_Cerrado_2020,2406,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_bfs_cer_f20_prep
184
183
  nBR_MapBiomas_col9_forest_Brazil_2020,2407,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_mapbiomasc9_f20_prep
185
184
  nBR_INPE_TCsilviculture_Amazon_2020,2408,BR,treecover,planted_plantation_2020,1,1,0,float32,1,0,nbr_terraclass_amz20_silv_prep
186
185
  nBR_INPE_TCsilviculture_Cerrado_2020,2409,BR,treecover,planted_plantation_2020,1,1,0,float32,1,0,nbr_terraclass_silv_cer20_prep
openforis_whisp/risk.py CHANGED
@@ -272,16 +272,12 @@ def whisp_risk(
272
272
  df=df_w_indicators,
273
273
  ind_1_name=ind_1_name,
274
274
  ind_2_name=ind_2_name,
275
- ind_3_name=ind_3_name,
276
275
  ind_4_name=ind_4_name,
277
276
  )
278
277
 
279
278
  df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
280
279
  df=df_w_indicators,
281
- ind_1_name=ind_1_name,
282
280
  ind_2_name=ind_2_name,
283
- ind_3_name=ind_3_name,
284
- ind_4_name=ind_4_name,
285
281
  ind_5_name=ind_5_name,
286
282
  ind_6_name=ind_6_name,
287
283
  ind_7_name=ind_7_name,
@@ -306,10 +302,10 @@ def add_eudr_risk_pcrop_col(
306
302
 
307
303
  Args:
308
304
  df (DataFrame): Input DataFrame.
309
- ind_1_name (str): Name of first indicator column.
310
- ind_2_name (str): Name of second indicator column.
311
- ind_3_name (str): Name of third indicator column.
312
- ind_4_name (str): Name of fourth indicator column.
305
+ ind_1_name (str, optional): Name of first indicator column. Defaults to "Ind_01_treecover".
306
+ ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
307
+ ind_3_name (str, optional): Name of third indicator column. Defaults to "Ind_03_disturbance_before_2020".
308
+ ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
313
309
 
314
310
  Returns:
315
311
  DataFrame: DataFrame with added 'EUDR_risk' column.
@@ -337,7 +333,6 @@ def add_eudr_risk_acrop_col(
337
333
  df: data_lookup_type,
338
334
  ind_1_name: str,
339
335
  ind_2_name: str,
340
- ind_3_name: str,
341
336
  ind_4_name: str,
342
337
  ) -> data_lookup_type:
343
338
  """
@@ -345,10 +340,9 @@ def add_eudr_risk_acrop_col(
345
340
 
346
341
  Args:
347
342
  df (DataFrame): Input DataFrame.
348
- ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
349
- ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
350
- ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
351
- ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
343
+ ind_1_name (str, optional): Name of first indicator column. Defaults to "Ind_01_treecover".
344
+ ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
345
+ ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
352
346
 
353
347
  Returns:
354
348
  DataFrame: DataFrame with added 'EUDR_risk' column.
@@ -371,10 +365,7 @@ def add_eudr_risk_acrop_col(
371
365
 
372
366
  def add_eudr_risk_timber_col(
373
367
  df: data_lookup_type,
374
- ind_1_name: str,
375
368
  ind_2_name: str,
376
- ind_3_name: str,
377
- ind_4_name: str,
378
369
  ind_5_name: str,
379
370
  ind_6_name: str,
380
371
  ind_7_name: str,
@@ -388,51 +379,54 @@ def add_eudr_risk_timber_col(
388
379
 
389
380
  Args:
390
381
  df (DataFrame): Input DataFrame.
391
- ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
392
- ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
393
- ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
394
- ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
382
+ ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
383
+ ind_5_name (str, optional): Name of fifth indicator column. Defaults to "Ind_05_primary_2020".
384
+ ind_6_name (str, optional): Name of sixth indicator column. Defaults to "Ind_06_nat_reg_forest_2020".
385
+ ind_7_name (str, optional): Name of seventh indicator column. Defaults to "Ind_07_planted_plantations_2020".
386
+ ind_8_name (str, optional): Name of eighth indicator column. Defaults to "Ind_08_planted_plantations_after_2020".
387
+ ind_9_name (str, optional): Name of ninth indicator column. Defaults to "Ind_09_treecover_after_2020".
388
+ ind_10_name (str, optional): Name of tenth indicator column. Defaults to "Ind_10_agri_after_2020".
389
+ ind_11_name (str, optional): Name of eleventh indicator column. Defaults to "Ind_11_logging_concession_before_2020".
395
390
 
396
391
  Returns:
397
392
  DataFrame: DataFrame with added 'EUDR_risk' column.
398
393
  """
399
394
 
400
395
  for index, row in df.iterrows():
401
- # If there is a commodity in 2020 OR if there is planted-plantation in 2020 AND no agriculture in 2023, set EUDR_risk_degrad to "low"
396
+ # If there is a commodity in 2020 (ind_2_name)
397
+ # OR if there is planted-plantation in 2020 (ind_7_name) AND no agriculture in 2023 (ind_10_name), set EUDR_risk_timber to "low"
402
398
  if row[ind_2_name] == "yes" or (
403
399
  row[ind_7_name] == "yes" and row[ind_10_name] == "no"
404
400
  ):
405
401
  df.at[index, "risk_timber"] = "low"
406
- # If there is no tree cover, set EUDR_risk_degrad to "low"? no because of unstocked forests
407
- # if row[ind_1_name] == "no" or row[ind_3_name] == "yes" or row[ind_7_name] == "yes":
408
- # df.at[index, 'EUDR_risk_degrad'] = "low"
409
- # If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
402
+ # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) or planted forest (ind_7_name) in 2020 AND agricultural after 2020 (ind_10_name), set EUDR_timber to high
410
403
  elif (
411
404
  row[ind_5_name] == "yes"
412
405
  or row[ind_6_name] == "yes"
413
406
  or row[ind_7_name] == "yes"
414
407
  ) and row[ind_10_name] == "yes":
415
408
  df.at[index, "risk_timber"] = "high"
416
- # If primary or naturally regenerating AND planted post 2020, set EUDR_risk to "high"
409
+ # If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) AND planted after 2020 (ind_8_name), set EUDR_risk to "high"
417
410
  elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
418
411
  ind_8_name
419
412
  ] == "yes":
420
413
  df.at[index, "risk_timber"] = "high"
414
+ # No data yet on OWL conversion
421
415
  # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
422
416
  # elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
423
417
  # df.at[index, 'EUDR_risk_timber'] = "high"
424
418
 
425
- # If primary forest OR naturally regenerating AND an information on management practice OR tree cover post 2020, set EUDR_risk_degrad to "low"
419
+ # If there is a natural primary forest (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) AND an information on management practice any time (ind_11_name) OR tree cover or regrowth post 2020 (ind_9_name), set EUDR_risk_timber to "low"
426
420
  elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
427
421
  row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
428
422
  ):
429
423
  df.at[index, "risk_timber"] = "low"
430
- # If primary or naturally regenerating and no other info, set EUDR_risk to "more_info_needed"
424
+ # If primary (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) and no other info, set EUDR_risk to "more_info_needed"
431
425
  elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
432
426
  df.at[index, "risk_timber"] = "more_info_needed"
433
- # If none of the above conditions are met, set EUDR_risk to "high"
427
+ # If none of the above conditions are met, set EUDR_risk to "low"
434
428
  else:
435
- df.at[index, "risk_timber"] = "high"
429
+ df.at[index, "risk_timber"] = "low"
436
430
 
437
431
  return df
438
432
 
openforis_whisp/stats.py CHANGED
@@ -6,7 +6,7 @@ import json
6
6
  import country_converter as coco
7
7
  from openforis_whisp.parameters.config_runtime import (
8
8
  plot_id_column,
9
- geo_id_column,
9
+ external_id_column,
10
10
  geometry_type_column,
11
11
  geometry_area_column,
12
12
  geometry_area_column_formatting,
@@ -57,6 +57,8 @@ def whisp_formatted_stats_geojson_to_df(
57
57
  The filepath to the GeoJSON of the ROI to analyze.
58
58
  external_id_column : str, optional
59
59
  The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
60
+ This column must exist as a property in ALL features of the GeoJSON file.
61
+ Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
60
62
  remove_geom : bool, default=False
61
63
  If True, the geometry of the GeoJSON is removed from the output DataFrame.
62
64
  national_codes : list, optional
@@ -369,7 +371,11 @@ def whisp_stats_geojson_to_drive(
369
371
 
370
372
 
371
373
  def whisp_stats_ee_to_ee(
372
- feature_collection, external_id_column, national_codes=None, unit_type="ha"
374
+ feature_collection,
375
+ external_id_column,
376
+ national_codes=None,
377
+ unit_type="ha",
378
+ keep_properties=None,
373
379
  ):
374
380
  """
375
381
  Process a feature collection to get statistics for each feature.
@@ -379,46 +385,68 @@ def whisp_stats_ee_to_ee(
379
385
  external_id_column (str): The name of the external ID column to check.
380
386
  national_codes (list, optional): List of ISO2 country codes to include national datasets.
381
387
  unit_type (str): Whether to use hectares ("ha") or percentage ("percent"), default "ha".
388
+ keep_properties (None, bool, or list, optional): Properties to keep from the input features.
389
+ - None: Remove all properties (default behavior)
390
+ - True: Keep all properties
391
+ - list: Keep only the specified properties
382
392
 
383
393
  Returns:
384
394
  ee.FeatureCollection: The output feature collection with statistics.
385
395
  """
386
396
  if external_id_column is not None:
387
397
  try:
388
- # Check if external_id_column is a property in feature_collection (server-side)
389
- def check_column_exists(feature):
390
- return ee.Algorithms.If(
391
- feature.propertyNames().contains(external_id_column),
392
- feature,
393
- ee.Feature(
394
- None
395
- ), # Return an empty feature if the column does not exist
396
- )
397
-
398
- feature_collection_with_check = feature_collection.map(check_column_exists)
399
- size_fc = feature_collection.size()
400
- valid_feature_count = feature_collection_with_check.filter(
401
- ee.Filter.notNull([external_id_column])
402
- ).size()
398
+ # Validate that the external_id_column exists in all features
399
+ validation_result = validate_external_id_column(
400
+ feature_collection, external_id_column
401
+ )
403
402
 
404
- # Raise an error if the column does not exist in any feature
405
- if valid_feature_count.neq(size_fc).getInfo():
406
- raise ValueError(
407
- f"The column '{external_id_column}' is not a property throughout the feature collection."
403
+ if not validation_result["is_valid"]:
404
+ raise ValueError(validation_result["error_message"])
405
+
406
+ # First handle property selection, but preserve the external_id_column
407
+ if keep_properties is not None:
408
+ if keep_properties == True:
409
+ # Keep all properties including external_id_column
410
+ pass # No need to modify feature_collection
411
+ elif isinstance(keep_properties, list):
412
+ # Ensure external_id_column is included in the list
413
+ if external_id_column not in keep_properties:
414
+ keep_properties = keep_properties + [external_id_column]
415
+ feature_collection = feature_collection.select(keep_properties)
416
+ else:
417
+ raise ValueError(
418
+ "keep_properties must be None, True, or a list of property names."
419
+ )
420
+
421
+ # Set the external_id with robust null handling
422
+ def set_external_id_safely_and_clean(feature):
423
+ external_id_value = feature.get(external_id_column)
424
+ # Use server-side null checking and string conversion
425
+ external_id_value = ee.Algorithms.If(
426
+ ee.Algorithms.IsEqual(external_id_value, None),
427
+ "unknown",
428
+ ee.String(external_id_value),
408
429
  )
430
+ # Create a new feature with the standardized external_id column
431
+ # Note: we use "external_id" as the standardized column name, not the original external_id_column name
432
+ return ee.Feature(feature.set("external_id", external_id_value))
409
433
 
410
- # Set the geo_id_column
411
434
  feature_collection = feature_collection.map(
412
- lambda feature: feature.set(
413
- geo_id_column, ee.String(feature.get(external_id_column))
414
- )
435
+ set_external_id_safely_and_clean
415
436
  )
416
437
 
438
+ # Finally, clean up to keep only geometry and external_id if keep_properties is None
439
+ if keep_properties is None:
440
+ feature_collection = feature_collection.select(["external_id"])
441
+
417
442
  except Exception as e:
418
443
  # Handle the exception and provide a helpful error message
419
444
  print(
420
445
  f"An error occurred when trying to set the external_id_column: {external_id_column}. Error: {e}"
421
446
  )
447
+ raise e # Re-raise the exception to stop execution
448
+ else:
449
+ feature_collection = _keep_fc_properties(feature_collection, keep_properties)
422
450
 
423
451
  fc = get_stats(
424
452
  feature_collection, national_codes=national_codes, unit_type=unit_type
@@ -427,6 +455,23 @@ def whisp_stats_ee_to_ee(
427
455
  return add_id_to_feature_collection(dataset=fc, id_name=plot_id_column)
428
456
 
429
457
 
458
+ def _keep_fc_properties(feature_collection, keep_properties):
459
+ # If keep_properties is specified, select only those properties
460
+ if keep_properties is None:
461
+ feature_collection = feature_collection.select([])
462
+ elif keep_properties == True:
463
+ # If keep_properties is true, select all properties
464
+ first_feature_props = feature_collection.first().propertyNames().getInfo()
465
+ feature_collection = feature_collection.select(first_feature_props)
466
+ elif isinstance(keep_properties, list):
467
+ feature_collection = feature_collection.select(keep_properties)
468
+ else:
469
+ raise ValueError(
470
+ "keep_properties must be None, True, or a list of property names."
471
+ )
472
+ return feature_collection
473
+
474
+
430
475
  def whisp_stats_ee_to_df(
431
476
  feature_collection: ee.FeatureCollection,
432
477
  external_id_column=None,
@@ -951,3 +996,139 @@ def convert_iso3_to_iso2(df, iso3_column, iso2_column):
951
996
  )
952
997
 
953
998
  return df
999
+
1000
+
1001
+ def validate_external_id_column(feature_collection, external_id_column):
1002
+ """
1003
+ Validates that the external_id_column exists in all features of the collection.
1004
+
1005
+ Parameters
1006
+ ----------
1007
+ feature_collection : ee.FeatureCollection
1008
+ The feature collection to validate
1009
+ external_id_column : str
1010
+ The name of the external ID column to check
1011
+
1012
+ Returns
1013
+ -------
1014
+ dict
1015
+ Dictionary with validation results including:
1016
+ - 'is_valid': bool indicating if column exists in all features
1017
+ - 'total_features': int total number of features
1018
+ - 'features_with_column': int number of features that have the column
1019
+ - 'available_properties': list of properties available in first feature
1020
+ - 'error_message': str error message if validation fails
1021
+ """
1022
+ try:
1023
+ # Get total number of features
1024
+ total_features = feature_collection.size().getInfo()
1025
+
1026
+ if total_features == 0:
1027
+ return {
1028
+ "is_valid": False,
1029
+ "total_features": 0,
1030
+ "features_with_column": 0,
1031
+ "available_properties": [],
1032
+ "error_message": "Feature collection is empty",
1033
+ }
1034
+
1035
+ # Get available properties from first feature
1036
+ first_feature_props = feature_collection.first().propertyNames().getInfo()
1037
+
1038
+ # Check if external_id_column exists in all features
1039
+ def check_column_exists(feature):
1040
+ has_column = feature.propertyNames().contains(external_id_column)
1041
+ return feature.set("_has_external_id", has_column)
1042
+
1043
+ features_with_check = feature_collection.map(check_column_exists)
1044
+ features_with_column = (
1045
+ features_with_check.filter(ee.Filter.eq("_has_external_id", True))
1046
+ .size()
1047
+ .getInfo()
1048
+ )
1049
+
1050
+ is_valid = features_with_column == total_features
1051
+
1052
+ error_message = None
1053
+ if not is_valid:
1054
+ missing_count = total_features - features_with_column
1055
+ error_message = (
1056
+ f"The column '{external_id_column}' is missing from {missing_count} "
1057
+ f"out of {total_features} features in the collection. "
1058
+ f"Available properties in first feature: {first_feature_props}"
1059
+ )
1060
+
1061
+ return {
1062
+ "is_valid": is_valid,
1063
+ "total_features": total_features,
1064
+ "features_with_column": features_with_column,
1065
+ "available_properties": first_feature_props,
1066
+ "error_message": error_message,
1067
+ }
1068
+
1069
+ except Exception as e:
1070
+ return {
1071
+ "is_valid": False,
1072
+ "total_features": 0,
1073
+ "features_with_column": 0,
1074
+ "available_properties": [],
1075
+ "error_message": f"Error during validation: {str(e)}",
1076
+ }
1077
+
1078
+
1079
+ def debug_feature_collection_properties(feature_collection, max_features=5):
1080
+ """
1081
+ Debug helper function to inspect the properties of features in a collection.
1082
+
1083
+ Parameters
1084
+ ----------
1085
+ feature_collection : ee.FeatureCollection
1086
+ The feature collection to inspect
1087
+ max_features : int, optional
1088
+ Maximum number of features to inspect, by default 5
1089
+
1090
+ Returns
1091
+ -------
1092
+ dict
1093
+ Dictionary with debugging information about the feature collection
1094
+ """
1095
+ try:
1096
+ total_features = feature_collection.size().getInfo()
1097
+
1098
+ if total_features == 0:
1099
+ return {"total_features": 0, "error": "Feature collection is empty"}
1100
+
1101
+ # Limit the number of features to inspect
1102
+ features_to_check = min(max_features, total_features)
1103
+ limited_fc = feature_collection.limit(features_to_check)
1104
+
1105
+ # Get properties for each feature
1106
+ def get_feature_properties(feature):
1107
+ return ee.Dictionary(
1108
+ {
1109
+ "properties": feature.propertyNames(),
1110
+ "geometry_type": feature.geometry().type(),
1111
+ }
1112
+ )
1113
+
1114
+ feature_info = limited_fc.map(get_feature_properties).getInfo()
1115
+
1116
+ return {
1117
+ "total_features": total_features,
1118
+ "inspected_features": features_to_check,
1119
+ "feature_details": [
1120
+ {
1121
+ "feature_index": i,
1122
+ "properties": feature_info["features"][i]["properties"][
1123
+ "properties"
1124
+ ],
1125
+ "geometry_type": feature_info["features"][i]["properties"][
1126
+ "geometry_type"
1127
+ ],
1128
+ }
1129
+ for i in range(len(feature_info["features"]))
1130
+ ],
1131
+ }
1132
+
1133
+ except Exception as e:
1134
+ return {"error": f"Error during debugging: {str(e)}"}
openforis_whisp/utils.py CHANGED
@@ -113,9 +113,9 @@ def remove_geometry_from_feature_collection(feature_collection):
113
113
  return feature_collection_no_geometry
114
114
 
115
115
 
116
- # Compute centroids of each polygon
117
- def get_centroid(feature, geo_id_column="Geo_id"):
118
- keepProperties = [geo_id_column]
116
+ # Compute centroids of each polygon including the external_id_column
117
+ def get_centroid(feature, external_id_column="external_id"):
118
+ keepProperties = [external_id_column]
119
119
  # Get the centroid of the feature's geometry.
120
120
  centroid = feature.geometry().centroid(1)
121
121
  # Return a new Feature, copying properties from the old Feature.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openforis-whisp
3
- Version: 2.0.0a3
3
+ Version: 2.0.0a5
4
4
  Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
5
5
  License: MIT
6
6
  Keywords: whisp,geospatial,data-processing
@@ -59,7 +59,6 @@ Description-Content-Type: text/markdown
59
59
  - [Whisp pathways](#whisp_pathways)
60
60
  - [Whisp datasets](#whisp_datasets)
61
61
  - [Whisp notebooks](#whisp_notebooks)
62
- - [System setup](#whisp_setup)
63
62
  - [Add data layers](#whisp_add_data)
64
63
  - [Contribute to the code](#whisp_contribute)
65
64
  - [Code of conduct](#whisp_conduct)
@@ -78,8 +77,6 @@ Description-Content-Type: text/markdown
78
77
 
79
78
 
80
79
  ## Whisp datasets <a name="whisp_datasets"></a>
81
- All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
82
-
83
80
  ***Whisp*** implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
84
81
 
85
82
  1) Tree and forest cover (at the end of 2020);
@@ -87,27 +84,39 @@ Description-Content-Type: text/markdown
87
84
  3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
88
85
  4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
89
86
 
87
+ Additional categories are specific for the timber commodity, considering a harvesting date in 2023:
88
+
89
+ 5) Primary forests in 2020;
90
+ 6) Naturally regenerating forests in 2020;
91
+ 7) Planted and plantation forests in 2020;
92
+ 8) Planted and plantation forests in 2023;
93
+ 9) Treecover in 2023;
94
+ 10) Commodities or croplands in 2023.
95
+ 11) Logging concessions;
96
+
90
97
  There are multiple datasets for each category. Find the full current [list of datasets used in Whisp here](https://github.com/forestdatapartnership/whisp/blob/main/layers_description.md).
91
- Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
98
+
99
+ ### Whisp risk assessment <a name="whisp_risk"></a>
100
+
101
+ Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
92
102
 
93
103
  1) Was there tree cover in 2020?
94
104
  2) Were there commodity plantations or other agricultural uses in 2020?
95
105
  3) Were there disturbances until 2020-12-31?
96
106
  4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
97
107
 
98
- If no treecover dataset indicates any tree cover for a plot by the end of 2020, **Whisp will categorize the deforestation risk as low.**
99
-
100
- If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
101
-
102
- If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
103
-
104
- Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
105
- However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
108
+ And specifically for the timber commodity, considering a harvesting date in 2023:
106
109
 
110
+ 5) Were there primary forests in 2020?
111
+ 6) Were there naturally regenerating forests in 2020?
112
+ 7) Were there planted and plantation forests in 2020?
113
+ 8) Were there planted and plantation forests in 2023?
114
+ 9) Was there treecover in 2023?
115
+ 10) Were there commodity plantations or other agricultural uses in 2023?
116
+ 11) Is it part of a logging concession?
107
117
 
108
- *The Whisp algorithm for **Perennial Crops** visualized:*
109
- ![CoE_Graphic 5](https://github.com/user-attachments/assets/007b5f50-3939-4707-95fa-98be4d56745f)
110
118
  The Whisp algorithm outputs multiple statistical columns with disaggregated data from the input datasets, followed by aggregated indicator columns, and the final risk assessment columns.
119
+ All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
111
120
 
112
121
  The **relevant risk assessment column depends on the commodity** in question:
113
122
 
@@ -142,47 +151,28 @@ The **relevant risk assessment column depends on the commodity** in question:
142
151
  </tr>
143
152
  </table>
144
153
 
145
- The decision tree for the timber risk assessment slightly differs from the above. For more information see below.
146
-
147
-
154
+ *The Whisp algorithm for **Perennial Crops** visualized:*
155
+ ![CoE_Graphic 5](https://github.com/user-attachments/assets/007b5f50-3939-4707-95fa-98be4d56745f)
156
+
157
+ If no treecover dataset indicates any tree cover for a plot by the end of 2020, **Whisp will categorize the deforestation risk as low.**
148
158
 
159
+ If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
149
160
 
150
- ## Whisp datasets for timber <a name="whisp_datasets_timber"></a>
151
- ***Whisp*** implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
152
- 1) Tree and forest cover (at the end of 2020);
153
- 2) Commodities (i.e., crop plantations and other agricultural uses at the end of 2020);
154
- 3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
155
- 4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
156
- 5) Primary forests in 2020;
157
- 6) Naturally regenerating forests in 2020;
158
- 7) Planted and plantation forests in 2020;
159
- 8) Planted and plantation forests in 2023;
160
- 9) Treecover in 2023;
161
- 10) Commodities or croplands in 2023.
162
- 11) Logging concessions;
161
+ If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
163
162
 
164
- There are multiple datasets for each category. Find the full current [list of datasets used in Whisp here](https://github.com/forestdatapartnership/whisp/blob/main/layers_description.md).
165
- Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
163
+ Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
164
+ However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
166
165
 
167
- 1) Was there tree cover in 2020?
168
- 2) Were there commodity plantations or other agricultural uses in 2020?
169
- 3) Were there disturbances until 2020-12-31?
170
- 4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
171
- 5) Were there primary forests in 2020?
172
- 6) Were there naturally regenerating forests in 2020?
173
- 7) Were there planted and plantation forests in 2020?
174
- 8) Were there planted and plantation forests in 2023?
175
- 9) Was there treecover in 2023?
176
- 10) Were there commodity plantations or other agricultural uses in 2023?
177
- 11) Were there logging concessions?
178
166
 
179
- # Run Whisp python package from a notebook
167
+ ## Run Whisp python package from a notebook <a name="whisp_notebooks"></a>
180
168
 
181
169
  For most users we suggest using the Whisp App to process their plot data. But for some, using the python package directly will fit their workflow.
182
170
 
183
171
  A simple example of the package functionality can be seen in this [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb)
184
172
 
185
- ## Requirements for running the package
173
+ For an example notebook adapted for running locally (or in Sepal), see: [whisp_geojson_to_csv.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_csv.ipynb) or if datasets are very large, see [whisp_geojson_to_drive.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_drive.ipynb)
174
+
175
+ ### Requirements for running the package
186
176
 
187
177
  - A Google Earth Engine (GEE) account.
188
178
  - A registered cloud GEE project.
@@ -190,7 +180,8 @@ The **relevant risk assessment column depends on the commodity** in question:
190
180
 
191
181
  More info on Whisp can be found in [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
192
182
 
193
- ## Python package installation
183
+
184
+ ### Python package installation
194
185
 
195
186
  The Whisp package is available on pip
196
187
  https://pypi.org/project/openforis-whisp/
@@ -202,15 +193,15 @@ The **relevant risk assessment column depends on the commodity** in question:
202
193
  pip install --pre openforis-whisp
203
194
  ```
204
195
 
205
- If running locally we recommend a [virtual environment](https://docs.python.org/3/library/venv.html) to keep your main python installation clean.
196
+ If running the package locally we recommend a [virtual environment](https://docs.python.org/3/library/venv.html) to keep your main python installation clean. For users running the package in Sepal see [here](https://docs.sepal.io/en/latest/cli/python.html#virtual-environment).
206
197
 
207
198
  The package relies upon the google earth engine api being setup correctly using a registered cloud project.
208
199
 
209
- More info on Whisp can be found in [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
200
+ More info on Whisp can be found [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
210
201
 
211
202
 
212
203
 
213
- ## How to add data layers to Whisp
204
+ ## How to add data layers to Whisp <a name="whisp_add_data"></a>
214
205
 
215
206
 
216
207
 
@@ -253,12 +244,24 @@ Before submitting a request, consider the following:
253
244
  ### Adding your own data directly
254
245
 
255
246
 
256
-
257
247
  To add your own data you will need some coding experience as well as familiarity with GitHub and Google Earth Engine.
258
248
 
249
+ This approach is for those who want to run a bespoke analysis combining their own data with those already in Whisp.
259
250
 
260
- Firstly follow the steps to install the package in editable mode (as detailed below in Contributing to the Whisp code base). Once in editable mode you are running the Whisp package locally based on a cloned version of the code. This approach is for those who want to run a bespoke analysis combining their own data with those already in Whisp. If, however, you think the datasets are of use to the wider community and you have the code running smoothly, you can make a pull request from a forked repository.
251
+ Firstly follow the steps below to install the package in editable mode.
261
252
 
253
+ As with the regular pip installation, we recommend a separate [virtual environment](https://docs.python.org/3/library/venv.html) for running in editable mode. For Sepal users see [here](https://docs.sepal.io/en/latest/cli/python.html#virtual-environment).
254
+
255
+ ```bash
256
+
257
+ git clone https://github.com/forestdatapartnership/whisp.git
258
+
259
+ cd whisp/
260
+
261
+ pip install -e .[dev]
262
+
263
+ ```
264
+ Once in editable mode you are running the Whisp package locally based on a cloned version of the code.
262
265
 
263
266
 
264
267
 
@@ -294,7 +297,7 @@ For example, if it is a dataset for tree cover in 2000, then add `'treecover'` u
294
297
 
295
298
  ```python
296
299
 
297
- def nBR_my_custom_dataset_prep():
300
+ def my_custom_dataset_prep():
298
301
 
299
302
  image = ee.Image("MY/GEE/DATASET")
300
303
 
@@ -309,7 +312,6 @@ return binary.rename("My_custom_dataset")
309
312
  ---
310
313
 
311
314
 
312
-
313
315
  We are working on ways to make this process smoother. However, in the meantime do contact us through the [issues page on GitHub](https://github.com/forestdatapartnership/whisp/issues), or via the Open Foris email, if this functionality is useful to you or you need help.
314
316
 
315
317
 
@@ -318,28 +320,14 @@ We are working on ways to make this process smoother. However, in the meantime d
318
320
 
319
321
 
320
322
 
321
- ## Contributing to the Whisp code base
322
-
323
-
324
-
325
- Contributions to the Whisp code in GitHub are welcome. They can be made by forking the repository, making and pushing the required changes, then making a pull request to the Whisp repository. After briefly reviewing the request, we can make a branch for which to make a new pull request to. After final checks, we can then incorporate the code into the main branch. If in doubt, get in contact first or log as an issue [here](https://github.com/forestdatapartnership/whisp/issues/).
326
-
323
+ ## Contributing to the Whisp code base <a name="whisp_contribute"></a>
327
324
 
325
+ Contributions to the Whisp code in GitHub are welcome. These could be additional functionality, datasets or just cleaner code! Contributions can be made by forking the repository, making and pushing the required changes, then making a pull request to the Whisp repository. After briefly reviewing the request, we can make a branch for which to make a new pull request to. After final checks, we can then incorporate the code into the main branch. If in doubt, get in contact first or log as an issue [here](https://github.com/forestdatapartnership/whisp/issues/).
328
326
 
329
- Install the package in editable mode:
330
327
 
328
+ Install the package in editable mode (see Adding your own data directly above):
331
329
 
332
- ```bash
333
-
334
- git clone https://github.com/forestdatapartnership/whisp.git
335
-
336
- cd whisp/
337
-
338
- pip install -e .[dev]
339
-
340
- ```
341
-
342
- Add additional dependencies required for testing and running pre-commit hooks:
330
+ Then add additional dependencies required for testing and running pre-commit hooks:
343
331
 
344
332
 
345
333
  ```bash
@@ -352,7 +340,6 @@ pre-commit install
352
340
  You should be able to run the Pytest suite by simply running the `pytest` command from the repo's root folder.
353
341
 
354
342
 
355
-
356
343
  Please read the [contributing guidelines](contributing_guidelines.md) for good practice recommendations
357
344
 
358
345
 
@@ -0,0 +1,17 @@
1
+ openforis_whisp/__init__.py,sha256=xfXNzskPfnlQkmf3QZHEydhte3U9_uLdoYM04eowNqw,2403
2
+ openforis_whisp/data_conversion.py,sha256=_HSjYozNO1xAOAk-uGmzTVCTOc3W7x3GDlvEUgrnj_Q,16909
3
+ openforis_whisp/datasets.py,sha256=9Ofxyy2ignnN6mSXfXDP9n6SsQ8QPQQWivuolS_i8LY,52013
4
+ openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
5
+ openforis_whisp/parameters/__init__.py,sha256=KL7iORJVjSpZatYjoyWckcmQJnE89_DBC8R6_0_eR6o,349
6
+ openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
7
+ openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
8
+ openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=5K1LQyuvwvG1vOdlyCknv_foDtRUKHPU3VvOU_zsoWQ,17626
9
+ openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
10
+ openforis_whisp/reformat.py,sha256=o3TpeuddR1UlP1C3uFeI957kIZYMQqEW1pXsjKbAtiY,17922
11
+ openforis_whisp/risk.py,sha256=FNWH84xhSjVZW3yTnTWZF3MxiZtNA5jb154vu-C2kJ0,31951
12
+ openforis_whisp/stats.py,sha256=_l2V8BWdbJ2GoK7N5Zswg0Gvs1I5RRT-JGgl9fyl2AY,40882
13
+ openforis_whisp/utils.py,sha256=YqFYK1fH2WpuWolXa-gCeSGYiHdJ0_xQUIo15dQ9Sh8,5378
14
+ openforis_whisp-2.0.0a5.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
15
+ openforis_whisp-2.0.0a5.dist-info/METADATA,sha256=4ii5-gyxRZZmWyAhorNo9phcbpQoLRcmhagxxCCKHeA,16681
16
+ openforis_whisp-2.0.0a5.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
17
+ openforis_whisp-2.0.0a5.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- openforis_whisp/__init__.py,sha256=xfXNzskPfnlQkmf3QZHEydhte3U9_uLdoYM04eowNqw,2403
2
- openforis_whisp/data_conversion.py,sha256=Ean2SBxhGr1YwzhbrHQD9kDdRYdNTJZLBiAmYZtBIM8,11812
3
- openforis_whisp/datasets.py,sha256=EOiNwTaMUMc0hYXBwUVzP-5q0Vq2jqzdNQF0Y6GQCSQ,52411
4
- openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
5
- openforis_whisp/parameters/__init__.py,sha256=KL7iORJVjSpZatYjoyWckcmQJnE89_DBC8R6_0_eR6o,349
6
- openforis_whisp/parameters/config_runtime.py,sha256=aH00CFV09f7JQnZQzpCFR5BIlvsovVfM4K_KUjMl0N8,1416
7
- openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=54uZ4oqfsiHgj2I39pAcsCr4SeSUqgIRboDhlxIAdik,1293
8
- openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=r1s_eUneFOVk7RALukaJj7Rj374XWuZTDkE2dAZAeu0,17691
9
- openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
10
- openforis_whisp/reformat.py,sha256=o3TpeuddR1UlP1C3uFeI957kIZYMQqEW1pXsjKbAtiY,17922
11
- openforis_whisp/risk.py,sha256=E9yZJ2wCinYrOydKK7EB0O5Imk5quG9Cs1uNkcv8AlM,31531
12
- openforis_whisp/stats.py,sha256=yAa6j3RpkPIjAM06IKQ7XGaFrwXhxfzIXn37aTOEwP4,33562
13
- openforis_whisp/utils.py,sha256=hpeY9aA3BND2m9c15PZ6_nClemsfiVNUEzA4pQXfztA,5330
14
- openforis_whisp-2.0.0a3.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
15
- openforis_whisp-2.0.0a3.dist-info/METADATA,sha256=yjXgVT5Max81548KdmoHRCe1SDap2o1cKkVUYiUUA3Q,16933
16
- openforis_whisp-2.0.0a3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
17
- openforis_whisp-2.0.0a3.dist-info/RECORD,,