openforis-whisp 2.0.0b3__py3-none-any.whl → 3.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openforis_whisp/stats.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
3
  from pathlib import Path
4
4
  from .datasets import combine_datasets
5
5
  import json
6
+ import logging
6
7
  import country_converter as coco
7
8
  from openforis_whisp.parameters.config_runtime import (
8
9
  plot_id_column,
@@ -41,7 +42,7 @@ from .reformat import (
41
42
  # to avoid repeated expensive operations. This saves 7-15 seconds per analysis.
42
43
 
43
44
  _WATER_FLAG_IMAGE = None
44
- _GEOBOUNDARIES_FC = None
45
+ _admin_boundaries_FC = None
45
46
 
46
47
 
47
48
  def get_water_flag_image():
@@ -63,26 +64,28 @@ def get_water_flag_image():
63
64
  return _WATER_FLAG_IMAGE
64
65
 
65
66
 
66
- def get_geoboundaries_fc():
67
+ def get_admin_boundaries_fc():
67
68
  """
68
- Get cached geoboundaries feature collection.
69
+ Get cached GAUL 2024 L1 administrative boundary feature collection.
69
70
 
70
- OPTIMIZATION: Geoboundaries collection is loaded once and reused for all features.
71
+ OPTIMIZATION: GAUL 2024 L1 collection is loaded once and reused for all features.
71
72
  This avoids loading the large FeatureCollection for every feature (previously
72
- called in get_geoboundaries_info for each feature).
73
+ called in get_admin_boundaries_info for each feature).
73
74
 
74
75
  Returns
75
76
  -------
76
77
  ee.FeatureCollection
77
- Cached geoboundaries feature collection
78
+ Cached GAUL 2024 L1 administrative boundary feature collection
78
79
  """
79
- global _GEOBOUNDARIES_FC
80
- if _GEOBOUNDARIES_FC is None:
81
- _GEOBOUNDARIES_FC = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM1")
82
- return _GEOBOUNDARIES_FC
80
+ global _admin_boundaries_FC
81
+ if _admin_boundaries_FC is None:
82
+ _admin_boundaries_FC = ee.FeatureCollection(
83
+ "projects/sat-io/open-datasets/FAO/GAUL/GAUL_2024_L1"
84
+ )
85
+ return _admin_boundaries_FC
83
86
 
84
87
 
85
- def whisp_formatted_stats_geojson_to_df(
88
+ def whisp_formatted_stats_geojson_to_df_legacy(
86
89
  input_geojson_filepath: Path | str,
87
90
  external_id_column=None,
88
91
  remove_geom=False,
@@ -90,9 +93,15 @@ def whisp_formatted_stats_geojson_to_df(
90
93
  unit_type="ha",
91
94
  whisp_image=None,
92
95
  custom_bands=None, # New parameter
96
+ validate_geometries: bool = False,
93
97
  ) -> pd.DataFrame:
94
98
  """
95
- Main function for most users.
99
+ Legacy function for basic Whisp stats extraction.
100
+
101
+ DEPRECATED: This is the original implementation maintained for backward compatibility.
102
+ Use whisp_formatted_stats_geojson_to_df() for new code, which provides automatic
103
+ optimization, formatting, and schema validation.
104
+
96
105
  Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
97
106
  Output df is validated against a panderas schema (created on the fly from the two lookup CSVs).
98
107
 
@@ -126,13 +135,51 @@ def whisp_formatted_stats_geojson_to_df(
126
135
  - List of band names: ['Aa_test', 'elevation']
127
136
  - Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
128
137
  - None: preserves all extra columns automatically
138
+ validate_geometries : bool, optional
139
+ Whether to validate and fix invalid geometries, by default False.
140
+ Set to True to automatically fix invalid/self-intersecting polygons.
129
141
 
130
142
  Returns
131
143
  -------
132
144
  df_stats : pd.DataFrame
133
145
  The DataFrame containing the Whisp stats for the input ROI.
134
146
  """
135
- feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
147
+ # Load GeoJSON and validate geometries if requested
148
+ if validate_geometries:
149
+ import json
150
+ import geopandas as gpd
151
+ from shapely.validation import make_valid
152
+ import logging as py_logging
153
+
154
+ logger = py_logging.getLogger("whisp")
155
+
156
+ # Load GeoJSON file
157
+ with open(input_geojson_filepath, "r") as f:
158
+ geojson_data = json.load(f)
159
+
160
+ # Convert to GeoDataFrame
161
+ gdf = gpd.GeoDataFrame.from_features(geojson_data["features"])
162
+
163
+ # Validate and fix invalid geometries
164
+ valid_count = gdf.geometry.is_valid.sum()
165
+ invalid_count = len(gdf) - valid_count
166
+ if invalid_count > 0:
167
+ logger.warning(f"Fixing {invalid_count} invalid geometries")
168
+ gdf["geometry"] = gdf["geometry"].apply(
169
+ lambda g: make_valid(g) if g and not g.is_valid else g
170
+ )
171
+
172
+ # Pass GeoDataFrame directly to preserve CRS metadata
173
+ # convert_geojson_to_ee will handle:
174
+ # - CRS detection and conversion to WGS84 if needed
175
+ # - Data type sanitization (datetime, object columns)
176
+ # - Geometry validation and Z-coordinate stripping
177
+ feature_collection = convert_geojson_to_ee(
178
+ gdf, enforce_wgs84=True, strip_z_coords=True
179
+ )
180
+ else:
181
+ # Original path - no validation
182
+ feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
136
183
 
137
184
  return whisp_formatted_stats_ee_to_df(
138
185
  feature_collection,
@@ -145,6 +192,181 @@ def whisp_formatted_stats_geojson_to_df(
145
192
  )
146
193
 
147
194
 
195
+ def whisp_formatted_stats_geojson_to_df(
196
+ input_geojson_filepath: Path | str,
197
+ external_id_column=None,
198
+ remove_geom=False,
199
+ national_codes=None,
200
+ unit_type="ha",
201
+ whisp_image=None,
202
+ custom_bands=None,
203
+ mode: str = "sequential",
204
+ batch_size: int = 10,
205
+ max_concurrent: int = 20,
206
+ validate_geometries: bool = False,
207
+ include_geometry_audit_trail: bool = False,
208
+ ) -> pd.DataFrame:
209
+ """
210
+ Main entry point for converting GeoJSON to Whisp statistics.
211
+
212
+ Routes to the appropriate processing mode with automatic formatting and validation.
213
+
214
+ Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
215
+ Output DataFrame is validated against a Panderas schema (created from lookup CSVs).
216
+ Results are automatically formatted and unit-converted (ha or percent).
217
+
218
+ If `external_id_column` is provided, it will be used to link external identifiers
219
+ from the input GeoJSON to the output DataFrame.
220
+
221
+ Parameters
222
+ ----------
223
+ input_geojson_filepath : Path | str
224
+ The filepath to the GeoJSON of the ROI to analyze.
225
+ external_id_column : str, optional
226
+ The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
227
+ This column must exist as a property in ALL features of the GeoJSON file.
228
+ Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
229
+ remove_geom : bool, default=False
230
+ If True, the geometry of the GeoJSON is removed from the output DataFrame.
231
+ national_codes : list, optional
232
+ List of ISO2 country codes to include national datasets.
233
+ unit_type: str, optional
234
+ Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
235
+ whisp_image : ee.Image, optional
236
+ Pre-combined multiband Earth Engine Image containing all Whisp datasets.
237
+ If provided, this image will be used instead of combining datasets based on national_codes.
238
+ If None, datasets will be combined automatically using national_codes parameter.
239
+ custom_bands : list or dict, optional
240
+ Custom band information for extra columns. Can be:
241
+ - List of band names: ['Aa_test', 'elevation']
242
+ - Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
243
+ - None: preserves all extra columns automatically
244
+ mode : str, optional
245
+ Processing mode, by default "concurrent":
246
+ - "concurrent": Uses high-volume endpoint with concurrent batching (recommended for large files)
247
+ - "sequential": Uses standard endpoint for sequential processing (more stable)
248
+ - "legacy": Uses original implementation (basic stats extraction only, no formatting)
249
+ batch_size : int, optional
250
+ Features per batch for concurrent/sequential modes, by default 10.
251
+ Only applicable for "concurrent" and "sequential" modes.
252
+ max_concurrent : int, optional
253
+ Maximum concurrent EE calls for concurrent mode, by default 20.
254
+ Only applicable for "concurrent" mode.
255
+ validate_geometries : bool, optional
256
+ Whether to validate and fix invalid geometries, by default False.
257
+ Set to True to automatically fix invalid/self-intersecting polygons.
258
+ For production workflows, it's recommended to use geometry validation and
259
+ cleaning tools BEFORE processing with this function.
260
+ include_geometry_audit_trail : bool, default True
261
+ If True (default), includes audit trail columns:
262
+ - geo_original: Original input geometry
263
+ - geometry_type_original: Original geometry type
264
+ - geometry_type: Processed geometry type (from EE)
265
+ - geometry_type_changed: Boolean flag if geometry changed
266
+ - geometry_degradation_type: Description of how it changed
267
+
268
+ Processing metadata stored in df.attrs['processing_metadata'].
269
+ These columns enable full transparency for geometry modifications during processing.
270
+
271
+ Returns
272
+ -------
273
+ df_stats : pd.DataFrame
274
+ The DataFrame containing the Whisp stats for the input ROI,
275
+ automatically formatted and validated.
276
+
277
+ Examples
278
+ --------
279
+ >>> # Use concurrent processing (default, recommended for large datasets)
280
+ >>> df = whisp_formatted_stats_geojson_to_df("data.geojson")
281
+
282
+ >>> # Use sequential processing for more stable/predictable results
283
+ >>> df = whisp_formatted_stats_geojson_to_df(
284
+ ... "data.geojson",
285
+ ... mode="sequential"
286
+ ... )
287
+
288
+ >>> # Adjust concurrency parameters
289
+ >>> df = whisp_formatted_stats_geojson_to_df(
290
+ ... "large_data.geojson",
291
+ ... mode="concurrent",
292
+ ... max_concurrent=30,
293
+ ... batch_size=15
294
+ ... )
295
+
296
+ >>> # Use legacy mode for backward compatibility (basic extraction only)
297
+ >>> df = whisp_formatted_stats_geojson_to_df(
298
+ ... "data.geojson",
299
+ ... mode="legacy"
300
+ ... )
301
+ """
302
+ # Import here to avoid circular imports
303
+ try:
304
+ from openforis_whisp.advanced_stats import (
305
+ whisp_formatted_stats_geojson_to_df_fast,
306
+ )
307
+ except ImportError:
308
+ # Fallback to legacy if advanced_stats not available
309
+ mode = "legacy"
310
+
311
+ logger = logging.getLogger("whisp")
312
+
313
+ if mode == "legacy":
314
+ # Log info if batch_size or max_concurrent were passed but won't be used
315
+ if batch_size != 10 or max_concurrent != 20:
316
+ unused = []
317
+ if batch_size != 10:
318
+ unused.append(f"batch_size={batch_size}")
319
+ if max_concurrent != 20:
320
+ unused.append(f"max_concurrent={max_concurrent}")
321
+ logger.info(
322
+ f"Mode is 'legacy': {', '.join(unused)}\n"
323
+ "parameter(s) are not used in legacy mode."
324
+ )
325
+ # Use original implementation (basic stats extraction only)
326
+ return whisp_formatted_stats_geojson_to_df_legacy(
327
+ input_geojson_filepath=input_geojson_filepath,
328
+ external_id_column=external_id_column,
329
+ remove_geom=remove_geom,
330
+ national_codes=national_codes,
331
+ unit_type=unit_type,
332
+ whisp_image=whisp_image,
333
+ custom_bands=custom_bands,
334
+ validate_geometries=validate_geometries,
335
+ )
336
+ elif mode in ("concurrent", "sequential"):
337
+ # Log info if batch_size or max_concurrent are not used in sequential mode
338
+ if mode == "sequential":
339
+ unused = []
340
+ if batch_size != 10:
341
+ unused.append(f"batch_size={batch_size}")
342
+ if max_concurrent != 20:
343
+ unused.append(f"max_concurrent={max_concurrent}")
344
+ if unused:
345
+ logger.info(
346
+ f"Mode is 'sequential': {', '.join(unused)}\n"
347
+ "parameter(s) are not used in sequential (single-threaded) mode."
348
+ )
349
+ # Route to fast function with explicit mode (skip auto-detection)
350
+ return whisp_formatted_stats_geojson_to_df_fast(
351
+ input_geojson_filepath=input_geojson_filepath,
352
+ external_id_column=external_id_column,
353
+ remove_geom=remove_geom,
354
+ national_codes=national_codes,
355
+ unit_type=unit_type,
356
+ whisp_image=whisp_image,
357
+ custom_bands=custom_bands,
358
+ mode=mode, # Pass mode directly (concurrent or sequential)
359
+ batch_size=batch_size,
360
+ max_concurrent=max_concurrent,
361
+ validate_geometries=validate_geometries,
362
+ include_geometry_audit_trail=include_geometry_audit_trail,
363
+ )
364
+ else:
365
+ raise ValueError(
366
+ f"Invalid mode '{mode}'. Must be 'concurrent', 'sequential', or 'legacy'."
367
+ )
368
+
369
+
148
370
  def whisp_formatted_stats_geojson_to_geojson(
149
371
  input_geojson_filepath,
150
372
  output_geojson_filepath,
@@ -188,7 +410,8 @@ def whisp_formatted_stats_geojson_to_geojson(
188
410
  # Convert the df to GeoJSON
189
411
  convert_df_to_geojson(df, output_geojson_filepath, geo_column)
190
412
 
191
- print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
413
+ # Suppress verbose output
414
+ # print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
192
415
 
193
416
 
194
417
  def whisp_formatted_stats_ee_to_geojson(
@@ -688,6 +911,13 @@ def whisp_stats_ee_to_df(
688
911
  print(f"An error occurred during point geometry area adjustment: {e}")
689
912
  # Continue without the adjustment rather than failing completely
690
913
 
914
+ # Reformat geometry types (MultiPolygon -> Polygon)
915
+ try:
916
+ df_stats = reformat_geometry_type(df_stats)
917
+ except Exception as e:
918
+ print(f"An error occurred during geometry type reformatting: {e}")
919
+ # Continue without the adjustment rather than failing completely
920
+
691
921
  return df_stats
692
922
 
693
923
 
@@ -727,6 +957,43 @@ def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
727
957
  return df_modified
728
958
 
729
959
 
960
+ def reformat_geometry_type(df: pd.DataFrame) -> pd.DataFrame:
961
+ """
962
+ Reformat geometry type classification in the DataFrame output.
963
+ Standardizes MultiPolygon geometry type to Polygon for consistent output.
964
+
965
+ Parameters
966
+ ----------
967
+ df : pd.DataFrame
968
+ DataFrame containing geometry type column
969
+
970
+ Returns
971
+ -------
972
+ pd.DataFrame
973
+ DataFrame with standardized geometry types
974
+ """
975
+ # Check if required columns exist
976
+ if geometry_type_column not in df.columns:
977
+ print(
978
+ f"Warning: {geometry_type_column} column not found. Skipping geometry type reformatting."
979
+ )
980
+ return df
981
+
982
+ # Create a copy to avoid modifying the original
983
+ df_modified = df.copy()
984
+
985
+ # Reformat MultiPolygon to Polygon
986
+ multipolygon_mask = df_modified[geometry_type_column] == "MultiPolygon"
987
+ df_modified.loc[multipolygon_mask, geometry_type_column] = "Polygon"
988
+
989
+ # Log the changes
990
+ num_reformatted = multipolygon_mask.sum()
991
+ # if num_reformatted > 0:
992
+ # print(f"Reformatted {num_reformatted} MultiPolygon geometries to Polygon")
993
+
994
+ return df_modified
995
+
996
+
730
997
  def whisp_stats_ee_to_drive(
731
998
  feature_collection: ee.FeatureCollection,
732
999
  external_id_column=None,
@@ -813,7 +1080,9 @@ def get_stats(
813
1080
  print("Using provided whisp_image")
814
1081
  else:
815
1082
  img_combined = combine_datasets(
816
- national_codes=national_codes, validate_bands=validate_bands
1083
+ national_codes=national_codes,
1084
+ validate_bands=validate_bands,
1085
+ include_context_bands=False,
817
1086
  )
818
1087
  print(f"Combining datasets with national_codes: {national_codes}")
819
1088
 
@@ -822,7 +1091,7 @@ def get_stats(
822
1091
  print("Processing single feature")
823
1092
  # OPTIMIZATION: Create cached images for single feature processing
824
1093
  water_all = get_water_flag_image()
825
- gbounds_ADM0 = get_geoboundaries_fc()
1094
+ bounds_ADM1 = get_admin_boundaries_fc()
826
1095
  output = ee.FeatureCollection(
827
1096
  [
828
1097
  get_stats_feature(
@@ -830,7 +1099,7 @@ def get_stats(
830
1099
  img_combined,
831
1100
  unit_type=unit_type,
832
1101
  water_all=water_all,
833
- gbounds_ADM0=gbounds_ADM0,
1102
+ bounds_ADM1=bounds_ADM1,
834
1103
  )
835
1104
  ]
836
1105
  )
@@ -852,7 +1121,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
852
1121
  """
853
1122
  Calculate statistics for a feature collection using Whisp datasets.
854
1123
 
855
- OPTIMIZATION: Creates water flag and geoboundaries images once and reuses
1124
+ OPTIMIZATION: Creates water flag and admin_boundaries images once and reuses
856
1125
  them for all features instead of recreating them for each feature.
857
1126
  This saves 7-15 seconds per analysis.
858
1127
 
@@ -878,7 +1147,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
878
1147
  # OPTIMIZATION: Create cached images once before processing features
879
1148
  # These will be reused for all features instead of being recreated each time
880
1149
  water_all = get_water_flag_image()
881
- gbounds_ADM0 = get_geoboundaries_fc()
1150
+ bounds_ADM1 = get_admin_boundaries_fc()
882
1151
 
883
1152
  out_feature_col = ee.FeatureCollection(
884
1153
  feature_col.map(
@@ -887,7 +1156,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
887
1156
  img_combined,
888
1157
  unit_type=unit_type,
889
1158
  water_all=water_all,
890
- gbounds_ADM0=gbounds_ADM0,
1159
+ bounds_ADM1=bounds_ADM1,
891
1160
  )
892
1161
  )
893
1162
  )
@@ -901,12 +1170,12 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
901
1170
 
902
1171
 
903
1172
  def get_stats_feature(
904
- feature, img_combined, unit_type="ha", water_all=None, gbounds_ADM0=None
1173
+ feature, img_combined, unit_type="ha", water_all=None, bounds_ADM1=None
905
1174
  ):
906
1175
  """
907
1176
  Get statistics for a single feature using a pre-combined image.
908
1177
 
909
- OPTIMIZATION: Accepts cached water/geoboundaries images to avoid recreating
1178
+ OPTIMIZATION: Accepts cached water/admin_boundaries images to avoid recreating
910
1179
  them for every feature.
911
1180
 
912
1181
  Parameters
@@ -919,8 +1188,8 @@ def get_stats_feature(
919
1188
  Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
920
1189
  water_all : ee.Image, optional
921
1190
  Cached water flag image
922
- gbounds_ADM0 : ee.FeatureCollection, optional
923
- Cached geoboundaries feature collection
1191
+ bounds_ADM1 : ee.FeatureCollection, optional
1192
+ Cached admin_boundaries feature collection
924
1193
 
925
1194
  Returns
926
1195
  -------
@@ -936,7 +1205,7 @@ def get_stats_feature(
936
1205
  )
937
1206
 
938
1207
  # Get basic feature information with cached images
939
- feature_info = get_type_and_location(feature, water_all, gbounds_ADM0)
1208
+ feature_info = get_type_and_location(feature, water_all, bounds_ADM1)
940
1209
 
941
1210
  # add statistics unit type (e.g., percentage or hectares) to dictionary
942
1211
  stats_unit_type = ee.Dictionary({stats_unit_type_column: unit_type})
@@ -985,11 +1254,11 @@ def get_stats_feature(
985
1254
 
986
1255
 
987
1256
  # Get basic feature information - uses admin and water datasets in gee.
988
- def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
1257
+ def get_type_and_location(feature, water_all=None, bounds_ADM1=None):
989
1258
  """
990
1259
  Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags.
991
1260
 
992
- OPTIMIZATION: Accepts cached water flag image and geoboundaries collection
1261
+ OPTIMIZATION: Accepts cached water flag image and admin_boundaries collection
993
1262
  to avoid recreating them for every feature (saves 7-15 seconds per analysis).
994
1263
 
995
1264
  Parameters
@@ -998,8 +1267,8 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
998
1267
  The feature to extract information from
999
1268
  water_all : ee.Image, optional
1000
1269
  Cached water flag image. If None, creates it.
1001
- gbounds_ADM0 : ee.FeatureCollection, optional
1002
- Cached geoboundaries feature collection. If None, loads it.
1270
+ bounds_ADM1 : ee.FeatureCollection, optional
1271
+ Cached admin_boundaries feature collection. If None, loads it.
1003
1272
 
1004
1273
  Returns
1005
1274
  -------
@@ -1007,19 +1276,23 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
1007
1276
  Dictionary with feature information
1008
1277
  """
1009
1278
  # Get centroid of the feature's geometry
1010
- centroid = feature.geometry().centroid(1)
1279
+ centroid = feature.geometry().centroid(0.1)
1011
1280
 
1012
- # OPTIMIZATION: Use cached geoboundaries
1013
- if gbounds_ADM0 is None:
1014
- gbounds_ADM0 = get_geoboundaries_fc()
1281
+ # OPTIMIZATION: Use cached admin_boundaries
1282
+ if bounds_ADM1 is None:
1283
+ bounds_ADM1 = get_admin_boundaries_fc()
1015
1284
 
1016
- # Fetch location info from geoboundaries (country, admin)
1017
- location = ee.Dictionary(get_geoboundaries_info(centroid, gbounds_ADM0))
1018
- country = ee.Dictionary({iso3_country_column: location.get("shapeGroup")})
1285
+ # Fetch location info from GAUL 2024 L1 (country, admin)
1286
+ location = ee.Dictionary(get_admin_boundaries_info(centroid, bounds_ADM1))
1287
+ country = ee.Dictionary({iso3_country_column: location.get("iso3_code")})
1019
1288
 
1020
1289
  admin_1 = ee.Dictionary(
1021
- {admin_1_column: location.get("shapeName")}
1022
- ) # Administrative level 1 (if available)
1290
+ {admin_1_column: location.get("gaul1_name")}
1291
+ ) # Administrative level 1 (from GAUL 2024 L1)
1292
+
1293
+ # OPTIMIZATION: Use cached water flag image
1294
+ if water_all is None:
1295
+ water_all = get_water_flag_image()
1023
1296
 
1024
1297
  # OPTIMIZATION: Use cached water flag image
1025
1298
  if water_all is None:
@@ -1037,8 +1310,12 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
1037
1310
  coords_list = centroid.coordinates()
1038
1311
  coords_dict = ee.Dictionary(
1039
1312
  {
1040
- centroid_x_coord_column: coords_list.get(0), # Longitude
1041
- centroid_y_coord_column: coords_list.get(1), # Latitude
1313
+ centroid_x_coord_column: ee.Number(coords_list.get(0)).format(
1314
+ "%.6f"
1315
+ ), # Longitude (6 dp)
1316
+ centroid_y_coord_column: ee.Number(coords_list.get(1)).format(
1317
+ "%.6f"
1318
+ ), # Latitude (6 dp)
1042
1319
  }
1043
1320
  )
1044
1321
 
@@ -1076,36 +1353,36 @@ def percent_and_format(val, area_ha):
1076
1353
  return ee.Number(formatted_value)
1077
1354
 
1078
1355
 
1079
- # geoboundaries - admin units from a freqently updated database, allows commercial use (CC BY 4.0 DEED) (disputed territories may need checking)
1080
- def get_geoboundaries_info(geometry, gbounds_ADM0=None):
1356
+ # GAUL 2024 L1 - admin units from FAO, allows commercial use
1357
+ def get_admin_boundaries_info(geometry, bounds_ADM1=None):
1081
1358
  """
1082
- Get geoboundaries info for a geometry.
1359
+ Get GAUL 2024 L1 info for a geometry (country ISO3 code and admin boundary name).
1083
1360
 
1084
- OPTIMIZATION: Accepts cached geoboundaries FeatureCollection to avoid
1361
+ OPTIMIZATION: Accepts cached GAUL 2024 L1 FeatureCollection to avoid
1085
1362
  reloading it for every feature (saves 2-5 seconds per analysis).
1086
1363
 
1087
1364
  Parameters
1088
1365
  ----------
1089
1366
  geometry : ee.Geometry
1090
1367
  The geometry to query
1091
- gbounds_ADM0 : ee.FeatureCollection, optional
1092
- Cached geoboundaries feature collection. If None, loads it.
1368
+ bounds_ADM1 : ee.FeatureCollection, optional
1369
+ Cached GAUL 2024 L1 feature collection. If None, loads it.
1093
1370
 
1094
1371
  Returns
1095
1372
  -------
1096
1373
  ee.Dictionary
1097
- Dictionary with shapeGroup and shapeName
1374
+ Dictionary with iso3_code (country) and gaul1_name (admin boundary name)
1098
1375
  """
1099
- if gbounds_ADM0 is None:
1100
- gbounds_ADM0 = get_geoboundaries_fc()
1376
+ if bounds_ADM1 is None:
1377
+ bounds_ADM1 = get_admin_boundaries_fc()
1101
1378
 
1102
- polygonsIntersectPoint = gbounds_ADM0.filterBounds(geometry)
1103
- backup_dict = ee.Dictionary({"shapeGroup": "Unknown", "shapeName": "Unknown"})
1379
+ polygonsIntersectPoint = bounds_ADM1.filterBounds(geometry)
1380
+ backup_dict = ee.Dictionary({"iso3_code": "Unknown", "gaul1_name": "Unknown"})
1104
1381
  return ee.Algorithms.If(
1105
1382
  polygonsIntersectPoint.size().gt(0),
1106
1383
  polygonsIntersectPoint.first()
1107
1384
  .toDictionary()
1108
- .select(["shapeGroup", "shapeName"]),
1385
+ .select(["iso3_code", "gaul1_name"]),
1109
1386
  backup_dict,
1110
1387
  )
1111
1388
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openforis-whisp
3
- Version: 2.0.0b3
3
+ Version: 3.0.0a2
4
4
  Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
5
5
  License: MIT
6
6
  Keywords: whisp,geospatial,data-processing
@@ -0,0 +1,20 @@
1
+ openforis_whisp/__init__.py,sha256=-r_9LFxbV6d-o4s0_huhaXxve6GIzCwl3pXKuJo6ixE,3663
2
+ openforis_whisp/advanced_stats.py,sha256=xrwKHG-c44_UkFha7TFgf71mo9UMw5ZZL3XQTPF5luM,92681
3
+ openforis_whisp/data_checks.py,sha256=KwgD72FA_n7joiJadGRpzntd2sLo0aqGNbOjRkB8iQI,32293
4
+ openforis_whisp/data_conversion.py,sha256=L2IsiUyQUt3aHgSYGbIhgPGwM7eyS3nLVEoNO9YqQeM,21888
5
+ openforis_whisp/datasets.py,sha256=aGJy0OYN4d0nsH3_IOYlHl-WCB7KFwZwMJ-dBi5Hc5Y,53470
6
+ openforis_whisp/logger.py,sha256=9M6_3mdpoiWfC-pDwM9vKmB2l5Gul6Rb5rNTNh-_nzs,3054
7
+ openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
9
+ openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
10
+ openforis_whisp/parameters/lookup_gaul1_admin.py,sha256=cQr5liRdXi85QieTxrz4VAkn0COvRCp82ZV0dYFWOio,474980
11
+ openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
12
+ openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
13
+ openforis_whisp/reformat.py,sha256=mIooJ3zfSTDY3_Mx3OAW4jpfQ72q3zasG9tl58PdfN4,33729
14
+ openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
15
+ openforis_whisp/stats.py,sha256=dCQXx6KKEV99owqyPURk6CL97kQQARjetFrIz1ZbIvs,65725
16
+ openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
17
+ openforis_whisp-3.0.0a2.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
18
+ openforis_whisp-3.0.0a2.dist-info/METADATA,sha256=wG4vc7B-f0JXmNkTUh4wJ-H0KPpbgyU9OfMwGewZq_A,16684
19
+ openforis_whisp-3.0.0a2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
20
+ openforis_whisp-3.0.0a2.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- openforis_whisp/__init__.py,sha256=bnEZ4_X-mJInltSKVI0STfvrb09Df-z21buIVFDif5w,2524
2
- openforis_whisp/data_conversion.py,sha256=Mc6dXbvoHBeRzl3o83pyKeI5_sPC8Yc90Tj4bN6_Bv8,17519
3
- openforis_whisp/datasets.py,sha256=hb8Y35vTcQQNUH_z2_l8Pu6Sjn_E8BzSow1-qAfs9bQ,50194
4
- openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
5
- openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
6
- openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
7
- openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
8
- openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
9
- openforis_whisp/reformat.py,sha256=rtkKs8z1mJd5JD9rXuMk1tbbbTvQxCCh68tA4hIQAv8,25445
10
- openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
11
- openforis_whisp/stats.py,sha256=1ikeV8UYpL8O5HZJY8lPXrhQwZ9D1IglbOsagZHCYdA,54000
12
- openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
13
- openforis_whisp-2.0.0b3.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
14
- openforis_whisp-2.0.0b3.dist-info/METADATA,sha256=Opn73PWlsOQWTiwZ-HYvLkrPh4jYQELtSIIqDf4MsoQ,16684
15
- openforis_whisp-2.0.0b3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
16
- openforis_whisp-2.0.0b3.dist-info/RECORD,,