openforis-whisp 2.0.0b3__py3-none-any.whl → 3.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openforis_whisp/stats.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
3
  from pathlib import Path
4
4
  from .datasets import combine_datasets
5
5
  import json
6
+ import logging
6
7
  import country_converter as coco
7
8
  from openforis_whisp.parameters.config_runtime import (
8
9
  plot_id_column,
@@ -41,7 +42,7 @@ from .reformat import (
41
42
  # to avoid repeated expensive operations. This saves 7-15 seconds per analysis.
42
43
 
43
44
  _WATER_FLAG_IMAGE = None
44
- _GEOBOUNDARIES_FC = None
45
+ _admin_boundaries_FC = None
45
46
 
46
47
 
47
48
  def get_water_flag_image():
@@ -63,26 +64,28 @@ def get_water_flag_image():
63
64
  return _WATER_FLAG_IMAGE
64
65
 
65
66
 
66
- def get_geoboundaries_fc():
67
+ def get_admin_boundaries_fc():
67
68
  """
68
- Get cached geoboundaries feature collection.
69
+ Get cached GAUL 2024 L1 administrative boundary feature collection.
69
70
 
70
- OPTIMIZATION: Geoboundaries collection is loaded once and reused for all features.
71
+ OPTIMIZATION: GAUL 2024 L1 collection is loaded once and reused for all features.
71
72
  This avoids loading the large FeatureCollection for every feature (previously
72
- called in get_geoboundaries_info for each feature).
73
+ called in get_admin_boundaries_info for each feature).
73
74
 
74
75
  Returns
75
76
  -------
76
77
  ee.FeatureCollection
77
- Cached geoboundaries feature collection
78
+ Cached GAUL 2024 L1 administrative boundary feature collection
78
79
  """
79
- global _GEOBOUNDARIES_FC
80
- if _GEOBOUNDARIES_FC is None:
81
- _GEOBOUNDARIES_FC = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM1")
82
- return _GEOBOUNDARIES_FC
80
+ global _admin_boundaries_FC
81
+ if _admin_boundaries_FC is None:
82
+ _admin_boundaries_FC = ee.FeatureCollection(
83
+ "projects/sat-io/open-datasets/FAO/GAUL/GAUL_2024_L1"
84
+ )
85
+ return _admin_boundaries_FC
83
86
 
84
87
 
85
- def whisp_formatted_stats_geojson_to_df(
88
+ def whisp_formatted_stats_geojson_to_df_legacy(
86
89
  input_geojson_filepath: Path | str,
87
90
  external_id_column=None,
88
91
  remove_geom=False,
@@ -90,9 +93,15 @@ def whisp_formatted_stats_geojson_to_df(
90
93
  unit_type="ha",
91
94
  whisp_image=None,
92
95
  custom_bands=None, # New parameter
96
+ validate_geometries: bool = False,
93
97
  ) -> pd.DataFrame:
94
98
  """
95
- Main function for most users.
99
+ Legacy function for basic Whisp stats extraction.
100
+
101
+ DEPRECATED: This is the original implementation maintained for backward compatibility.
102
+ Use whisp_formatted_stats_geojson_to_df() for new code, which provides automatic
103
+ optimization, formatting, and schema validation.
104
+
96
105
  Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
97
106
  Output df is validated against a panderas schema (created on the fly from the two lookup CSVs).
98
107
 
@@ -126,13 +135,48 @@ def whisp_formatted_stats_geojson_to_df(
126
135
  - List of band names: ['Aa_test', 'elevation']
127
136
  - Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
128
137
  - None: preserves all extra columns automatically
138
+ validate_geometries : bool, optional
139
+ Whether to validate and fix invalid geometries, by default False.
140
+ Set to True to automatically fix invalid/self-intersecting polygons.
129
141
 
130
142
  Returns
131
143
  -------
132
144
  df_stats : pd.DataFrame
133
145
  The DataFrame containing the Whisp stats for the input ROI.
134
146
  """
135
- feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
147
+ # Load GeoJSON and validate geometries if requested
148
+ if validate_geometries:
149
+ import json
150
+ import geopandas as gpd
151
+ from shapely.validation import make_valid
152
+ import logging as py_logging
153
+
154
+ logger = py_logging.getLogger("whisp-legacy")
155
+
156
+ # Load GeoJSON file
157
+ with open(input_geojson_filepath, "r") as f:
158
+ geojson_data = json.load(f)
159
+
160
+ # Convert to GeoDataFrame
161
+ gdf = gpd.GeoDataFrame.from_features(geojson_data["features"])
162
+
163
+ # Validate and fix invalid geometries
164
+ valid_count = gdf.geometry.is_valid.sum()
165
+ invalid_count = len(gdf) - valid_count
166
+ if invalid_count > 0:
167
+ logger.warning(f"Fixing {invalid_count} invalid geometries")
168
+ gdf["geometry"] = gdf["geometry"].apply(
169
+ lambda g: make_valid(g) if g and not g.is_valid else g
170
+ )
171
+
172
+ # Convert back to GeoJSON dict (stays in memory - no temp files!)
173
+ geojson_cleaned = json.loads(gdf.to_json())
174
+
175
+ # OPTIMIZATION: Pass GeoJSON dict directly - eliminates file I/O overhead
176
+ feature_collection = convert_geojson_to_ee(geojson_cleaned)
177
+ else:
178
+ # Original path - no validation
179
+ feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
136
180
 
137
181
  return whisp_formatted_stats_ee_to_df(
138
182
  feature_collection,
@@ -145,6 +189,169 @@ def whisp_formatted_stats_geojson_to_df(
145
189
  )
146
190
 
147
191
 
192
+ def whisp_formatted_stats_geojson_to_df(
193
+ input_geojson_filepath: Path | str,
194
+ external_id_column=None,
195
+ remove_geom=False,
196
+ national_codes=None,
197
+ unit_type="ha",
198
+ whisp_image=None,
199
+ custom_bands=None,
200
+ mode: str = "sequential",
201
+ batch_size: int = 10,
202
+ max_concurrent: int = 20,
203
+ validate_geometries: bool = False,
204
+ ) -> pd.DataFrame:
205
+ """
206
+ Main entry point for converting GeoJSON to Whisp statistics.
207
+
208
+ Routes to the appropriate processing mode with automatic formatting and validation.
209
+
210
+ Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
211
+ Output DataFrame is validated against a Panderas schema (created from lookup CSVs).
212
+ Results are automatically formatted and unit-converted (ha or percent).
213
+
214
+ If `external_id_column` is provided, it will be used to link external identifiers
215
+ from the input GeoJSON to the output DataFrame.
216
+
217
+ Parameters
218
+ ----------
219
+ input_geojson_filepath : Path | str
220
+ The filepath to the GeoJSON of the ROI to analyze.
221
+ external_id_column : str, optional
222
+ The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
223
+ This column must exist as a property in ALL features of the GeoJSON file.
224
+ Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
225
+ remove_geom : bool, default=False
226
+ If True, the geometry of the GeoJSON is removed from the output DataFrame.
227
+ national_codes : list, optional
228
+ List of ISO2 country codes to include national datasets.
229
+ unit_type: str, optional
230
+ Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
231
+ whisp_image : ee.Image, optional
232
+ Pre-combined multiband Earth Engine Image containing all Whisp datasets.
233
+ If provided, this image will be used instead of combining datasets based on national_codes.
234
+ If None, datasets will be combined automatically using national_codes parameter.
235
+ custom_bands : list or dict, optional
236
+ Custom band information for extra columns. Can be:
237
+ - List of band names: ['Aa_test', 'elevation']
238
+ - Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
239
+ - None: preserves all extra columns automatically
240
+ mode : str, optional
241
+ Processing mode, by default "concurrent":
242
+ - "concurrent": Uses high-volume endpoint with concurrent batching (recommended for large files)
243
+ - "sequential": Uses standard endpoint for sequential processing (more stable)
244
+ - "legacy": Uses original implementation (basic stats extraction only, no formatting)
245
+ batch_size : int, optional
246
+ Features per batch for concurrent/sequential modes, by default 10.
247
+ Only applicable for "concurrent" and "sequential" modes.
248
+ max_concurrent : int, optional
249
+ Maximum concurrent EE calls for concurrent mode, by default 20.
250
+ Only applicable for "concurrent" mode.
251
+ validate_geometries : bool, optional
252
+ Whether to validate and fix invalid geometries, by default False.
253
+ Set to True to automatically fix invalid/self-intersecting polygons.
254
+ For production workflows, it's recommended to use geometry validation and
255
+ cleaning tools BEFORE processing with this function.
256
+
257
+ Returns
258
+ -------
259
+ df_stats : pd.DataFrame
260
+ The DataFrame containing the Whisp stats for the input ROI,
261
+ automatically formatted and validated.
262
+
263
+ Examples
264
+ --------
265
+ >>> # Use concurrent processing (default, recommended for large datasets)
266
+ >>> df = whisp_formatted_stats_geojson_to_df("data.geojson")
267
+
268
+ >>> # Use sequential processing for more stable/predictable results
269
+ >>> df = whisp_formatted_stats_geojson_to_df(
270
+ ... "data.geojson",
271
+ ... mode="sequential"
272
+ ... )
273
+
274
+ >>> # Adjust concurrency parameters
275
+ >>> df = whisp_formatted_stats_geojson_to_df(
276
+ ... "large_data.geojson",
277
+ ... mode="concurrent",
278
+ ... max_concurrent=30,
279
+ ... batch_size=15
280
+ ... )
281
+
282
+ >>> # Use legacy mode for backward compatibility (basic extraction only)
283
+ >>> df = whisp_formatted_stats_geojson_to_df(
284
+ ... "data.geojson",
285
+ ... mode="legacy"
286
+ ... )
287
+ """
288
+ # Import here to avoid circular imports
289
+ try:
290
+ from openforis_whisp.advanced_stats import (
291
+ whisp_formatted_stats_geojson_to_df_fast,
292
+ )
293
+ except ImportError:
294
+ # Fallback to legacy if advanced_stats not available
295
+ mode = "legacy"
296
+
297
+ logger = logging.getLogger("whisp")
298
+
299
+ if mode == "legacy":
300
+ # Log info if batch_size or max_concurrent were passed but won't be used
301
+ if batch_size != 10 or max_concurrent != 20:
302
+ unused = []
303
+ if batch_size != 10:
304
+ unused.append(f"batch_size={batch_size}")
305
+ if max_concurrent != 20:
306
+ unused.append(f"max_concurrent={max_concurrent}")
307
+ logger.info(
308
+ f"Mode is 'legacy': {', '.join(unused)}\n"
309
+ "parameter(s) are not used in legacy mode."
310
+ )
311
+ # Use original implementation (basic stats extraction only)
312
+ return whisp_formatted_stats_geojson_to_df_legacy(
313
+ input_geojson_filepath=input_geojson_filepath,
314
+ external_id_column=external_id_column,
315
+ remove_geom=remove_geom,
316
+ national_codes=national_codes,
317
+ unit_type=unit_type,
318
+ whisp_image=whisp_image,
319
+ custom_bands=custom_bands,
320
+ validate_geometries=validate_geometries,
321
+ )
322
+ elif mode in ("concurrent", "sequential"):
323
+ # Log info if batch_size or max_concurrent are not used in sequential mode
324
+ if mode == "sequential":
325
+ unused = []
326
+ if batch_size != 10:
327
+ unused.append(f"batch_size={batch_size}")
328
+ if max_concurrent != 20:
329
+ unused.append(f"max_concurrent={max_concurrent}")
330
+ if unused:
331
+ logger.info(
332
+ f"Mode is 'sequential': {', '.join(unused)}\n"
333
+ "parameter(s) are not used in sequential (single-threaded) mode."
334
+ )
335
+ # Route to fast function with explicit mode (skip auto-detection)
336
+ return whisp_formatted_stats_geojson_to_df_fast(
337
+ input_geojson_filepath=input_geojson_filepath,
338
+ external_id_column=external_id_column,
339
+ remove_geom=remove_geom,
340
+ national_codes=national_codes,
341
+ unit_type=unit_type,
342
+ whisp_image=whisp_image,
343
+ custom_bands=custom_bands,
344
+ mode=mode, # Pass mode directly (concurrent or sequential)
345
+ batch_size=batch_size,
346
+ max_concurrent=max_concurrent,
347
+ validate_geometries=validate_geometries,
348
+ )
349
+ else:
350
+ raise ValueError(
351
+ f"Invalid mode '{mode}'. Must be 'concurrent', 'sequential', or 'legacy'."
352
+ )
353
+
354
+
148
355
  def whisp_formatted_stats_geojson_to_geojson(
149
356
  input_geojson_filepath,
150
357
  output_geojson_filepath,
@@ -188,7 +395,8 @@ def whisp_formatted_stats_geojson_to_geojson(
188
395
  # Convert the df to GeoJSON
189
396
  convert_df_to_geojson(df, output_geojson_filepath, geo_column)
190
397
 
191
- print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
398
+ # Suppress verbose output
399
+ # print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
192
400
 
193
401
 
194
402
  def whisp_formatted_stats_ee_to_geojson(
@@ -688,6 +896,13 @@ def whisp_stats_ee_to_df(
688
896
  print(f"An error occurred during point geometry area adjustment: {e}")
689
897
  # Continue without the adjustment rather than failing completely
690
898
 
899
+ # Reformat geometry types (MultiPolygon -> Polygon)
900
+ try:
901
+ df_stats = reformat_geometry_type(df_stats)
902
+ except Exception as e:
903
+ print(f"An error occurred during geometry type reformatting: {e}")
904
+ # Continue without the adjustment rather than failing completely
905
+
691
906
  return df_stats
692
907
 
693
908
 
@@ -727,6 +942,43 @@ def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
727
942
  return df_modified
728
943
 
729
944
 
945
+ def reformat_geometry_type(df: pd.DataFrame) -> pd.DataFrame:
946
+ """
947
+ Reformat geometry type classification in the DataFrame output.
948
+ Standardizes MultiPolygon geometry type to Polygon for consistent output.
949
+
950
+ Parameters
951
+ ----------
952
+ df : pd.DataFrame
953
+ DataFrame containing geometry type column
954
+
955
+ Returns
956
+ -------
957
+ pd.DataFrame
958
+ DataFrame with standardized geometry types
959
+ """
960
+ # Check if required columns exist
961
+ if geometry_type_column not in df.columns:
962
+ print(
963
+ f"Warning: {geometry_type_column} column not found. Skipping geometry type reformatting."
964
+ )
965
+ return df
966
+
967
+ # Create a copy to avoid modifying the original
968
+ df_modified = df.copy()
969
+
970
+ # Reformat MultiPolygon to Polygon
971
+ multipolygon_mask = df_modified[geometry_type_column] == "MultiPolygon"
972
+ df_modified.loc[multipolygon_mask, geometry_type_column] = "Polygon"
973
+
974
+ # Log the changes
975
+ num_reformatted = multipolygon_mask.sum()
976
+ # if num_reformatted > 0:
977
+ # print(f"Reformatted {num_reformatted} MultiPolygon geometries to Polygon")
978
+
979
+ return df_modified
980
+
981
+
730
982
  def whisp_stats_ee_to_drive(
731
983
  feature_collection: ee.FeatureCollection,
732
984
  external_id_column=None,
@@ -813,7 +1065,9 @@ def get_stats(
813
1065
  print("Using provided whisp_image")
814
1066
  else:
815
1067
  img_combined = combine_datasets(
816
- national_codes=national_codes, validate_bands=validate_bands
1068
+ national_codes=national_codes,
1069
+ validate_bands=validate_bands,
1070
+ include_context_bands=False,
817
1071
  )
818
1072
  print(f"Combining datasets with national_codes: {national_codes}")
819
1073
 
@@ -822,7 +1076,7 @@ def get_stats(
822
1076
  print("Processing single feature")
823
1077
  # OPTIMIZATION: Create cached images for single feature processing
824
1078
  water_all = get_water_flag_image()
825
- gbounds_ADM0 = get_geoboundaries_fc()
1079
+ bounds_ADM1 = get_admin_boundaries_fc()
826
1080
  output = ee.FeatureCollection(
827
1081
  [
828
1082
  get_stats_feature(
@@ -830,7 +1084,7 @@ def get_stats(
830
1084
  img_combined,
831
1085
  unit_type=unit_type,
832
1086
  water_all=water_all,
833
- gbounds_ADM0=gbounds_ADM0,
1087
+ bounds_ADM1=bounds_ADM1,
834
1088
  )
835
1089
  ]
836
1090
  )
@@ -852,7 +1106,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
852
1106
  """
853
1107
  Calculate statistics for a feature collection using Whisp datasets.
854
1108
 
855
- OPTIMIZATION: Creates water flag and geoboundaries images once and reuses
1109
+ OPTIMIZATION: Creates water flag and admin_boundaries images once and reuses
856
1110
  them for all features instead of recreating them for each feature.
857
1111
  This saves 7-15 seconds per analysis.
858
1112
 
@@ -878,7 +1132,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
878
1132
  # OPTIMIZATION: Create cached images once before processing features
879
1133
  # These will be reused for all features instead of being recreated each time
880
1134
  water_all = get_water_flag_image()
881
- gbounds_ADM0 = get_geoboundaries_fc()
1135
+ bounds_ADM1 = get_admin_boundaries_fc()
882
1136
 
883
1137
  out_feature_col = ee.FeatureCollection(
884
1138
  feature_col.map(
@@ -887,7 +1141,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
887
1141
  img_combined,
888
1142
  unit_type=unit_type,
889
1143
  water_all=water_all,
890
- gbounds_ADM0=gbounds_ADM0,
1144
+ bounds_ADM1=bounds_ADM1,
891
1145
  )
892
1146
  )
893
1147
  )
@@ -901,12 +1155,12 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
901
1155
 
902
1156
 
903
1157
  def get_stats_feature(
904
- feature, img_combined, unit_type="ha", water_all=None, gbounds_ADM0=None
1158
+ feature, img_combined, unit_type="ha", water_all=None, bounds_ADM1=None
905
1159
  ):
906
1160
  """
907
1161
  Get statistics for a single feature using a pre-combined image.
908
1162
 
909
- OPTIMIZATION: Accepts cached water/geoboundaries images to avoid recreating
1163
+ OPTIMIZATION: Accepts cached water/admin_boundaries images to avoid recreating
910
1164
  them for every feature.
911
1165
 
912
1166
  Parameters
@@ -919,8 +1173,8 @@ def get_stats_feature(
919
1173
  Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
920
1174
  water_all : ee.Image, optional
921
1175
  Cached water flag image
922
- gbounds_ADM0 : ee.FeatureCollection, optional
923
- Cached geoboundaries feature collection
1176
+ bounds_ADM1 : ee.FeatureCollection, optional
1177
+ Cached admin_boundaries feature collection
924
1178
 
925
1179
  Returns
926
1180
  -------
@@ -936,7 +1190,7 @@ def get_stats_feature(
936
1190
  )
937
1191
 
938
1192
  # Get basic feature information with cached images
939
- feature_info = get_type_and_location(feature, water_all, gbounds_ADM0)
1193
+ feature_info = get_type_and_location(feature, water_all, bounds_ADM1)
940
1194
 
941
1195
  # add statistics unit type (e.g., percentage or hectares) to dictionary
942
1196
  stats_unit_type = ee.Dictionary({stats_unit_type_column: unit_type})
@@ -985,11 +1239,11 @@ def get_stats_feature(
985
1239
 
986
1240
 
987
1241
  # Get basic feature information - uses admin and water datasets in gee.
988
- def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
1242
+ def get_type_and_location(feature, water_all=None, bounds_ADM1=None):
989
1243
  """
990
1244
  Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags.
991
1245
 
992
- OPTIMIZATION: Accepts cached water flag image and geoboundaries collection
1246
+ OPTIMIZATION: Accepts cached water flag image and admin_boundaries collection
993
1247
  to avoid recreating them for every feature (saves 7-15 seconds per analysis).
994
1248
 
995
1249
  Parameters
@@ -998,8 +1252,8 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
998
1252
  The feature to extract information from
999
1253
  water_all : ee.Image, optional
1000
1254
  Cached water flag image. If None, creates it.
1001
- gbounds_ADM0 : ee.FeatureCollection, optional
1002
- Cached geoboundaries feature collection. If None, loads it.
1255
+ bounds_ADM1 : ee.FeatureCollection, optional
1256
+ Cached admin_boundaries feature collection. If None, loads it.
1003
1257
 
1004
1258
  Returns
1005
1259
  -------
@@ -1007,19 +1261,23 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
1007
1261
  Dictionary with feature information
1008
1262
  """
1009
1263
  # Get centroid of the feature's geometry
1010
- centroid = feature.geometry().centroid(1)
1264
+ centroid = feature.geometry().centroid(0.1)
1011
1265
 
1012
- # OPTIMIZATION: Use cached geoboundaries
1013
- if gbounds_ADM0 is None:
1014
- gbounds_ADM0 = get_geoboundaries_fc()
1266
+ # OPTIMIZATION: Use cached admin_boundaries
1267
+ if bounds_ADM1 is None:
1268
+ bounds_ADM1 = get_admin_boundaries_fc()
1015
1269
 
1016
- # Fetch location info from geoboundaries (country, admin)
1017
- location = ee.Dictionary(get_geoboundaries_info(centroid, gbounds_ADM0))
1018
- country = ee.Dictionary({iso3_country_column: location.get("shapeGroup")})
1270
+ # Fetch location info from GAUL 2024 L1 (country, admin)
1271
+ location = ee.Dictionary(get_admin_boundaries_info(centroid, bounds_ADM1))
1272
+ country = ee.Dictionary({iso3_country_column: location.get("iso3_code")})
1019
1273
 
1020
1274
  admin_1 = ee.Dictionary(
1021
- {admin_1_column: location.get("shapeName")}
1022
- ) # Administrative level 1 (if available)
1275
+ {admin_1_column: location.get("gaul1_name")}
1276
+ ) # Administrative level 1 (from GAUL 2024 L1)
1277
+
1278
+ # OPTIMIZATION: Use cached water flag image
1279
+ if water_all is None:
1280
+ water_all = get_water_flag_image()
1023
1281
 
1024
1282
  # OPTIMIZATION: Use cached water flag image
1025
1283
  if water_all is None:
@@ -1037,8 +1295,12 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
1037
1295
  coords_list = centroid.coordinates()
1038
1296
  coords_dict = ee.Dictionary(
1039
1297
  {
1040
- centroid_x_coord_column: coords_list.get(0), # Longitude
1041
- centroid_y_coord_column: coords_list.get(1), # Latitude
1298
+ centroid_x_coord_column: ee.Number(coords_list.get(0)).format(
1299
+ "%.6f"
1300
+ ), # Longitude (6 dp)
1301
+ centroid_y_coord_column: ee.Number(coords_list.get(1)).format(
1302
+ "%.6f"
1303
+ ), # Latitude (6 dp)
1042
1304
  }
1043
1305
  )
1044
1306
 
@@ -1076,36 +1338,36 @@ def percent_and_format(val, area_ha):
1076
1338
  return ee.Number(formatted_value)
1077
1339
 
1078
1340
 
1079
- # geoboundaries - admin units from a freqently updated database, allows commercial use (CC BY 4.0 DEED) (disputed territories may need checking)
1080
- def get_geoboundaries_info(geometry, gbounds_ADM0=None):
1341
+ # GAUL 2024 L1 - admin units from FAO, allows commercial use
1342
+ def get_admin_boundaries_info(geometry, bounds_ADM1=None):
1081
1343
  """
1082
- Get geoboundaries info for a geometry.
1344
+ Get GAUL 2024 L1 info for a geometry (country ISO3 code and admin boundary name).
1083
1345
 
1084
- OPTIMIZATION: Accepts cached geoboundaries FeatureCollection to avoid
1346
+ OPTIMIZATION: Accepts cached GAUL 2024 L1 FeatureCollection to avoid
1085
1347
  reloading it for every feature (saves 2-5 seconds per analysis).
1086
1348
 
1087
1349
  Parameters
1088
1350
  ----------
1089
1351
  geometry : ee.Geometry
1090
1352
  The geometry to query
1091
- gbounds_ADM0 : ee.FeatureCollection, optional
1092
- Cached geoboundaries feature collection. If None, loads it.
1353
+ bounds_ADM1 : ee.FeatureCollection, optional
1354
+ Cached GAUL 2024 L1 feature collection. If None, loads it.
1093
1355
 
1094
1356
  Returns
1095
1357
  -------
1096
1358
  ee.Dictionary
1097
- Dictionary with shapeGroup and shapeName
1359
+ Dictionary with iso3_code (country) and gaul1_name (admin boundary name)
1098
1360
  """
1099
- if gbounds_ADM0 is None:
1100
- gbounds_ADM0 = get_geoboundaries_fc()
1361
+ if bounds_ADM1 is None:
1362
+ bounds_ADM1 = get_admin_boundaries_fc()
1101
1363
 
1102
- polygonsIntersectPoint = gbounds_ADM0.filterBounds(geometry)
1103
- backup_dict = ee.Dictionary({"shapeGroup": "Unknown", "shapeName": "Unknown"})
1364
+ polygonsIntersectPoint = bounds_ADM1.filterBounds(geometry)
1365
+ backup_dict = ee.Dictionary({"iso3_code": "Unknown", "gaul1_name": "Unknown"})
1104
1366
  return ee.Algorithms.If(
1105
1367
  polygonsIntersectPoint.size().gt(0),
1106
1368
  polygonsIntersectPoint.first()
1107
1369
  .toDictionary()
1108
- .select(["shapeGroup", "shapeName"]),
1370
+ .select(["iso3_code", "gaul1_name"]),
1109
1371
  backup_dict,
1110
1372
  )
1111
1373
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openforis-whisp
3
- Version: 2.0.0b3
3
+ Version: 3.0.0a1
4
4
  Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
5
5
  License: MIT
6
6
  Keywords: whisp,geospatial,data-processing
@@ -0,0 +1,20 @@
1
+ openforis_whisp/__init__.py,sha256=-r_9LFxbV6d-o4s0_huhaXxve6GIzCwl3pXKuJo6ixE,3663
2
+ openforis_whisp/advanced_stats.py,sha256=_bP_ApeaAdOF41WvabOhUNGL9Tt35AesUjNjqnHs8wo,76730
3
+ openforis_whisp/data_checks.py,sha256=WiYhoTedPs1MqSv4T978nDF3_WDYyg8YmHRi9mQXXqI,25203
4
+ openforis_whisp/data_conversion.py,sha256=sr2j_q6YjxVTicytimOMO8-RYohD1oyWWrgbg6WsLSw,18796
5
+ openforis_whisp/datasets.py,sha256=aGJy0OYN4d0nsH3_IOYlHl-WCB7KFwZwMJ-dBi5Hc5Y,53470
6
+ openforis_whisp/logger.py,sha256=9M6_3mdpoiWfC-pDwM9vKmB2l5Gul6Rb5rNTNh-_nzs,3054
7
+ openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
9
+ openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
10
+ openforis_whisp/parameters/lookup_gaul1_admin.py,sha256=cQr5liRdXi85QieTxrz4VAkn0COvRCp82ZV0dYFWOio,474980
11
+ openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
12
+ openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
13
+ openforis_whisp/reformat.py,sha256=mIooJ3zfSTDY3_Mx3OAW4jpfQ72q3zasG9tl58PdfN4,33729
14
+ openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
15
+ openforis_whisp/stats.py,sha256=mzzd3oU3RnJQPfeWoUBuMDTIw2FCAzWXHCt53ZuQ__A,64895
16
+ openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
17
+ openforis_whisp-3.0.0a1.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
18
+ openforis_whisp-3.0.0a1.dist-info/METADATA,sha256=dAlxg3DFtpIw5fCbZyrO2hOhtJSg7DYxtbN71ez3S2Y,16684
19
+ openforis_whisp-3.0.0a1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
20
+ openforis_whisp-3.0.0a1.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- openforis_whisp/__init__.py,sha256=bnEZ4_X-mJInltSKVI0STfvrb09Df-z21buIVFDif5w,2524
2
- openforis_whisp/data_conversion.py,sha256=Mc6dXbvoHBeRzl3o83pyKeI5_sPC8Yc90Tj4bN6_Bv8,17519
3
- openforis_whisp/datasets.py,sha256=hb8Y35vTcQQNUH_z2_l8Pu6Sjn_E8BzSow1-qAfs9bQ,50194
4
- openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
5
- openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
6
- openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
7
- openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
8
- openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
9
- openforis_whisp/reformat.py,sha256=rtkKs8z1mJd5JD9rXuMk1tbbbTvQxCCh68tA4hIQAv8,25445
10
- openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
11
- openforis_whisp/stats.py,sha256=1ikeV8UYpL8O5HZJY8lPXrhQwZ9D1IglbOsagZHCYdA,54000
12
- openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
13
- openforis_whisp-2.0.0b3.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
14
- openforis_whisp-2.0.0b3.dist-info/METADATA,sha256=Opn73PWlsOQWTiwZ-HYvLkrPh4jYQELtSIIqDf4MsoQ,16684
15
- openforis_whisp-2.0.0b3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
16
- openforis_whisp-2.0.0b3.dist-info/RECORD,,