openforis-whisp 2.0.0b3__py3-none-any.whl → 3.0.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +35 -4
- openforis_whisp/advanced_stats.py +2428 -0
- openforis_whisp/data_checks.py +805 -0
- openforis_whisp/data_conversion.py +221 -84
- openforis_whisp/datasets.py +124 -36
- openforis_whisp/logger.py +26 -0
- openforis_whisp/parameters/__init__.py +0 -0
- openforis_whisp/parameters/lookup_gaul1_admin.py +18663 -0
- openforis_whisp/reformat.py +198 -2
- openforis_whisp/stats.py +329 -52
- {openforis_whisp-2.0.0b3.dist-info → openforis_whisp-3.0.0a2.dist-info}/METADATA +1 -1
- openforis_whisp-3.0.0a2.dist-info/RECORD +20 -0
- openforis_whisp-2.0.0b3.dist-info/RECORD +0 -16
- {openforis_whisp-2.0.0b3.dist-info → openforis_whisp-3.0.0a2.dist-info}/LICENSE +0 -0
- {openforis_whisp-2.0.0b3.dist-info → openforis_whisp-3.0.0a2.dist-info}/WHEEL +0 -0
openforis_whisp/stats.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from .datasets import combine_datasets
|
|
5
5
|
import json
|
|
6
|
+
import logging
|
|
6
7
|
import country_converter as coco
|
|
7
8
|
from openforis_whisp.parameters.config_runtime import (
|
|
8
9
|
plot_id_column,
|
|
@@ -41,7 +42,7 @@ from .reformat import (
|
|
|
41
42
|
# to avoid repeated expensive operations. This saves 7-15 seconds per analysis.
|
|
42
43
|
|
|
43
44
|
_WATER_FLAG_IMAGE = None
|
|
44
|
-
|
|
45
|
+
_admin_boundaries_FC = None
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
def get_water_flag_image():
|
|
@@ -63,26 +64,28 @@ def get_water_flag_image():
|
|
|
63
64
|
return _WATER_FLAG_IMAGE
|
|
64
65
|
|
|
65
66
|
|
|
66
|
-
def
|
|
67
|
+
def get_admin_boundaries_fc():
|
|
67
68
|
"""
|
|
68
|
-
Get cached
|
|
69
|
+
Get cached GAUL 2024 L1 administrative boundary feature collection.
|
|
69
70
|
|
|
70
|
-
OPTIMIZATION:
|
|
71
|
+
OPTIMIZATION: GAUL 2024 L1 collection is loaded once and reused for all features.
|
|
71
72
|
This avoids loading the large FeatureCollection for every feature (previously
|
|
72
|
-
called in
|
|
73
|
+
called in get_admin_boundaries_info for each feature).
|
|
73
74
|
|
|
74
75
|
Returns
|
|
75
76
|
-------
|
|
76
77
|
ee.FeatureCollection
|
|
77
|
-
Cached
|
|
78
|
+
Cached GAUL 2024 L1 administrative boundary feature collection
|
|
78
79
|
"""
|
|
79
|
-
global
|
|
80
|
-
if
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
global _admin_boundaries_FC
|
|
81
|
+
if _admin_boundaries_FC is None:
|
|
82
|
+
_admin_boundaries_FC = ee.FeatureCollection(
|
|
83
|
+
"projects/sat-io/open-datasets/FAO/GAUL/GAUL_2024_L1"
|
|
84
|
+
)
|
|
85
|
+
return _admin_boundaries_FC
|
|
83
86
|
|
|
84
87
|
|
|
85
|
-
def
|
|
88
|
+
def whisp_formatted_stats_geojson_to_df_legacy(
|
|
86
89
|
input_geojson_filepath: Path | str,
|
|
87
90
|
external_id_column=None,
|
|
88
91
|
remove_geom=False,
|
|
@@ -90,9 +93,15 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
90
93
|
unit_type="ha",
|
|
91
94
|
whisp_image=None,
|
|
92
95
|
custom_bands=None, # New parameter
|
|
96
|
+
validate_geometries: bool = False,
|
|
93
97
|
) -> pd.DataFrame:
|
|
94
98
|
"""
|
|
95
|
-
|
|
99
|
+
Legacy function for basic Whisp stats extraction.
|
|
100
|
+
|
|
101
|
+
DEPRECATED: This is the original implementation maintained for backward compatibility.
|
|
102
|
+
Use whisp_formatted_stats_geojson_to_df() for new code, which provides automatic
|
|
103
|
+
optimization, formatting, and schema validation.
|
|
104
|
+
|
|
96
105
|
Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
|
|
97
106
|
Output df is validated against a panderas schema (created on the fly from the two lookup CSVs).
|
|
98
107
|
|
|
@@ -126,13 +135,51 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
126
135
|
- List of band names: ['Aa_test', 'elevation']
|
|
127
136
|
- Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
|
|
128
137
|
- None: preserves all extra columns automatically
|
|
138
|
+
validate_geometries : bool, optional
|
|
139
|
+
Whether to validate and fix invalid geometries, by default False.
|
|
140
|
+
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
129
141
|
|
|
130
142
|
Returns
|
|
131
143
|
-------
|
|
132
144
|
df_stats : pd.DataFrame
|
|
133
145
|
The DataFrame containing the Whisp stats for the input ROI.
|
|
134
146
|
"""
|
|
135
|
-
|
|
147
|
+
# Load GeoJSON and validate geometries if requested
|
|
148
|
+
if validate_geometries:
|
|
149
|
+
import json
|
|
150
|
+
import geopandas as gpd
|
|
151
|
+
from shapely.validation import make_valid
|
|
152
|
+
import logging as py_logging
|
|
153
|
+
|
|
154
|
+
logger = py_logging.getLogger("whisp")
|
|
155
|
+
|
|
156
|
+
# Load GeoJSON file
|
|
157
|
+
with open(input_geojson_filepath, "r") as f:
|
|
158
|
+
geojson_data = json.load(f)
|
|
159
|
+
|
|
160
|
+
# Convert to GeoDataFrame
|
|
161
|
+
gdf = gpd.GeoDataFrame.from_features(geojson_data["features"])
|
|
162
|
+
|
|
163
|
+
# Validate and fix invalid geometries
|
|
164
|
+
valid_count = gdf.geometry.is_valid.sum()
|
|
165
|
+
invalid_count = len(gdf) - valid_count
|
|
166
|
+
if invalid_count > 0:
|
|
167
|
+
logger.warning(f"Fixing {invalid_count} invalid geometries")
|
|
168
|
+
gdf["geometry"] = gdf["geometry"].apply(
|
|
169
|
+
lambda g: make_valid(g) if g and not g.is_valid else g
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Pass GeoDataFrame directly to preserve CRS metadata
|
|
173
|
+
# convert_geojson_to_ee will handle:
|
|
174
|
+
# - CRS detection and conversion to WGS84 if needed
|
|
175
|
+
# - Data type sanitization (datetime, object columns)
|
|
176
|
+
# - Geometry validation and Z-coordinate stripping
|
|
177
|
+
feature_collection = convert_geojson_to_ee(
|
|
178
|
+
gdf, enforce_wgs84=True, strip_z_coords=True
|
|
179
|
+
)
|
|
180
|
+
else:
|
|
181
|
+
# Original path - no validation
|
|
182
|
+
feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
|
|
136
183
|
|
|
137
184
|
return whisp_formatted_stats_ee_to_df(
|
|
138
185
|
feature_collection,
|
|
@@ -145,6 +192,181 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
145
192
|
)
|
|
146
193
|
|
|
147
194
|
|
|
195
|
+
def whisp_formatted_stats_geojson_to_df(
|
|
196
|
+
input_geojson_filepath: Path | str,
|
|
197
|
+
external_id_column=None,
|
|
198
|
+
remove_geom=False,
|
|
199
|
+
national_codes=None,
|
|
200
|
+
unit_type="ha",
|
|
201
|
+
whisp_image=None,
|
|
202
|
+
custom_bands=None,
|
|
203
|
+
mode: str = "sequential",
|
|
204
|
+
batch_size: int = 10,
|
|
205
|
+
max_concurrent: int = 20,
|
|
206
|
+
validate_geometries: bool = False,
|
|
207
|
+
include_geometry_audit_trail: bool = False,
|
|
208
|
+
) -> pd.DataFrame:
|
|
209
|
+
"""
|
|
210
|
+
Main entry point for converting GeoJSON to Whisp statistics.
|
|
211
|
+
|
|
212
|
+
Routes to the appropriate processing mode with automatic formatting and validation.
|
|
213
|
+
|
|
214
|
+
Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
|
|
215
|
+
Output DataFrame is validated against a Panderas schema (created from lookup CSVs).
|
|
216
|
+
Results are automatically formatted and unit-converted (ha or percent).
|
|
217
|
+
|
|
218
|
+
If `external_id_column` is provided, it will be used to link external identifiers
|
|
219
|
+
from the input GeoJSON to the output DataFrame.
|
|
220
|
+
|
|
221
|
+
Parameters
|
|
222
|
+
----------
|
|
223
|
+
input_geojson_filepath : Path | str
|
|
224
|
+
The filepath to the GeoJSON of the ROI to analyze.
|
|
225
|
+
external_id_column : str, optional
|
|
226
|
+
The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
|
|
227
|
+
This column must exist as a property in ALL features of the GeoJSON file.
|
|
228
|
+
Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
|
|
229
|
+
remove_geom : bool, default=False
|
|
230
|
+
If True, the geometry of the GeoJSON is removed from the output DataFrame.
|
|
231
|
+
national_codes : list, optional
|
|
232
|
+
List of ISO2 country codes to include national datasets.
|
|
233
|
+
unit_type: str, optional
|
|
234
|
+
Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
|
|
235
|
+
whisp_image : ee.Image, optional
|
|
236
|
+
Pre-combined multiband Earth Engine Image containing all Whisp datasets.
|
|
237
|
+
If provided, this image will be used instead of combining datasets based on national_codes.
|
|
238
|
+
If None, datasets will be combined automatically using national_codes parameter.
|
|
239
|
+
custom_bands : list or dict, optional
|
|
240
|
+
Custom band information for extra columns. Can be:
|
|
241
|
+
- List of band names: ['Aa_test', 'elevation']
|
|
242
|
+
- Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
|
|
243
|
+
- None: preserves all extra columns automatically
|
|
244
|
+
mode : str, optional
|
|
245
|
+
Processing mode, by default "concurrent":
|
|
246
|
+
- "concurrent": Uses high-volume endpoint with concurrent batching (recommended for large files)
|
|
247
|
+
- "sequential": Uses standard endpoint for sequential processing (more stable)
|
|
248
|
+
- "legacy": Uses original implementation (basic stats extraction only, no formatting)
|
|
249
|
+
batch_size : int, optional
|
|
250
|
+
Features per batch for concurrent/sequential modes, by default 10.
|
|
251
|
+
Only applicable for "concurrent" and "sequential" modes.
|
|
252
|
+
max_concurrent : int, optional
|
|
253
|
+
Maximum concurrent EE calls for concurrent mode, by default 20.
|
|
254
|
+
Only applicable for "concurrent" mode.
|
|
255
|
+
validate_geometries : bool, optional
|
|
256
|
+
Whether to validate and fix invalid geometries, by default False.
|
|
257
|
+
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
258
|
+
For production workflows, it's recommended to use geometry validation and
|
|
259
|
+
cleaning tools BEFORE processing with this function.
|
|
260
|
+
include_geometry_audit_trail : bool, default True
|
|
261
|
+
If True (default), includes audit trail columns:
|
|
262
|
+
- geo_original: Original input geometry
|
|
263
|
+
- geometry_type_original: Original geometry type
|
|
264
|
+
- geometry_type: Processed geometry type (from EE)
|
|
265
|
+
- geometry_type_changed: Boolean flag if geometry changed
|
|
266
|
+
- geometry_degradation_type: Description of how it changed
|
|
267
|
+
|
|
268
|
+
Processing metadata stored in df.attrs['processing_metadata'].
|
|
269
|
+
These columns enable full transparency for geometry modifications during processing.
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
df_stats : pd.DataFrame
|
|
274
|
+
The DataFrame containing the Whisp stats for the input ROI,
|
|
275
|
+
automatically formatted and validated.
|
|
276
|
+
|
|
277
|
+
Examples
|
|
278
|
+
--------
|
|
279
|
+
>>> # Use concurrent processing (default, recommended for large datasets)
|
|
280
|
+
>>> df = whisp_formatted_stats_geojson_to_df("data.geojson")
|
|
281
|
+
|
|
282
|
+
>>> # Use sequential processing for more stable/predictable results
|
|
283
|
+
>>> df = whisp_formatted_stats_geojson_to_df(
|
|
284
|
+
... "data.geojson",
|
|
285
|
+
... mode="sequential"
|
|
286
|
+
... )
|
|
287
|
+
|
|
288
|
+
>>> # Adjust concurrency parameters
|
|
289
|
+
>>> df = whisp_formatted_stats_geojson_to_df(
|
|
290
|
+
... "large_data.geojson",
|
|
291
|
+
... mode="concurrent",
|
|
292
|
+
... max_concurrent=30,
|
|
293
|
+
... batch_size=15
|
|
294
|
+
... )
|
|
295
|
+
|
|
296
|
+
>>> # Use legacy mode for backward compatibility (basic extraction only)
|
|
297
|
+
>>> df = whisp_formatted_stats_geojson_to_df(
|
|
298
|
+
... "data.geojson",
|
|
299
|
+
... mode="legacy"
|
|
300
|
+
... )
|
|
301
|
+
"""
|
|
302
|
+
# Import here to avoid circular imports
|
|
303
|
+
try:
|
|
304
|
+
from openforis_whisp.advanced_stats import (
|
|
305
|
+
whisp_formatted_stats_geojson_to_df_fast,
|
|
306
|
+
)
|
|
307
|
+
except ImportError:
|
|
308
|
+
# Fallback to legacy if advanced_stats not available
|
|
309
|
+
mode = "legacy"
|
|
310
|
+
|
|
311
|
+
logger = logging.getLogger("whisp")
|
|
312
|
+
|
|
313
|
+
if mode == "legacy":
|
|
314
|
+
# Log info if batch_size or max_concurrent were passed but won't be used
|
|
315
|
+
if batch_size != 10 or max_concurrent != 20:
|
|
316
|
+
unused = []
|
|
317
|
+
if batch_size != 10:
|
|
318
|
+
unused.append(f"batch_size={batch_size}")
|
|
319
|
+
if max_concurrent != 20:
|
|
320
|
+
unused.append(f"max_concurrent={max_concurrent}")
|
|
321
|
+
logger.info(
|
|
322
|
+
f"Mode is 'legacy': {', '.join(unused)}\n"
|
|
323
|
+
"parameter(s) are not used in legacy mode."
|
|
324
|
+
)
|
|
325
|
+
# Use original implementation (basic stats extraction only)
|
|
326
|
+
return whisp_formatted_stats_geojson_to_df_legacy(
|
|
327
|
+
input_geojson_filepath=input_geojson_filepath,
|
|
328
|
+
external_id_column=external_id_column,
|
|
329
|
+
remove_geom=remove_geom,
|
|
330
|
+
national_codes=national_codes,
|
|
331
|
+
unit_type=unit_type,
|
|
332
|
+
whisp_image=whisp_image,
|
|
333
|
+
custom_bands=custom_bands,
|
|
334
|
+
validate_geometries=validate_geometries,
|
|
335
|
+
)
|
|
336
|
+
elif mode in ("concurrent", "sequential"):
|
|
337
|
+
# Log info if batch_size or max_concurrent are not used in sequential mode
|
|
338
|
+
if mode == "sequential":
|
|
339
|
+
unused = []
|
|
340
|
+
if batch_size != 10:
|
|
341
|
+
unused.append(f"batch_size={batch_size}")
|
|
342
|
+
if max_concurrent != 20:
|
|
343
|
+
unused.append(f"max_concurrent={max_concurrent}")
|
|
344
|
+
if unused:
|
|
345
|
+
logger.info(
|
|
346
|
+
f"Mode is 'sequential': {', '.join(unused)}\n"
|
|
347
|
+
"parameter(s) are not used in sequential (single-threaded) mode."
|
|
348
|
+
)
|
|
349
|
+
# Route to fast function with explicit mode (skip auto-detection)
|
|
350
|
+
return whisp_formatted_stats_geojson_to_df_fast(
|
|
351
|
+
input_geojson_filepath=input_geojson_filepath,
|
|
352
|
+
external_id_column=external_id_column,
|
|
353
|
+
remove_geom=remove_geom,
|
|
354
|
+
national_codes=national_codes,
|
|
355
|
+
unit_type=unit_type,
|
|
356
|
+
whisp_image=whisp_image,
|
|
357
|
+
custom_bands=custom_bands,
|
|
358
|
+
mode=mode, # Pass mode directly (concurrent or sequential)
|
|
359
|
+
batch_size=batch_size,
|
|
360
|
+
max_concurrent=max_concurrent,
|
|
361
|
+
validate_geometries=validate_geometries,
|
|
362
|
+
include_geometry_audit_trail=include_geometry_audit_trail,
|
|
363
|
+
)
|
|
364
|
+
else:
|
|
365
|
+
raise ValueError(
|
|
366
|
+
f"Invalid mode '{mode}'. Must be 'concurrent', 'sequential', or 'legacy'."
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
148
370
|
def whisp_formatted_stats_geojson_to_geojson(
|
|
149
371
|
input_geojson_filepath,
|
|
150
372
|
output_geojson_filepath,
|
|
@@ -188,7 +410,8 @@ def whisp_formatted_stats_geojson_to_geojson(
|
|
|
188
410
|
# Convert the df to GeoJSON
|
|
189
411
|
convert_df_to_geojson(df, output_geojson_filepath, geo_column)
|
|
190
412
|
|
|
191
|
-
|
|
413
|
+
# Suppress verbose output
|
|
414
|
+
# print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
|
|
192
415
|
|
|
193
416
|
|
|
194
417
|
def whisp_formatted_stats_ee_to_geojson(
|
|
@@ -688,6 +911,13 @@ def whisp_stats_ee_to_df(
|
|
|
688
911
|
print(f"An error occurred during point geometry area adjustment: {e}")
|
|
689
912
|
# Continue without the adjustment rather than failing completely
|
|
690
913
|
|
|
914
|
+
# Reformat geometry types (MultiPolygon -> Polygon)
|
|
915
|
+
try:
|
|
916
|
+
df_stats = reformat_geometry_type(df_stats)
|
|
917
|
+
except Exception as e:
|
|
918
|
+
print(f"An error occurred during geometry type reformatting: {e}")
|
|
919
|
+
# Continue without the adjustment rather than failing completely
|
|
920
|
+
|
|
691
921
|
return df_stats
|
|
692
922
|
|
|
693
923
|
|
|
@@ -727,6 +957,43 @@ def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
727
957
|
return df_modified
|
|
728
958
|
|
|
729
959
|
|
|
960
|
+
def reformat_geometry_type(df: pd.DataFrame) -> pd.DataFrame:
|
|
961
|
+
"""
|
|
962
|
+
Reformat geometry type classification in the DataFrame output.
|
|
963
|
+
Standardizes MultiPolygon geometry type to Polygon for consistent output.
|
|
964
|
+
|
|
965
|
+
Parameters
|
|
966
|
+
----------
|
|
967
|
+
df : pd.DataFrame
|
|
968
|
+
DataFrame containing geometry type column
|
|
969
|
+
|
|
970
|
+
Returns
|
|
971
|
+
-------
|
|
972
|
+
pd.DataFrame
|
|
973
|
+
DataFrame with standardized geometry types
|
|
974
|
+
"""
|
|
975
|
+
# Check if required columns exist
|
|
976
|
+
if geometry_type_column not in df.columns:
|
|
977
|
+
print(
|
|
978
|
+
f"Warning: {geometry_type_column} column not found. Skipping geometry type reformatting."
|
|
979
|
+
)
|
|
980
|
+
return df
|
|
981
|
+
|
|
982
|
+
# Create a copy to avoid modifying the original
|
|
983
|
+
df_modified = df.copy()
|
|
984
|
+
|
|
985
|
+
# Reformat MultiPolygon to Polygon
|
|
986
|
+
multipolygon_mask = df_modified[geometry_type_column] == "MultiPolygon"
|
|
987
|
+
df_modified.loc[multipolygon_mask, geometry_type_column] = "Polygon"
|
|
988
|
+
|
|
989
|
+
# Log the changes
|
|
990
|
+
num_reformatted = multipolygon_mask.sum()
|
|
991
|
+
# if num_reformatted > 0:
|
|
992
|
+
# print(f"Reformatted {num_reformatted} MultiPolygon geometries to Polygon")
|
|
993
|
+
|
|
994
|
+
return df_modified
|
|
995
|
+
|
|
996
|
+
|
|
730
997
|
def whisp_stats_ee_to_drive(
|
|
731
998
|
feature_collection: ee.FeatureCollection,
|
|
732
999
|
external_id_column=None,
|
|
@@ -813,7 +1080,9 @@ def get_stats(
|
|
|
813
1080
|
print("Using provided whisp_image")
|
|
814
1081
|
else:
|
|
815
1082
|
img_combined = combine_datasets(
|
|
816
|
-
national_codes=national_codes,
|
|
1083
|
+
national_codes=national_codes,
|
|
1084
|
+
validate_bands=validate_bands,
|
|
1085
|
+
include_context_bands=False,
|
|
817
1086
|
)
|
|
818
1087
|
print(f"Combining datasets with national_codes: {national_codes}")
|
|
819
1088
|
|
|
@@ -822,7 +1091,7 @@ def get_stats(
|
|
|
822
1091
|
print("Processing single feature")
|
|
823
1092
|
# OPTIMIZATION: Create cached images for single feature processing
|
|
824
1093
|
water_all = get_water_flag_image()
|
|
825
|
-
|
|
1094
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
826
1095
|
output = ee.FeatureCollection(
|
|
827
1096
|
[
|
|
828
1097
|
get_stats_feature(
|
|
@@ -830,7 +1099,7 @@ def get_stats(
|
|
|
830
1099
|
img_combined,
|
|
831
1100
|
unit_type=unit_type,
|
|
832
1101
|
water_all=water_all,
|
|
833
|
-
|
|
1102
|
+
bounds_ADM1=bounds_ADM1,
|
|
834
1103
|
)
|
|
835
1104
|
]
|
|
836
1105
|
)
|
|
@@ -852,7 +1121,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
852
1121
|
"""
|
|
853
1122
|
Calculate statistics for a feature collection using Whisp datasets.
|
|
854
1123
|
|
|
855
|
-
OPTIMIZATION: Creates water flag and
|
|
1124
|
+
OPTIMIZATION: Creates water flag and admin_boundaries images once and reuses
|
|
856
1125
|
them for all features instead of recreating them for each feature.
|
|
857
1126
|
This saves 7-15 seconds per analysis.
|
|
858
1127
|
|
|
@@ -878,7 +1147,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
878
1147
|
# OPTIMIZATION: Create cached images once before processing features
|
|
879
1148
|
# These will be reused for all features instead of being recreated each time
|
|
880
1149
|
water_all = get_water_flag_image()
|
|
881
|
-
|
|
1150
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
882
1151
|
|
|
883
1152
|
out_feature_col = ee.FeatureCollection(
|
|
884
1153
|
feature_col.map(
|
|
@@ -887,7 +1156,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
887
1156
|
img_combined,
|
|
888
1157
|
unit_type=unit_type,
|
|
889
1158
|
water_all=water_all,
|
|
890
|
-
|
|
1159
|
+
bounds_ADM1=bounds_ADM1,
|
|
891
1160
|
)
|
|
892
1161
|
)
|
|
893
1162
|
)
|
|
@@ -901,12 +1170,12 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
901
1170
|
|
|
902
1171
|
|
|
903
1172
|
def get_stats_feature(
|
|
904
|
-
feature, img_combined, unit_type="ha", water_all=None,
|
|
1173
|
+
feature, img_combined, unit_type="ha", water_all=None, bounds_ADM1=None
|
|
905
1174
|
):
|
|
906
1175
|
"""
|
|
907
1176
|
Get statistics for a single feature using a pre-combined image.
|
|
908
1177
|
|
|
909
|
-
OPTIMIZATION: Accepts cached water/
|
|
1178
|
+
OPTIMIZATION: Accepts cached water/admin_boundaries images to avoid recreating
|
|
910
1179
|
them for every feature.
|
|
911
1180
|
|
|
912
1181
|
Parameters
|
|
@@ -919,8 +1188,8 @@ def get_stats_feature(
|
|
|
919
1188
|
Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
|
|
920
1189
|
water_all : ee.Image, optional
|
|
921
1190
|
Cached water flag image
|
|
922
|
-
|
|
923
|
-
Cached
|
|
1191
|
+
bounds_ADM1 : ee.FeatureCollection, optional
|
|
1192
|
+
Cached admin_boundaries feature collection
|
|
924
1193
|
|
|
925
1194
|
Returns
|
|
926
1195
|
-------
|
|
@@ -936,7 +1205,7 @@ def get_stats_feature(
|
|
|
936
1205
|
)
|
|
937
1206
|
|
|
938
1207
|
# Get basic feature information with cached images
|
|
939
|
-
feature_info = get_type_and_location(feature, water_all,
|
|
1208
|
+
feature_info = get_type_and_location(feature, water_all, bounds_ADM1)
|
|
940
1209
|
|
|
941
1210
|
# add statistics unit type (e.g., percentage or hectares) to dictionary
|
|
942
1211
|
stats_unit_type = ee.Dictionary({stats_unit_type_column: unit_type})
|
|
@@ -985,11 +1254,11 @@ def get_stats_feature(
|
|
|
985
1254
|
|
|
986
1255
|
|
|
987
1256
|
# Get basic feature information - uses admin and water datasets in gee.
|
|
988
|
-
def get_type_and_location(feature, water_all=None,
|
|
1257
|
+
def get_type_and_location(feature, water_all=None, bounds_ADM1=None):
|
|
989
1258
|
"""
|
|
990
1259
|
Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags.
|
|
991
1260
|
|
|
992
|
-
OPTIMIZATION: Accepts cached water flag image and
|
|
1261
|
+
OPTIMIZATION: Accepts cached water flag image and admin_boundaries collection
|
|
993
1262
|
to avoid recreating them for every feature (saves 7-15 seconds per analysis).
|
|
994
1263
|
|
|
995
1264
|
Parameters
|
|
@@ -998,8 +1267,8 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
|
|
|
998
1267
|
The feature to extract information from
|
|
999
1268
|
water_all : ee.Image, optional
|
|
1000
1269
|
Cached water flag image. If None, creates it.
|
|
1001
|
-
|
|
1002
|
-
Cached
|
|
1270
|
+
bounds_ADM1 : ee.FeatureCollection, optional
|
|
1271
|
+
Cached admin_boundaries feature collection. If None, loads it.
|
|
1003
1272
|
|
|
1004
1273
|
Returns
|
|
1005
1274
|
-------
|
|
@@ -1007,19 +1276,23 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
|
|
|
1007
1276
|
Dictionary with feature information
|
|
1008
1277
|
"""
|
|
1009
1278
|
# Get centroid of the feature's geometry
|
|
1010
|
-
centroid = feature.geometry().centroid(1)
|
|
1279
|
+
centroid = feature.geometry().centroid(0.1)
|
|
1011
1280
|
|
|
1012
|
-
# OPTIMIZATION: Use cached
|
|
1013
|
-
if
|
|
1014
|
-
|
|
1281
|
+
# OPTIMIZATION: Use cached admin_boundaries
|
|
1282
|
+
if bounds_ADM1 is None:
|
|
1283
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
1015
1284
|
|
|
1016
|
-
# Fetch location info from
|
|
1017
|
-
location = ee.Dictionary(
|
|
1018
|
-
country = ee.Dictionary({iso3_country_column: location.get("
|
|
1285
|
+
# Fetch location info from GAUL 2024 L1 (country, admin)
|
|
1286
|
+
location = ee.Dictionary(get_admin_boundaries_info(centroid, bounds_ADM1))
|
|
1287
|
+
country = ee.Dictionary({iso3_country_column: location.get("iso3_code")})
|
|
1019
1288
|
|
|
1020
1289
|
admin_1 = ee.Dictionary(
|
|
1021
|
-
{admin_1_column: location.get("
|
|
1022
|
-
) # Administrative level 1 (
|
|
1290
|
+
{admin_1_column: location.get("gaul1_name")}
|
|
1291
|
+
) # Administrative level 1 (from GAUL 2024 L1)
|
|
1292
|
+
|
|
1293
|
+
# OPTIMIZATION: Use cached water flag image
|
|
1294
|
+
if water_all is None:
|
|
1295
|
+
water_all = get_water_flag_image()
|
|
1023
1296
|
|
|
1024
1297
|
# OPTIMIZATION: Use cached water flag image
|
|
1025
1298
|
if water_all is None:
|
|
@@ -1037,8 +1310,12 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
|
|
|
1037
1310
|
coords_list = centroid.coordinates()
|
|
1038
1311
|
coords_dict = ee.Dictionary(
|
|
1039
1312
|
{
|
|
1040
|
-
centroid_x_coord_column: coords_list.get(0)
|
|
1041
|
-
|
|
1313
|
+
centroid_x_coord_column: ee.Number(coords_list.get(0)).format(
|
|
1314
|
+
"%.6f"
|
|
1315
|
+
), # Longitude (6 dp)
|
|
1316
|
+
centroid_y_coord_column: ee.Number(coords_list.get(1)).format(
|
|
1317
|
+
"%.6f"
|
|
1318
|
+
), # Latitude (6 dp)
|
|
1042
1319
|
}
|
|
1043
1320
|
)
|
|
1044
1321
|
|
|
@@ -1076,36 +1353,36 @@ def percent_and_format(val, area_ha):
|
|
|
1076
1353
|
return ee.Number(formatted_value)
|
|
1077
1354
|
|
|
1078
1355
|
|
|
1079
|
-
#
|
|
1080
|
-
def
|
|
1356
|
+
# GAUL 2024 L1 - admin units from FAO, allows commercial use
|
|
1357
|
+
def get_admin_boundaries_info(geometry, bounds_ADM1=None):
|
|
1081
1358
|
"""
|
|
1082
|
-
Get
|
|
1359
|
+
Get GAUL 2024 L1 info for a geometry (country ISO3 code and admin boundary name).
|
|
1083
1360
|
|
|
1084
|
-
OPTIMIZATION: Accepts cached
|
|
1361
|
+
OPTIMIZATION: Accepts cached GAUL 2024 L1 FeatureCollection to avoid
|
|
1085
1362
|
reloading it for every feature (saves 2-5 seconds per analysis).
|
|
1086
1363
|
|
|
1087
1364
|
Parameters
|
|
1088
1365
|
----------
|
|
1089
1366
|
geometry : ee.Geometry
|
|
1090
1367
|
The geometry to query
|
|
1091
|
-
|
|
1092
|
-
Cached
|
|
1368
|
+
bounds_ADM1 : ee.FeatureCollection, optional
|
|
1369
|
+
Cached GAUL 2024 L1 feature collection. If None, loads it.
|
|
1093
1370
|
|
|
1094
1371
|
Returns
|
|
1095
1372
|
-------
|
|
1096
1373
|
ee.Dictionary
|
|
1097
|
-
Dictionary with
|
|
1374
|
+
Dictionary with iso3_code (country) and gaul1_name (admin boundary name)
|
|
1098
1375
|
"""
|
|
1099
|
-
if
|
|
1100
|
-
|
|
1376
|
+
if bounds_ADM1 is None:
|
|
1377
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
1101
1378
|
|
|
1102
|
-
polygonsIntersectPoint =
|
|
1103
|
-
backup_dict = ee.Dictionary({"
|
|
1379
|
+
polygonsIntersectPoint = bounds_ADM1.filterBounds(geometry)
|
|
1380
|
+
backup_dict = ee.Dictionary({"iso3_code": "Unknown", "gaul1_name": "Unknown"})
|
|
1104
1381
|
return ee.Algorithms.If(
|
|
1105
1382
|
polygonsIntersectPoint.size().gt(0),
|
|
1106
1383
|
polygonsIntersectPoint.first()
|
|
1107
1384
|
.toDictionary()
|
|
1108
|
-
.select(["
|
|
1385
|
+
.select(["iso3_code", "gaul1_name"]),
|
|
1109
1386
|
backup_dict,
|
|
1110
1387
|
)
|
|
1111
1388
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: openforis-whisp
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0a2
|
|
4
4
|
Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: whisp,geospatial,data-processing
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
openforis_whisp/__init__.py,sha256=-r_9LFxbV6d-o4s0_huhaXxve6GIzCwl3pXKuJo6ixE,3663
|
|
2
|
+
openforis_whisp/advanced_stats.py,sha256=xrwKHG-c44_UkFha7TFgf71mo9UMw5ZZL3XQTPF5luM,92681
|
|
3
|
+
openforis_whisp/data_checks.py,sha256=KwgD72FA_n7joiJadGRpzntd2sLo0aqGNbOjRkB8iQI,32293
|
|
4
|
+
openforis_whisp/data_conversion.py,sha256=L2IsiUyQUt3aHgSYGbIhgPGwM7eyS3nLVEoNO9YqQeM,21888
|
|
5
|
+
openforis_whisp/datasets.py,sha256=aGJy0OYN4d0nsH3_IOYlHl-WCB7KFwZwMJ-dBi5Hc5Y,53470
|
|
6
|
+
openforis_whisp/logger.py,sha256=9M6_3mdpoiWfC-pDwM9vKmB2l5Gul6Rb5rNTNh-_nzs,3054
|
|
7
|
+
openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
|
|
9
|
+
openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
|
|
10
|
+
openforis_whisp/parameters/lookup_gaul1_admin.py,sha256=cQr5liRdXi85QieTxrz4VAkn0COvRCp82ZV0dYFWOio,474980
|
|
11
|
+
openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
|
|
12
|
+
openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
|
|
13
|
+
openforis_whisp/reformat.py,sha256=mIooJ3zfSTDY3_Mx3OAW4jpfQ72q3zasG9tl58PdfN4,33729
|
|
14
|
+
openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
|
|
15
|
+
openforis_whisp/stats.py,sha256=dCQXx6KKEV99owqyPURk6CL97kQQARjetFrIz1ZbIvs,65725
|
|
16
|
+
openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
|
|
17
|
+
openforis_whisp-3.0.0a2.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
|
|
18
|
+
openforis_whisp-3.0.0a2.dist-info/METADATA,sha256=wG4vc7B-f0JXmNkTUh4wJ-H0KPpbgyU9OfMwGewZq_A,16684
|
|
19
|
+
openforis_whisp-3.0.0a2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
20
|
+
openforis_whisp-3.0.0a2.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
openforis_whisp/__init__.py,sha256=bnEZ4_X-mJInltSKVI0STfvrb09Df-z21buIVFDif5w,2524
|
|
2
|
-
openforis_whisp/data_conversion.py,sha256=Mc6dXbvoHBeRzl3o83pyKeI5_sPC8Yc90Tj4bN6_Bv8,17519
|
|
3
|
-
openforis_whisp/datasets.py,sha256=hb8Y35vTcQQNUH_z2_l8Pu6Sjn_E8BzSow1-qAfs9bQ,50194
|
|
4
|
-
openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
|
|
5
|
-
openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
|
|
6
|
-
openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
|
|
7
|
-
openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
|
|
8
|
-
openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
|
|
9
|
-
openforis_whisp/reformat.py,sha256=rtkKs8z1mJd5JD9rXuMk1tbbbTvQxCCh68tA4hIQAv8,25445
|
|
10
|
-
openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
|
|
11
|
-
openforis_whisp/stats.py,sha256=1ikeV8UYpL8O5HZJY8lPXrhQwZ9D1IglbOsagZHCYdA,54000
|
|
12
|
-
openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
|
|
13
|
-
openforis_whisp-2.0.0b3.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
|
|
14
|
-
openforis_whisp-2.0.0b3.dist-info/METADATA,sha256=Opn73PWlsOQWTiwZ-HYvLkrPh4jYQELtSIIqDf4MsoQ,16684
|
|
15
|
-
openforis_whisp-2.0.0b3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
16
|
-
openforis_whisp-2.0.0b3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|