openforis-whisp 2.0.0b3__py3-none-any.whl → 3.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +35 -4
- openforis_whisp/advanced_stats.py +2070 -0
- openforis_whisp/data_checks.py +642 -0
- openforis_whisp/data_conversion.py +86 -44
- openforis_whisp/datasets.py +124 -36
- openforis_whisp/logger.py +26 -0
- openforis_whisp/parameters/__init__.py +0 -0
- openforis_whisp/parameters/lookup_gaul1_admin.py +18663 -0
- openforis_whisp/reformat.py +198 -2
- openforis_whisp/stats.py +314 -52
- {openforis_whisp-2.0.0b3.dist-info → openforis_whisp-3.0.0a1.dist-info}/METADATA +1 -1
- openforis_whisp-3.0.0a1.dist-info/RECORD +20 -0
- openforis_whisp-2.0.0b3.dist-info/RECORD +0 -16
- {openforis_whisp-2.0.0b3.dist-info → openforis_whisp-3.0.0a1.dist-info}/LICENSE +0 -0
- {openforis_whisp-2.0.0b3.dist-info → openforis_whisp-3.0.0a1.dist-info}/WHEEL +0 -0
openforis_whisp/stats.py
CHANGED
|
@@ -3,6 +3,7 @@ import pandas as pd
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from .datasets import combine_datasets
|
|
5
5
|
import json
|
|
6
|
+
import logging
|
|
6
7
|
import country_converter as coco
|
|
7
8
|
from openforis_whisp.parameters.config_runtime import (
|
|
8
9
|
plot_id_column,
|
|
@@ -41,7 +42,7 @@ from .reformat import (
|
|
|
41
42
|
# to avoid repeated expensive operations. This saves 7-15 seconds per analysis.
|
|
42
43
|
|
|
43
44
|
_WATER_FLAG_IMAGE = None
|
|
44
|
-
|
|
45
|
+
_admin_boundaries_FC = None
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
def get_water_flag_image():
|
|
@@ -63,26 +64,28 @@ def get_water_flag_image():
|
|
|
63
64
|
return _WATER_FLAG_IMAGE
|
|
64
65
|
|
|
65
66
|
|
|
66
|
-
def
|
|
67
|
+
def get_admin_boundaries_fc():
|
|
67
68
|
"""
|
|
68
|
-
Get cached
|
|
69
|
+
Get cached GAUL 2024 L1 administrative boundary feature collection.
|
|
69
70
|
|
|
70
|
-
OPTIMIZATION:
|
|
71
|
+
OPTIMIZATION: GAUL 2024 L1 collection is loaded once and reused for all features.
|
|
71
72
|
This avoids loading the large FeatureCollection for every feature (previously
|
|
72
|
-
called in
|
|
73
|
+
called in get_admin_boundaries_info for each feature).
|
|
73
74
|
|
|
74
75
|
Returns
|
|
75
76
|
-------
|
|
76
77
|
ee.FeatureCollection
|
|
77
|
-
Cached
|
|
78
|
+
Cached GAUL 2024 L1 administrative boundary feature collection
|
|
78
79
|
"""
|
|
79
|
-
global
|
|
80
|
-
if
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
global _admin_boundaries_FC
|
|
81
|
+
if _admin_boundaries_FC is None:
|
|
82
|
+
_admin_boundaries_FC = ee.FeatureCollection(
|
|
83
|
+
"projects/sat-io/open-datasets/FAO/GAUL/GAUL_2024_L1"
|
|
84
|
+
)
|
|
85
|
+
return _admin_boundaries_FC
|
|
83
86
|
|
|
84
87
|
|
|
85
|
-
def
|
|
88
|
+
def whisp_formatted_stats_geojson_to_df_legacy(
|
|
86
89
|
input_geojson_filepath: Path | str,
|
|
87
90
|
external_id_column=None,
|
|
88
91
|
remove_geom=False,
|
|
@@ -90,9 +93,15 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
90
93
|
unit_type="ha",
|
|
91
94
|
whisp_image=None,
|
|
92
95
|
custom_bands=None, # New parameter
|
|
96
|
+
validate_geometries: bool = False,
|
|
93
97
|
) -> pd.DataFrame:
|
|
94
98
|
"""
|
|
95
|
-
|
|
99
|
+
Legacy function for basic Whisp stats extraction.
|
|
100
|
+
|
|
101
|
+
DEPRECATED: This is the original implementation maintained for backward compatibility.
|
|
102
|
+
Use whisp_formatted_stats_geojson_to_df() for new code, which provides automatic
|
|
103
|
+
optimization, formatting, and schema validation.
|
|
104
|
+
|
|
96
105
|
Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
|
|
97
106
|
Output df is validated against a panderas schema (created on the fly from the two lookup CSVs).
|
|
98
107
|
|
|
@@ -126,13 +135,48 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
126
135
|
- List of band names: ['Aa_test', 'elevation']
|
|
127
136
|
- Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
|
|
128
137
|
- None: preserves all extra columns automatically
|
|
138
|
+
validate_geometries : bool, optional
|
|
139
|
+
Whether to validate and fix invalid geometries, by default False.
|
|
140
|
+
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
129
141
|
|
|
130
142
|
Returns
|
|
131
143
|
-------
|
|
132
144
|
df_stats : pd.DataFrame
|
|
133
145
|
The DataFrame containing the Whisp stats for the input ROI.
|
|
134
146
|
"""
|
|
135
|
-
|
|
147
|
+
# Load GeoJSON and validate geometries if requested
|
|
148
|
+
if validate_geometries:
|
|
149
|
+
import json
|
|
150
|
+
import geopandas as gpd
|
|
151
|
+
from shapely.validation import make_valid
|
|
152
|
+
import logging as py_logging
|
|
153
|
+
|
|
154
|
+
logger = py_logging.getLogger("whisp-legacy")
|
|
155
|
+
|
|
156
|
+
# Load GeoJSON file
|
|
157
|
+
with open(input_geojson_filepath, "r") as f:
|
|
158
|
+
geojson_data = json.load(f)
|
|
159
|
+
|
|
160
|
+
# Convert to GeoDataFrame
|
|
161
|
+
gdf = gpd.GeoDataFrame.from_features(geojson_data["features"])
|
|
162
|
+
|
|
163
|
+
# Validate and fix invalid geometries
|
|
164
|
+
valid_count = gdf.geometry.is_valid.sum()
|
|
165
|
+
invalid_count = len(gdf) - valid_count
|
|
166
|
+
if invalid_count > 0:
|
|
167
|
+
logger.warning(f"Fixing {invalid_count} invalid geometries")
|
|
168
|
+
gdf["geometry"] = gdf["geometry"].apply(
|
|
169
|
+
lambda g: make_valid(g) if g and not g.is_valid else g
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Convert back to GeoJSON dict (stays in memory - no temp files!)
|
|
173
|
+
geojson_cleaned = json.loads(gdf.to_json())
|
|
174
|
+
|
|
175
|
+
# OPTIMIZATION: Pass GeoJSON dict directly - eliminates file I/O overhead
|
|
176
|
+
feature_collection = convert_geojson_to_ee(geojson_cleaned)
|
|
177
|
+
else:
|
|
178
|
+
# Original path - no validation
|
|
179
|
+
feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
|
|
136
180
|
|
|
137
181
|
return whisp_formatted_stats_ee_to_df(
|
|
138
182
|
feature_collection,
|
|
@@ -145,6 +189,169 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
145
189
|
)
|
|
146
190
|
|
|
147
191
|
|
|
192
|
+
def whisp_formatted_stats_geojson_to_df(
|
|
193
|
+
input_geojson_filepath: Path | str,
|
|
194
|
+
external_id_column=None,
|
|
195
|
+
remove_geom=False,
|
|
196
|
+
national_codes=None,
|
|
197
|
+
unit_type="ha",
|
|
198
|
+
whisp_image=None,
|
|
199
|
+
custom_bands=None,
|
|
200
|
+
mode: str = "sequential",
|
|
201
|
+
batch_size: int = 10,
|
|
202
|
+
max_concurrent: int = 20,
|
|
203
|
+
validate_geometries: bool = False,
|
|
204
|
+
) -> pd.DataFrame:
|
|
205
|
+
"""
|
|
206
|
+
Main entry point for converting GeoJSON to Whisp statistics.
|
|
207
|
+
|
|
208
|
+
Routes to the appropriate processing mode with automatic formatting and validation.
|
|
209
|
+
|
|
210
|
+
Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
|
|
211
|
+
Output DataFrame is validated against a Panderas schema (created from lookup CSVs).
|
|
212
|
+
Results are automatically formatted and unit-converted (ha or percent).
|
|
213
|
+
|
|
214
|
+
If `external_id_column` is provided, it will be used to link external identifiers
|
|
215
|
+
from the input GeoJSON to the output DataFrame.
|
|
216
|
+
|
|
217
|
+
Parameters
|
|
218
|
+
----------
|
|
219
|
+
input_geojson_filepath : Path | str
|
|
220
|
+
The filepath to the GeoJSON of the ROI to analyze.
|
|
221
|
+
external_id_column : str, optional
|
|
222
|
+
The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
|
|
223
|
+
This column must exist as a property in ALL features of the GeoJSON file.
|
|
224
|
+
Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
|
|
225
|
+
remove_geom : bool, default=False
|
|
226
|
+
If True, the geometry of the GeoJSON is removed from the output DataFrame.
|
|
227
|
+
national_codes : list, optional
|
|
228
|
+
List of ISO2 country codes to include national datasets.
|
|
229
|
+
unit_type: str, optional
|
|
230
|
+
Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
|
|
231
|
+
whisp_image : ee.Image, optional
|
|
232
|
+
Pre-combined multiband Earth Engine Image containing all Whisp datasets.
|
|
233
|
+
If provided, this image will be used instead of combining datasets based on national_codes.
|
|
234
|
+
If None, datasets will be combined automatically using national_codes parameter.
|
|
235
|
+
custom_bands : list or dict, optional
|
|
236
|
+
Custom band information for extra columns. Can be:
|
|
237
|
+
- List of band names: ['Aa_test', 'elevation']
|
|
238
|
+
- Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
|
|
239
|
+
- None: preserves all extra columns automatically
|
|
240
|
+
mode : str, optional
|
|
241
|
+
Processing mode, by default "concurrent":
|
|
242
|
+
- "concurrent": Uses high-volume endpoint with concurrent batching (recommended for large files)
|
|
243
|
+
- "sequential": Uses standard endpoint for sequential processing (more stable)
|
|
244
|
+
- "legacy": Uses original implementation (basic stats extraction only, no formatting)
|
|
245
|
+
batch_size : int, optional
|
|
246
|
+
Features per batch for concurrent/sequential modes, by default 10.
|
|
247
|
+
Only applicable for "concurrent" and "sequential" modes.
|
|
248
|
+
max_concurrent : int, optional
|
|
249
|
+
Maximum concurrent EE calls for concurrent mode, by default 20.
|
|
250
|
+
Only applicable for "concurrent" mode.
|
|
251
|
+
validate_geometries : bool, optional
|
|
252
|
+
Whether to validate and fix invalid geometries, by default False.
|
|
253
|
+
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
254
|
+
For production workflows, it's recommended to use geometry validation and
|
|
255
|
+
cleaning tools BEFORE processing with this function.
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
df_stats : pd.DataFrame
|
|
260
|
+
The DataFrame containing the Whisp stats for the input ROI,
|
|
261
|
+
automatically formatted and validated.
|
|
262
|
+
|
|
263
|
+
Examples
|
|
264
|
+
--------
|
|
265
|
+
>>> # Use concurrent processing (default, recommended for large datasets)
|
|
266
|
+
>>> df = whisp_formatted_stats_geojson_to_df("data.geojson")
|
|
267
|
+
|
|
268
|
+
>>> # Use sequential processing for more stable/predictable results
|
|
269
|
+
>>> df = whisp_formatted_stats_geojson_to_df(
|
|
270
|
+
... "data.geojson",
|
|
271
|
+
... mode="sequential"
|
|
272
|
+
... )
|
|
273
|
+
|
|
274
|
+
>>> # Adjust concurrency parameters
|
|
275
|
+
>>> df = whisp_formatted_stats_geojson_to_df(
|
|
276
|
+
... "large_data.geojson",
|
|
277
|
+
... mode="concurrent",
|
|
278
|
+
... max_concurrent=30,
|
|
279
|
+
... batch_size=15
|
|
280
|
+
... )
|
|
281
|
+
|
|
282
|
+
>>> # Use legacy mode for backward compatibility (basic extraction only)
|
|
283
|
+
>>> df = whisp_formatted_stats_geojson_to_df(
|
|
284
|
+
... "data.geojson",
|
|
285
|
+
... mode="legacy"
|
|
286
|
+
... )
|
|
287
|
+
"""
|
|
288
|
+
# Import here to avoid circular imports
|
|
289
|
+
try:
|
|
290
|
+
from openforis_whisp.advanced_stats import (
|
|
291
|
+
whisp_formatted_stats_geojson_to_df_fast,
|
|
292
|
+
)
|
|
293
|
+
except ImportError:
|
|
294
|
+
# Fallback to legacy if advanced_stats not available
|
|
295
|
+
mode = "legacy"
|
|
296
|
+
|
|
297
|
+
logger = logging.getLogger("whisp")
|
|
298
|
+
|
|
299
|
+
if mode == "legacy":
|
|
300
|
+
# Log info if batch_size or max_concurrent were passed but won't be used
|
|
301
|
+
if batch_size != 10 or max_concurrent != 20:
|
|
302
|
+
unused = []
|
|
303
|
+
if batch_size != 10:
|
|
304
|
+
unused.append(f"batch_size={batch_size}")
|
|
305
|
+
if max_concurrent != 20:
|
|
306
|
+
unused.append(f"max_concurrent={max_concurrent}")
|
|
307
|
+
logger.info(
|
|
308
|
+
f"Mode is 'legacy': {', '.join(unused)}\n"
|
|
309
|
+
"parameter(s) are not used in legacy mode."
|
|
310
|
+
)
|
|
311
|
+
# Use original implementation (basic stats extraction only)
|
|
312
|
+
return whisp_formatted_stats_geojson_to_df_legacy(
|
|
313
|
+
input_geojson_filepath=input_geojson_filepath,
|
|
314
|
+
external_id_column=external_id_column,
|
|
315
|
+
remove_geom=remove_geom,
|
|
316
|
+
national_codes=national_codes,
|
|
317
|
+
unit_type=unit_type,
|
|
318
|
+
whisp_image=whisp_image,
|
|
319
|
+
custom_bands=custom_bands,
|
|
320
|
+
validate_geometries=validate_geometries,
|
|
321
|
+
)
|
|
322
|
+
elif mode in ("concurrent", "sequential"):
|
|
323
|
+
# Log info if batch_size or max_concurrent are not used in sequential mode
|
|
324
|
+
if mode == "sequential":
|
|
325
|
+
unused = []
|
|
326
|
+
if batch_size != 10:
|
|
327
|
+
unused.append(f"batch_size={batch_size}")
|
|
328
|
+
if max_concurrent != 20:
|
|
329
|
+
unused.append(f"max_concurrent={max_concurrent}")
|
|
330
|
+
if unused:
|
|
331
|
+
logger.info(
|
|
332
|
+
f"Mode is 'sequential': {', '.join(unused)}\n"
|
|
333
|
+
"parameter(s) are not used in sequential (single-threaded) mode."
|
|
334
|
+
)
|
|
335
|
+
# Route to fast function with explicit mode (skip auto-detection)
|
|
336
|
+
return whisp_formatted_stats_geojson_to_df_fast(
|
|
337
|
+
input_geojson_filepath=input_geojson_filepath,
|
|
338
|
+
external_id_column=external_id_column,
|
|
339
|
+
remove_geom=remove_geom,
|
|
340
|
+
national_codes=national_codes,
|
|
341
|
+
unit_type=unit_type,
|
|
342
|
+
whisp_image=whisp_image,
|
|
343
|
+
custom_bands=custom_bands,
|
|
344
|
+
mode=mode, # Pass mode directly (concurrent or sequential)
|
|
345
|
+
batch_size=batch_size,
|
|
346
|
+
max_concurrent=max_concurrent,
|
|
347
|
+
validate_geometries=validate_geometries,
|
|
348
|
+
)
|
|
349
|
+
else:
|
|
350
|
+
raise ValueError(
|
|
351
|
+
f"Invalid mode '{mode}'. Must be 'concurrent', 'sequential', or 'legacy'."
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
|
|
148
355
|
def whisp_formatted_stats_geojson_to_geojson(
|
|
149
356
|
input_geojson_filepath,
|
|
150
357
|
output_geojson_filepath,
|
|
@@ -188,7 +395,8 @@ def whisp_formatted_stats_geojson_to_geojson(
|
|
|
188
395
|
# Convert the df to GeoJSON
|
|
189
396
|
convert_df_to_geojson(df, output_geojson_filepath, geo_column)
|
|
190
397
|
|
|
191
|
-
|
|
398
|
+
# Suppress verbose output
|
|
399
|
+
# print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
|
|
192
400
|
|
|
193
401
|
|
|
194
402
|
def whisp_formatted_stats_ee_to_geojson(
|
|
@@ -688,6 +896,13 @@ def whisp_stats_ee_to_df(
|
|
|
688
896
|
print(f"An error occurred during point geometry area adjustment: {e}")
|
|
689
897
|
# Continue without the adjustment rather than failing completely
|
|
690
898
|
|
|
899
|
+
# Reformat geometry types (MultiPolygon -> Polygon)
|
|
900
|
+
try:
|
|
901
|
+
df_stats = reformat_geometry_type(df_stats)
|
|
902
|
+
except Exception as e:
|
|
903
|
+
print(f"An error occurred during geometry type reformatting: {e}")
|
|
904
|
+
# Continue without the adjustment rather than failing completely
|
|
905
|
+
|
|
691
906
|
return df_stats
|
|
692
907
|
|
|
693
908
|
|
|
@@ -727,6 +942,43 @@ def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
727
942
|
return df_modified
|
|
728
943
|
|
|
729
944
|
|
|
945
|
+
def reformat_geometry_type(df: pd.DataFrame) -> pd.DataFrame:
|
|
946
|
+
"""
|
|
947
|
+
Reformat geometry type classification in the DataFrame output.
|
|
948
|
+
Standardizes MultiPolygon geometry type to Polygon for consistent output.
|
|
949
|
+
|
|
950
|
+
Parameters
|
|
951
|
+
----------
|
|
952
|
+
df : pd.DataFrame
|
|
953
|
+
DataFrame containing geometry type column
|
|
954
|
+
|
|
955
|
+
Returns
|
|
956
|
+
-------
|
|
957
|
+
pd.DataFrame
|
|
958
|
+
DataFrame with standardized geometry types
|
|
959
|
+
"""
|
|
960
|
+
# Check if required columns exist
|
|
961
|
+
if geometry_type_column not in df.columns:
|
|
962
|
+
print(
|
|
963
|
+
f"Warning: {geometry_type_column} column not found. Skipping geometry type reformatting."
|
|
964
|
+
)
|
|
965
|
+
return df
|
|
966
|
+
|
|
967
|
+
# Create a copy to avoid modifying the original
|
|
968
|
+
df_modified = df.copy()
|
|
969
|
+
|
|
970
|
+
# Reformat MultiPolygon to Polygon
|
|
971
|
+
multipolygon_mask = df_modified[geometry_type_column] == "MultiPolygon"
|
|
972
|
+
df_modified.loc[multipolygon_mask, geometry_type_column] = "Polygon"
|
|
973
|
+
|
|
974
|
+
# Log the changes
|
|
975
|
+
num_reformatted = multipolygon_mask.sum()
|
|
976
|
+
# if num_reformatted > 0:
|
|
977
|
+
# print(f"Reformatted {num_reformatted} MultiPolygon geometries to Polygon")
|
|
978
|
+
|
|
979
|
+
return df_modified
|
|
980
|
+
|
|
981
|
+
|
|
730
982
|
def whisp_stats_ee_to_drive(
|
|
731
983
|
feature_collection: ee.FeatureCollection,
|
|
732
984
|
external_id_column=None,
|
|
@@ -813,7 +1065,9 @@ def get_stats(
|
|
|
813
1065
|
print("Using provided whisp_image")
|
|
814
1066
|
else:
|
|
815
1067
|
img_combined = combine_datasets(
|
|
816
|
-
national_codes=national_codes,
|
|
1068
|
+
national_codes=national_codes,
|
|
1069
|
+
validate_bands=validate_bands,
|
|
1070
|
+
include_context_bands=False,
|
|
817
1071
|
)
|
|
818
1072
|
print(f"Combining datasets with national_codes: {national_codes}")
|
|
819
1073
|
|
|
@@ -822,7 +1076,7 @@ def get_stats(
|
|
|
822
1076
|
print("Processing single feature")
|
|
823
1077
|
# OPTIMIZATION: Create cached images for single feature processing
|
|
824
1078
|
water_all = get_water_flag_image()
|
|
825
|
-
|
|
1079
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
826
1080
|
output = ee.FeatureCollection(
|
|
827
1081
|
[
|
|
828
1082
|
get_stats_feature(
|
|
@@ -830,7 +1084,7 @@ def get_stats(
|
|
|
830
1084
|
img_combined,
|
|
831
1085
|
unit_type=unit_type,
|
|
832
1086
|
water_all=water_all,
|
|
833
|
-
|
|
1087
|
+
bounds_ADM1=bounds_ADM1,
|
|
834
1088
|
)
|
|
835
1089
|
]
|
|
836
1090
|
)
|
|
@@ -852,7 +1106,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
852
1106
|
"""
|
|
853
1107
|
Calculate statistics for a feature collection using Whisp datasets.
|
|
854
1108
|
|
|
855
|
-
OPTIMIZATION: Creates water flag and
|
|
1109
|
+
OPTIMIZATION: Creates water flag and admin_boundaries images once and reuses
|
|
856
1110
|
them for all features instead of recreating them for each feature.
|
|
857
1111
|
This saves 7-15 seconds per analysis.
|
|
858
1112
|
|
|
@@ -878,7 +1132,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
878
1132
|
# OPTIMIZATION: Create cached images once before processing features
|
|
879
1133
|
# These will be reused for all features instead of being recreated each time
|
|
880
1134
|
water_all = get_water_flag_image()
|
|
881
|
-
|
|
1135
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
882
1136
|
|
|
883
1137
|
out_feature_col = ee.FeatureCollection(
|
|
884
1138
|
feature_col.map(
|
|
@@ -887,7 +1141,7 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
887
1141
|
img_combined,
|
|
888
1142
|
unit_type=unit_type,
|
|
889
1143
|
water_all=water_all,
|
|
890
|
-
|
|
1144
|
+
bounds_ADM1=bounds_ADM1,
|
|
891
1145
|
)
|
|
892
1146
|
)
|
|
893
1147
|
)
|
|
@@ -901,12 +1155,12 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
901
1155
|
|
|
902
1156
|
|
|
903
1157
|
def get_stats_feature(
|
|
904
|
-
feature, img_combined, unit_type="ha", water_all=None,
|
|
1158
|
+
feature, img_combined, unit_type="ha", water_all=None, bounds_ADM1=None
|
|
905
1159
|
):
|
|
906
1160
|
"""
|
|
907
1161
|
Get statistics for a single feature using a pre-combined image.
|
|
908
1162
|
|
|
909
|
-
OPTIMIZATION: Accepts cached water/
|
|
1163
|
+
OPTIMIZATION: Accepts cached water/admin_boundaries images to avoid recreating
|
|
910
1164
|
them for every feature.
|
|
911
1165
|
|
|
912
1166
|
Parameters
|
|
@@ -919,8 +1173,8 @@ def get_stats_feature(
|
|
|
919
1173
|
Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
|
|
920
1174
|
water_all : ee.Image, optional
|
|
921
1175
|
Cached water flag image
|
|
922
|
-
|
|
923
|
-
Cached
|
|
1176
|
+
bounds_ADM1 : ee.FeatureCollection, optional
|
|
1177
|
+
Cached admin_boundaries feature collection
|
|
924
1178
|
|
|
925
1179
|
Returns
|
|
926
1180
|
-------
|
|
@@ -936,7 +1190,7 @@ def get_stats_feature(
|
|
|
936
1190
|
)
|
|
937
1191
|
|
|
938
1192
|
# Get basic feature information with cached images
|
|
939
|
-
feature_info = get_type_and_location(feature, water_all,
|
|
1193
|
+
feature_info = get_type_and_location(feature, water_all, bounds_ADM1)
|
|
940
1194
|
|
|
941
1195
|
# add statistics unit type (e.g., percentage or hectares) to dictionary
|
|
942
1196
|
stats_unit_type = ee.Dictionary({stats_unit_type_column: unit_type})
|
|
@@ -985,11 +1239,11 @@ def get_stats_feature(
|
|
|
985
1239
|
|
|
986
1240
|
|
|
987
1241
|
# Get basic feature information - uses admin and water datasets in gee.
|
|
988
|
-
def get_type_and_location(feature, water_all=None,
|
|
1242
|
+
def get_type_and_location(feature, water_all=None, bounds_ADM1=None):
|
|
989
1243
|
"""
|
|
990
1244
|
Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags.
|
|
991
1245
|
|
|
992
|
-
OPTIMIZATION: Accepts cached water flag image and
|
|
1246
|
+
OPTIMIZATION: Accepts cached water flag image and admin_boundaries collection
|
|
993
1247
|
to avoid recreating them for every feature (saves 7-15 seconds per analysis).
|
|
994
1248
|
|
|
995
1249
|
Parameters
|
|
@@ -998,8 +1252,8 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
|
|
|
998
1252
|
The feature to extract information from
|
|
999
1253
|
water_all : ee.Image, optional
|
|
1000
1254
|
Cached water flag image. If None, creates it.
|
|
1001
|
-
|
|
1002
|
-
Cached
|
|
1255
|
+
bounds_ADM1 : ee.FeatureCollection, optional
|
|
1256
|
+
Cached admin_boundaries feature collection. If None, loads it.
|
|
1003
1257
|
|
|
1004
1258
|
Returns
|
|
1005
1259
|
-------
|
|
@@ -1007,19 +1261,23 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
|
|
|
1007
1261
|
Dictionary with feature information
|
|
1008
1262
|
"""
|
|
1009
1263
|
# Get centroid of the feature's geometry
|
|
1010
|
-
centroid = feature.geometry().centroid(1)
|
|
1264
|
+
centroid = feature.geometry().centroid(0.1)
|
|
1011
1265
|
|
|
1012
|
-
# OPTIMIZATION: Use cached
|
|
1013
|
-
if
|
|
1014
|
-
|
|
1266
|
+
# OPTIMIZATION: Use cached admin_boundaries
|
|
1267
|
+
if bounds_ADM1 is None:
|
|
1268
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
1015
1269
|
|
|
1016
|
-
# Fetch location info from
|
|
1017
|
-
location = ee.Dictionary(
|
|
1018
|
-
country = ee.Dictionary({iso3_country_column: location.get("
|
|
1270
|
+
# Fetch location info from GAUL 2024 L1 (country, admin)
|
|
1271
|
+
location = ee.Dictionary(get_admin_boundaries_info(centroid, bounds_ADM1))
|
|
1272
|
+
country = ee.Dictionary({iso3_country_column: location.get("iso3_code")})
|
|
1019
1273
|
|
|
1020
1274
|
admin_1 = ee.Dictionary(
|
|
1021
|
-
{admin_1_column: location.get("
|
|
1022
|
-
) # Administrative level 1 (
|
|
1275
|
+
{admin_1_column: location.get("gaul1_name")}
|
|
1276
|
+
) # Administrative level 1 (from GAUL 2024 L1)
|
|
1277
|
+
|
|
1278
|
+
# OPTIMIZATION: Use cached water flag image
|
|
1279
|
+
if water_all is None:
|
|
1280
|
+
water_all = get_water_flag_image()
|
|
1023
1281
|
|
|
1024
1282
|
# OPTIMIZATION: Use cached water flag image
|
|
1025
1283
|
if water_all is None:
|
|
@@ -1037,8 +1295,12 @@ def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
|
|
|
1037
1295
|
coords_list = centroid.coordinates()
|
|
1038
1296
|
coords_dict = ee.Dictionary(
|
|
1039
1297
|
{
|
|
1040
|
-
centroid_x_coord_column: coords_list.get(0)
|
|
1041
|
-
|
|
1298
|
+
centroid_x_coord_column: ee.Number(coords_list.get(0)).format(
|
|
1299
|
+
"%.6f"
|
|
1300
|
+
), # Longitude (6 dp)
|
|
1301
|
+
centroid_y_coord_column: ee.Number(coords_list.get(1)).format(
|
|
1302
|
+
"%.6f"
|
|
1303
|
+
), # Latitude (6 dp)
|
|
1042
1304
|
}
|
|
1043
1305
|
)
|
|
1044
1306
|
|
|
@@ -1076,36 +1338,36 @@ def percent_and_format(val, area_ha):
|
|
|
1076
1338
|
return ee.Number(formatted_value)
|
|
1077
1339
|
|
|
1078
1340
|
|
|
1079
|
-
#
|
|
1080
|
-
def
|
|
1341
|
+
# GAUL 2024 L1 - admin units from FAO, allows commercial use
|
|
1342
|
+
def get_admin_boundaries_info(geometry, bounds_ADM1=None):
|
|
1081
1343
|
"""
|
|
1082
|
-
Get
|
|
1344
|
+
Get GAUL 2024 L1 info for a geometry (country ISO3 code and admin boundary name).
|
|
1083
1345
|
|
|
1084
|
-
OPTIMIZATION: Accepts cached
|
|
1346
|
+
OPTIMIZATION: Accepts cached GAUL 2024 L1 FeatureCollection to avoid
|
|
1085
1347
|
reloading it for every feature (saves 2-5 seconds per analysis).
|
|
1086
1348
|
|
|
1087
1349
|
Parameters
|
|
1088
1350
|
----------
|
|
1089
1351
|
geometry : ee.Geometry
|
|
1090
1352
|
The geometry to query
|
|
1091
|
-
|
|
1092
|
-
Cached
|
|
1353
|
+
bounds_ADM1 : ee.FeatureCollection, optional
|
|
1354
|
+
Cached GAUL 2024 L1 feature collection. If None, loads it.
|
|
1093
1355
|
|
|
1094
1356
|
Returns
|
|
1095
1357
|
-------
|
|
1096
1358
|
ee.Dictionary
|
|
1097
|
-
Dictionary with
|
|
1359
|
+
Dictionary with iso3_code (country) and gaul1_name (admin boundary name)
|
|
1098
1360
|
"""
|
|
1099
|
-
if
|
|
1100
|
-
|
|
1361
|
+
if bounds_ADM1 is None:
|
|
1362
|
+
bounds_ADM1 = get_admin_boundaries_fc()
|
|
1101
1363
|
|
|
1102
|
-
polygonsIntersectPoint =
|
|
1103
|
-
backup_dict = ee.Dictionary({"
|
|
1364
|
+
polygonsIntersectPoint = bounds_ADM1.filterBounds(geometry)
|
|
1365
|
+
backup_dict = ee.Dictionary({"iso3_code": "Unknown", "gaul1_name": "Unknown"})
|
|
1104
1366
|
return ee.Algorithms.If(
|
|
1105
1367
|
polygonsIntersectPoint.size().gt(0),
|
|
1106
1368
|
polygonsIntersectPoint.first()
|
|
1107
1369
|
.toDictionary()
|
|
1108
|
-
.select(["
|
|
1370
|
+
.select(["iso3_code", "gaul1_name"]),
|
|
1109
1371
|
backup_dict,
|
|
1110
1372
|
)
|
|
1111
1373
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: openforis-whisp
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0a1
|
|
4
4
|
Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: whisp,geospatial,data-processing
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
openforis_whisp/__init__.py,sha256=-r_9LFxbV6d-o4s0_huhaXxve6GIzCwl3pXKuJo6ixE,3663
|
|
2
|
+
openforis_whisp/advanced_stats.py,sha256=_bP_ApeaAdOF41WvabOhUNGL9Tt35AesUjNjqnHs8wo,76730
|
|
3
|
+
openforis_whisp/data_checks.py,sha256=WiYhoTedPs1MqSv4T978nDF3_WDYyg8YmHRi9mQXXqI,25203
|
|
4
|
+
openforis_whisp/data_conversion.py,sha256=sr2j_q6YjxVTicytimOMO8-RYohD1oyWWrgbg6WsLSw,18796
|
|
5
|
+
openforis_whisp/datasets.py,sha256=aGJy0OYN4d0nsH3_IOYlHl-WCB7KFwZwMJ-dBi5Hc5Y,53470
|
|
6
|
+
openforis_whisp/logger.py,sha256=9M6_3mdpoiWfC-pDwM9vKmB2l5Gul6Rb5rNTNh-_nzs,3054
|
|
7
|
+
openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
|
|
9
|
+
openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
|
|
10
|
+
openforis_whisp/parameters/lookup_gaul1_admin.py,sha256=cQr5liRdXi85QieTxrz4VAkn0COvRCp82ZV0dYFWOio,474980
|
|
11
|
+
openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
|
|
12
|
+
openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
|
|
13
|
+
openforis_whisp/reformat.py,sha256=mIooJ3zfSTDY3_Mx3OAW4jpfQ72q3zasG9tl58PdfN4,33729
|
|
14
|
+
openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
|
|
15
|
+
openforis_whisp/stats.py,sha256=mzzd3oU3RnJQPfeWoUBuMDTIw2FCAzWXHCt53ZuQ__A,64895
|
|
16
|
+
openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
|
|
17
|
+
openforis_whisp-3.0.0a1.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
|
|
18
|
+
openforis_whisp-3.0.0a1.dist-info/METADATA,sha256=dAlxg3DFtpIw5fCbZyrO2hOhtJSg7DYxtbN71ez3S2Y,16684
|
|
19
|
+
openforis_whisp-3.0.0a1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
20
|
+
openforis_whisp-3.0.0a1.dist-info/RECORD,,
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
openforis_whisp/__init__.py,sha256=bnEZ4_X-mJInltSKVI0STfvrb09Df-z21buIVFDif5w,2524
|
|
2
|
-
openforis_whisp/data_conversion.py,sha256=Mc6dXbvoHBeRzl3o83pyKeI5_sPC8Yc90Tj4bN6_Bv8,17519
|
|
3
|
-
openforis_whisp/datasets.py,sha256=hb8Y35vTcQQNUH_z2_l8Pu6Sjn_E8BzSow1-qAfs9bQ,50194
|
|
4
|
-
openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
|
|
5
|
-
openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
|
|
6
|
-
openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
|
|
7
|
-
openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
|
|
8
|
-
openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
|
|
9
|
-
openforis_whisp/reformat.py,sha256=rtkKs8z1mJd5JD9rXuMk1tbbbTvQxCCh68tA4hIQAv8,25445
|
|
10
|
-
openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
|
|
11
|
-
openforis_whisp/stats.py,sha256=1ikeV8UYpL8O5HZJY8lPXrhQwZ9D1IglbOsagZHCYdA,54000
|
|
12
|
-
openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
|
|
13
|
-
openforis_whisp-2.0.0b3.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
|
|
14
|
-
openforis_whisp-2.0.0b3.dist-info/METADATA,sha256=Opn73PWlsOQWTiwZ-HYvLkrPh4jYQELtSIIqDf4MsoQ,16684
|
|
15
|
-
openforis_whisp-2.0.0b3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
16
|
-
openforis_whisp-2.0.0b3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|