openforis-whisp 2.0.0b1__py3-none-any.whl → 2.0.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +2 -1
- openforis_whisp/data_conversion.py +11 -0
- openforis_whisp/datasets.py +207 -247
- openforis_whisp/parameters/lookup_gee_datasets.csv +2 -5
- openforis_whisp/risk.py +29 -29
- openforis_whisp/stats.py +297 -47
- openforis_whisp/utils.py +298 -5
- {openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b3.dist-info}/METADATA +1 -1
- openforis_whisp-2.0.0b3.dist-info/RECORD +16 -0
- openforis_whisp/parameters/__init__.py +0 -15
- openforis_whisp-2.0.0b1.dist-info/RECORD +0 -17
- {openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b3.dist-info}/LICENSE +0 -0
- {openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b3.dist-info}/WHEEL +0 -0
|
@@ -2,7 +2,7 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
|
|
|
2
2
|
EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
|
|
3
3
|
GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
|
|
4
4
|
TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
|
|
5
|
-
GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,
|
|
5
|
+
GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
|
|
6
6
|
Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
|
|
7
7
|
ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
|
|
8
8
|
TMF_plant,80,,commodities,NA,1,1,0,float32,1,0,g_jrc_tmf_plantation_prep
|
|
@@ -163,13 +163,11 @@ GFT_planted_plantation,1900,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_gft_
|
|
|
163
163
|
IIASA_planted_plantation,1910,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_iiasa_planted_prep
|
|
164
164
|
TMF_regrowth_2023,2000,,NA,treecover_after_2020,0,1,0,float32,1,0,g_tmf_regrowth_prep
|
|
165
165
|
ESRI_2023_TC,2010,,NA,treecover_after_2020,0,1,0,float32,1,0,g_esri_2023_tc_prep
|
|
166
|
-
GLC_FCS30D_TC_2022,2020,,NA,treecover_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_tc_2022_prep
|
|
167
166
|
Oil_palm_2023_FDaP,2100,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_palm_2023_prep
|
|
168
167
|
Rubber_2023_FDaP,2110,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_rubber_2023_prep
|
|
169
168
|
Coffee_FDaP_2023,2111,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_coffee_2023_prep
|
|
170
169
|
Cocoa_2023_FDaP,2120,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_cocoa_2023_prep
|
|
171
|
-
|
|
172
|
-
GLC_FCS30D_crop_2022,2140,,NA,agri_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_crop_2022_prep
|
|
170
|
+
ESRI_crop_gain_2020_2023,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2020_2023_crop_prep
|
|
173
171
|
GFW_logging_before_2020,2200,,NA,logging_concession,0,1,0,float32,1,0,g_logging_concessions_prep
|
|
174
172
|
nCO_ideam_forest_2020,2310,CO,treecover,NA,1,1,0,float32,1,0,nco_ideam_forest_2020_prep
|
|
175
173
|
nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,1,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
|
|
@@ -199,4 +197,3 @@ nBR_INPE_TCamz_pasture_2020,2422,BR,commodities,NA,1,1,0,float32,1,0,nbr_terracl
|
|
|
199
197
|
nBR_INPE_TCcer_pasture_2020,2423,BR,commodities,NA,1,1,0,float32,1,0,nbr_terraclass_cer20_ac_prep
|
|
200
198
|
nBR_MapBiomas_col9_pasture_2020,2424,BR,commodities,NA,1,1,0,float32,1,0,nbr_mapbiomasc9_pasture_prep
|
|
201
199
|
nCI_Cocoa_bnetd,3000,CI,commodities,NA,1,1,0,float32,1,0,nci_ocs2020_prep
|
|
202
|
-
|
openforis_whisp/risk.py
CHANGED
|
@@ -161,9 +161,8 @@ def whisp_risk(
|
|
|
161
161
|
lookup_df_copy, custom_bands_info, df.columns
|
|
162
162
|
)
|
|
163
163
|
print(f"Including custom bands: {list(custom_bands_info.keys())}")
|
|
164
|
-
# print(f"appended custom bands info to lookup table")
|
|
165
164
|
if national_codes:
|
|
166
|
-
print(f"
|
|
165
|
+
print(f"Including additional national data for: {national_codes}")
|
|
167
166
|
# Filter by national codes
|
|
168
167
|
filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
|
|
169
168
|
lookup_df=lookup_df_copy,
|
|
@@ -473,7 +472,6 @@ def add_indicators(
|
|
|
473
472
|
return df
|
|
474
473
|
|
|
475
474
|
|
|
476
|
-
# Update add_indicator_column to use the unit_type parameter
|
|
477
475
|
def add_indicator_column(
|
|
478
476
|
df: data_lookup_type,
|
|
479
477
|
input_columns: list[str],
|
|
@@ -482,49 +480,51 @@ def add_indicator_column(
|
|
|
482
480
|
low_name: str = "no",
|
|
483
481
|
high_name: str = "yes",
|
|
484
482
|
sum_comparison: bool = False,
|
|
485
|
-
unit_type: str = None,
|
|
483
|
+
unit_type: str = None,
|
|
486
484
|
) -> data_lookup_type:
|
|
487
|
-
"""
|
|
488
|
-
Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
|
|
485
|
+
"""Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign."""
|
|
489
486
|
|
|
490
|
-
Parameters:
|
|
491
|
-
df (data_lookup_type): The pandas DataFrame to which the column will be added.
|
|
492
|
-
input_columns (list): List of column names to check for threshold.
|
|
493
|
-
threshold (float): The threshold value to compare against.
|
|
494
|
-
new_column_name (str): The name of the new column to be added.
|
|
495
|
-
The '>' sign is used for comparisons.
|
|
496
|
-
When 'sum comparison' == True, then the threshold is compared to the sum of all those listed in 'input_columns', as opposed to when Flalse, when each column in the list is compared to the threshold individually
|
|
497
|
-
low_name (str): The name for the value when below or equal to threshold (default is 'no').
|
|
498
|
-
high_name (str): The name for the value when above threshold (default is 'yes').
|
|
499
|
-
sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
|
|
500
|
-
unit_type (str): Whether values are in "ha" or "percent".
|
|
501
|
-
|
|
502
|
-
Returns:
|
|
503
|
-
data_lookup_type: The DataFrame with the new column added.
|
|
504
|
-
"""
|
|
505
487
|
# Create a new column and initialize with low_name
|
|
506
488
|
new_column = pd.Series(low_name, index=df.index, name=new_column_name)
|
|
507
489
|
|
|
508
|
-
# Default behavior: use '>' for single column comparison
|
|
509
490
|
if sum_comparison:
|
|
510
491
|
# Sum all values in specified columns and compare to threshold
|
|
511
492
|
sum_values = df[input_columns].sum(axis=1)
|
|
512
493
|
new_column[sum_values > threshold] = high_name
|
|
513
494
|
else:
|
|
514
|
-
# Check if any values in specified columns are above the threshold
|
|
495
|
+
# Check if any values in specified columns are above the threshold
|
|
515
496
|
for col in input_columns:
|
|
516
|
-
# So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
|
|
517
|
-
# Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
|
|
518
497
|
if unit_type == "ha":
|
|
519
498
|
df[geometry_area_column] = pd.to_numeric(
|
|
520
499
|
df[geometry_area_column], errors="coerce"
|
|
521
500
|
)
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
501
|
+
|
|
502
|
+
# Handle points (Area = 0) separately
|
|
503
|
+
is_point = df[geometry_area_column] == 0
|
|
504
|
+
|
|
505
|
+
# For points: any value > 0 exceeds threshold
|
|
506
|
+
point_mask = is_point & (df[col] > 0)
|
|
507
|
+
new_column[point_mask] = high_name
|
|
508
|
+
|
|
509
|
+
# For polygons: convert to percentage and check threshold
|
|
510
|
+
polygon_mask = ~is_point
|
|
511
|
+
if polygon_mask.any():
|
|
512
|
+
val_to_check = clamp(
|
|
513
|
+
(
|
|
514
|
+
(
|
|
515
|
+
df.loc[polygon_mask, col]
|
|
516
|
+
/ df.loc[polygon_mask, geometry_area_column]
|
|
517
|
+
)
|
|
518
|
+
* 100
|
|
519
|
+
),
|
|
520
|
+
0,
|
|
521
|
+
100,
|
|
522
|
+
)
|
|
523
|
+
new_column[polygon_mask & (val_to_check > threshold)] = high_name
|
|
525
524
|
else:
|
|
525
|
+
# For percentage values, use direct comparison
|
|
526
526
|
val_to_check = df[col]
|
|
527
|
-
|
|
527
|
+
new_column[val_to_check > threshold] = high_name
|
|
528
528
|
|
|
529
529
|
# Concatenate the new column to the DataFrame
|
|
530
530
|
df = pd.concat([df, new_column], axis=1)
|
openforis_whisp/stats.py
CHANGED
|
@@ -34,6 +34,53 @@ from .reformat import (
|
|
|
34
34
|
|
|
35
35
|
# NB functions that included "formatted" in the name apply a schema for validation and reformatting of the output dataframe. The schema is created from lookup tables.
|
|
36
36
|
|
|
37
|
+
# ============================================================================
|
|
38
|
+
# PERFORMANCE OPTIMIZATION: Cache expensive Earth Engine datasets
|
|
39
|
+
# ============================================================================
|
|
40
|
+
# These images/collections are loaded once and reused across all features
|
|
41
|
+
# to avoid repeated expensive operations. This saves 7-15 seconds per analysis.
|
|
42
|
+
|
|
43
|
+
_WATER_FLAG_IMAGE = None
|
|
44
|
+
_GEOBOUNDARIES_FC = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_water_flag_image():
|
|
48
|
+
"""
|
|
49
|
+
Get cached water flag image.
|
|
50
|
+
|
|
51
|
+
OPTIMIZATION: Water flag image is created once and reused for all features.
|
|
52
|
+
This avoids recreating ocean/water datasets for every feature (previously
|
|
53
|
+
called in get_type_and_location for each feature).
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
ee.Image
|
|
58
|
+
Cached water flag image
|
|
59
|
+
"""
|
|
60
|
+
global _WATER_FLAG_IMAGE
|
|
61
|
+
if _WATER_FLAG_IMAGE is None:
|
|
62
|
+
_WATER_FLAG_IMAGE = water_flag_all_prep()
|
|
63
|
+
return _WATER_FLAG_IMAGE
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_geoboundaries_fc():
|
|
67
|
+
"""
|
|
68
|
+
Get cached geoboundaries feature collection.
|
|
69
|
+
|
|
70
|
+
OPTIMIZATION: Geoboundaries collection is loaded once and reused for all features.
|
|
71
|
+
This avoids loading the large FeatureCollection for every feature (previously
|
|
72
|
+
called in get_geoboundaries_info for each feature).
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
ee.FeatureCollection
|
|
77
|
+
Cached geoboundaries feature collection
|
|
78
|
+
"""
|
|
79
|
+
global _GEOBOUNDARIES_FC
|
|
80
|
+
if _GEOBOUNDARIES_FC is None:
|
|
81
|
+
_GEOBOUNDARIES_FC = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM1")
|
|
82
|
+
return _GEOBOUNDARIES_FC
|
|
83
|
+
|
|
37
84
|
|
|
38
85
|
def whisp_formatted_stats_geojson_to_df(
|
|
39
86
|
input_geojson_filepath: Path | str,
|
|
@@ -425,7 +472,9 @@ def whisp_stats_ee_to_ee(
|
|
|
425
472
|
national_codes=None,
|
|
426
473
|
unit_type="ha",
|
|
427
474
|
keep_properties=None,
|
|
428
|
-
whisp_image=None,
|
|
475
|
+
whisp_image=None,
|
|
476
|
+
validate_external_id=True,
|
|
477
|
+
validate_bands=False, # New parameter
|
|
429
478
|
):
|
|
430
479
|
"""
|
|
431
480
|
Process a feature collection to get statistics for each feature.
|
|
@@ -442,19 +491,25 @@ def whisp_stats_ee_to_ee(
|
|
|
442
491
|
whisp_image (ee.Image, optional): Pre-combined multiband Earth Engine Image containing
|
|
443
492
|
all Whisp datasets. If provided, this image will be used instead of combining
|
|
444
493
|
datasets based on national_codes.
|
|
494
|
+
validate_external_id (bool, optional): If True, validates that external_id_column exists
|
|
495
|
+
in all features (default: True). Set to False to skip validation and save 2-4 seconds.
|
|
496
|
+
Only disable if you're confident the column exists in all features.
|
|
445
497
|
|
|
446
498
|
Returns:
|
|
447
499
|
ee.FeatureCollection: The output feature collection with statistics.
|
|
448
500
|
"""
|
|
449
501
|
if external_id_column is not None:
|
|
450
502
|
try:
|
|
451
|
-
#
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
503
|
+
# OPTIMIZATION: Make validation optional to save 2-4 seconds
|
|
504
|
+
# Validation includes multiple .getInfo() calls which are slow
|
|
505
|
+
if validate_external_id:
|
|
506
|
+
# Validate that the external_id_column exists in all features
|
|
507
|
+
validation_result = validate_external_id_column(
|
|
508
|
+
feature_collection, external_id_column
|
|
509
|
+
)
|
|
455
510
|
|
|
456
|
-
|
|
457
|
-
|
|
511
|
+
if not validation_result["is_valid"]:
|
|
512
|
+
raise ValueError(validation_result["error_message"])
|
|
458
513
|
|
|
459
514
|
# First handle property selection, but preserve the external_id_column
|
|
460
515
|
if keep_properties is not None:
|
|
@@ -506,19 +561,27 @@ def whisp_stats_ee_to_ee(
|
|
|
506
561
|
national_codes=national_codes,
|
|
507
562
|
unit_type=unit_type,
|
|
508
563
|
whisp_image=whisp_image, # Pass through
|
|
564
|
+
validate_bands=validate_bands,
|
|
509
565
|
)
|
|
510
566
|
|
|
511
567
|
return add_id_to_feature_collection(dataset=fc, id_name=plot_id_column)
|
|
512
568
|
|
|
513
569
|
|
|
514
570
|
def _keep_fc_properties(feature_collection, keep_properties):
|
|
571
|
+
"""
|
|
572
|
+
Filter feature collection properties based on keep_properties parameter.
|
|
573
|
+
|
|
574
|
+
OPTIMIZATION: When keep_properties is True, we no longer call .getInfo()
|
|
575
|
+
to get property names. Instead, we simply return the collection as-is,
|
|
576
|
+
since True means "keep all properties". This saves 1-2 seconds.
|
|
577
|
+
"""
|
|
515
578
|
# If keep_properties is specified, select only those properties
|
|
516
579
|
if keep_properties is None:
|
|
517
580
|
feature_collection = feature_collection.select([])
|
|
518
581
|
elif keep_properties == True:
|
|
519
|
-
# If keep_properties is true,
|
|
520
|
-
|
|
521
|
-
|
|
582
|
+
# If keep_properties is true, keep all properties
|
|
583
|
+
# No need to call .select() or .getInfo() - just return as-is
|
|
584
|
+
pass
|
|
522
585
|
elif isinstance(keep_properties, list):
|
|
523
586
|
feature_collection = feature_collection.select(keep_properties)
|
|
524
587
|
else:
|
|
@@ -534,7 +597,8 @@ def whisp_stats_ee_to_df(
|
|
|
534
597
|
remove_geom=False,
|
|
535
598
|
national_codes=None,
|
|
536
599
|
unit_type="ha",
|
|
537
|
-
whisp_image=None,
|
|
600
|
+
whisp_image=None,
|
|
601
|
+
validate_bands=False, # New parameter
|
|
538
602
|
) -> pd.DataFrame:
|
|
539
603
|
"""
|
|
540
604
|
Convert a Google Earth Engine FeatureCollection to a pandas DataFrame and convert ISO3 to ISO2 country codes.
|
|
@@ -561,27 +625,52 @@ def whisp_stats_ee_to_df(
|
|
|
561
625
|
"""
|
|
562
626
|
# First, do the whisp processing to get the EE feature collection with stats
|
|
563
627
|
try:
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
628
|
+
try:
|
|
629
|
+
stats_feature_collection = whisp_stats_ee_to_ee(
|
|
630
|
+
feature_collection,
|
|
631
|
+
external_id_column,
|
|
632
|
+
national_codes=national_codes,
|
|
633
|
+
unit_type=unit_type,
|
|
634
|
+
whisp_image=whisp_image, # Pass through
|
|
635
|
+
validate_bands=False, # try withoutb validation first
|
|
636
|
+
)
|
|
637
|
+
except Exception as e:
|
|
638
|
+
print(f"An error occurred during Whisp stats processing: {e}")
|
|
639
|
+
raise e
|
|
574
640
|
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
641
|
+
# Then, convert the EE feature collection to DataFrame
|
|
642
|
+
try:
|
|
643
|
+
df_stats = convert_ee_to_df(
|
|
644
|
+
ee_object=stats_feature_collection,
|
|
645
|
+
remove_geom=remove_geom,
|
|
646
|
+
)
|
|
647
|
+
except Exception as e:
|
|
648
|
+
print(f"An error occurred during the conversion from EE to DataFrame: {e}")
|
|
649
|
+
raise e
|
|
650
|
+
|
|
651
|
+
except: # retry with validation of whisp input datasets
|
|
652
|
+
try:
|
|
653
|
+
stats_feature_collection = whisp_stats_ee_to_ee(
|
|
654
|
+
feature_collection,
|
|
655
|
+
external_id_column,
|
|
656
|
+
national_codes=national_codes,
|
|
657
|
+
unit_type=unit_type,
|
|
658
|
+
whisp_image=whisp_image,
|
|
659
|
+
validate_bands=True, # If error, try with validation
|
|
660
|
+
)
|
|
661
|
+
except Exception as e:
|
|
662
|
+
print(f"An error occurred during Whisp stats processing: {e}")
|
|
663
|
+
raise e
|
|
584
664
|
|
|
665
|
+
# Then, convert the EE feature collection to DataFrame
|
|
666
|
+
try:
|
|
667
|
+
df_stats = convert_ee_to_df(
|
|
668
|
+
ee_object=stats_feature_collection,
|
|
669
|
+
remove_geom=remove_geom,
|
|
670
|
+
)
|
|
671
|
+
except Exception as e:
|
|
672
|
+
print(f"An error occurred during the conversion from EE to DataFrame: {e}")
|
|
673
|
+
raise e
|
|
585
674
|
try:
|
|
586
675
|
df_stats = convert_iso3_to_iso2(
|
|
587
676
|
df=df_stats,
|
|
@@ -592,9 +681,52 @@ def whisp_stats_ee_to_df(
|
|
|
592
681
|
print(f"An error occurred during the ISO3 to ISO2 conversion: {e}")
|
|
593
682
|
return pd.DataFrame() # Return an empty DataFrame in case of error
|
|
594
683
|
|
|
684
|
+
# NEW: Set area to 0 for point geometries
|
|
685
|
+
try:
|
|
686
|
+
df_stats = set_point_geometry_area_to_zero(df_stats)
|
|
687
|
+
except Exception as e:
|
|
688
|
+
print(f"An error occurred during point geometry area adjustment: {e}")
|
|
689
|
+
# Continue without the adjustment rather than failing completely
|
|
690
|
+
|
|
595
691
|
return df_stats
|
|
596
692
|
|
|
597
693
|
|
|
694
|
+
def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
|
|
695
|
+
"""
|
|
696
|
+
Set the geometry area column to 0 for features with Point geometry type.
|
|
697
|
+
|
|
698
|
+
Parameters
|
|
699
|
+
----------
|
|
700
|
+
df : pd.DataFrame
|
|
701
|
+
DataFrame containing geometry type and area columns
|
|
702
|
+
|
|
703
|
+
Returns
|
|
704
|
+
-------
|
|
705
|
+
pd.DataFrame
|
|
706
|
+
DataFrame with area set to 0 for Point geometries
|
|
707
|
+
"""
|
|
708
|
+
# Check if required columns exist
|
|
709
|
+
if geometry_type_column not in df.columns:
|
|
710
|
+
print(
|
|
711
|
+
f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
|
|
712
|
+
)
|
|
713
|
+
return df
|
|
714
|
+
|
|
715
|
+
# Create a copy to avoid modifying the original
|
|
716
|
+
df_modified = df.copy()
|
|
717
|
+
|
|
718
|
+
# Set area to 0 where geometry type is Point
|
|
719
|
+
point_mask = df_modified[geometry_type_column] == "Point"
|
|
720
|
+
df_modified.loc[point_mask, geometry_area_column] = 0.0
|
|
721
|
+
|
|
722
|
+
# Log the changes
|
|
723
|
+
num_points = point_mask.sum()
|
|
724
|
+
if num_points > 0:
|
|
725
|
+
print(f"Set area to 0 for {num_points} Point geometries")
|
|
726
|
+
|
|
727
|
+
return df_modified
|
|
728
|
+
|
|
729
|
+
|
|
598
730
|
def whisp_stats_ee_to_drive(
|
|
599
731
|
feature_collection: ee.FeatureCollection,
|
|
600
732
|
external_id_column=None,
|
|
@@ -647,7 +779,11 @@ def whisp_stats_ee_to_drive(
|
|
|
647
779
|
|
|
648
780
|
# Get stats for a feature or feature collection
|
|
649
781
|
def get_stats(
|
|
650
|
-
feature_or_feature_col,
|
|
782
|
+
feature_or_feature_col,
|
|
783
|
+
national_codes=None,
|
|
784
|
+
unit_type="ha",
|
|
785
|
+
whisp_image=None,
|
|
786
|
+
validate_bands=False,
|
|
651
787
|
):
|
|
652
788
|
"""
|
|
653
789
|
Get stats for a feature or feature collection with optional pre-combined image.
|
|
@@ -676,16 +812,25 @@ def get_stats(
|
|
|
676
812
|
img_combined = whisp_image
|
|
677
813
|
print("Using provided whisp_image")
|
|
678
814
|
else:
|
|
679
|
-
img_combined = combine_datasets(
|
|
815
|
+
img_combined = combine_datasets(
|
|
816
|
+
national_codes=national_codes, validate_bands=validate_bands
|
|
817
|
+
)
|
|
680
818
|
print(f"Combining datasets with national_codes: {national_codes}")
|
|
681
819
|
|
|
682
820
|
# Check if the input is a Feature or a FeatureCollection
|
|
683
821
|
if isinstance(feature_or_feature_col, ee.Feature):
|
|
684
822
|
print("Processing single feature")
|
|
823
|
+
# OPTIMIZATION: Create cached images for single feature processing
|
|
824
|
+
water_all = get_water_flag_image()
|
|
825
|
+
gbounds_ADM0 = get_geoboundaries_fc()
|
|
685
826
|
output = ee.FeatureCollection(
|
|
686
827
|
[
|
|
687
828
|
get_stats_feature(
|
|
688
|
-
feature_or_feature_col,
|
|
829
|
+
feature_or_feature_col,
|
|
830
|
+
img_combined,
|
|
831
|
+
unit_type=unit_type,
|
|
832
|
+
water_all=water_all,
|
|
833
|
+
gbounds_ADM0=gbounds_ADM0,
|
|
689
834
|
)
|
|
690
835
|
]
|
|
691
836
|
)
|
|
@@ -707,6 +852,10 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
707
852
|
"""
|
|
708
853
|
Calculate statistics for a feature collection using Whisp datasets.
|
|
709
854
|
|
|
855
|
+
OPTIMIZATION: Creates water flag and geoboundaries images once and reuses
|
|
856
|
+
them for all features instead of recreating them for each feature.
|
|
857
|
+
This saves 7-15 seconds per analysis.
|
|
858
|
+
|
|
710
859
|
Parameters
|
|
711
860
|
----------
|
|
712
861
|
feature_col : ee.FeatureCollection
|
|
@@ -726,15 +875,19 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
726
875
|
ee.FeatureCollection
|
|
727
876
|
Feature collection with calculated statistics
|
|
728
877
|
"""
|
|
729
|
-
|
|
730
|
-
#
|
|
731
|
-
|
|
732
|
-
|
|
878
|
+
# OPTIMIZATION: Create cached images once before processing features
|
|
879
|
+
# These will be reused for all features instead of being recreated each time
|
|
880
|
+
water_all = get_water_flag_image()
|
|
881
|
+
gbounds_ADM0 = get_geoboundaries_fc()
|
|
733
882
|
|
|
734
883
|
out_feature_col = ee.FeatureCollection(
|
|
735
884
|
feature_col.map(
|
|
736
885
|
lambda feature: get_stats_feature(
|
|
737
|
-
feature,
|
|
886
|
+
feature,
|
|
887
|
+
img_combined,
|
|
888
|
+
unit_type=unit_type,
|
|
889
|
+
water_all=water_all,
|
|
890
|
+
gbounds_ADM0=gbounds_ADM0,
|
|
738
891
|
)
|
|
739
892
|
)
|
|
740
893
|
)
|
|
@@ -747,10 +900,15 @@ def get_stats_fc(feature_col, national_codes=None, unit_type="ha", img_combined=
|
|
|
747
900
|
# Note: This function doesn't need whisp_image parameter since it already accepts img_combined directly
|
|
748
901
|
|
|
749
902
|
|
|
750
|
-
def get_stats_feature(
|
|
903
|
+
def get_stats_feature(
|
|
904
|
+
feature, img_combined, unit_type="ha", water_all=None, gbounds_ADM0=None
|
|
905
|
+
):
|
|
751
906
|
"""
|
|
752
907
|
Get statistics for a single feature using a pre-combined image.
|
|
753
908
|
|
|
909
|
+
OPTIMIZATION: Accepts cached water/geoboundaries images to avoid recreating
|
|
910
|
+
them for every feature.
|
|
911
|
+
|
|
754
912
|
Parameters
|
|
755
913
|
----------
|
|
756
914
|
feature : ee.Feature
|
|
@@ -759,6 +917,10 @@ def get_stats_feature(feature, img_combined, unit_type="ha"):
|
|
|
759
917
|
Pre-combined image with all the datasets
|
|
760
918
|
unit_type : str, optional
|
|
761
919
|
Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
|
|
920
|
+
water_all : ee.Image, optional
|
|
921
|
+
Cached water flag image
|
|
922
|
+
gbounds_ADM0 : ee.FeatureCollection, optional
|
|
923
|
+
Cached geoboundaries feature collection
|
|
762
924
|
|
|
763
925
|
Returns
|
|
764
926
|
-------
|
|
@@ -773,8 +935,8 @@ def get_stats_feature(feature, img_combined, unit_type="ha"):
|
|
|
773
935
|
tileScale=8,
|
|
774
936
|
)
|
|
775
937
|
|
|
776
|
-
# Get basic feature information
|
|
777
|
-
feature_info = get_type_and_location(feature)
|
|
938
|
+
# Get basic feature information with cached images
|
|
939
|
+
feature_info = get_type_and_location(feature, water_all, gbounds_ADM0)
|
|
778
940
|
|
|
779
941
|
# add statistics unit type (e.g., percentage or hectares) to dictionary
|
|
780
942
|
stats_unit_type = ee.Dictionary({stats_unit_type_column: unit_type})
|
|
@@ -823,22 +985,47 @@ def get_stats_feature(feature, img_combined, unit_type="ha"):
|
|
|
823
985
|
|
|
824
986
|
|
|
825
987
|
# Get basic feature information - uses admin and water datasets in gee.
|
|
826
|
-
def get_type_and_location(feature):
|
|
827
|
-
"""
|
|
988
|
+
def get_type_and_location(feature, water_all=None, gbounds_ADM0=None):
|
|
989
|
+
"""
|
|
990
|
+
Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags.
|
|
991
|
+
|
|
992
|
+
OPTIMIZATION: Accepts cached water flag image and geoboundaries collection
|
|
993
|
+
to avoid recreating them for every feature (saves 7-15 seconds per analysis).
|
|
994
|
+
|
|
995
|
+
Parameters
|
|
996
|
+
----------
|
|
997
|
+
feature : ee.Feature
|
|
998
|
+
The feature to extract information from
|
|
999
|
+
water_all : ee.Image, optional
|
|
1000
|
+
Cached water flag image. If None, creates it.
|
|
1001
|
+
gbounds_ADM0 : ee.FeatureCollection, optional
|
|
1002
|
+
Cached geoboundaries feature collection. If None, loads it.
|
|
828
1003
|
|
|
1004
|
+
Returns
|
|
1005
|
+
-------
|
|
1006
|
+
ee.Dictionary
|
|
1007
|
+
Dictionary with feature information
|
|
1008
|
+
"""
|
|
829
1009
|
# Get centroid of the feature's geometry
|
|
830
1010
|
centroid = feature.geometry().centroid(1)
|
|
831
1011
|
|
|
1012
|
+
# OPTIMIZATION: Use cached geoboundaries
|
|
1013
|
+
if gbounds_ADM0 is None:
|
|
1014
|
+
gbounds_ADM0 = get_geoboundaries_fc()
|
|
1015
|
+
|
|
832
1016
|
# Fetch location info from geoboundaries (country, admin)
|
|
833
|
-
location = ee.Dictionary(get_geoboundaries_info(centroid))
|
|
1017
|
+
location = ee.Dictionary(get_geoboundaries_info(centroid, gbounds_ADM0))
|
|
834
1018
|
country = ee.Dictionary({iso3_country_column: location.get("shapeGroup")})
|
|
835
1019
|
|
|
836
1020
|
admin_1 = ee.Dictionary(
|
|
837
1021
|
{admin_1_column: location.get("shapeName")}
|
|
838
1022
|
) # Administrative level 1 (if available)
|
|
839
1023
|
|
|
1024
|
+
# OPTIMIZATION: Use cached water flag image
|
|
1025
|
+
if water_all is None:
|
|
1026
|
+
water_all = get_water_flag_image()
|
|
1027
|
+
|
|
840
1028
|
# Prepare the water flag information
|
|
841
|
-
water_all = water_flag_all_prep()
|
|
842
1029
|
water_flag_dict = value_at_point_flag(
|
|
843
1030
|
point=centroid, image=water_all, band_name=water_flag, output_name=water_flag
|
|
844
1031
|
)
|
|
@@ -890,8 +1077,28 @@ def percent_and_format(val, area_ha):
|
|
|
890
1077
|
|
|
891
1078
|
|
|
892
1079
|
# geoboundaries - admin units from a freqently updated database, allows commercial use (CC BY 4.0 DEED) (disputed territories may need checking)
|
|
893
|
-
def get_geoboundaries_info(geometry):
|
|
894
|
-
|
|
1080
|
+
def get_geoboundaries_info(geometry, gbounds_ADM0=None):
|
|
1081
|
+
"""
|
|
1082
|
+
Get geoboundaries info for a geometry.
|
|
1083
|
+
|
|
1084
|
+
OPTIMIZATION: Accepts cached geoboundaries FeatureCollection to avoid
|
|
1085
|
+
reloading it for every feature (saves 2-5 seconds per analysis).
|
|
1086
|
+
|
|
1087
|
+
Parameters
|
|
1088
|
+
----------
|
|
1089
|
+
geometry : ee.Geometry
|
|
1090
|
+
The geometry to query
|
|
1091
|
+
gbounds_ADM0 : ee.FeatureCollection, optional
|
|
1092
|
+
Cached geoboundaries feature collection. If None, loads it.
|
|
1093
|
+
|
|
1094
|
+
Returns
|
|
1095
|
+
-------
|
|
1096
|
+
ee.Dictionary
|
|
1097
|
+
Dictionary with shapeGroup and shapeName
|
|
1098
|
+
"""
|
|
1099
|
+
if gbounds_ADM0 is None:
|
|
1100
|
+
gbounds_ADM0 = get_geoboundaries_fc()
|
|
1101
|
+
|
|
895
1102
|
polygonsIntersectPoint = gbounds_ADM0.filterBounds(geometry)
|
|
896
1103
|
backup_dict = ee.Dictionary({"shapeGroup": "Unknown", "shapeName": "Unknown"})
|
|
897
1104
|
return ee.Algorithms.If(
|
|
@@ -1226,3 +1433,46 @@ def debug_feature_collection_properties(feature_collection, max_features=5):
|
|
|
1226
1433
|
|
|
1227
1434
|
except Exception as e:
|
|
1228
1435
|
return {"error": f"Error during debugging: {str(e)}"}
|
|
1436
|
+
|
|
1437
|
+
|
|
1438
|
+
# helper function to set area to 0 for point geometries
|
|
1439
|
+
def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
|
|
1440
|
+
"""
|
|
1441
|
+
Set the geometry area column to 0 for features with Point geometry type.
|
|
1442
|
+
|
|
1443
|
+
Parameters
|
|
1444
|
+
----------
|
|
1445
|
+
df : pd.DataFrame
|
|
1446
|
+
DataFrame containing geometry type and area columns
|
|
1447
|
+
|
|
1448
|
+
Returns
|
|
1449
|
+
-------
|
|
1450
|
+
pd.DataFrame
|
|
1451
|
+
DataFrame with area set to 0 for Point geometries
|
|
1452
|
+
"""
|
|
1453
|
+
# Check if required columns exist
|
|
1454
|
+
if geometry_type_column not in df.columns:
|
|
1455
|
+
print(
|
|
1456
|
+
f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
|
|
1457
|
+
)
|
|
1458
|
+
return df
|
|
1459
|
+
|
|
1460
|
+
if geometry_area_column not in df.columns:
|
|
1461
|
+
print(
|
|
1462
|
+
f"Warning: {geometry_area_column} column not found. Skipping area adjustment for points."
|
|
1463
|
+
)
|
|
1464
|
+
return df
|
|
1465
|
+
|
|
1466
|
+
# Create a copy to avoid modifying the original
|
|
1467
|
+
df_modified = df.copy()
|
|
1468
|
+
|
|
1469
|
+
# Set area to 0 where geometry type is Point
|
|
1470
|
+
point_mask = df_modified[geometry_type_column] == "Point"
|
|
1471
|
+
df_modified.loc[point_mask, geometry_area_column] = 0.0
|
|
1472
|
+
|
|
1473
|
+
# Log the changes
|
|
1474
|
+
num_points = point_mask.sum()
|
|
1475
|
+
# if num_points > 0:
|
|
1476
|
+
# print(f"Set area to 0 for {num_points} Point geometries")
|
|
1477
|
+
|
|
1478
|
+
return df_modified
|