openforis-whisp 3.0.0a2__py3-none-any.whl → 3.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +8 -8
- openforis_whisp/advanced_stats.py +476 -312
- openforis_whisp/data_checks.py +80 -28
- openforis_whisp/datasets.py +14 -0
- openforis_whisp/logger.py +15 -3
- openforis_whisp/parameters/lookup_gee_datasets.csv +3 -2
- openforis_whisp/pd_schemas.py +7 -2
- openforis_whisp/reformat.py +8 -30
- openforis_whisp/stats.py +16 -62
- openforis_whisp/utils.py +468 -80
- {openforis_whisp-3.0.0a2.dist-info → openforis_whisp-3.0.0a4.dist-info}/METADATA +1 -1
- openforis_whisp-3.0.0a4.dist-info/RECORD +20 -0
- openforis_whisp-3.0.0a2.dist-info/RECORD +0 -20
- {openforis_whisp-3.0.0a2.dist-info → openforis_whisp-3.0.0a4.dist-info}/LICENSE +0 -0
- {openforis_whisp-3.0.0a2.dist-info → openforis_whisp-3.0.0a4.dist-info}/WHEEL +0 -0
openforis_whisp/data_checks.py
CHANGED
|
@@ -750,23 +750,43 @@ def validate_geojson_constraints(
|
|
|
750
750
|
return results
|
|
751
751
|
|
|
752
752
|
|
|
753
|
-
def
|
|
753
|
+
def suggest_processing_mode(
|
|
754
|
+
feature_count,
|
|
755
|
+
mean_area_ha=None,
|
|
756
|
+
mean_vertices=None,
|
|
757
|
+
feature_type="polygon",
|
|
758
|
+
verbose=True,
|
|
759
|
+
):
|
|
754
760
|
"""
|
|
755
|
-
Suggest processing
|
|
761
|
+
Suggest processing mode based on feature characteristics.
|
|
762
|
+
|
|
763
|
+
Decision thresholds from comprehensive benchmark data (Nov 2025):
|
|
756
764
|
|
|
757
|
-
|
|
758
|
-
-
|
|
759
|
-
-
|
|
760
|
-
-
|
|
765
|
+
POINTS:
|
|
766
|
+
- Break-even: 750-1000 features
|
|
767
|
+
- Sequential faster: < 750 features
|
|
768
|
+
- Concurrent faster: >= 750 features
|
|
769
|
+
|
|
770
|
+
POLYGONS (area-based thresholds):
|
|
771
|
+
- Tiny (< 1 ha): break-even ~500 features
|
|
772
|
+
- Small (1-5 ha, simple): break-even ~500 features
|
|
773
|
+
- Small (1-5 ha, complex 20-50v): break-even ~500 features
|
|
774
|
+
- Medium (5-20 ha): break-even ~250 features
|
|
775
|
+
- Large (20-100 ha): break-even ~250 features
|
|
776
|
+
- Very large (50-200 ha): break-even ~250 features
|
|
777
|
+
|
|
778
|
+
Vertex complexity adjustment: High vertex counts (>50) favor concurrent at lower thresholds
|
|
761
779
|
|
|
762
780
|
Parameters:
|
|
763
781
|
-----------
|
|
764
|
-
|
|
765
|
-
Number of polygons
|
|
766
|
-
mean_area_ha : float
|
|
767
|
-
Mean area per polygon in hectares
|
|
782
|
+
feature_count : int
|
|
783
|
+
Number of features (polygons or points)
|
|
784
|
+
mean_area_ha : float, optional
|
|
785
|
+
Mean area per polygon in hectares (required for polygons, ignored for points)
|
|
768
786
|
mean_vertices : float, optional
|
|
769
|
-
Mean number of vertices per polygon (
|
|
787
|
+
Mean number of vertices per polygon (influences decision for complex geometries)
|
|
788
|
+
feature_type : str
|
|
789
|
+
'polygon', 'multipolygon', or 'point' (default: 'polygon')
|
|
770
790
|
verbose : bool
|
|
771
791
|
Print recommendation explanation
|
|
772
792
|
|
|
@@ -775,31 +795,63 @@ def suggest_method(polygon_count, mean_area_ha, mean_vertices=None, verbose=True
|
|
|
775
795
|
str: 'concurrent' or 'sequential'
|
|
776
796
|
"""
|
|
777
797
|
|
|
778
|
-
#
|
|
779
|
-
if
|
|
780
|
-
breakeven =
|
|
781
|
-
method = "concurrent" if
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
798
|
+
# Points: simple threshold-based decision
|
|
799
|
+
if feature_type == "point":
|
|
800
|
+
breakeven = 750
|
|
801
|
+
method = "concurrent" if feature_count >= breakeven else "sequential"
|
|
802
|
+
|
|
803
|
+
if verbose:
|
|
804
|
+
print(f"\nMETHOD RECOMMENDATION (Points)")
|
|
805
|
+
print(f" Features: {feature_count} points")
|
|
806
|
+
print(f" Break-even: {breakeven} features | Method: {method.upper()}")
|
|
807
|
+
|
|
808
|
+
return method
|
|
809
|
+
|
|
810
|
+
# Polygons and MultiPolygons: area and complexity-based decision
|
|
811
|
+
# MultiPolygons use same breakpoints as Polygons
|
|
812
|
+
if mean_area_ha is None:
|
|
813
|
+
# Default to conservative threshold if area unknown
|
|
814
|
+
breakeven = 500
|
|
815
|
+
method = "concurrent" if feature_count >= breakeven else "sequential"
|
|
816
|
+
|
|
817
|
+
if verbose:
|
|
818
|
+
print(f"\nMETHOD RECOMMENDATION (Polygons - area unknown)")
|
|
819
|
+
print(f" Features: {feature_count} polygons")
|
|
820
|
+
print(
|
|
821
|
+
f" Break-even: {breakeven} (conservative) | Method: {method.upper()}"
|
|
822
|
+
)
|
|
823
|
+
|
|
824
|
+
return method
|
|
825
|
+
|
|
826
|
+
# Area-based thresholds from benchmark data
|
|
827
|
+
if mean_area_ha >= 20: # Large to very large polygons
|
|
828
|
+
breakeven = 250
|
|
829
|
+
elif mean_area_ha >= 5: # Medium polygons
|
|
786
830
|
breakeven = 250
|
|
787
|
-
|
|
831
|
+
elif mean_area_ha >= 1: # Small polygons
|
|
832
|
+
# Vertex complexity matters more for small polygons
|
|
833
|
+
if mean_vertices is not None and mean_vertices >= 30:
|
|
834
|
+
breakeven = 500 # Complex small polygons
|
|
835
|
+
else:
|
|
836
|
+
breakeven = 500 # Simple small polygons
|
|
837
|
+
else: # Tiny polygons (< 1 ha)
|
|
838
|
+
breakeven = 500
|
|
839
|
+
|
|
840
|
+
# Vertex complexity adjustment for high-complexity geometries
|
|
841
|
+
if mean_vertices is not None and mean_vertices >= 50:
|
|
842
|
+
# High complexity: reduce breakeven by 20% (concurrent beneficial sooner)
|
|
843
|
+
breakeven = int(breakeven * 0.8)
|
|
788
844
|
|
|
789
|
-
|
|
790
|
-
if mean_vertices is not None and mean_vertices > 500:
|
|
791
|
-
# Reduce breakeven by 25% for very complex geometries
|
|
792
|
-
adjusted_breakeven = int(breakeven * 0.75)
|
|
793
|
-
method = "concurrent" if polygon_count >= adjusted_breakeven else "sequential"
|
|
845
|
+
method = "concurrent" if feature_count >= breakeven else "sequential"
|
|
794
846
|
|
|
795
847
|
if verbose:
|
|
796
|
-
print(f"\nMETHOD RECOMMENDATION")
|
|
848
|
+
print(f"\nMETHOD RECOMMENDATION (Polygons)")
|
|
797
849
|
print(
|
|
798
|
-
f"
|
|
850
|
+
f" Features: {feature_count} | Mean Area: {mean_area_ha:.1f} ha", end=""
|
|
799
851
|
)
|
|
800
852
|
if mean_vertices is not None:
|
|
801
853
|
print(f" | Mean Vertices: {mean_vertices:.1f}", end="")
|
|
802
854
|
print()
|
|
803
|
-
print(f"
|
|
855
|
+
print(f" Break-even: {breakeven} features | Method: {method.upper()}")
|
|
804
856
|
|
|
805
857
|
return method
|
openforis_whisp/datasets.py
CHANGED
|
@@ -1160,6 +1160,20 @@ def nci_ocs2020_prep():
|
|
|
1160
1160
|
).selfMask() # cocoa from national land cover map for Côte d'Ivoire
|
|
1161
1161
|
|
|
1162
1162
|
|
|
1163
|
+
# nCM - Cameroon
|
|
1164
|
+
# data from Aurelie Shapiro (FAO) working directly with country experts - info on methods and accuracy assessment to follow
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
def ncm_treecover_2020_prep():
|
|
1168
|
+
return (
|
|
1169
|
+
ee.Image("projects/ee-cocoacmr/assets/land_cover/CMR_TNTMMU_2020")
|
|
1170
|
+
.select("FNF_2020")
|
|
1171
|
+
.eq(1)
|
|
1172
|
+
.rename("nCM_Treecover_2020")
|
|
1173
|
+
.selfMask()
|
|
1174
|
+
)
|
|
1175
|
+
|
|
1176
|
+
|
|
1163
1177
|
# ============================================================================
|
|
1164
1178
|
# CONTEXT BANDS (Administrative boundaries and water mask)
|
|
1165
1179
|
# ============================================================================
|
openforis_whisp/logger.py
CHANGED
|
@@ -8,9 +8,21 @@ BASE_MSG_FORMAT = (
|
|
|
8
8
|
|
|
9
9
|
class StdoutLogger:
|
|
10
10
|
def __init__(self, name: str, msg_format: str = BASE_MSG_FORMAT) -> None:
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
# Create handler that auto-flushes for Colab/notebook visibility
|
|
12
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
13
|
+
handler.setFormatter(logging.Formatter(msg_format))
|
|
14
|
+
handler.setLevel(logging.DEBUG)
|
|
15
|
+
|
|
16
|
+
# Override emit to force flush after each message
|
|
17
|
+
original_emit = handler.emit
|
|
18
|
+
|
|
19
|
+
def emit_with_flush(record):
|
|
20
|
+
original_emit(record)
|
|
21
|
+
sys.stdout.flush()
|
|
22
|
+
|
|
23
|
+
handler.emit = emit_with_flush
|
|
24
|
+
|
|
25
|
+
self.handler = handler
|
|
14
26
|
self.logger = logging.getLogger(name)
|
|
15
27
|
self.logger.addHandler(self.handler)
|
|
16
28
|
self.logger.propagate = False
|
|
@@ -2,9 +2,9 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
|
|
|
2
2
|
EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
|
|
3
3
|
GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
|
|
4
4
|
TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
|
|
5
|
-
GFC_TC_2020,50,,treecover,naturally_reg_2020,
|
|
5
|
+
GFC_TC_2020,50,,treecover,naturally_reg_2020,0,0,0,float32,1,0,g_glad_gfc_10pc_prep
|
|
6
6
|
Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
|
|
7
|
-
ESA_TC_2020,70,,treecover,naturally_reg_2020,
|
|
7
|
+
ESA_TC_2020,70,,treecover,naturally_reg_2020,0,0,0,float32,1,0,g_esa_worldcover_trees_prep
|
|
8
8
|
TMF_plant,80,,commodities,NA,1,1,0,float32,1,0,g_jrc_tmf_plantation_prep
|
|
9
9
|
Oil_palm_Descals,90,,commodities,NA,1,1,0,float32,1,0,g_creaf_descals_palm_prep
|
|
10
10
|
Oil_palm_FDaP,100,,commodities,NA,1,1,0,float32,1,0,g_fdap_palm_prep
|
|
@@ -197,3 +197,4 @@ nBR_INPE_TCamz_pasture_2020,2422,BR,commodities,NA,1,1,0,float32,1,0,nbr_terracl
|
|
|
197
197
|
nBR_INPE_TCcer_pasture_2020,2423,BR,commodities,NA,1,1,0,float32,1,0,nbr_terraclass_cer20_ac_prep
|
|
198
198
|
nBR_MapBiomas_col9_pasture_2020,2424,BR,commodities,NA,1,1,0,float32,1,0,nbr_mapbiomasc9_pasture_prep
|
|
199
199
|
nCI_Cocoa_bnetd,3000,CI,commodities,NA,1,1,0,float32,1,0,nci_ocs2020_prep
|
|
200
|
+
nCM_Treecover_2020,3100,CM,treecover,NA,1,0,0,float32,1,0,ncm_treecover_2020_prep
|
openforis_whisp/pd_schemas.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
# Support both old and new pandera import paths
|
|
2
|
+
try:
|
|
3
|
+
import pandera.pandas as pa
|
|
4
|
+
from pandera.typing.pandas import DataFrame, Series
|
|
5
|
+
except (ImportError, ModuleNotFoundError):
|
|
6
|
+
import pandera as pa
|
|
7
|
+
from pandera.typing import DataFrame, Series
|
|
3
8
|
|
|
4
9
|
# Define a schema for validating a DataFrame related to GEE (Google Earth Engine) datasets.
|
|
5
10
|
class DataLookupSchema(pa.DataFrameModel):
|
openforis_whisp/reformat.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
|
1
1
|
# !pip install pandera[io] # special version used
|
|
2
|
-
|
|
2
|
+
# Support both old and new pandera import paths
|
|
3
|
+
try:
|
|
4
|
+
import pandera.pandas as pa
|
|
5
|
+
except (ImportError, ModuleNotFoundError):
|
|
6
|
+
import pandera as pa
|
|
7
|
+
|
|
3
8
|
import pandas as pd
|
|
4
9
|
import os
|
|
5
10
|
import logging
|
|
@@ -125,7 +130,7 @@ def validate_dataframe(
|
|
|
125
130
|
Returns:
|
|
126
131
|
pd.DataFrame: The validated DataFrame with columns ordered according to the schema, or None if validation fails.
|
|
127
132
|
"""
|
|
128
|
-
|
|
133
|
+
_log_missing_columns(df_stats, schema)
|
|
129
134
|
|
|
130
135
|
# df_stats = df_stats.reindex(schema.columns.keys(), axis=1)
|
|
131
136
|
|
|
@@ -251,7 +256,7 @@ def create_schema_from_dataframe(schema_df: pd.DataFrame) -> pa.DataFrameSchema:
|
|
|
251
256
|
# return logger
|
|
252
257
|
|
|
253
258
|
|
|
254
|
-
def
|
|
259
|
+
def _log_missing_columns(df_stats: pd.DataFrame, template_schema: pa.DataFrameSchema):
|
|
255
260
|
# Initialize the logger
|
|
256
261
|
logger = setup_logger(__name__)
|
|
257
262
|
|
|
@@ -675,33 +680,6 @@ def _process_custom_bands(df_extra: pd.DataFrame, custom_bands) -> pd.DataFrame:
|
|
|
675
680
|
|
|
676
681
|
|
|
677
682
|
# Fix the duplicate logging issue
|
|
678
|
-
def log_missing_columns(df_stats: pd.DataFrame, template_schema: pa.DataFrameSchema):
|
|
679
|
-
# Remove the duplicate logger creation line
|
|
680
|
-
# logger = setup_logger(__name__) # DELETE THIS LINE
|
|
681
|
-
|
|
682
|
-
# Use the existing module-level logger (line 18: logger = StdoutLogger(__name__))
|
|
683
|
-
|
|
684
|
-
# Extract the expected columns from the DataFrameSchema
|
|
685
|
-
template_columns = list(template_schema.columns.keys())
|
|
686
|
-
df_stats_columns = df_stats.columns.tolist()
|
|
687
|
-
|
|
688
|
-
# Find missing and extra columns
|
|
689
|
-
missing_in_df = [col for col in template_columns if col not in df_stats_columns]
|
|
690
|
-
extra_in_df = [col for col in df_stats_columns if col not in template_columns]
|
|
691
|
-
|
|
692
|
-
# Log missing schema columns
|
|
693
|
-
if missing_in_df:
|
|
694
|
-
logger.warning(f"Missing expected schema columns: {missing_in_df}")
|
|
695
|
-
else:
|
|
696
|
-
logger.info("All expected schema columns found in DataFrame.")
|
|
697
|
-
|
|
698
|
-
# Log extra columns (will be preserved)
|
|
699
|
-
if extra_in_df:
|
|
700
|
-
logger.info(f"Extra columns found (will be preserved): {extra_in_df}")
|
|
701
|
-
else:
|
|
702
|
-
logger.info("No extra columns found in DataFrame.")
|
|
703
|
-
|
|
704
|
-
|
|
705
683
|
def format_stats_dataframe(
|
|
706
684
|
df,
|
|
707
685
|
area_col="Area_sum",
|
openforis_whisp/stats.py
CHANGED
|
@@ -88,12 +88,10 @@ def get_admin_boundaries_fc():
|
|
|
88
88
|
def whisp_formatted_stats_geojson_to_df_legacy(
|
|
89
89
|
input_geojson_filepath: Path | str,
|
|
90
90
|
external_id_column=None,
|
|
91
|
-
remove_geom=False,
|
|
92
91
|
national_codes=None,
|
|
93
92
|
unit_type="ha",
|
|
94
93
|
whisp_image=None,
|
|
95
94
|
custom_bands=None, # New parameter
|
|
96
|
-
validate_geometries: bool = False,
|
|
97
95
|
) -> pd.DataFrame:
|
|
98
96
|
"""
|
|
99
97
|
Legacy function for basic Whisp stats extraction.
|
|
@@ -135,56 +133,19 @@ def whisp_formatted_stats_geojson_to_df_legacy(
|
|
|
135
133
|
- List of band names: ['Aa_test', 'elevation']
|
|
136
134
|
- Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
|
|
137
135
|
- None: preserves all extra columns automatically
|
|
138
|
-
validate_geometries : bool, optional
|
|
139
|
-
Whether to validate and fix invalid geometries, by default False.
|
|
140
|
-
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
141
136
|
|
|
142
137
|
Returns
|
|
143
138
|
-------
|
|
144
139
|
df_stats : pd.DataFrame
|
|
145
140
|
The DataFrame containing the Whisp stats for the input ROI.
|
|
146
141
|
"""
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
import geopandas as gpd
|
|
151
|
-
from shapely.validation import make_valid
|
|
152
|
-
import logging as py_logging
|
|
153
|
-
|
|
154
|
-
logger = py_logging.getLogger("whisp")
|
|
155
|
-
|
|
156
|
-
# Load GeoJSON file
|
|
157
|
-
with open(input_geojson_filepath, "r") as f:
|
|
158
|
-
geojson_data = json.load(f)
|
|
159
|
-
|
|
160
|
-
# Convert to GeoDataFrame
|
|
161
|
-
gdf = gpd.GeoDataFrame.from_features(geojson_data["features"])
|
|
162
|
-
|
|
163
|
-
# Validate and fix invalid geometries
|
|
164
|
-
valid_count = gdf.geometry.is_valid.sum()
|
|
165
|
-
invalid_count = len(gdf) - valid_count
|
|
166
|
-
if invalid_count > 0:
|
|
167
|
-
logger.warning(f"Fixing {invalid_count} invalid geometries")
|
|
168
|
-
gdf["geometry"] = gdf["geometry"].apply(
|
|
169
|
-
lambda g: make_valid(g) if g and not g.is_valid else g
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
# Pass GeoDataFrame directly to preserve CRS metadata
|
|
173
|
-
# convert_geojson_to_ee will handle:
|
|
174
|
-
# - CRS detection and conversion to WGS84 if needed
|
|
175
|
-
# - Data type sanitization (datetime, object columns)
|
|
176
|
-
# - Geometry validation and Z-coordinate stripping
|
|
177
|
-
feature_collection = convert_geojson_to_ee(
|
|
178
|
-
gdf, enforce_wgs84=True, strip_z_coords=True
|
|
179
|
-
)
|
|
180
|
-
else:
|
|
181
|
-
# Original path - no validation
|
|
182
|
-
feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
|
|
142
|
+
# Convert GeoJSON to Earth Engine FeatureCollection
|
|
143
|
+
# Note: Geometry validation/cleaning should be done before calling this function
|
|
144
|
+
feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
|
|
183
145
|
|
|
184
146
|
return whisp_formatted_stats_ee_to_df(
|
|
185
147
|
feature_collection,
|
|
186
148
|
external_id_column,
|
|
187
|
-
remove_geom,
|
|
188
149
|
national_codes=national_codes,
|
|
189
150
|
unit_type=unit_type,
|
|
190
151
|
whisp_image=whisp_image,
|
|
@@ -203,8 +164,8 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
203
164
|
mode: str = "sequential",
|
|
204
165
|
batch_size: int = 10,
|
|
205
166
|
max_concurrent: int = 20,
|
|
206
|
-
|
|
207
|
-
|
|
167
|
+
geometry_audit_trail: bool = False,
|
|
168
|
+
status_file: str = None,
|
|
208
169
|
) -> pd.DataFrame:
|
|
209
170
|
"""
|
|
210
171
|
Main entry point for converting GeoJSON to Whisp statistics.
|
|
@@ -226,11 +187,7 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
226
187
|
The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
|
|
227
188
|
This column must exist as a property in ALL features of the GeoJSON file.
|
|
228
189
|
Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
|
|
229
|
-
remove_geom : bool, default=False
|
|
230
|
-
If True, the geometry of the GeoJSON is removed from the output DataFrame.
|
|
231
190
|
national_codes : list, optional
|
|
232
|
-
List of ISO2 country codes to include national datasets.
|
|
233
|
-
unit_type: str, optional
|
|
234
191
|
Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
|
|
235
192
|
whisp_image : ee.Image, optional
|
|
236
193
|
Pre-combined multiband Earth Engine Image containing all Whisp datasets.
|
|
@@ -252,12 +209,7 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
252
209
|
max_concurrent : int, optional
|
|
253
210
|
Maximum concurrent EE calls for concurrent mode, by default 20.
|
|
254
211
|
Only applicable for "concurrent" mode.
|
|
255
|
-
|
|
256
|
-
Whether to validate and fix invalid geometries, by default False.
|
|
257
|
-
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
258
|
-
For production workflows, it's recommended to use geometry validation and
|
|
259
|
-
cleaning tools BEFORE processing with this function.
|
|
260
|
-
include_geometry_audit_trail : bool, default True
|
|
212
|
+
geometry_audit_trail : bool, default True
|
|
261
213
|
If True (default), includes audit trail columns:
|
|
262
214
|
- geo_original: Original input geometry
|
|
263
215
|
- geometry_type_original: Original geometry type
|
|
@@ -267,6 +219,13 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
267
219
|
|
|
268
220
|
Processing metadata stored in df.attrs['processing_metadata'].
|
|
269
221
|
These columns enable full transparency for geometry modifications during processing.
|
|
222
|
+
status_file : str, optional
|
|
223
|
+
Path to JSON status file or directory for real-time progress tracking.
|
|
224
|
+
If a directory is provided, creates 'whisp_processing_status.json' in that directory.
|
|
225
|
+
Updates every 3 minutes and at progress milestones (5%, 10%, etc.).
|
|
226
|
+
Format: {"status": "processing", "progress": "450/1000", "percent": 45.0,
|
|
227
|
+
"elapsed_sec": 120, "eta_sec": 145, "updated_at": "2025-11-13T14:23:45"}
|
|
228
|
+
Most useful for large concurrent jobs. Works in both concurrent and sequential modes.
|
|
270
229
|
|
|
271
230
|
Returns
|
|
272
231
|
-------
|
|
@@ -326,12 +285,10 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
326
285
|
return whisp_formatted_stats_geojson_to_df_legacy(
|
|
327
286
|
input_geojson_filepath=input_geojson_filepath,
|
|
328
287
|
external_id_column=external_id_column,
|
|
329
|
-
remove_geom=remove_geom,
|
|
330
288
|
national_codes=national_codes,
|
|
331
289
|
unit_type=unit_type,
|
|
332
290
|
whisp_image=whisp_image,
|
|
333
291
|
custom_bands=custom_bands,
|
|
334
|
-
validate_geometries=validate_geometries,
|
|
335
292
|
)
|
|
336
293
|
elif mode in ("concurrent", "sequential"):
|
|
337
294
|
# Log info if batch_size or max_concurrent are not used in sequential mode
|
|
@@ -350,7 +307,6 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
350
307
|
return whisp_formatted_stats_geojson_to_df_fast(
|
|
351
308
|
input_geojson_filepath=input_geojson_filepath,
|
|
352
309
|
external_id_column=external_id_column,
|
|
353
|
-
remove_geom=remove_geom,
|
|
354
310
|
national_codes=national_codes,
|
|
355
311
|
unit_type=unit_type,
|
|
356
312
|
whisp_image=whisp_image,
|
|
@@ -358,8 +314,8 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
358
314
|
mode=mode, # Pass mode directly (concurrent or sequential)
|
|
359
315
|
batch_size=batch_size,
|
|
360
316
|
max_concurrent=max_concurrent,
|
|
361
|
-
|
|
362
|
-
|
|
317
|
+
geometry_audit_trail=geometry_audit_trail,
|
|
318
|
+
status_file=status_file,
|
|
363
319
|
)
|
|
364
320
|
else:
|
|
365
321
|
raise ValueError(
|
|
@@ -518,7 +474,6 @@ def whisp_formatted_stats_ee_to_df(
|
|
|
518
474
|
def whisp_stats_geojson_to_df(
|
|
519
475
|
input_geojson_filepath: Path | str,
|
|
520
476
|
external_id_column=None,
|
|
521
|
-
remove_geom=False,
|
|
522
477
|
national_codes=None,
|
|
523
478
|
unit_type="ha",
|
|
524
479
|
whisp_image=None, # New parameter
|
|
@@ -551,7 +506,6 @@ def whisp_stats_geojson_to_df(
|
|
|
551
506
|
return whisp_stats_ee_to_df(
|
|
552
507
|
feature_collection,
|
|
553
508
|
external_id_column,
|
|
554
|
-
remove_geom,
|
|
555
509
|
national_codes=national_codes,
|
|
556
510
|
unit_type=unit_type,
|
|
557
511
|
whisp_image=whisp_image, # Pass through
|
|
@@ -1035,7 +989,7 @@ def whisp_stats_ee_to_drive(
|
|
|
1035
989
|
)
|
|
1036
990
|
task.start()
|
|
1037
991
|
print(
|
|
1038
|
-
"Exporting to Google Drive: '
|
|
992
|
+
"Exporting to Google Drive: 'whisp_output_table.csv'. To track progress: https://code.earthengine.google.com/tasks"
|
|
1039
993
|
)
|
|
1040
994
|
except Exception as e:
|
|
1041
995
|
print(f"An error occurred during the export: {e}")
|