openforis-whisp 3.0.0a2__py3-none-any.whl → 3.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +7 -7
- openforis_whisp/advanced_stats.py +171 -222
- openforis_whisp/reformat.py +2 -29
- openforis_whisp/stats.py +6 -51
- openforis_whisp/utils.py +449 -80
- {openforis_whisp-3.0.0a2.dist-info → openforis_whisp-3.0.0a3.dist-info}/METADATA +1 -1
- {openforis_whisp-3.0.0a2.dist-info → openforis_whisp-3.0.0a3.dist-info}/RECORD +9 -9
- {openforis_whisp-3.0.0a2.dist-info → openforis_whisp-3.0.0a3.dist-info}/LICENSE +0 -0
- {openforis_whisp-3.0.0a2.dist-info → openforis_whisp-3.0.0a3.dist-info}/WHEEL +0 -0
openforis_whisp/reformat.py
CHANGED
|
@@ -125,7 +125,7 @@ def validate_dataframe(
|
|
|
125
125
|
Returns:
|
|
126
126
|
pd.DataFrame: The validated DataFrame with columns ordered according to the schema, or None if validation fails.
|
|
127
127
|
"""
|
|
128
|
-
|
|
128
|
+
_log_missing_columns(df_stats, schema)
|
|
129
129
|
|
|
130
130
|
# df_stats = df_stats.reindex(schema.columns.keys(), axis=1)
|
|
131
131
|
|
|
@@ -251,7 +251,7 @@ def create_schema_from_dataframe(schema_df: pd.DataFrame) -> pa.DataFrameSchema:
|
|
|
251
251
|
# return logger
|
|
252
252
|
|
|
253
253
|
|
|
254
|
-
def
|
|
254
|
+
def _log_missing_columns(df_stats: pd.DataFrame, template_schema: pa.DataFrameSchema):
|
|
255
255
|
# Initialize the logger
|
|
256
256
|
logger = setup_logger(__name__)
|
|
257
257
|
|
|
@@ -675,33 +675,6 @@ def _process_custom_bands(df_extra: pd.DataFrame, custom_bands) -> pd.DataFrame:
|
|
|
675
675
|
|
|
676
676
|
|
|
677
677
|
# Fix the duplicate logging issue
|
|
678
|
-
def log_missing_columns(df_stats: pd.DataFrame, template_schema: pa.DataFrameSchema):
|
|
679
|
-
# Remove the duplicate logger creation line
|
|
680
|
-
# logger = setup_logger(__name__) # DELETE THIS LINE
|
|
681
|
-
|
|
682
|
-
# Use the existing module-level logger (line 18: logger = StdoutLogger(__name__))
|
|
683
|
-
|
|
684
|
-
# Extract the expected columns from the DataFrameSchema
|
|
685
|
-
template_columns = list(template_schema.columns.keys())
|
|
686
|
-
df_stats_columns = df_stats.columns.tolist()
|
|
687
|
-
|
|
688
|
-
# Find missing and extra columns
|
|
689
|
-
missing_in_df = [col for col in template_columns if col not in df_stats_columns]
|
|
690
|
-
extra_in_df = [col for col in df_stats_columns if col not in template_columns]
|
|
691
|
-
|
|
692
|
-
# Log missing schema columns
|
|
693
|
-
if missing_in_df:
|
|
694
|
-
logger.warning(f"Missing expected schema columns: {missing_in_df}")
|
|
695
|
-
else:
|
|
696
|
-
logger.info("All expected schema columns found in DataFrame.")
|
|
697
|
-
|
|
698
|
-
# Log extra columns (will be preserved)
|
|
699
|
-
if extra_in_df:
|
|
700
|
-
logger.info(f"Extra columns found (will be preserved): {extra_in_df}")
|
|
701
|
-
else:
|
|
702
|
-
logger.info("No extra columns found in DataFrame.")
|
|
703
|
-
|
|
704
|
-
|
|
705
678
|
def format_stats_dataframe(
|
|
706
679
|
df,
|
|
707
680
|
area_col="Area_sum",
|
openforis_whisp/stats.py
CHANGED
|
@@ -93,7 +93,6 @@ def whisp_formatted_stats_geojson_to_df_legacy(
|
|
|
93
93
|
unit_type="ha",
|
|
94
94
|
whisp_image=None,
|
|
95
95
|
custom_bands=None, # New parameter
|
|
96
|
-
validate_geometries: bool = False,
|
|
97
96
|
) -> pd.DataFrame:
|
|
98
97
|
"""
|
|
99
98
|
Legacy function for basic Whisp stats extraction.
|
|
@@ -135,51 +134,15 @@ def whisp_formatted_stats_geojson_to_df_legacy(
|
|
|
135
134
|
- List of band names: ['Aa_test', 'elevation']
|
|
136
135
|
- Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
|
|
137
136
|
- None: preserves all extra columns automatically
|
|
138
|
-
validate_geometries : bool, optional
|
|
139
|
-
Whether to validate and fix invalid geometries, by default False.
|
|
140
|
-
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
141
137
|
|
|
142
138
|
Returns
|
|
143
139
|
-------
|
|
144
140
|
df_stats : pd.DataFrame
|
|
145
141
|
The DataFrame containing the Whisp stats for the input ROI.
|
|
146
142
|
"""
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
import geopandas as gpd
|
|
151
|
-
from shapely.validation import make_valid
|
|
152
|
-
import logging as py_logging
|
|
153
|
-
|
|
154
|
-
logger = py_logging.getLogger("whisp")
|
|
155
|
-
|
|
156
|
-
# Load GeoJSON file
|
|
157
|
-
with open(input_geojson_filepath, "r") as f:
|
|
158
|
-
geojson_data = json.load(f)
|
|
159
|
-
|
|
160
|
-
# Convert to GeoDataFrame
|
|
161
|
-
gdf = gpd.GeoDataFrame.from_features(geojson_data["features"])
|
|
162
|
-
|
|
163
|
-
# Validate and fix invalid geometries
|
|
164
|
-
valid_count = gdf.geometry.is_valid.sum()
|
|
165
|
-
invalid_count = len(gdf) - valid_count
|
|
166
|
-
if invalid_count > 0:
|
|
167
|
-
logger.warning(f"Fixing {invalid_count} invalid geometries")
|
|
168
|
-
gdf["geometry"] = gdf["geometry"].apply(
|
|
169
|
-
lambda g: make_valid(g) if g and not g.is_valid else g
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
# Pass GeoDataFrame directly to preserve CRS metadata
|
|
173
|
-
# convert_geojson_to_ee will handle:
|
|
174
|
-
# - CRS detection and conversion to WGS84 if needed
|
|
175
|
-
# - Data type sanitization (datetime, object columns)
|
|
176
|
-
# - Geometry validation and Z-coordinate stripping
|
|
177
|
-
feature_collection = convert_geojson_to_ee(
|
|
178
|
-
gdf, enforce_wgs84=True, strip_z_coords=True
|
|
179
|
-
)
|
|
180
|
-
else:
|
|
181
|
-
# Original path - no validation
|
|
182
|
-
feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
|
|
143
|
+
# Convert GeoJSON to Earth Engine FeatureCollection
|
|
144
|
+
# Note: Geometry validation/cleaning should be done before calling this function
|
|
145
|
+
feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
|
|
183
146
|
|
|
184
147
|
return whisp_formatted_stats_ee_to_df(
|
|
185
148
|
feature_collection,
|
|
@@ -203,8 +166,7 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
203
166
|
mode: str = "sequential",
|
|
204
167
|
batch_size: int = 10,
|
|
205
168
|
max_concurrent: int = 20,
|
|
206
|
-
|
|
207
|
-
include_geometry_audit_trail: bool = False,
|
|
169
|
+
geometry_audit_trail: bool = False,
|
|
208
170
|
) -> pd.DataFrame:
|
|
209
171
|
"""
|
|
210
172
|
Main entry point for converting GeoJSON to Whisp statistics.
|
|
@@ -252,12 +214,7 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
252
214
|
max_concurrent : int, optional
|
|
253
215
|
Maximum concurrent EE calls for concurrent mode, by default 20.
|
|
254
216
|
Only applicable for "concurrent" mode.
|
|
255
|
-
|
|
256
|
-
Whether to validate and fix invalid geometries, by default False.
|
|
257
|
-
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
258
|
-
For production workflows, it's recommended to use geometry validation and
|
|
259
|
-
cleaning tools BEFORE processing with this function.
|
|
260
|
-
include_geometry_audit_trail : bool, default True
|
|
217
|
+
geometry_audit_trail : bool, default True
|
|
261
218
|
If True (default), includes audit trail columns:
|
|
262
219
|
- geo_original: Original input geometry
|
|
263
220
|
- geometry_type_original: Original geometry type
|
|
@@ -331,7 +288,6 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
331
288
|
unit_type=unit_type,
|
|
332
289
|
whisp_image=whisp_image,
|
|
333
290
|
custom_bands=custom_bands,
|
|
334
|
-
validate_geometries=validate_geometries,
|
|
335
291
|
)
|
|
336
292
|
elif mode in ("concurrent", "sequential"):
|
|
337
293
|
# Log info if batch_size or max_concurrent are not used in sequential mode
|
|
@@ -358,8 +314,7 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
358
314
|
mode=mode, # Pass mode directly (concurrent or sequential)
|
|
359
315
|
batch_size=batch_size,
|
|
360
316
|
max_concurrent=max_concurrent,
|
|
361
|
-
|
|
362
|
-
include_geometry_audit_trail=include_geometry_audit_trail,
|
|
317
|
+
geometry_audit_trail=geometry_audit_trail,
|
|
363
318
|
)
|
|
364
319
|
else:
|
|
365
320
|
raise ValueError(
|