PyPI - openforis-whisp - Versions diffs - 3.0.0a2__py3-none-any.whl → 3.0.0a3__py3-none-any.whl - Mend

openforis-whisp 3.0.0a2py3-none-any.whl → 3.0.0a3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

openforis_whisp/reformat.py CHANGED Viewed

@@ -125,7 +125,7 @@ def validate_dataframe(
     Returns:
         pd.DataFrame: The validated DataFrame with columns ordered according to the schema, or None if validation fails.
     """
-    log_missing_columns(df_stats, schema)
+    _log_missing_columns(df_stats, schema)
     # df_stats = df_stats.reindex(schema.columns.keys(), axis=1)
@@ -251,7 +251,7 @@ def create_schema_from_dataframe(schema_df: pd.DataFrame) -> pa.DataFrameSchema:
 #     return logger
-def log_missing_columns(df_stats: pd.DataFrame, template_schema: pa.DataFrameSchema):
+def _log_missing_columns(df_stats: pd.DataFrame, template_schema: pa.DataFrameSchema):
     # Initialize the logger
     logger = setup_logger(__name__)
@@ -675,33 +675,6 @@ def _process_custom_bands(df_extra: pd.DataFrame, custom_bands) -> pd.DataFrame:
 # Fix the duplicate logging issue
-def log_missing_columns(df_stats: pd.DataFrame, template_schema: pa.DataFrameSchema):
-    # Remove the duplicate logger creation line
-    # logger = setup_logger(__name__)  # DELETE THIS LINE
-    # Use the existing module-level logger (line 18: logger = StdoutLogger(__name__))
-    # Extract the expected columns from the DataFrameSchema
-    template_columns = list(template_schema.columns.keys())
-    df_stats_columns = df_stats.columns.tolist()
-    # Find missing and extra columns
-    missing_in_df = [col for col in template_columns if col not in df_stats_columns]
-    extra_in_df = [col for col in df_stats_columns if col not in template_columns]
-    # Log missing schema columns
-    if missing_in_df:
-        logger.warning(f"Missing expected schema columns: {missing_in_df}")
-    else:
-        logger.info("All expected schema columns found in DataFrame.")
-    # Log extra columns (will be preserved)
-    if extra_in_df:
-        logger.info(f"Extra columns found (will be preserved): {extra_in_df}")
-    else:
-        logger.info("No extra columns found in DataFrame.")
 def format_stats_dataframe(
     df,
     area_col="Area_sum",

openforis_whisp/stats.py CHANGED Viewed

@@ -93,7 +93,6 @@ def whisp_formatted_stats_geojson_to_df_legacy(
     unit_type="ha",
     whisp_image=None,
     custom_bands=None,  # New parameter
-    validate_geometries: bool = False,
 ) -> pd.DataFrame:
     """
         Legacy function for basic Whisp stats extraction.
@@ -135,51 +134,15 @@ def whisp_formatted_stats_geojson_to_df_legacy(
             - List of band names: ['Aa_test', 'elevation']
             - Dict with types: {'Aa_test': 'float64', 'elevation': 'float32'}
             - None: preserves all extra columns automatically
-        validate_geometries : bool, optional
-            Whether to validate and fix invalid geometries, by default False.
-            Set to True to automatically fix invalid/self-intersecting polygons.
     Returns
         -------
         df_stats : pd.DataFrame
             The DataFrame containing the Whisp stats for the input ROI.
     """
-    # Load GeoJSON and validate geometries if requested
-    if validate_geometries:
-        import json
-        import geopandas as gpd
-        from shapely.validation import make_valid
-        import logging as py_logging
-        logger = py_logging.getLogger("whisp")
-        # Load GeoJSON file
-        with open(input_geojson_filepath, "r") as f:
-            geojson_data = json.load(f)
-        # Convert to GeoDataFrame
-        gdf = gpd.GeoDataFrame.from_features(geojson_data["features"])
-        # Validate and fix invalid geometries
-        valid_count = gdf.geometry.is_valid.sum()
-        invalid_count = len(gdf) - valid_count
-        if invalid_count > 0:
-            logger.warning(f"Fixing {invalid_count} invalid geometries")
-            gdf["geometry"] = gdf["geometry"].apply(
-                lambda g: make_valid(g) if g and not g.is_valid else g
-            )
-        # Pass GeoDataFrame directly to preserve CRS metadata
-        # convert_geojson_to_ee will handle:
-        # - CRS detection and conversion to WGS84 if needed
-        # - Data type sanitization (datetime, object columns)
-        # - Geometry validation and Z-coordinate stripping
-        feature_collection = convert_geojson_to_ee(
-            gdf, enforce_wgs84=True, strip_z_coords=True
-        )
-    else:
-        # Original path - no validation
-        feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
+    # Convert GeoJSON to Earth Engine FeatureCollection
+    # Note: Geometry validation/cleaning should be done before calling this function
+    feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
     return whisp_formatted_stats_ee_to_df(
         feature_collection,
@@ -203,8 +166,7 @@ def whisp_formatted_stats_geojson_to_df(
     mode: str = "sequential",
     batch_size: int = 10,
     max_concurrent: int = 20,
-    validate_geometries: bool = False,
-    include_geometry_audit_trail: bool = False,
+    geometry_audit_trail: bool = False,
 ) -> pd.DataFrame:
     """
     Main entry point for converting GeoJSON to Whisp statistics.
@@ -252,12 +214,7 @@ def whisp_formatted_stats_geojson_to_df(
     max_concurrent : int, optional
         Maximum concurrent EE calls for concurrent mode, by default 20.
         Only applicable for "concurrent" mode.
-    validate_geometries : bool, optional
-        Whether to validate and fix invalid geometries, by default False.
-        Set to True to automatically fix invalid/self-intersecting polygons.
-        For production workflows, it's recommended to use geometry validation and
-        cleaning tools BEFORE processing with this function.
-    include_geometry_audit_trail : bool, default True
+    geometry_audit_trail : bool, default True
         If True (default), includes audit trail columns:
         - geo_original: Original input geometry
         - geometry_type_original: Original geometry type
@@ -331,7 +288,6 @@ def whisp_formatted_stats_geojson_to_df(
             unit_type=unit_type,
             whisp_image=whisp_image,
             custom_bands=custom_bands,
-            validate_geometries=validate_geometries,
         )
     elif mode in ("concurrent", "sequential"):
         # Log info if batch_size or max_concurrent are not used in sequential mode
@@ -358,8 +314,7 @@ def whisp_formatted_stats_geojson_to_df(
             mode=mode,  # Pass mode directly (concurrent or sequential)
             batch_size=batch_size,
             max_concurrent=max_concurrent,
-            validate_geometries=validate_geometries,
-            include_geometry_audit_trail=include_geometry_audit_trail,
+            geometry_audit_trail=geometry_audit_trail,
         )
     else:
         raise ValueError(

openforis-whisp 3.0.0a2__py3-none-any.whl → 3.0.0a3__py3-none-any.whl

openforis-whisp 3.0.0a2py3-none-any.whl → 3.0.0a3py3-none-any.whl