PyPI - openforis-whisp - Versions diffs - 3.0.0a3__py3-none-any.whl → 3.0.0a5__py3-none-any.whl - Mend

openforis-whisp 3.0.0a3py3-none-any.whl → 3.0.0a5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

openforis_whisp/__init__.py +1 -1
openforis_whisp/advanced_stats.py +523 -218
openforis_whisp/data_checks.py +80 -28
openforis_whisp/datasets.py +14 -0
openforis_whisp/logger.py +15 -3
openforis_whisp/parameters/lookup_context_and_metadata.csv +1 -1
openforis_whisp/parameters/lookup_gee_datasets.csv +3 -2
openforis_whisp/pd_schemas.py +7 -2
openforis_whisp/reformat.py +6 -1
openforis_whisp/stats.py +10 -11
openforis_whisp/utils.py +19 -0
{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a5.dist-info}/METADATA +1 -1
openforis_whisp-3.0.0a5.dist-info/RECORD +20 -0
openforis_whisp-3.0.0a3.dist-info/RECORD +0 -20
{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a5.dist-info}/LICENSE +0 -0
{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a5.dist-info}/WHEEL +0 -0

openforis_whisp/advanced_stats.py CHANGED Viewed

@@ -36,6 +36,24 @@ from typing import Optional, List, Dict, Any, Tuple, Union
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import tempfile
+# Configure the "whisp" logger with auto-flush handler for Colab visibility
+_whisp_logger = logging.getLogger("whisp")
+if not _whisp_logger.handlers:
+    _handler = logging.StreamHandler(sys.stdout)
+    _handler.setLevel(logging.DEBUG)
+    _handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
+    # Override emit to force flush after each message for Colab
+    _original_emit = _handler.emit
+    def _emit_with_flush(record):
+        _original_emit(record)
+        sys.stdout.flush()
+    _handler.emit = _emit_with_flush
+    _whisp_logger.addHandler(_handler)
+    _whisp_logger.setLevel(logging.INFO)
+    _whisp_logger.propagate = False  # Don't propagate to root to avoid duplicates
 # ============================================================================
 # STDOUT/STDERR SUPPRESSION CONTEXT MANAGER (for C-level output)
 # ============================================================================
@@ -163,8 +181,25 @@ def _suppress_verbose_output(max_concurrent: int = None):
     reformat_logger.setLevel(logging.ERROR)
-def _load_geojson_silently(filepath: str) -> gpd.GeoDataFrame:
-    """Load GeoJSON file with all output suppressed."""
+def _load_and_prepare_geojson(
+    filepath: str, external_id_column: Optional[str] = None
+) -> gpd.GeoDataFrame:
+    """Load GeoJSON file and prepare for processing.
+    Suppresses logging output and optionally renames external_id column.
+    Parameters
+    ----------
+    filepath : str
+        Path to GeoJSON file
+    external_id_column : str, optional
+        If provided, rename this column to 'external_id' immediately after loading
+    Returns
+    -------
+    gpd.GeoDataFrame
+        Loaded GeoDataFrame with external_id renamed if specified
+    """
     fiona_logger = logging.getLogger("fiona")
     pyogrio_logger = logging.getLogger("pyogrio._io")
     old_fiona_level = fiona_logger.level
@@ -175,6 +210,16 @@ def _load_geojson_silently(filepath: str) -> gpd.GeoDataFrame:
     try:
         with redirect_stdout(io.StringIO()):
             gdf = gpd.read_file(filepath)
+        # Rename external_id column early and convert to string
+        if external_id_column and external_id_column in gdf.columns:
+            if external_id_column != "external_id":
+                gdf = gdf.rename(
+                    columns={external_id_column: "external_id"}
+                )  # hard coding here to avoid confusion later
+            # Convert to string to ensure consistent type throughout pipeline
+            gdf["external_id"] = gdf["external_id"].astype(str)
         return gdf
     finally:
         fiona_logger.setLevel(old_fiona_level)
@@ -445,6 +490,16 @@ def join_admin_codes(
             columns=["admin_code_for_join", "gaul1_code"], errors="ignore"
         )
+        # Fill NaN values with "Unknown" and "not found" for features outside admin boundaries
+        # (e.g., points in the ocean or international waters)
+        df_joined[iso3_country_column] = df_joined[iso3_country_column].fillna(
+            "Unknown"
+        )
+        df_joined[iso2_country_column] = df_joined[iso2_country_column].fillna(
+            "not found"
+        )
+        df_joined[admin_1_column] = df_joined[admin_1_column].fillna("Unknown")
         logger.debug(
             f"Admin codes joined: {iso3_country_column}, {iso2_country_column}, {admin_1_column}"
         )
@@ -461,10 +516,16 @@ class ProgressTracker:
     Shows progress at adaptive milestones (more frequent for small datasets,
     less frequent for large datasets) with estimated time remaining based on
-    processing speed.
+    processing speed. Includes time-based heartbeat to prevent long silences.
     """
-    def __init__(self, total: int, logger: logging.Logger = None):
+    def __init__(
+        self,
+        total: int,
+        logger: logging.Logger = None,
+        heartbeat_interval: int = 180,
+        status_file: str = None,
+    ):
         """
         Initialize progress tracker.
@@ -474,26 +535,147 @@ class ProgressTracker:
             Total number of items to process
         logger : logging.Logger, optional
             Logger for output
+        heartbeat_interval : int, optional
+            Seconds between heartbeat messages (default: 180 = 3 minutes)
+        status_file : str, optional
+            Path to JSON status file for API/web app consumption.
+            Checkpoints auto-save to same directory as status_file.
         """
         self.total = total
         self.completed = 0
         self.lock = threading.Lock()
         self.logger = logger or logging.getLogger("whisp")
+        self.heartbeat_interval = heartbeat_interval
+        # Handle status_file: if directory passed, auto-generate filename
+        if status_file:
+            import os
+            if os.path.isdir(status_file):
+                self.status_file = os.path.join(
+                    status_file, "whisp_processing_status.json"
+                )
+            else:
+                # Validate that parent directory exists
+                parent_dir = os.path.dirname(status_file)
+                if parent_dir and not os.path.isdir(parent_dir):
+                    self.logger.warning(
+                        f"Status file directory does not exist: {parent_dir}"
+                    )
+                    self.status_file = None
+                else:
+                    self.status_file = status_file
+        else:
+            self.status_file = None
         # Adaptive milestones based on dataset size
         # Small datasets (< 50): show every 25% (not too spammy)
         # Medium (50-500): show every 20%
-        # Large (500+): show every 10% (more frequent feedback on long runs)
+        # Large (500-1000): show every 10%
+        # Very large (1000+): show every 5% (cleaner for long jobs)
         if total < 50:
             self.milestones = {25, 50, 75, 100}
         elif total < 500:
             self.milestones = {20, 40, 60, 80, 100}
-        else:
+        elif total < 1000:
             self.milestones = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100}
+        else:
+            self.milestones = {
+                5,
+                10,
+                15,
+                20,
+                25,
+                30,
+                35,
+                40,
+                45,
+                50,
+                55,
+                60,
+                65,
+                70,
+                75,
+                80,
+                85,
+                90,
+                95,
+                100,
+            }
         self.shown_milestones = set()
         self.start_time = time.time()
         self.last_update_time = self.start_time
+        self.heartbeat_stop = threading.Event()
+        self.heartbeat_thread = None
+    def _write_status_file(self, status: str = "processing") -> None:
+        """Write current progress to JSON status file using atomic write."""
+        if not self.status_file:
+            return
+        try:
+            import json
+            import os
+            elapsed = time.time() - self.start_time
+            percent = (self.completed / self.total * 100) if self.total > 0 else 0
+            rate = self.completed / elapsed if elapsed > 0 else 0
+            eta = (
+                (self.total - self.completed) / rate * 1.15
+                if rate > 0 and percent >= 5
+                else None
+            )
+            # Write to temp file then atomic rename to prevent partial reads
+            from datetime import datetime
+            temp_file = self.status_file + ".tmp"
+            with open(temp_file, "w") as f:
+                json.dump(
+                    {
+                        "status": status,
+                        "progress": f"{self.completed}/{self.total}",
+                        "percent": round(percent, 1),
+                        "elapsed_sec": round(elapsed),
+                        "eta_sec": round(eta) if eta else None,
+                        "updated_at": datetime.now().isoformat(),
+                    },
+                    f,
+                )
+            os.replace(temp_file, self.status_file)
+        except Exception:
+            pass
+    def start_heartbeat(self) -> None:
+        """Start background heartbeat thread for time-based progress updates."""
+        if self.heartbeat_thread is None or not self.heartbeat_thread.is_alive():
+            self.heartbeat_stop.clear()
+            self.heartbeat_thread = threading.Thread(
+                target=self._heartbeat_loop, daemon=True
+            )
+            self.heartbeat_thread.start()
+            # Write initial status
+            self._write_status_file(status="processing")
+    def _heartbeat_loop(self) -> None:
+        """Background loop that logs progress at time intervals."""
+        while not self.heartbeat_stop.wait(self.heartbeat_interval):
+            with self.lock:
+                # Only log if we haven't shown a milestone recently
+                time_since_update = time.time() - self.last_update_time
+                if (
+                    time_since_update >= self.heartbeat_interval
+                    and self.completed < self.total
+                ):
+                    elapsed = time.time() - self.start_time
+                    percent = int((self.completed / self.total) * 100)
+                    elapsed_str = self._format_time(elapsed)
+                    self.logger.info(
+                        f"[Processing] {self.completed:,}/{self.total:,} batches ({percent}%) | "
+                        f"Elapsed: {elapsed_str}"
+                    )
+                    self.last_update_time = time.time()
     def update(self, n: int = 1) -> None:
         """
@@ -508,7 +690,7 @@ class ProgressTracker:
             self.completed += n
             percent = int((self.completed / self.total) * 100)
-            # Show milestone messages (25%, 50%, 75%, 100%)
+            # Show milestone messages (5%, 10%, 15%... for large datasets)
             for milestone in sorted(self.milestones):
                 if percent >= milestone and milestone not in self.shown_milestones:
                     self.shown_milestones.add(milestone)
@@ -517,20 +699,36 @@ class ProgressTracker:
                     elapsed = time.time() - self.start_time
                     rate = self.completed / elapsed if elapsed > 0 else 0
                     remaining_items = self.total - self.completed
-                    eta_seconds = remaining_items / rate if rate > 0 else 0
+                    # Calculate ETA with padding for overhead (loading, joins, etc.)
+                    # Don't show ETA until we have some samples (at least 5% complete)
+                    if rate > 0 and self.completed >= max(5, self.total * 0.05):
+                        eta_seconds = (
+                            remaining_items / rate
+                        ) * 1.15  # Add 15% padding for overhead
+                    else:
+                        eta_seconds = 0
                     # Format time strings
-                    eta_str = self._format_time(eta_seconds)
+                    eta_str = (
+                        self._format_time(eta_seconds)
+                        if eta_seconds > 0
+                        else "calculating..."
+                    )
                     elapsed_str = self._format_time(elapsed)
                     # Build progress message
-                    msg = f"Progress: {self.completed}/{self.total} ({percent}%)"
+                    msg = f"Progress: {self.completed:,}/{self.total:,} batches ({percent}%)"
                     if percent < 100:
                         msg += f" | Elapsed: {elapsed_str} | ETA: {eta_str}"
                     else:
                         msg += f" | Total time: {elapsed_str}"
                     self.logger.info(msg)
+                    self.last_update_time = time.time()
+        # Update status file for API consumption
+        self._write_status_file()
     @staticmethod
     def _format_time(seconds: float) -> str:
@@ -544,14 +742,21 @@ class ProgressTracker:
             hours = seconds / 3600
             return f"{hours:.1f}h"
-    def finish(self) -> None:
-        """Log completion."""
+    def finish(self, output_file: str = None) -> None:
+        """Stop heartbeat and log completion."""
+        # Stop heartbeat thread
+        self.heartbeat_stop.set()
+        if self.heartbeat_thread and self.heartbeat_thread.is_alive():
+            self.heartbeat_thread.join(timeout=1)
         with self.lock:
             total_time = time.time() - self.start_time
             time_str = self._format_time(total_time)
-            self.logger.info(
-                f"Processing complete: {self.completed}/{self.total} batches in {time_str}"
-            )
+            msg = f"Processing complete: {self.completed:,}/{self.total:,} batches in {time_str}"
+            self.logger.info(msg)
+        # Write final status
+        self._write_status_file(status="completed")
 # ============================================================================
@@ -602,19 +807,17 @@ def validate_ee_endpoint(endpoint_type: str = "high-volume", raise_error: bool =
     if not check_ee_endpoint(endpoint_type):
         if endpoint_type == "high-volume":
             msg = (
-                "Concurrent mode requires the HIGH-VOLUME endpoint. To change endpoint run:\n"
+                "# Concurrent mode requires the HIGH-VOLUME endpoint. To change endpoint run:\n"
                 "ee.Reset()\n"
-                "ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')\n"
-                "Or with project specified (e.g. when in Colab):\n"
-                "ee.Initialize(project='your_cloud_project_name', opt_url='https://earthengine-highvolume.googleapis.com')"
+                "ee.Initialize(project=gee_project_name, opt_url='https://earthengine-highvolume.googleapis.com')\n"
+                "# where gee_project_name is your GEE project (necessary in Colab)"
             )
         else:  # standard endpoint
             msg = (
                 "Sequential mode requires the STANDARD endpoint. To change endpoint run:\n"
                 "ee.Reset()\n"
-                "ee.Initialize()\n"
-                "Or with project specified (e.g. when in Colab):\n"
-                "ee.Initialize(project='your_cloud_project_name')"
+                "ee.Initialize(project=gee_project_name)\n"
+                "# where gee_project_name is your GEE project (necessary in Colab)"
             )
         if raise_error:
@@ -687,13 +890,13 @@ def extract_centroid_and_geomtype_client(
         if plot_id_column in gdf.columns:
             cols.append(plot_id_column)
-        # Include external_id_column if provided and exists
+        # Include external_id if it exists (already renamed during load)
         if (
             external_id_column
-            and external_id_column in gdf.columns
-            and external_id_column not in cols
+            and "external_id" in gdf.columns
+            and "external_id" not in cols
         ):
-            cols.append(external_id_column)
+            cols.append("external_id")
         # Always include metadata columns (centroid, geometry type)
         cols.extend([x_col, y_col, type_col])
@@ -787,6 +990,10 @@ def convert_batch_to_ee(batch_gdf: gpd.GeoDataFrame) -> ee.FeatureCollection:
     Preserves the __row_id__ column if present so it can be retrieved after processing.
+    IMPORTANT: Drops external_id column before sending to EE to enable query caching.
+    external_id is user metadata that's not needed for EE computation. Including it
+    breaks EE's caching mechanism since each unique external_id creates a different query.
     Parameters
     ----------
     batch_gdf : gpd.GeoDataFrame
@@ -795,15 +1002,21 @@ def convert_batch_to_ee(batch_gdf: gpd.GeoDataFrame) -> ee.FeatureCollection:
     Returns
     -------
     ee.FeatureCollection
-        EE FeatureCollection with __row_id__ as a feature property
+        EE FeatureCollection with __row_id__ as a feature property (no external_id)
     """
+    # Drop external_id before sending to EE to enable caching
+    # (external_id is preserved separately on client side for merging)
+    batch_for_ee = batch_gdf.copy()
+    if "external_id" in batch_for_ee.columns:
+        batch_for_ee = batch_for_ee.drop(columns=["external_id"])
     # Pass GeoDataFrame directly to preserve CRS metadata
     # convert_geojson_to_ee will handle:
     # - CRS detection and conversion to WGS84 if needed
     # - Data type sanitization (datetime, object columns)
     # - Geometry validation and Z-coordinate stripping
-    fc = convert_geojson_to_ee(batch_gdf, enforce_wgs84=True, strip_z_coords=True)
+    fc = convert_geojson_to_ee(batch_for_ee, enforce_wgs84=True, strip_z_coords=True)
     # If __row_id__ is in the original GeoDataFrame, it will be preserved
     # as a feature property in the GeoJSON and thus in the EE FeatureCollection
@@ -929,7 +1142,19 @@ def process_ee_batch(
             # Ensure plot_id_column is present for merging
             # It should come from the feature properties (added before EE processing)
             if plot_id_column not in df.columns:
-                df[plot_id_column] = range(len(df))
+                logger.warning(
+                    f"Batch {batch_idx + 1}: plotId column DROPPED by EE. "
+                    f"Regenerating with 1-indexed range. "
+                    f"Columns from EE: {list(df.columns)}"
+                )
+                # Use 1-indexed range to match client-side assignment
+                df[plot_id_column] = range(1, len(df) + 1)
+            # Ensure plotId is integer type (EE may return as string)
+            if plot_id_column in df.columns:
+                df[plot_id_column] = pd.to_numeric(
+                    df[plot_id_column], errors="coerce"
+                ).astype("Int64")
             # Ensure all column names are strings (fixes pandas .str accessor issues)
             df.columns = df.columns.astype(str)
@@ -983,7 +1208,6 @@ def process_ee_batch(
 def whisp_stats_geojson_to_df_concurrent(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -996,6 +1220,7 @@ def whisp_stats_geojson_to_df_concurrent(
     logger: logging.Logger = None,
     # Format parameters (auto-detect from config if not provided)
     decimal_places: int = None,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON concurrently to compute Whisp statistics with automatic formatting.
@@ -1010,8 +1235,6 @@ def whisp_stats_geojson_to_df_concurrent(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry column from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -1055,10 +1278,32 @@ def whisp_stats_geojson_to_df_concurrent(
     # Validate endpoint
     validate_ee_endpoint("high-volume", raise_error=True)
-    # Load GeoJSON with output suppressed
-    gdf = _load_geojson_silently(input_geojson_filepath)
+    # Load GeoJSON with output suppressed (external_id_column renamed to 'external_id' if provided)
+    gdf = _load_and_prepare_geojson(
+        input_geojson_filepath, external_id_column=external_id_column
+    )
     logger.info(f"Loaded {len(gdf):,} features")
+    # Validate external_id if provided (lightweight client-side check)
+    # Note: external_id_column already renamed to 'external_id' during load
+    if external_id_column and "external_id" not in gdf.columns:
+        # Exclude geometry column from available columns list
+        available_cols = [c for c in gdf.columns if c != gdf.geometry.name]
+        raise ValueError(
+            f"Column '{external_id_column}' not found in GeoJSON properties. "
+            f"Available columns: {available_cols}"
+        )
+    # Check completeness of external_id (warn if nulls exist)
+    if external_id_column and "external_id" in gdf.columns:
+        null_count = gdf["external_id"].isna().sum()
+        if null_count > 0:
+            null_pct = (null_count / len(gdf)) * 100
+            logger.warning(
+                f"Column 'external_id' (from '{external_id_column}') has {null_count:,} null values ({null_pct:.1f}% of {len(gdf):,} features). "
+                f"These features may have missing external IDs in output."
+            )
     if validate_geometries:
         gdf = clean_geodataframe(
             gdf, remove_nulls=False, repair_geometries=False, logger=logger
@@ -1068,13 +1313,21 @@ def whisp_stats_geojson_to_df_concurrent(
     gdf[plot_id_column] = range(1, len(gdf) + 1)
     # Strip unnecessary properties before sending to EE
-    # Keep only: geometry, plot_id_column, and external_id_column
+    # Keep only: geometry, plot_id_column, and external_id
     # This prevents duplication of GeoJSON properties in EE results
     keep_cols = ["geometry", plot_id_column]
-    if external_id_column and external_id_column in gdf.columns:
-        keep_cols.append(external_id_column)
+    if (
+        external_id_column and "external_id" in gdf.columns
+    ):  # Already renamed during load
+        keep_cols.append("external_id")
     gdf_for_ee = gdf[keep_cols].copy()
+    # CRITICAL: Convert external_id to string to prevent EE from confusing it with integer plotId
+    if external_id_column and "external_id" in gdf_for_ee.columns:
+        gdf_for_ee["external_id"] = gdf_for_ee["external_id"].astype(str)
+        logger.debug(f"Converted external_id column to string type")
     logger.debug(f"Stripped GeoJSON to essential columns: {keep_cols}")
     # Create image if not provided
@@ -1101,13 +1354,18 @@ def whisp_stats_geojson_to_df_concurrent(
     # Batch the data
     batches = batch_geodataframe(gdf_for_ee, batch_size)
-    logger.info(f"Processing {len(gdf_for_ee):,} features in {len(batches)} batches")
+    logger.info(
+        f"Processing {len(gdf_for_ee):,} features in {len(batches)} batches (concurrent mode)..."
+    )
     # Setup semaphore for EE concurrency control
     ee_semaphore = threading.BoundedSemaphore(max_concurrent)
-    # Progress tracker
-    progress = ProgressTracker(len(batches), logger=logger)
+    # Progress tracker with heartbeat for long-running jobs
+    progress = ProgressTracker(
+        len(batches), logger=logger, heartbeat_interval=180, status_file=status_file
+    )
+    progress.start_heartbeat()
     results = []
@@ -1148,73 +1406,97 @@ def whisp_stats_geojson_to_df_concurrent(
     pyogrio_logger.setLevel(logging.CRITICAL)
     try:
-        with redirect_stdout(io.StringIO()):
-            with ThreadPoolExecutor(max_workers=pool_workers) as executor:
-                futures = {
-                    executor.submit(process_batch, i, batch): i
-                    for i, batch in enumerate(batches)
-                }
-                # Track which batches failed for retry
-                batch_map = {i: batch for i, batch in enumerate(batches)}
-                batch_futures = {future: i for future, i in futures.items()}
+        # Don't suppress stdout here - we want progress messages to show in Colab
+        with ThreadPoolExecutor(max_workers=pool_workers) as executor:
+            futures = {
+                executor.submit(process_batch, i, batch): i
+                for i, batch in enumerate(batches)
+            }
-                for future in as_completed(futures):
-                    batch_idx = batch_futures[future]
-                    try:
-                        batch_idx, df_server, df_client = future.result()
-                        # Merge server and client results
-                        if plot_id_column not in df_server.columns:
-                            df_server[plot_id_column] = range(len(df_server))
-                        # Keep all EE statistics from server (all columns with _sum and _median suffixes)
-                        # These are the actual EE processing results
-                        df_server_clean = df_server.copy()
-                        # Keep external metadata: plot_id, external_id, geometry, geometry type, and centroids from client
-                        # (formatted wrapper handles keep_external_columns parameter)
-                        keep_external_columns = [plot_id_column]
-                        if (
-                            external_id_column
-                            and external_id_column in df_client.columns
-                        ):
-                            keep_external_columns.append(external_id_column)
-                        if "geometry" in df_client.columns:
-                            keep_external_columns.append("geometry")
-                        # Keep geometry type column (Geometry_type)
-                        if geometry_type_column in df_client.columns:
-                            keep_external_columns.append(geometry_type_column)
-                        # Also keep centroid columns (Centroid_lon, Centroid_lat)
-                        centroid_cols = [
-                            c for c in df_client.columns if c.startswith("Centroid_")
-                        ]
-                        keep_external_columns.extend(centroid_cols)
+            # Track which batches failed for retry
+            batch_map = {i: batch for i, batch in enumerate(batches)}
+            batch_futures = {future: i for future, i in futures.items()}
-                        df_client_clean = df_client[
-                            [c for c in keep_external_columns if c in df_client.columns]
-                        ].drop_duplicates()
+            for future in as_completed(futures):
+                batch_idx = batch_futures[future]
+                try:
+                    batch_idx, df_server, df_client = future.result()
-                        merged = df_server_clean.merge(
-                            df_client_clean,
-                            on=plot_id_column,
-                            how="left",
-                            suffixes=("_ee", "_client"),
+                    # Merge server and client results
+                    if plot_id_column not in df_server.columns:
+                        logger.warning(
+                            f"Batch {batch_idx + 1} (concurrent merge): plotId DROPPED by EE. "
+                            f"Regenerating. Columns from EE: {list(df_server.columns)}"
                         )
-                        results.append(merged)
-                        progress.update()
-                    except Exception as e:
-                        # Batch failed - fail fast with clear guidance
-                        error_msg = str(e)
-                        logger.error(f"Batch {batch_idx} failed: {error_msg[:100]}")
-                        logger.debug(f"Full error: {error_msg}")
-                        # Get original batch for error reporting
-                        original_batch = batch_map[batch_idx]
-                        # Add to batch errors for final reporting
-                        batch_errors.append((batch_idx, original_batch, error_msg))
+                        df_server[plot_id_column] = pd.array(
+                            range(1, len(df_server) + 1), dtype="Int64"
+                        )
+                    else:
+                        df_server[plot_id_column] = pd.to_numeric(
+                            df_server[plot_id_column], errors="coerce"
+                        ).astype("Int64")
+                    # Ensure plotId is Int64 in client data too
+                    if plot_id_column in df_client.columns:
+                        df_client[plot_id_column] = pd.to_numeric(
+                            df_client[plot_id_column], errors="coerce"
+                        ).astype("Int64")
+                    # Keep all EE statistics from server (all columns with _sum and _median suffixes)
+                    # These are the actual EE processing results
+                    df_server_clean = df_server.copy()
+                    # Drop external_id from df_server if it exists (already in df_client)
+                    if "external_id" in df_server_clean.columns:
+                        df_server_clean = df_server_clean.drop(columns=["external_id"])
+                    # Keep external metadata: plot_id, external_id, geometry, geometry type, and centroids from client
+                    # (formatted wrapper handles keep_external_columns parameter)
+                    keep_external_columns = [plot_id_column]
+                    if external_id_column and "external_id" in df_client.columns:
+                        keep_external_columns.append("external_id")
+                    if "geometry" in df_client.columns:
+                        keep_external_columns.append("geometry")
+                    # Keep geometry type column (Geometry_type)
+                    if geometry_type_column in df_client.columns:
+                        keep_external_columns.append(geometry_type_column)
+                    # Also keep centroid columns (Centroid_lon, Centroid_lat)
+                    centroid_cols = [
+                        c for c in df_client.columns if c.startswith("Centroid_")
+                    ]
+                    keep_external_columns.extend(centroid_cols)
+                    df_client_clean = df_client[
+                        [c for c in keep_external_columns if c in df_client.columns]
+                    ]
+                    # Don't drop duplicates - we need one row per feature (one per plot_id)
+                    # Each plot_id should have exactly one row with its metadata
+                    merged = df_server_clean.merge(
+                        df_client_clean,
+                        on=plot_id_column,
+                        how="left",
+                        suffixes=("_ee", "_client"),
+                    )
+                    results.append(merged)
+                    progress.update()
+                except Exception as e:
+                    # Batch failed - fail fast with clear guidance
+                    error_msg = str(e)
+                    logger.error(f"Batch {batch_idx} failed: {error_msg[:100]}")
+                    logger.debug(f"Full error: {error_msg}")
+                    # Get original batch for error reporting
+                    original_batch = batch_map[batch_idx]
+                    # Add to batch errors for final reporting
+                    batch_errors.append((batch_idx, original_batch, error_msg))
+    except (KeyboardInterrupt, SystemExit) as interrupt:
+        logger.warning("Processing interrupted by user")
+        # Update status file with interrupted state
+        progress._write_status_file(status="interrupted")
+        raise interrupt
     finally:
         # Restore logger levels
         fiona_logger.setLevel(old_fiona_level)
@@ -1318,7 +1600,10 @@ def whisp_stats_geojson_to_df_concurrent(
                             try:
                                 batch_idx, df_server, df_client = future.result()
                                 if plot_id_column not in df_server.columns:
-                                    df_server[plot_id_column] = range(len(df_server))
+                                    # Use 1-indexed range to match client-side assignment
+                                    df_server[plot_id_column] = range(
+                                        1, len(df_server) + 1
+                                    )
                                 merged = df_server.merge(
                                     df_client,
                                     on=plot_id_column,
@@ -1362,31 +1647,21 @@ def whisp_stats_geojson_to_df_concurrent(
         else:
             return pd.DataFrame()
-        # Clean up duplicate external_id columns created by merges
-        # Rename external_id_column to standardized 'external_id' for schema validation
-        if external_id_column:
-            # Find all columns related to external_id
-            external_id_variants = [
+        # Clean up duplicate external_id columns created by merges (if any exist)
+        # external_id was already renamed during load, so we just need to handle duplicates
+        if external_id_column and "external_id" in combined.columns:
+            # Find merge duplicates like external_id_x, external_id_y, external_id_ee, external_id_client
+            duplicate_variants = [
                 col
                 for col in combined.columns
-                if external_id_column.lower() in col.lower()
+                if col != "external_id" and col.startswith("external_id_")
             ]
-            if external_id_variants:
-                # Use the base column name if it exists, otherwise use first variant
-                base_col = (
-                    external_id_column
-                    if external_id_column in combined.columns
-                    else external_id_variants[0]
+            if duplicate_variants:
+                logger.debug(
+                    f"Dropping duplicate external_id columns: {duplicate_variants}"
                 )
-                # Rename to standardized 'external_id'
-                if base_col != "external_id":
-                    combined = combined.rename(columns={base_col: "external_id"})
-                # Drop all other variants
-                cols_to_drop = [c for c in external_id_variants if c != base_col]
-                combined = combined.drop(columns=cols_to_drop, errors="ignore")
+                combined = combined.drop(columns=duplicate_variants, errors="ignore")
         # plotId column is already present from batch processing
         # Just ensure it's at position 0
@@ -1469,14 +1744,26 @@ def whisp_stats_geojson_to_df_concurrent(
                         try:
                             batch_idx, df_server, df_client = future.result()
                             if plot_id_column not in df_server.columns:
-                                df_server[plot_id_column] = range(len(df_server))
-                            # Drop external_id_column from df_client if it exists (already in df_server)
-                            if (
-                                external_id_column
-                                and external_id_column in df_client.columns
-                            ):
-                                df_client = df_client.drop(columns=[external_id_column])
+                                logger.warning(
+                                    f"Batch {batch_idx + 1} (retry): plotId DROPPED by EE. "
+                                    f"Regenerating. Columns from EE: {list(df_server.columns)}"
+                                )
+                                # Use 1-indexed range to match client-side assignment
+                                df_server[plot_id_column] = range(1, len(df_server) + 1)
+                            # Ensure plotId is integer type (EE may return as string)
+                            if plot_id_column in df_server.columns:
+                                df_server[plot_id_column] = pd.to_numeric(
+                                    df_server[plot_id_column], errors="coerce"
+                                ).astype("Int64")
+                            if plot_id_column in df_client.columns:
+                                df_client[plot_id_column] = pd.to_numeric(
+                                    df_client[plot_id_column], errors="coerce"
+                                ).astype("Int64")
+                            # Drop external_id from df_server if it exists (already in df_client)
+                            if "external_id" in df_server.columns:
+                                df_server = df_server.drop(columns=["external_id"])
                             merged = df_server.merge(
                                 df_client,
@@ -1498,30 +1785,22 @@ def whisp_stats_geojson_to_df_concurrent(
                     # Ensure all column names are strings (fixes pandas .str accessor issues later)
                     combined.columns = combined.columns.astype(str)
-                    # Clean up duplicate external_id columns created by merges
-                    if external_id_column:
-                        external_id_variants = [
+                    # Clean up duplicate external_id columns created by merges (if any exist)
+                    # external_id was already renamed during load, so we just need to handle duplicates
+                    if external_id_column and "external_id" in combined.columns:
+                        # Find merge duplicates like external_id_x, external_id_y, external_id_ee, external_id_client
+                        duplicate_variants = [
                             col
                             for col in combined.columns
-                            if external_id_column.lower() in col.lower()
+                            if col != "external_id" and col.startswith("external_id_")
                         ]
-                        if external_id_variants:
-                            base_col = external_id_column
-                            if (
-                                base_col not in combined.columns
-                                and external_id_variants
-                            ):
-                                base_col = external_id_variants[0]
-                                combined = combined.rename(
-                                    columns={base_col: "external_id"}
-                                )
-                            cols_to_drop = [
-                                c for c in external_id_variants if c != base_col
-                            ]
+                        if duplicate_variants:
+                            logger.debug(
+                                f"Dropping duplicate external_id columns: {duplicate_variants}"
+                            )
                             combined = combined.drop(
-                                columns=cols_to_drop, errors="ignore"
+                                columns=duplicate_variants, errors="ignore"
                             )
                     # plotId column is already present, just ensure it's at position 0
@@ -1565,7 +1844,15 @@ def whisp_stats_geojson_to_df_concurrent(
                 )
                 raise retry_e
-        logger.info(f"Processed {len(formatted):,} features successfully")
+        # Ensure plot_id is present (should already be there from batch processing)
+        if plot_id_column not in formatted.columns:
+            logger.warning(f"{plot_id_column} column missing, regenerating...")
+            formatted.insert(0, plot_id_column, range(1, len(formatted) + 1))
+        # Sort by plot_id to ensure consistent output order
+        formatted = formatted.sort_values(by=plot_id_column).reset_index(drop=True)
+        logger.info(f"Processing complete: {len(formatted):,} features")
         return formatted
     else:
         logger.error(" No results produced")
@@ -1580,7 +1867,6 @@ def whisp_stats_geojson_to_df_concurrent(
 def whisp_stats_geojson_to_df_sequential(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -1605,8 +1891,6 @@ def whisp_stats_geojson_to_df_sequential(
         Path to input GeoJSON
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -1642,10 +1926,32 @@ def whisp_stats_geojson_to_df_sequential(
     # Validate endpoint
     validate_ee_endpoint("standard", raise_error=True)
-    # Load GeoJSON with output suppressed
-    gdf = _load_geojson_silently(input_geojson_filepath)
+    # Load GeoJSON with output suppressed (external_id_column renamed to 'external_id' if provided)
+    gdf = _load_and_prepare_geojson(
+        input_geojson_filepath, external_id_column=external_id_column
+    )
     logger.info(f"Loaded {len(gdf):,} features")
+    # Validate external_id if provided (lightweight client-side check)
+    # Note: external_id_column already renamed to 'external_id' during load
+    if external_id_column and "external_id" not in gdf.columns:
+        # Exclude geometry column from available columns list
+        available_cols = [c for c in gdf.columns if c != gdf.geometry.name]
+        raise ValueError(
+            f"Column '{external_id_column}' not found in GeoJSON properties. "
+            f"Available columns: {available_cols}"
+        )
+    # Check completeness of external_id (warn if nulls exist)
+    if external_id_column and "external_id" in gdf.columns:
+        null_count = gdf["external_id"].isna().sum()
+        if null_count > 0:
+            null_pct = (null_count / len(gdf)) * 100
+            logger.warning(
+                f"Column 'external_id' (from '{external_id_column}') has {null_count:,} null values ({null_pct:.1f}% of {len(gdf):,} features). "
+                f"These features may have missing external IDs in output."
+            )
     # Clean geometries (preserve both null and invalid geometries by default)
     gdf = clean_geodataframe(
         gdf, remove_nulls=False, repair_geometries=False, logger=logger
@@ -1654,18 +1960,22 @@ def whisp_stats_geojson_to_df_sequential(
     # Add stable plotIds for merging (starting from 1, not 0)
     gdf[plot_id_column] = range(1, len(gdf) + 1)
-    # Add stable row IDs
-    row_id_col = "__row_id__"
-    gdf[row_id_col] = range(len(gdf))
     # Strip unnecessary properties before sending to EE
-    # Keep only: geometry, plot_id_column, and external_id_column
+    # Keep only: geometry, plot_id_column, and external_id
     # This prevents duplication of GeoJSON properties in EE results
-    keep_cols = ["geometry", plot_id_column, row_id_col]
-    if external_id_column and external_id_column in gdf.columns:
-        keep_cols.append(external_id_column)
+    keep_cols = ["geometry", plot_id_column]
+    if (
+        external_id_column and "external_id" in gdf.columns
+    ):  # Already renamed during load
+        keep_cols.append("external_id")
     gdf_for_ee = gdf[keep_cols].copy()
+    # CRITICAL: Convert external_id to string to prevent EE from confusing it with integer plotId
+    if external_id_column and "external_id" in gdf_for_ee.columns:
+        gdf_for_ee["external_id"] = gdf_for_ee["external_id"].astype(str)
+        logger.debug(f"Converted external_id column to string type")
     logger.debug(f"Stripped GeoJSON to essential columns: {keep_cols}")
     # Create image if not provided
@@ -1687,16 +1997,27 @@ def whisp_stats_geojson_to_df_sequential(
                     national_codes=national_codes, validate_bands=True
                 )
+    # Drop external_id before sending to EE to enable caching
+    # (external_id is preserved separately in gdf for client-side merging)
+    gdf_for_ee_clean = gdf_for_ee.copy()
+    if "external_id" in gdf_for_ee_clean.columns:
+        gdf_for_ee_clean = gdf_for_ee_clean.drop(columns=["external_id"])
+        logger.debug("Dropped external_id from data sent to EE (enables caching)")
     # Convert to EE (suppress print statements from convert_geojson_to_ee)
     logger.debug("Converting to EE FeatureCollection...")
     with redirect_stdout(io.StringIO()):
-        fc = convert_geojson_to_ee(gdf_for_ee, enforce_wgs84=True, strip_z_coords=True)
+        fc = convert_geojson_to_ee(
+            gdf_for_ee_clean, enforce_wgs84=True, strip_z_coords=True
+        )
     # Create reducer
     reducer = ee.Reducer.sum().combine(ee.Reducer.median(), sharedInputs=True)
     # Process server-side with error handling for bad bands
-    logger.info("Processing with Earth Engine...")
+    logger.info(
+        f"Processing {len(gdf):,} features with Earth Engine (sequential mode)..."
+    )
     try:
         results_fc = whisp_image.reduceRegions(collection=fc, reducer=reducer, scale=10)
         df_server = convert_ee_to_df(results_fc)
@@ -1728,11 +2049,13 @@ def whisp_stats_geojson_to_df_sequential(
         else:
             raise
-    logger.debug("Server-side processing complete")
+    logger.info("Server-side processing complete")
-    # Add row_id if missing
-    if row_id_col not in df_server.columns:
-        df_server[row_id_col] = range(len(df_server))
+    # Ensure plotId is Int64 type for fast merges
+    if plot_id_column in df_server.columns:
+        df_server[plot_id_column] = pd.to_numeric(
+            df_server[plot_id_column], errors="coerce"
+        ).astype("Int64")
     # Add client-side metadata if requested
     if add_metadata_client_side:
@@ -1743,21 +2066,23 @@ def whisp_stats_geojson_to_df_sequential(
             return_attributes_only=True,
         )
-        # Drop external_id_column from df_client if it exists (already in df_server)
-        if external_id_column and external_id_column in df_client.columns:
-            df_client = df_client.drop(columns=[external_id_column])
+        # Ensure plotId is Int64 type for fast merges
+        if plot_id_column in df_client.columns:
+            df_client[plot_id_column] = pd.to_numeric(
+                df_client[plot_id_column], errors="coerce"
+            ).astype("Int64")
-        # Merge
+        # Drop external_id from df_server if it exists (keep from df_client - more reliable)
+        if "external_id" in df_server.columns:
+            df_server = df_server.drop(columns=["external_id"])
+        # Merge on plotId (same strategy as concurrent mode)
         result = df_server.merge(
-            df_client, on=row_id_col, how="left", suffixes=("", "_client")
+            df_client, on=plot_id_column, how="left", suffixes=("", "_client")
         )
     else:
         result = df_server
-    # Remove internal __row_id__ column if present
-    if row_id_col in result.columns:
-        result = result.drop(columns=[row_id_col])
     # Format the output
     # Add admin context (Country, ProducerCountry, Admin_Level_1) from admin_code
     # MUST be done BEFORE formatting (which removes _median columns)
@@ -1782,27 +2107,14 @@ def whisp_stats_geojson_to_df_sequential(
         convert_water_flag=True,
     )
-    logger.info(f"Processed {len(formatted):,} features")
-    # Consolidate external_id_column to standardized 'external_id'
-    if external_id_column:
-        variants = [
-            col
-            for col in formatted.columns
-            if external_id_column.lower() in col.lower()
-        ]
-        if variants:
-            base_col = (
-                external_id_column
-                if external_id_column in formatted.columns
-                else variants[0]
-            )
-            if base_col != "external_id":
-                formatted = formatted.rename(columns={base_col: "external_id"})
-            # Drop other variants
-            formatted = formatted.drop(
-                columns=[c for c in variants if c != base_col], errors="ignore"
-            )
+    # Ensure plot_id exists and sort by it
+    if plot_id_column not in formatted.columns:
+        formatted.insert(0, plot_id_column, range(1, len(formatted) + 1))
+    formatted = formatted.sort_values(by=plot_id_column).reset_index(drop=True)
+    logger.info(f"Processing complete: {len(formatted):,} features")
+    # external_id_column already renamed to 'external_id' during load - no action needed here
     return formatted
@@ -1815,7 +2127,6 @@ def whisp_stats_geojson_to_df_sequential(
 def whisp_formatted_stats_geojson_to_df_concurrent(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -1833,6 +2144,7 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
     water_flag_threshold: float = 0.5,
     sort_column: str = "plotId",
     geometry_audit_trail: bool = False,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON concurrently with automatic formatting and validation.
@@ -1848,8 +2160,6 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry column from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -1910,14 +2220,13 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
     gdf_original_geoms = None
     if geometry_audit_trail:
         logger.debug("Pre-loading GeoJSON for geometry audit trail...")
-        gdf_original_geoms = _load_geojson_silently(input_geojson_filepath)
+        gdf_original_geoms = _load_and_prepare_geojson(input_geojson_filepath)
     # Step 1: Get raw stats
     logger.debug("Step 1/2: Extracting statistics (concurrent)...")
     df_raw = whisp_stats_geojson_to_df_concurrent(
         input_geojson_filepath=input_geojson_filepath,
         external_id_column=external_id_column,
-        remove_geom=remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
         whisp_image=whisp_image,
@@ -1928,6 +2237,7 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
         max_retries=max_retries,
         add_metadata_server=add_metadata_server,
         logger=logger,
+        status_file=status_file,
     )
     # Step 2: Format the output
@@ -1979,7 +2289,7 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
             # Use pre-loaded original geometries (loaded at wrapper start to avoid reloading)
             if gdf_original_geoms is None:
                 logger.warning("Original geometries not pre-loaded, loading now...")
-                gdf_original_geoms = _load_geojson_silently(input_geojson_filepath)
+                gdf_original_geoms = _load_and_prepare_geojson(input_geojson_filepath)
             # Use plotId from df_validated to maintain mapping
             df_original_geom = pd.DataFrame(
@@ -2030,7 +2340,6 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
 def whisp_formatted_stats_geojson_to_df_sequential(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -2044,6 +2353,7 @@ def whisp_formatted_stats_geojson_to_df_sequential(
     water_flag_threshold: float = 0.5,
     sort_column: str = "plotId",
     geometry_audit_trail: bool = False,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON sequentially with automatic formatting and validation.
@@ -2059,8 +2369,6 @@ def whisp_formatted_stats_geojson_to_df_sequential(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -2113,14 +2421,13 @@ def whisp_formatted_stats_geojson_to_df_sequential(
     gdf_original_geoms = None
     if geometry_audit_trail:
         logger.debug("Pre-loading GeoJSON for geometry audit trail...")
-        gdf_original_geoms = _load_geojson_silently(input_geojson_filepath)
+        gdf_original_geoms = _load_and_prepare_geojson(input_geojson_filepath)
     # Step 1: Get raw stats
     logger.debug("Step 1/2: Extracting statistics (sequential)...")
     df_raw = whisp_stats_geojson_to_df_sequential(
         input_geojson_filepath=input_geojson_filepath,
         external_id_column=external_id_column,
-        remove_geom=remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
         whisp_image=whisp_image,
@@ -2178,7 +2485,7 @@ def whisp_formatted_stats_geojson_to_df_sequential(
             # Use pre-loaded original geometries (loaded at wrapper start to avoid reloading)
             if gdf_original_geoms is None:
                 logger.warning("Original geometries not pre-loaded, loading now...")
-                gdf_original_geoms = _load_geojson_silently(input_geojson_filepath)
+                gdf_original_geoms = _load_and_prepare_geojson(input_geojson_filepath)
             # Use plotId from df_validated to maintain mapping
             df_original_geom = pd.DataFrame(
@@ -2233,7 +2540,6 @@ def whisp_formatted_stats_geojson_to_df_sequential(
 def whisp_formatted_stats_geojson_to_df_fast(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -2252,6 +2558,7 @@ def whisp_formatted_stats_geojson_to_df_fast(
     water_flag_threshold: float = 0.5,
     sort_column: str = "plotId",
     geometry_audit_trail: bool = False,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON to Whisp statistics with optimized fast processing.
@@ -2267,8 +2574,6 @@ def whisp_formatted_stats_geojson_to_df_fast(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry column from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -2339,7 +2644,6 @@ def whisp_formatted_stats_geojson_to_df_fast(
         return whisp_formatted_stats_geojson_to_df_concurrent(
             input_geojson_filepath=input_geojson_filepath,
             external_id_column=external_id_column,
-            remove_geom=remove_geom,
             national_codes=national_codes,
             unit_type=unit_type,
             whisp_image=whisp_image,
@@ -2356,13 +2660,13 @@ def whisp_formatted_stats_geojson_to_df_fast(
             water_flag_threshold=water_flag_threshold,
             sort_column=sort_column,
             geometry_audit_trail=geometry_audit_trail,
+            status_file=status_file,
         )
     else:  # sequential
         logger.debug("Routing to sequential processing...")
         return whisp_formatted_stats_geojson_to_df_sequential(
             input_geojson_filepath=input_geojson_filepath,
             external_id_column=external_id_column,
-            remove_geom=remove_geom,
             national_codes=national_codes,
             unit_type=unit_type,
             whisp_image=whisp_image,
@@ -2374,4 +2678,5 @@ def whisp_formatted_stats_geojson_to_df_fast(
             water_flag_threshold=water_flag_threshold,
             sort_column=sort_column,
             geometry_audit_trail=geometry_audit_trail,
+            status_file=status_file,
         )

openforis-whisp 3.0.0a3__py3-none-any.whl → 3.0.0a5__py3-none-any.whl

openforis-whisp 3.0.0a3py3-none-any.whl → 3.0.0a5py3-none-any.whl