PyPI - openforis-whisp - Versions diffs - 3.0.0a3__py3-none-any.whl → 3.0.0a4__py3-none-any.whl - Mend

openforis-whisp 3.0.0a3py3-none-any.whl → 3.0.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

openforis_whisp/__init__.py +1 -1
openforis_whisp/advanced_stats.py +318 -103
openforis_whisp/data_checks.py +80 -28
openforis_whisp/datasets.py +14 -0
openforis_whisp/logger.py +15 -3
openforis_whisp/parameters/lookup_gee_datasets.csv +3 -2
openforis_whisp/pd_schemas.py +7 -2
openforis_whisp/reformat.py +6 -1
openforis_whisp/stats.py +10 -11
openforis_whisp/utils.py +19 -0
{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a4.dist-info}/METADATA +1 -1
openforis_whisp-3.0.0a4.dist-info/RECORD +20 -0
openforis_whisp-3.0.0a3.dist-info/RECORD +0 -20
{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a4.dist-info}/LICENSE +0 -0
{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a4.dist-info}/WHEEL +0 -0

openforis_whisp/__init__.py CHANGED Viewed

@@ -102,5 +102,5 @@ from openforis_whisp.utils import (
 from openforis_whisp.data_checks import (
     analyze_geojson,
     validate_geojson_constraints,
-    suggest_method,
+    suggest_processing_mode,
 )

openforis_whisp/advanced_stats.py CHANGED Viewed

@@ -36,6 +36,24 @@ from typing import Optional, List, Dict, Any, Tuple, Union
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import tempfile
+# Configure the "whisp" logger with auto-flush handler for Colab visibility
+_whisp_logger = logging.getLogger("whisp")
+if not _whisp_logger.handlers:
+    _handler = logging.StreamHandler(sys.stdout)
+    _handler.setLevel(logging.DEBUG)
+    _handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
+    # Override emit to force flush after each message for Colab
+    _original_emit = _handler.emit
+    def _emit_with_flush(record):
+        _original_emit(record)
+        sys.stdout.flush()
+    _handler.emit = _emit_with_flush
+    _whisp_logger.addHandler(_handler)
+    _whisp_logger.setLevel(logging.INFO)
+    _whisp_logger.propagate = False  # Don't propagate to root to avoid duplicates
 # ============================================================================
 # STDOUT/STDERR SUPPRESSION CONTEXT MANAGER (for C-level output)
 # ============================================================================
@@ -445,6 +463,16 @@ def join_admin_codes(
             columns=["admin_code_for_join", "gaul1_code"], errors="ignore"
         )
+        # Fill NaN values with "Unknown" and "not found" for features outside admin boundaries
+        # (e.g., points in the ocean or international waters)
+        df_joined[iso3_country_column] = df_joined[iso3_country_column].fillna(
+            "Unknown"
+        )
+        df_joined[iso2_country_column] = df_joined[iso2_country_column].fillna(
+            "not found"
+        )
+        df_joined[admin_1_column] = df_joined[admin_1_column].fillna("Unknown")
         logger.debug(
             f"Admin codes joined: {iso3_country_column}, {iso2_country_column}, {admin_1_column}"
         )
@@ -461,10 +489,16 @@ class ProgressTracker:
     Shows progress at adaptive milestones (more frequent for small datasets,
     less frequent for large datasets) with estimated time remaining based on
-    processing speed.
+    processing speed. Includes time-based heartbeat to prevent long silences.
     """
-    def __init__(self, total: int, logger: logging.Logger = None):
+    def __init__(
+        self,
+        total: int,
+        logger: logging.Logger = None,
+        heartbeat_interval: int = 180,
+        status_file: str = None,
+    ):
         """
         Initialize progress tracker.
@@ -474,26 +508,147 @@ class ProgressTracker:
             Total number of items to process
         logger : logging.Logger, optional
             Logger for output
+        heartbeat_interval : int, optional
+            Seconds between heartbeat messages (default: 180 = 3 minutes)
+        status_file : str, optional
+            Path to JSON status file for API/web app consumption.
+            Checkpoints auto-save to same directory as status_file.
         """
         self.total = total
         self.completed = 0
         self.lock = threading.Lock()
         self.logger = logger or logging.getLogger("whisp")
+        self.heartbeat_interval = heartbeat_interval
+        # Handle status_file: if directory passed, auto-generate filename
+        if status_file:
+            import os
+            if os.path.isdir(status_file):
+                self.status_file = os.path.join(
+                    status_file, "whisp_processing_status.json"
+                )
+            else:
+                # Validate that parent directory exists
+                parent_dir = os.path.dirname(status_file)
+                if parent_dir and not os.path.isdir(parent_dir):
+                    self.logger.warning(
+                        f"Status file directory does not exist: {parent_dir}"
+                    )
+                    self.status_file = None
+                else:
+                    self.status_file = status_file
+        else:
+            self.status_file = None
         # Adaptive milestones based on dataset size
         # Small datasets (< 50): show every 25% (not too spammy)
         # Medium (50-500): show every 20%
-        # Large (500+): show every 10% (more frequent feedback on long runs)
+        # Large (500-1000): show every 10%
+        # Very large (1000+): show every 5% (cleaner for long jobs)
         if total < 50:
             self.milestones = {25, 50, 75, 100}
         elif total < 500:
             self.milestones = {20, 40, 60, 80, 100}
-        else:
+        elif total < 1000:
             self.milestones = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100}
+        else:
+            self.milestones = {
+                5,
+                10,
+                15,
+                20,
+                25,
+                30,
+                35,
+                40,
+                45,
+                50,
+                55,
+                60,
+                65,
+                70,
+                75,
+                80,
+                85,
+                90,
+                95,
+                100,
+            }
         self.shown_milestones = set()
         self.start_time = time.time()
         self.last_update_time = self.start_time
+        self.heartbeat_stop = threading.Event()
+        self.heartbeat_thread = None
+    def _write_status_file(self, status: str = "processing") -> None:
+        """Write current progress to JSON status file using atomic write."""
+        if not self.status_file:
+            return
+        try:
+            import json
+            import os
+            elapsed = time.time() - self.start_time
+            percent = (self.completed / self.total * 100) if self.total > 0 else 0
+            rate = self.completed / elapsed if elapsed > 0 else 0
+            eta = (
+                (self.total - self.completed) / rate * 1.15
+                if rate > 0 and percent >= 5
+                else None
+            )
+            # Write to temp file then atomic rename to prevent partial reads
+            from datetime import datetime
+            temp_file = self.status_file + ".tmp"
+            with open(temp_file, "w") as f:
+                json.dump(
+                    {
+                        "status": status,
+                        "progress": f"{self.completed}/{self.total}",
+                        "percent": round(percent, 1),
+                        "elapsed_sec": round(elapsed),
+                        "eta_sec": round(eta) if eta else None,
+                        "updated_at": datetime.now().isoformat(),
+                    },
+                    f,
+                )
+            os.replace(temp_file, self.status_file)
+        except Exception:
+            pass
+    def start_heartbeat(self) -> None:
+        """Start background heartbeat thread for time-based progress updates."""
+        if self.heartbeat_thread is None or not self.heartbeat_thread.is_alive():
+            self.heartbeat_stop.clear()
+            self.heartbeat_thread = threading.Thread(
+                target=self._heartbeat_loop, daemon=True
+            )
+            self.heartbeat_thread.start()
+            # Write initial status
+            self._write_status_file(status="processing")
+    def _heartbeat_loop(self) -> None:
+        """Background loop that logs progress at time intervals."""
+        while not self.heartbeat_stop.wait(self.heartbeat_interval):
+            with self.lock:
+                # Only log if we haven't shown a milestone recently
+                time_since_update = time.time() - self.last_update_time
+                if (
+                    time_since_update >= self.heartbeat_interval
+                    and self.completed < self.total
+                ):
+                    elapsed = time.time() - self.start_time
+                    percent = int((self.completed / self.total) * 100)
+                    elapsed_str = self._format_time(elapsed)
+                    self.logger.info(
+                        f"[Processing] {self.completed:,}/{self.total:,} batches ({percent}%) | "
+                        f"Elapsed: {elapsed_str}"
+                    )
+                    self.last_update_time = time.time()
     def update(self, n: int = 1) -> None:
         """
@@ -508,7 +663,7 @@ class ProgressTracker:
             self.completed += n
             percent = int((self.completed / self.total) * 100)
-            # Show milestone messages (25%, 50%, 75%, 100%)
+            # Show milestone messages (5%, 10%, 15%... for large datasets)
             for milestone in sorted(self.milestones):
                 if percent >= milestone and milestone not in self.shown_milestones:
                     self.shown_milestones.add(milestone)
@@ -517,20 +672,36 @@ class ProgressTracker:
                     elapsed = time.time() - self.start_time
                     rate = self.completed / elapsed if elapsed > 0 else 0
                     remaining_items = self.total - self.completed
-                    eta_seconds = remaining_items / rate if rate > 0 else 0
+                    # Calculate ETA with padding for overhead (loading, joins, etc.)
+                    # Don't show ETA until we have some samples (at least 5% complete)
+                    if rate > 0 and self.completed >= max(5, self.total * 0.05):
+                        eta_seconds = (
+                            remaining_items / rate
+                        ) * 1.15  # Add 15% padding for overhead
+                    else:
+                        eta_seconds = 0
                     # Format time strings
-                    eta_str = self._format_time(eta_seconds)
+                    eta_str = (
+                        self._format_time(eta_seconds)
+                        if eta_seconds > 0
+                        else "calculating..."
+                    )
                     elapsed_str = self._format_time(elapsed)
                     # Build progress message
-                    msg = f"Progress: {self.completed}/{self.total} ({percent}%)"
+                    msg = f"Progress: {self.completed:,}/{self.total:,} batches ({percent}%)"
                     if percent < 100:
                         msg += f" | Elapsed: {elapsed_str} | ETA: {eta_str}"
                     else:
                         msg += f" | Total time: {elapsed_str}"
                     self.logger.info(msg)
+                    self.last_update_time = time.time()
+        # Update status file for API consumption
+        self._write_status_file()
     @staticmethod
     def _format_time(seconds: float) -> str:
@@ -544,14 +715,21 @@ class ProgressTracker:
             hours = seconds / 3600
             return f"{hours:.1f}h"
-    def finish(self) -> None:
-        """Log completion."""
+    def finish(self, output_file: str = None) -> None:
+        """Stop heartbeat and log completion."""
+        # Stop heartbeat thread
+        self.heartbeat_stop.set()
+        if self.heartbeat_thread and self.heartbeat_thread.is_alive():
+            self.heartbeat_thread.join(timeout=1)
         with self.lock:
             total_time = time.time() - self.start_time
             time_str = self._format_time(total_time)
-            self.logger.info(
-                f"Processing complete: {self.completed}/{self.total} batches in {time_str}"
-            )
+            msg = f"Processing complete: {self.completed:,}/{self.total:,} batches in {time_str}"
+            self.logger.info(msg)
+        # Write final status
+        self._write_status_file(status="completed")
 # ============================================================================
@@ -983,7 +1161,6 @@ def process_ee_batch(
 def whisp_stats_geojson_to_df_concurrent(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -996,6 +1173,7 @@ def whisp_stats_geojson_to_df_concurrent(
     logger: logging.Logger = None,
     # Format parameters (auto-detect from config if not provided)
     decimal_places: int = None,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON concurrently to compute Whisp statistics with automatic formatting.
@@ -1010,8 +1188,6 @@ def whisp_stats_geojson_to_df_concurrent(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry column from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -1059,6 +1235,25 @@ def whisp_stats_geojson_to_df_concurrent(
     gdf = _load_geojson_silently(input_geojson_filepath)
     logger.info(f"Loaded {len(gdf):,} features")
+    # Validate external_id_column if provided (lightweight client-side check)
+    if external_id_column and external_id_column not in gdf.columns:
+        # Exclude geometry column from available columns list
+        available_cols = [c for c in gdf.columns if c != gdf.geometry.name]
+        raise ValueError(
+            f"Column '{external_id_column}' not found in GeoJSON properties. "
+            f"Available columns: {available_cols}"
+        )
+    # Check completeness of external_id_column (warn if nulls exist)
+    if external_id_column and external_id_column in gdf.columns:
+        null_count = gdf[external_id_column].isna().sum()
+        if null_count > 0:
+            null_pct = (null_count / len(gdf)) * 100
+            logger.warning(
+                f"Column '{external_id_column}' has {null_count:,} null values ({null_pct:.1f}% of {len(gdf):,} features). "
+                f"These features may have missing external IDs in output."
+            )
     if validate_geometries:
         gdf = clean_geodataframe(
             gdf, remove_nulls=False, repair_geometries=False, logger=logger
@@ -1101,13 +1296,18 @@ def whisp_stats_geojson_to_df_concurrent(
     # Batch the data
     batches = batch_geodataframe(gdf_for_ee, batch_size)
-    logger.info(f"Processing {len(gdf_for_ee):,} features in {len(batches)} batches")
+    logger.info(
+        f"Processing {len(gdf_for_ee):,} features in {len(batches)} batches (concurrent mode)..."
+    )
     # Setup semaphore for EE concurrency control
     ee_semaphore = threading.BoundedSemaphore(max_concurrent)
-    # Progress tracker
-    progress = ProgressTracker(len(batches), logger=logger)
+    # Progress tracker with heartbeat for long-running jobs
+    progress = ProgressTracker(
+        len(batches), logger=logger, heartbeat_interval=180, status_file=status_file
+    )
+    progress.start_heartbeat()
     results = []
@@ -1148,73 +1348,77 @@ def whisp_stats_geojson_to_df_concurrent(
     pyogrio_logger.setLevel(logging.CRITICAL)
     try:
-        with redirect_stdout(io.StringIO()):
-            with ThreadPoolExecutor(max_workers=pool_workers) as executor:
-                futures = {
-                    executor.submit(process_batch, i, batch): i
-                    for i, batch in enumerate(batches)
-                }
-                # Track which batches failed for retry
-                batch_map = {i: batch for i, batch in enumerate(batches)}
-                batch_futures = {future: i for future, i in futures.items()}
-                for future in as_completed(futures):
-                    batch_idx = batch_futures[future]
-                    try:
-                        batch_idx, df_server, df_client = future.result()
-                        # Merge server and client results
-                        if plot_id_column not in df_server.columns:
-                            df_server[plot_id_column] = range(len(df_server))
-                        # Keep all EE statistics from server (all columns with _sum and _median suffixes)
-                        # These are the actual EE processing results
-                        df_server_clean = df_server.copy()
-                        # Keep external metadata: plot_id, external_id, geometry, geometry type, and centroids from client
-                        # (formatted wrapper handles keep_external_columns parameter)
-                        keep_external_columns = [plot_id_column]
-                        if (
-                            external_id_column
-                            and external_id_column in df_client.columns
-                        ):
-                            keep_external_columns.append(external_id_column)
-                        if "geometry" in df_client.columns:
-                            keep_external_columns.append("geometry")
-                        # Keep geometry type column (Geometry_type)
-                        if geometry_type_column in df_client.columns:
-                            keep_external_columns.append(geometry_type_column)
-                        # Also keep centroid columns (Centroid_lon, Centroid_lat)
-                        centroid_cols = [
-                            c for c in df_client.columns if c.startswith("Centroid_")
-                        ]
-                        keep_external_columns.extend(centroid_cols)
-                        df_client_clean = df_client[
-                            [c for c in keep_external_columns if c in df_client.columns]
-                        ].drop_duplicates()
-                        merged = df_server_clean.merge(
-                            df_client_clean,
-                            on=plot_id_column,
-                            how="left",
-                            suffixes=("_ee", "_client"),
-                        )
-                        results.append(merged)
-                        progress.update()
-                    except Exception as e:
-                        # Batch failed - fail fast with clear guidance
-                        error_msg = str(e)
-                        logger.error(f"Batch {batch_idx} failed: {error_msg[:100]}")
-                        logger.debug(f"Full error: {error_msg}")
+        # Don't suppress stdout here - we want progress messages to show in Colab
+        with ThreadPoolExecutor(max_workers=pool_workers) as executor:
+            futures = {
+                executor.submit(process_batch, i, batch): i
+                for i, batch in enumerate(batches)
+            }
-                        # Get original batch for error reporting
-                        original_batch = batch_map[batch_idx]
+            # Track which batches failed for retry
+            batch_map = {i: batch for i, batch in enumerate(batches)}
+            batch_futures = {future: i for future, i in futures.items()}
-                        # Add to batch errors for final reporting
-                        batch_errors.append((batch_idx, original_batch, error_msg))
+            for future in as_completed(futures):
+                batch_idx = batch_futures[future]
+                try:
+                    batch_idx, df_server, df_client = future.result()
+                    # Merge server and client results
+                    if plot_id_column not in df_server.columns:
+                        df_server[plot_id_column] = range(len(df_server))
+                    # Keep all EE statistics from server (all columns with _sum and _median suffixes)
+                    # These are the actual EE processing results
+                    df_server_clean = df_server.copy()
+                    # Keep external metadata: plot_id, external_id, geometry, geometry type, and centroids from client
+                    # (formatted wrapper handles keep_external_columns parameter)
+                    keep_external_columns = [plot_id_column]
+                    if external_id_column and external_id_column in df_client.columns:
+                        keep_external_columns.append(external_id_column)
+                    if "geometry" in df_client.columns:
+                        keep_external_columns.append("geometry")
+                    # Keep geometry type column (Geometry_type)
+                    if geometry_type_column in df_client.columns:
+                        keep_external_columns.append(geometry_type_column)
+                    # Also keep centroid columns (Centroid_lon, Centroid_lat)
+                    centroid_cols = [
+                        c for c in df_client.columns if c.startswith("Centroid_")
+                    ]
+                    keep_external_columns.extend(centroid_cols)
+                    df_client_clean = df_client[
+                        [c for c in keep_external_columns if c in df_client.columns]
+                    ]
+                    # Don't drop duplicates - we need one row per feature (one per plot_id)
+                    # Each plot_id should have exactly one row with its metadata
+                    merged = df_server_clean.merge(
+                        df_client_clean,
+                        on=plot_id_column,
+                        how="left",
+                        suffixes=("_ee", "_client"),
+                    )
+                    results.append(merged)
+                    progress.update()
+                except Exception as e:
+                    # Batch failed - fail fast with clear guidance
+                    error_msg = str(e)
+                    logger.error(f"Batch {batch_idx} failed: {error_msg[:100]}")
+                    logger.debug(f"Full error: {error_msg}")
+                    # Get original batch for error reporting
+                    original_batch = batch_map[batch_idx]
+                    # Add to batch errors for final reporting
+                    batch_errors.append((batch_idx, original_batch, error_msg))
+    except (KeyboardInterrupt, SystemExit) as interrupt:
+        logger.warning("Processing interrupted by user")
+        # Update status file with interrupted state
+        progress._write_status_file(status="interrupted")
+        raise interrupt
     finally:
         # Restore logger levels
         fiona_logger.setLevel(old_fiona_level)
@@ -1565,7 +1769,7 @@ def whisp_stats_geojson_to_df_concurrent(
                 )
                 raise retry_e
-        logger.info(f"Processed {len(formatted):,} features successfully")
+        logger.info(f"Processing complete: {len(formatted):,} features")
         return formatted
     else:
         logger.error(" No results produced")
@@ -1580,7 +1784,6 @@ def whisp_stats_geojson_to_df_concurrent(
 def whisp_stats_geojson_to_df_sequential(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -1605,8 +1808,6 @@ def whisp_stats_geojson_to_df_sequential(
         Path to input GeoJSON
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -1646,6 +1847,25 @@ def whisp_stats_geojson_to_df_sequential(
     gdf = _load_geojson_silently(input_geojson_filepath)
     logger.info(f"Loaded {len(gdf):,} features")
+    # Validate external_id_column if provided (lightweight client-side check)
+    if external_id_column and external_id_column not in gdf.columns:
+        # Exclude geometry column from available columns list
+        available_cols = [c for c in gdf.columns if c != gdf.geometry.name]
+        raise ValueError(
+            f"Column '{external_id_column}' not found in GeoJSON properties. "
+            f"Available columns: {available_cols}"
+        )
+    # Check completeness of external_id_column (warn if nulls exist)
+    if external_id_column and external_id_column in gdf.columns:
+        null_count = gdf[external_id_column].isna().sum()
+        if null_count > 0:
+            null_pct = (null_count / len(gdf)) * 100
+            logger.warning(
+                f"Column '{external_id_column}' has {null_count:,} null values ({null_pct:.1f}% of {len(gdf):,} features). "
+                f"These features may have missing external IDs in output."
+            )
     # Clean geometries (preserve both null and invalid geometries by default)
     gdf = clean_geodataframe(
         gdf, remove_nulls=False, repair_geometries=False, logger=logger
@@ -1696,7 +1916,9 @@ def whisp_stats_geojson_to_df_sequential(
     reducer = ee.Reducer.sum().combine(ee.Reducer.median(), sharedInputs=True)
     # Process server-side with error handling for bad bands
-    logger.info("Processing with Earth Engine...")
+    logger.info(
+        f"Processing {len(gdf):,} features with Earth Engine (sequential mode)..."
+    )
     try:
         results_fc = whisp_image.reduceRegions(collection=fc, reducer=reducer, scale=10)
         df_server = convert_ee_to_df(results_fc)
@@ -1782,7 +2004,7 @@ def whisp_stats_geojson_to_df_sequential(
         convert_water_flag=True,
     )
-    logger.info(f"Processed {len(formatted):,} features")
+    logger.info(f"Processing complete: {len(formatted):,} features")
     # Consolidate external_id_column to standardized 'external_id'
     if external_id_column:
@@ -1815,7 +2037,6 @@ def whisp_stats_geojson_to_df_sequential(
 def whisp_formatted_stats_geojson_to_df_concurrent(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -1833,6 +2054,7 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
     water_flag_threshold: float = 0.5,
     sort_column: str = "plotId",
     geometry_audit_trail: bool = False,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON concurrently with automatic formatting and validation.
@@ -1848,8 +2070,6 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry column from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -1917,7 +2137,6 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
     df_raw = whisp_stats_geojson_to_df_concurrent(
         input_geojson_filepath=input_geojson_filepath,
         external_id_column=external_id_column,
-        remove_geom=remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
         whisp_image=whisp_image,
@@ -1928,6 +2147,7 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
         max_retries=max_retries,
         add_metadata_server=add_metadata_server,
         logger=logger,
+        status_file=status_file,
     )
     # Step 2: Format the output
@@ -2030,7 +2250,6 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
 def whisp_formatted_stats_geojson_to_df_sequential(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -2044,6 +2263,7 @@ def whisp_formatted_stats_geojson_to_df_sequential(
     water_flag_threshold: float = 0.5,
     sort_column: str = "plotId",
     geometry_audit_trail: bool = False,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON sequentially with automatic formatting and validation.
@@ -2059,8 +2279,6 @@ def whisp_formatted_stats_geojson_to_df_sequential(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -2120,7 +2338,6 @@ def whisp_formatted_stats_geojson_to_df_sequential(
     df_raw = whisp_stats_geojson_to_df_sequential(
         input_geojson_filepath=input_geojson_filepath,
         external_id_column=external_id_column,
-        remove_geom=remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
         whisp_image=whisp_image,
@@ -2233,7 +2450,6 @@ def whisp_formatted_stats_geojson_to_df_sequential(
 def whisp_formatted_stats_geojson_to_df_fast(
     input_geojson_filepath: str,
     external_id_column: str = None,
-    remove_geom: bool = False,
     national_codes: List[str] = None,
     unit_type: str = "ha",
     whisp_image: ee.Image = None,
@@ -2252,6 +2468,7 @@ def whisp_formatted_stats_geojson_to_df_fast(
     water_flag_threshold: float = 0.5,
     sort_column: str = "plotId",
     geometry_audit_trail: bool = False,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Process GeoJSON to Whisp statistics with optimized fast processing.
@@ -2267,8 +2484,6 @@ def whisp_formatted_stats_geojson_to_df_fast(
         Path to input GeoJSON file
     external_id_column : str, optional
         Column name for external IDs
-    remove_geom : bool
-        Remove geometry column from output
     national_codes : List[str], optional
         ISO2 codes for national datasets
     unit_type : str
@@ -2339,7 +2554,6 @@ def whisp_formatted_stats_geojson_to_df_fast(
         return whisp_formatted_stats_geojson_to_df_concurrent(
             input_geojson_filepath=input_geojson_filepath,
             external_id_column=external_id_column,
-            remove_geom=remove_geom,
             national_codes=national_codes,
             unit_type=unit_type,
             whisp_image=whisp_image,
@@ -2356,13 +2570,13 @@ def whisp_formatted_stats_geojson_to_df_fast(
             water_flag_threshold=water_flag_threshold,
             sort_column=sort_column,
             geometry_audit_trail=geometry_audit_trail,
+            status_file=status_file,
         )
     else:  # sequential
         logger.debug("Routing to sequential processing...")
         return whisp_formatted_stats_geojson_to_df_sequential(
             input_geojson_filepath=input_geojson_filepath,
             external_id_column=external_id_column,
-            remove_geom=remove_geom,
             national_codes=national_codes,
             unit_type=unit_type,
             whisp_image=whisp_image,
@@ -2374,4 +2588,5 @@ def whisp_formatted_stats_geojson_to_df_fast(
             water_flag_threshold=water_flag_threshold,
             sort_column=sort_column,
             geometry_audit_trail=geometry_audit_trail,
+            status_file=status_file,
         )

openforis_whisp/data_checks.py CHANGED Viewed

@@ -750,23 +750,43 @@ def validate_geojson_constraints(
     return results
-def suggest_method(polygon_count, mean_area_ha, mean_vertices=None, verbose=True):
+def suggest_processing_mode(
+    feature_count,
+    mean_area_ha=None,
+    mean_vertices=None,
+    feature_type="polygon",
+    verbose=True,
+):
     """
-    Suggest processing method based on polygon characteristics.
+    Suggest processing mode based on feature characteristics.
+    Decision thresholds from comprehensive benchmark data (Nov 2025):
-    Decision thresholds from benchmark data (area per polygon × polygon count):
-    - Small polygons (10 ha): need 250+ polygons for concurrent
-    - Medium polygons (100 ha): breakeven at ~100 polygons
-    - Large polygons (500 ha): concurrent wins at 50+ polygons
+    POINTS:
+    - Break-even: 750-1000 features
+    - Sequential faster: < 750 features
+    - Concurrent faster: >= 750 features
+    POLYGONS (area-based thresholds):
+    - Tiny (< 1 ha): break-even ~500 features
+    - Small (1-5 ha, simple): break-even ~500 features
+    - Small (1-5 ha, complex 20-50v): break-even ~500 features
+    - Medium (5-20 ha): break-even ~250 features
+    - Large (20-100 ha): break-even ~250 features
+    - Very large (50-200 ha): break-even ~250 features
+    Vertex complexity adjustment: High vertex counts (>50) favor concurrent at lower thresholds
     Parameters:
     -----------
-    polygon_count : int
-        Number of polygons
-    mean_area_ha : float
-        Mean area per polygon in hectares
+    feature_count : int
+        Number of features (polygons or points)
+    mean_area_ha : float, optional
+        Mean area per polygon in hectares (required for polygons, ignored for points)
     mean_vertices : float, optional
-        Mean number of vertices per polygon (can influence decision for complex geometries)
+        Mean number of vertices per polygon (influences decision for complex geometries)
+    feature_type : str
+        'polygon', 'multipolygon', or 'point' (default: 'polygon')
     verbose : bool
         Print recommendation explanation
@@ -775,31 +795,63 @@ def suggest_method(polygon_count, mean_area_ha, mean_vertices=None, verbose=True
     str: 'concurrent' or 'sequential'
     """
-    # Primary decision based on area
-    if mean_area_ha >= 300:  # Large polygons
-        breakeven = 50
-        method = "concurrent" if polygon_count >= breakeven else "sequential"
-    elif mean_area_ha >= 50:  # Medium polygons
-        breakeven = 100
-        method = "concurrent" if polygon_count >= breakeven else "sequential"
-    else:  # Small polygons
+    # Points: simple threshold-based decision
+    if feature_type == "point":
+        breakeven = 750
+        method = "concurrent" if feature_count >= breakeven else "sequential"
+        if verbose:
+            print(f"\nMETHOD RECOMMENDATION (Points)")
+            print(f"   Features: {feature_count} points")
+            print(f"   Break-even: {breakeven} features | Method: {method.upper()}")
+        return method
+    # Polygons and MultiPolygons: area and complexity-based decision
+    # MultiPolygons use same breakpoints as Polygons
+    if mean_area_ha is None:
+        # Default to conservative threshold if area unknown
+        breakeven = 500
+        method = "concurrent" if feature_count >= breakeven else "sequential"
+        if verbose:
+            print(f"\nMETHOD RECOMMENDATION (Polygons - area unknown)")
+            print(f"   Features: {feature_count} polygons")
+            print(
+                f"   Break-even: {breakeven} (conservative) | Method: {method.upper()}"
+            )
+        return method
+    # Area-based thresholds from benchmark data
+    if mean_area_ha >= 20:  # Large to very large polygons
+        breakeven = 250
+    elif mean_area_ha >= 5:  # Medium polygons
         breakeven = 250
-        method = "concurrent" if polygon_count >= breakeven else "sequential"
+    elif mean_area_ha >= 1:  # Small polygons
+        # Vertex complexity matters more for small polygons
+        if mean_vertices is not None and mean_vertices >= 30:
+            breakeven = 500  # Complex small polygons
+        else:
+            breakeven = 500  # Simple small polygons
+    else:  # Tiny polygons (< 1 ha)
+        breakeven = 500
+    # Vertex complexity adjustment for high-complexity geometries
+    if mean_vertices is not None and mean_vertices >= 50:
+        # High complexity: reduce breakeven by 20% (concurrent beneficial sooner)
+        breakeven = int(breakeven * 0.8)
-    # Optional adjustment based on vertex complexity (very high complexity favors concurrent)
-    if mean_vertices is not None and mean_vertices > 500:
-        # Reduce breakeven by 25% for very complex geometries
-        adjusted_breakeven = int(breakeven * 0.75)
-        method = "concurrent" if polygon_count >= adjusted_breakeven else "sequential"
+    method = "concurrent" if feature_count >= breakeven else "sequential"
     if verbose:
-        print(f"\nMETHOD RECOMMENDATION")
+        print(f"\nMETHOD RECOMMENDATION (Polygons)")
         print(
-            f"   Polygons: {polygon_count} | Mean Area: {mean_area_ha:.1f} ha", end=""
+            f"   Features: {feature_count} | Mean Area: {mean_area_ha:.1f} ha", end=""
         )
         if mean_vertices is not None:
             print(f" | Mean Vertices: {mean_vertices:.1f}", end="")
         print()
-        print(f"   Breakeven: {breakeven} polygons | Method: {method.upper()}")
+        print(f"   Break-even: {breakeven} features | Method: {method.upper()}")
     return method

openforis_whisp/datasets.py CHANGED Viewed

@@ -1160,6 +1160,20 @@ def nci_ocs2020_prep():
     ).selfMask()  # cocoa from national land cover map for Côte d'Ivoire
+# nCM - Cameroon
+# data from Aurelie Shapiro (FAO) working directly with country experts - info on methods and accuracy assessment to follow
+def ncm_treecover_2020_prep():
+    return (
+        ee.Image("projects/ee-cocoacmr/assets/land_cover/CMR_TNTMMU_2020")
+        .select("FNF_2020")
+        .eq(1)
+        .rename("nCM_Treecover_2020")
+        .selfMask()
+    )
 # ============================================================================
 # CONTEXT BANDS (Administrative boundaries and water mask)
 # ============================================================================

openforis_whisp/logger.py CHANGED Viewed

@@ -8,9 +8,21 @@ BASE_MSG_FORMAT = (
 class StdoutLogger:
     def __init__(self, name: str, msg_format: str = BASE_MSG_FORMAT) -> None:
-        self.handler = logging.StreamHandler(sys.stdout)
-        self.handler.setFormatter(logging.Formatter(msg_format))
-        self.handler.setLevel(logging.DEBUG)
+        # Create handler that auto-flushes for Colab/notebook visibility
+        handler = logging.StreamHandler(sys.stdout)
+        handler.setFormatter(logging.Formatter(msg_format))
+        handler.setLevel(logging.DEBUG)
+        # Override emit to force flush after each message
+        original_emit = handler.emit
+        def emit_with_flush(record):
+            original_emit(record)
+            sys.stdout.flush()
+        handler.emit = emit_with_flush
+        self.handler = handler
         self.logger = logging.getLogger(name)
         self.logger.addHandler(self.handler)
         self.logger.propagate = False

openforis_whisp/parameters/lookup_gee_datasets.csv CHANGED Viewed

@@ -2,9 +2,9 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
 EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
 GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
 TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
-GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
+GFC_TC_2020,50,,treecover,naturally_reg_2020,0,0,0,float32,1,0,g_glad_gfc_10pc_prep
 Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
-ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
+ESA_TC_2020,70,,treecover,naturally_reg_2020,0,0,0,float32,1,0,g_esa_worldcover_trees_prep
 TMF_plant,80,,commodities,NA,1,1,0,float32,1,0,g_jrc_tmf_plantation_prep
 Oil_palm_Descals,90,,commodities,NA,1,1,0,float32,1,0,g_creaf_descals_palm_prep
 Oil_palm_FDaP,100,,commodities,NA,1,1,0,float32,1,0,g_fdap_palm_prep
@@ -197,3 +197,4 @@ nBR_INPE_TCamz_pasture_2020,2422,BR,commodities,NA,1,1,0,float32,1,0,nbr_terracl
 nBR_INPE_TCcer_pasture_2020,2423,BR,commodities,NA,1,1,0,float32,1,0,nbr_terraclass_cer20_ac_prep
 nBR_MapBiomas_col9_pasture_2020,2424,BR,commodities,NA,1,1,0,float32,1,0,nbr_mapbiomasc9_pasture_prep
 nCI_Cocoa_bnetd,3000,CI,commodities,NA,1,1,0,float32,1,0,nci_ocs2020_prep
+nCM_Treecover_2020,3100,CM,treecover,NA,1,0,0,float32,1,0,ncm_treecover_2020_prep

openforis_whisp/pd_schemas.py CHANGED Viewed

@@ -1,5 +1,10 @@
-import pandera as pa
-from pandera.typing import DataFrame, Series
+# Support both old and new pandera import paths
+try:
+    import pandera.pandas as pa
+    from pandera.typing.pandas import DataFrame, Series
+except (ImportError, ModuleNotFoundError):
+    import pandera as pa
+    from pandera.typing import DataFrame, Series
 # Define a schema for validating a DataFrame related to GEE (Google Earth Engine) datasets.
 class DataLookupSchema(pa.DataFrameModel):

openforis_whisp/reformat.py CHANGED Viewed

@@ -1,5 +1,10 @@
 # !pip install pandera[io] # special version used
-import pandera as pa
+# Support both old and new pandera import paths
+try:
+    import pandera.pandas as pa
+except (ImportError, ModuleNotFoundError):
+    import pandera as pa
 import pandas as pd
 import os
 import logging

openforis_whisp/stats.py CHANGED Viewed

@@ -88,7 +88,6 @@ def get_admin_boundaries_fc():
 def whisp_formatted_stats_geojson_to_df_legacy(
     input_geojson_filepath: Path | str,
     external_id_column=None,
-    remove_geom=False,
     national_codes=None,
     unit_type="ha",
     whisp_image=None,
@@ -147,7 +146,6 @@ def whisp_formatted_stats_geojson_to_df_legacy(
     return whisp_formatted_stats_ee_to_df(
         feature_collection,
         external_id_column,
-        remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
         whisp_image=whisp_image,
@@ -167,6 +165,7 @@ def whisp_formatted_stats_geojson_to_df(
     batch_size: int = 10,
     max_concurrent: int = 20,
     geometry_audit_trail: bool = False,
+    status_file: str = None,
 ) -> pd.DataFrame:
     """
     Main entry point for converting GeoJSON to Whisp statistics.
@@ -188,11 +187,7 @@ def whisp_formatted_stats_geojson_to_df(
         The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
         This column must exist as a property in ALL features of the GeoJSON file.
         Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
-    remove_geom : bool, default=False
-        If True, the geometry of the GeoJSON is removed from the output DataFrame.
     national_codes : list, optional
-        List of ISO2 country codes to include national datasets.
-    unit_type: str, optional
         Whether to use hectares ("ha") or percentage ("percent"), by default "ha".
     whisp_image : ee.Image, optional
         Pre-combined multiband Earth Engine Image containing all Whisp datasets.
@@ -224,6 +219,13 @@ def whisp_formatted_stats_geojson_to_df(
         Processing metadata stored in df.attrs['processing_metadata'].
         These columns enable full transparency for geometry modifications during processing.
+    status_file : str, optional
+        Path to JSON status file or directory for real-time progress tracking.
+        If a directory is provided, creates 'whisp_processing_status.json' in that directory.
+        Updates every 3 minutes and at progress milestones (5%, 10%, etc.).
+        Format: {"status": "processing", "progress": "450/1000", "percent": 45.0,
+                 "elapsed_sec": 120, "eta_sec": 145, "updated_at": "2025-11-13T14:23:45"}
+        Most useful for large concurrent jobs. Works in both concurrent and sequential modes.
     Returns
     -------
@@ -283,7 +285,6 @@ def whisp_formatted_stats_geojson_to_df(
         return whisp_formatted_stats_geojson_to_df_legacy(
             input_geojson_filepath=input_geojson_filepath,
             external_id_column=external_id_column,
-            remove_geom=remove_geom,
             national_codes=national_codes,
             unit_type=unit_type,
             whisp_image=whisp_image,
@@ -306,7 +307,6 @@ def whisp_formatted_stats_geojson_to_df(
         return whisp_formatted_stats_geojson_to_df_fast(
             input_geojson_filepath=input_geojson_filepath,
             external_id_column=external_id_column,
-            remove_geom=remove_geom,
             national_codes=national_codes,
             unit_type=unit_type,
             whisp_image=whisp_image,
@@ -315,6 +315,7 @@ def whisp_formatted_stats_geojson_to_df(
             batch_size=batch_size,
             max_concurrent=max_concurrent,
             geometry_audit_trail=geometry_audit_trail,
+            status_file=status_file,
         )
     else:
         raise ValueError(
@@ -473,7 +474,6 @@ def whisp_formatted_stats_ee_to_df(
 def whisp_stats_geojson_to_df(
     input_geojson_filepath: Path | str,
     external_id_column=None,
-    remove_geom=False,
     national_codes=None,
     unit_type="ha",
     whisp_image=None,  # New parameter
@@ -506,7 +506,6 @@ def whisp_stats_geojson_to_df(
     return whisp_stats_ee_to_df(
         feature_collection,
         external_id_column,
-        remove_geom,
         national_codes=national_codes,
         unit_type=unit_type,
         whisp_image=whisp_image,  # Pass through
@@ -990,7 +989,7 @@ def whisp_stats_ee_to_drive(
         )
         task.start()
         print(
-            "Exporting to Google Drive: 'whisp_results/whisp_output_table.csv'. To track progress: https://code.earthengine.google.com/tasks"
+            "Exporting to Google Drive: 'whisp_output_table.csv'. To track progress: https://code.earthengine.google.com/tasks"
         )
     except Exception as e:
         print(f"An error occurred during the export: {e}")

openforis_whisp/utils.py CHANGED Viewed

@@ -5,6 +5,8 @@ import os
 import pandas as pd
 import random
 import numpy as np
+import logging
+import sys
 import urllib.request
 import os
@@ -19,6 +21,23 @@ from shapely.validation import make_valid
 from .logger import StdoutLogger
+# Configure the "whisp" logger with auto-flush handler for Colab visibility
+_whisp_logger = logging.getLogger("whisp")
+if not _whisp_logger.handlers:
+    _handler = logging.StreamHandler(sys.stdout)
+    _handler.setLevel(logging.DEBUG)
+    _handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
+    # Override emit to force flush after each message for Colab
+    _original_emit = _handler.emit
+    def _emit_with_flush(record):
+        _original_emit(record)
+        sys.stdout.flush()
+    _handler.emit = _emit_with_flush
+    _whisp_logger.addHandler(_handler)
+    _whisp_logger.setLevel(logging.INFO)
+    _whisp_logger.propagate = False  # Don't propagate to root to avoid duplicates
 logger = StdoutLogger(__name__)

{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: openforis-whisp
-Version: 3.0.0a3
+Version: 3.0.0a4
 Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
 License: MIT
 Keywords: whisp,geospatial,data-processing

openforis_whisp-3.0.0a4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+openforis_whisp/__init__.py,sha256=5zJK84LYnlslxSajdCz6ZIYxRS4xgN3sGxSD6_GXEHs,3547
+openforis_whisp/advanced_stats.py,sha256=FC1YasSZ93jplF1qBgDopzBIsO2ueXnidomQU3rpP_Q,100006
+openforis_whisp/data_checks.py,sha256=ErIKGbCa3R8eYP0sVoAl-ZUl607W1QrG0Jr2SIVgm2I,34056
+openforis_whisp/data_conversion.py,sha256=L2IsiUyQUt3aHgSYGbIhgPGwM7eyS3nLVEoNO9YqQeM,21888
+openforis_whisp/datasets.py,sha256=F1WxXc93mxxmN-WHa0bf-XX-FloSQyEBJKmnrQEHYn8,53855
+openforis_whisp/logger.py,sha256=gFkRTwJDJKIBWcHDOK74Uln3JM7fAybURo7pQpGL790,3395
+openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
+openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
+openforis_whisp/parameters/lookup_gaul1_admin.py,sha256=cQr5liRdXi85QieTxrz4VAkn0COvRCp82ZV0dYFWOio,474980
+openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=7KdnFocEgbZO5m8JmWQchzZTurg9rJ96y17z8UyLtI0,17537
+openforis_whisp/pd_schemas.py,sha256=0z-oPmYIDUIn7mNY41W_uUpmTwjoR7e254mOCoHVsOg,2878
+openforis_whisp/reformat.py,sha256=gvhIa-_kTT5BSO8LuVmJ1TQcf_NwheskXboFM9e0KJY,32758
+openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
+openforis_whisp/stats.py,sha256=pTSYs77ISRBOIglRpq4SUx3lKRkrUZOKROLRX5IP9IY,63941
+openforis_whisp/utils.py,sha256=AISWF-MpfFdYkhd6bei4BViw2Iag20mmq61ykrF9YTk,31287
+openforis_whisp-3.0.0a4.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
+openforis_whisp-3.0.0a4.dist-info/METADATA,sha256=ak2Dw632lgOtXEXkl5-haYK7vF3hPaJ6IkaRRJRdH0Y,16684
+openforis_whisp-3.0.0a4.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
+openforis_whisp-3.0.0a4.dist-info/RECORD,,

openforis_whisp-3.0.0a3.dist-info/RECORD DELETED Viewed

@@ -1,20 +0,0 @@
-openforis_whisp/__init__.py,sha256=s42Q0VJdzm8mgnxfYg1hUEJPM2VLWIva2h-mdKyr444,3538
-openforis_whisp/advanced_stats.py,sha256=tvhgNTCGlT3aYecUPP6QCTO0FRrjk0qjs95NoVZvIt4,90935
-openforis_whisp/data_checks.py,sha256=KwgD72FA_n7joiJadGRpzntd2sLo0aqGNbOjRkB8iQI,32293
-openforis_whisp/data_conversion.py,sha256=L2IsiUyQUt3aHgSYGbIhgPGwM7eyS3nLVEoNO9YqQeM,21888
-openforis_whisp/datasets.py,sha256=aGJy0OYN4d0nsH3_IOYlHl-WCB7KFwZwMJ-dBi5Hc5Y,53470
-openforis_whisp/logger.py,sha256=9M6_3mdpoiWfC-pDwM9vKmB2l5Gul6Rb5rNTNh-_nzs,3054
-openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
-openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
-openforis_whisp/parameters/lookup_gaul1_admin.py,sha256=cQr5liRdXi85QieTxrz4VAkn0COvRCp82ZV0dYFWOio,474980
-openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
-openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
-openforis_whisp/reformat.py,sha256=MPjP5lb218GTcTpd_Qvbj5ER_8EY4JjLDteQaS5OZCQ,32620
-openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
-openforis_whisp/stats.py,sha256=nVzQpSu7BoSb2S6HheLeoK_pmguZ9Lyw0ZfbTTMVq4Q,63720
-openforis_whisp/utils.py,sha256=Q-EwhUaohk63WCx7Rr5VuR3X-oGtgILZDc8JsjbWhgg,30538
-openforis_whisp-3.0.0a3.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
-openforis_whisp-3.0.0a3.dist-info/METADATA,sha256=6xuNhUpQWyzKU3m13FnJ7SX39jAVry1YEKNAdH0D2to,16684
-openforis_whisp-3.0.0a3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
-openforis_whisp-3.0.0a3.dist-info/RECORD,,

{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a4.dist-info}/LICENSE RENAMED Viewed

File without changes

{openforis_whisp-3.0.0a3.dist-info → openforis_whisp-3.0.0a4.dist-info}/WHEEL RENAMED Viewed

File without changes

openforis-whisp 3.0.0a3__py3-none-any.whl → 3.0.0a4__py3-none-any.whl

openforis-whisp 3.0.0a3py3-none-any.whl → 3.0.0a4py3-none-any.whl