openforis-whisp 3.0.0a6__py3-none-any.whl → 3.0.0a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
1
  """
2
2
  Data validation and constraint checking functions for WHISP.
3
3
 
4
- Provides validation functions to check GeoJSON data against defined limits
4
+ Provides validation functions to check GeoJSON data against user defined limits
5
5
  and thresholds, raising informative errors when constraints are violated.
6
+ Note: Defaults in each function are not necessarily enforced.
6
7
  """
7
8
 
8
9
  import json
@@ -13,26 +14,6 @@ from shapely.geometry import Polygon as ShapelyPolygon, shape as shapely_shape
13
14
  # (estimation preferred here as allows efficient processing speed and limits overhead of checking file)
14
15
 
15
16
 
16
- def _convert_projected_area_to_ha(area_sq_units: float, crs: str = None) -> float:
17
- """
18
- Convert area from projected CRS units to hectares.
19
-
20
- Most projected CRS use meters as units, so:
21
- - area_sq_units is in square meters
22
- - 1 hectare = 10,000 m²
23
-
24
- Args:
25
- area_sq_units: Area in square units of the projection (typically square meters)
26
- crs: CRS string for reference (e.g., 'EPSG:3857'). Used for validation.
27
-
28
- Returns:
29
- Area in hectares
30
- """
31
- # Standard conversion: 1 hectare = 10,000 m²
32
- # Most projected CRS use meters, so this works universally
33
- return area_sq_units / 10000
34
-
35
-
36
17
  def _estimate_area_from_bounds(coords, area_conversion_factor: float) -> float:
37
18
  """
38
19
  Estimate area from bounding box when actual area calculation fails.
@@ -75,6 +56,8 @@ def analyze_geojson(
75
56
  metrics=[
76
57
  "count",
77
58
  "geometry_types",
59
+ "crs",
60
+ "file_size_mb",
78
61
  "min_area_ha",
79
62
  "mean_area_ha",
80
63
  "median_area_ha",
@@ -107,6 +90,8 @@ def analyze_geojson(
107
90
  Which metrics to return. Available metrics:
108
91
  - 'count': number of polygons
109
92
  - 'geometry_types': dict of geometry type counts (e.g., {'Polygon': 95, 'MultiPolygon': 5})
93
+ - 'crs': coordinate reference system (e.g., 'EPSG:4326') - only available when geojson_data is a file path
94
+ - 'file_size_mb': file size in megabytes (only available when geojson_data is a file path)
110
95
  - 'min_area_ha', 'mean_area_ha', 'median_area_ha', 'max_area_ha': area statistics (hectares) (accurate only at equator)
111
96
  - 'area_percentiles': dict with p25, p50 (median), p75, p90 area values (accurate only at equator)
112
97
  - 'min_vertices', 'mean_vertices', 'median_vertices', 'max_vertices': vertex count statistics
@@ -123,6 +108,8 @@ def analyze_geojson(
123
108
  dict with requested metrics:
124
109
  - 'count': number of polygons
125
110
  - 'geometry_types': {'Polygon': int, 'MultiPolygon': int, ...}
111
+ - 'crs': coordinate reference system string (e.g., 'EPSG:4326', only when geojson_data is a file path)
112
+ - 'file_size_mb': file size in megabytes (float, only when geojson_data is a file path)
126
113
  - 'min_area_ha': minimum area among all polygons in hectares
127
114
  - 'mean_area_ha': mean area per polygon in hectares (calculated from coordinates)
128
115
  - 'median_area_ha': median area among all polygons in hectares
@@ -134,8 +121,28 @@ def analyze_geojson(
134
121
  - 'max_vertices': maximum number of vertices among all polygons
135
122
  - 'vertex_percentiles': {'p25': int, 'p50': int, 'p75': int, 'p90': int}
136
123
  """
124
+ # Handle None metrics (use all default metrics)
125
+ if metrics is None:
126
+ metrics = [
127
+ "count",
128
+ "geometry_types",
129
+ "crs",
130
+ "file_size_mb",
131
+ "min_area_ha",
132
+ "mean_area_ha",
133
+ "median_area_ha",
134
+ "max_area_ha",
135
+ "area_percentiles",
136
+ "min_vertices",
137
+ "mean_vertices",
138
+ "median_vertices",
139
+ "max_vertices",
140
+ "vertex_percentiles",
141
+ ]
142
+
137
143
  results = {}
138
144
  crs_warning = None
145
+ detected_crs = None
139
146
  file_path = None
140
147
 
141
148
  try:
@@ -145,6 +152,35 @@ def analyze_geojson(
145
152
  if not file_path.exists():
146
153
  raise FileNotFoundError(f"GeoJSON file not found: {file_path}")
147
154
 
155
+ # Quick CRS detection BEFORE loading full file (if requested)
156
+ if "crs" in metrics:
157
+ try:
158
+ # Use fiona which only reads file metadata (fast, doesn't load features)
159
+ import fiona
160
+
161
+ with fiona.open(file_path) as src:
162
+ if src.crs:
163
+ # Convert fiona CRS dict to EPSG string
164
+ crs_dict = src.crs
165
+ if "init" in crs_dict:
166
+ # Old format: {'init': 'epsg:4326'}
167
+ detected_crs = (
168
+ crs_dict["init"].upper().replace("EPSG:", "EPSG:")
169
+ )
170
+ elif isinstance(crs_dict, dict) and crs_dict:
171
+ # Try to extract EPSG from dict (json already imported at top)
172
+ detected_crs = json.dumps(crs_dict)
173
+ else:
174
+ # No CRS means WGS84 by GeoJSON spec
175
+ detected_crs = "EPSG:4326"
176
+
177
+ # Check if CRS is WGS84
178
+ if detected_crs and detected_crs != "EPSG:4326":
179
+ crs_warning = f"⚠️ CRS is {detected_crs}, not EPSG:4326. Area metrics will be inaccurate. Data will be auto-reprojected during processing."
180
+ except Exception as e:
181
+ # If fiona fails, assume WGS84 (GeoJSON default)
182
+ detected_crs = "EPSG:4326"
183
+
148
184
  # Try UTF-8 first (most common), then fall back to auto-detection
149
185
  try:
150
186
  with open(file_path, "r", encoding="utf-8") as f:
@@ -166,26 +202,29 @@ def analyze_geojson(
166
202
  with open(file_path, "r", encoding="latin-1") as f:
167
203
  geojson_data = json.load(f)
168
204
 
169
- # Detect CRS from file if available
170
- try:
171
- import geopandas as gpd
172
-
173
- gdf = gpd.read_file(file_path)
174
- if gdf.crs and gdf.crs != "EPSG:4326":
175
- crs_warning = f"⚠️ CRS is {gdf.crs}, not EPSG:4326. Area metrics will be inaccurate. Data will be auto-reprojected during processing."
176
- except Exception:
177
- pass # If we can't detect CRS, continue without warning
178
-
179
205
  features = geojson_data.get("features", [])
180
206
 
181
- # Add CRS warning to results if detected
182
- if crs_warning:
183
- results["crs_warning"] = crs_warning
184
- print(crs_warning)
207
+ # Add file size if requested and available
208
+ if "file_size_mb" in metrics and file_path is not None:
209
+ size_bytes = file_path.stat().st_size
210
+ results["file_size_mb"] = round(size_bytes / (1024 * 1024), 2)
211
+
212
+ # Add CRS info if requested and detected
213
+ if "crs" in metrics and detected_crs:
214
+ results["crs"] = detected_crs
215
+ # Add warning if not WGS84
216
+ if crs_warning:
217
+ results["crs_warning"] = crs_warning
218
+ print(crs_warning)
185
219
 
186
220
  if "count" in metrics:
187
221
  results["count"] = len(features)
188
222
 
223
+ # Initialize tracking variables (used in quality logging later)
224
+ bbox_fallback_count = 0
225
+ geometry_skip_count = 0
226
+ polygon_type_stats = {}
227
+
189
228
  # Single sweep through features - compute all area/vertex metrics at once
190
229
  if any(
191
230
  m in metrics
@@ -208,11 +247,6 @@ def analyze_geojson(
208
247
  geometry_type_counts = {}
209
248
  valid_polygons = 0
210
249
 
211
- # Tracking for fallback geometries
212
- bbox_fallback_count = 0 # Geometries that used bounding box estimate
213
- geometry_skip_count = 0 # Geometries completely skipped
214
- polygon_type_stats = {} # Track stats by geometry type
215
-
216
250
  # Detect CRS to determine area conversion factor
217
251
  area_conversion_factor = 1232100 # Default: WGS84 (degrees to ha)
218
252
  detected_crs = None
@@ -489,6 +523,7 @@ def _check_metric_constraints(
489
523
  max_max_area_ha=None,
490
524
  max_mean_vertices=None,
491
525
  max_max_vertices=10_000,
526
+ max_file_size_mb=None,
492
527
  ):
493
528
  """
494
529
  Check if computed metrics violate any constraints.
@@ -499,7 +534,7 @@ def _check_metric_constraints(
499
534
  -----------
500
535
  metrics : dict
501
536
  Dictionary of computed metrics with keys: count, mean_area_ha, max_area_ha,
502
- mean_vertices, max_vertices
537
+ mean_vertices, max_vertices, file_size_mb (optional)
503
538
  max_polygon_count : int
504
539
  Maximum allowed number of polygons
505
540
  max_mean_area_ha : float
@@ -510,6 +545,8 @@ def _check_metric_constraints(
510
545
  Maximum allowed mean vertices per polygon
511
546
  max_max_vertices : int, optional
512
547
  Maximum allowed vertices per polygon
548
+ max_file_size_mb : float, optional
549
+ Maximum allowed file size in megabytes
513
550
 
514
551
  Returns:
515
552
  --------
@@ -523,6 +560,7 @@ def _check_metric_constraints(
523
560
  max_area = metrics["max_area_ha"]
524
561
  mean_vertices = metrics["mean_vertices"]
525
562
  max_vertices_value = metrics["max_vertices"]
563
+ file_size_mb = metrics.get("file_size_mb")
526
564
 
527
565
  if polygon_count > max_polygon_count:
528
566
  violations.append(
@@ -549,41 +587,63 @@ def _check_metric_constraints(
549
587
  f"Max vertices ({max_vertices_value:,}) exceeds limit ({max_max_vertices:,})"
550
588
  )
551
589
 
590
+ if (
591
+ max_file_size_mb is not None
592
+ and file_size_mb is not None
593
+ and file_size_mb > max_file_size_mb
594
+ ):
595
+ violations.append(
596
+ f"File size ({file_size_mb:.2f} MB) exceeds limit ({max_file_size_mb:.2f} MB)"
597
+ )
598
+
552
599
  return violations
553
600
 
554
601
 
555
- def validate_geojson_constraints(
556
- geojson_data: Path | str | dict,
602
+ def check_geojson_limits(
603
+ geojson_data: Path | str | dict = None,
604
+ analysis_results: dict = None,
557
605
  max_polygon_count=250_000,
558
- max_mean_area_ha=10_000,
559
- max_max_area_ha=None,
560
- max_mean_vertices=None,
561
- max_max_vertices=10_000,
606
+ max_mean_area_ha=50_000,
607
+ max_max_area_ha=50_000,
608
+ max_mean_vertices=50_000,
609
+ max_max_vertices=50_000,
610
+ max_file_size_mb=None,
611
+ allowed_crs=["EPSG:4326"],
562
612
  verbose=True,
563
613
  ):
564
614
  """
565
- Validate GeoJSON data against defined constraints.
615
+ Check GeoJSON data against defined limits for processing readiness.
566
616
 
567
617
  Raises ValueError if any metrics exceed the specified limits.
568
618
  Uses analyze_geojson to compute metrics efficiently in a single sweep.
569
619
 
570
620
  Parameters:
571
621
  -----------
572
- geojson_data : Path | str | dict
622
+ geojson_data : Path | str | dict, optional
573
623
  GeoJSON FeatureCollection to validate. Can be:
574
624
  - dict: GeoJSON FeatureCollection dictionary
575
625
  - str: Path to GeoJSON file as string
576
626
  - Path: pathlib.Path to GeoJSON file
627
+ Note: Cannot be used together with analysis_results
628
+ analysis_results : dict, optional
629
+ Pre-computed results from analyze_geojson(). Must contain keys:
630
+ 'count', 'mean_area_ha', 'max_area_ha', 'mean_vertices', 'max_vertices'
631
+ Note: Cannot be used together with geojson_data
577
632
  max_polygon_count : int, optional
578
633
  Maximum allowed number of polygons (default: 250,000)
579
634
  max_mean_area_ha : float, optional
580
- Maximum allowed mean area per polygon in hectares (default: 10,000)
635
+ Maximum allowed mean area per polygon in hectares (default: 50,000)
581
636
  max_max_area_ha : float, optional
582
- Maximum allowed maximum area per polygon in hectares (default: None, no limit)
637
+ Maximum allowed maximum area per polygon in hectares (default: 50,000)
583
638
  max_mean_vertices : float, optional
584
- Maximum allowed mean vertices per polygon (default: None, no limit)
639
+ Maximum allowed mean vertices per polygon (default: 50,000)
585
640
  max_max_vertices : int, optional
586
- Maximum allowed vertices per polygon (default: 10,000)
641
+ Maximum allowed vertices per polygon (default: 50,000)
642
+ max_file_size_mb : float, optional
643
+ Maximum allowed file size in megabytes (default: None, no limit)
644
+ allowed_crs : list, optional
645
+ List of allowed coordinate reference systems (default: ["EPSG:4326"])
646
+ Set to None to skip CRS validation
587
647
  verbose : bool
588
648
  Print validation results (default: True)
589
649
 
@@ -603,22 +663,25 @@ def validate_geojson_constraints(
603
663
  Raises:
604
664
  -------
605
665
  ValueError
606
- If any constraint is violated
666
+ If any constraint is violated, or if both geojson_data and analysis_results are provided,
667
+ or if neither is provided
607
668
  """
608
- from openforis_whisp.data_conversion import convert_geojson_to_ee
609
- from shapely.geometry import Polygon as ShapelyPolygon
669
+ # Validate input parameters
670
+ if geojson_data is not None and analysis_results is not None:
671
+ raise ValueError(
672
+ "Cannot provide both 'geojson_data' and 'analysis_results'. "
673
+ "Please provide only one input source."
674
+ )
610
675
 
611
- # Load GeoJSON from file if path provided
612
- if isinstance(geojson_data, (str, Path)):
613
- file_path = Path(geojson_data)
614
- if not file_path.exists():
615
- raise FileNotFoundError(f"GeoJSON file not found: {file_path}")
616
- with open(file_path, "r") as f:
617
- geojson_data = json.load(f)
676
+ if geojson_data is None and analysis_results is None:
677
+ raise ValueError(
678
+ "Must provide either 'geojson_data' or 'analysis_results'. "
679
+ "Both cannot be None."
680
+ )
618
681
 
619
682
  if verbose:
620
683
  print("\n" + "=" * 80)
621
- print("GEOJSON CONSTRAINT VALIDATION")
684
+ print("GEOJSON LIMITS CHECK")
622
685
  print("=" * 80)
623
686
  print("\nConstraint Limits:")
624
687
  print(f" - Max polygon count: {max_polygon_count:,}")
@@ -629,90 +692,47 @@ def validate_geojson_constraints(
629
692
  print(f" - Max mean vertices: {max_mean_vertices:,}")
630
693
  if max_max_vertices is not None:
631
694
  print(f" - Max vertices per polygon: {max_max_vertices:,}")
632
-
633
- # Collect all metrics we need to compute
634
- metrics_to_compute = [
635
- "count",
636
- "mean_area_ha",
637
- "max_area_ha",
638
- "mean_vertices",
639
- "max_vertices",
640
- ]
641
-
642
- # Import analyze_geojson (will be available after function is defined elsewhere)
643
- # For now, we'll compute it here efficiently in a single sweep
644
- features = geojson_data.get("features", [])
645
-
646
- # Single sweep computation
647
- total_area = 0
648
- total_vertices = 0
649
- max_area = 0
650
- max_vertices_value = 0
651
- valid_polygons = 0
652
-
653
- for feature in features:
654
- try:
655
- coords = feature["geometry"]["coordinates"]
656
- geom_type = feature["geometry"]["type"]
657
-
658
- if geom_type == "Polygon":
659
- # Count vertices
660
- feature_vertices = 0
661
- for ring in coords:
662
- feature_vertices += len(ring)
663
- total_vertices += feature_vertices
664
- max_vertices_value = max(max_vertices_value, feature_vertices)
665
-
666
- # Calculate area
667
- try:
668
- poly = ShapelyPolygon(coords[0])
669
- area_ha = abs(poly.area) * 1232100
670
- total_area += area_ha
671
- max_area = max(max_area, area_ha)
672
- except:
673
- pass
674
- valid_polygons += 1
675
-
676
- elif geom_type == "MultiPolygon":
677
- # Count vertices
678
- feature_vertices = 0
679
- for polygon in coords:
680
- for ring in polygon:
681
- feature_vertices += len(ring)
682
- total_vertices += feature_vertices
683
- max_vertices_value = max(max_vertices_value, feature_vertices)
684
-
685
- # Calculate area
686
- try:
687
- for polygon in coords:
688
- poly = ShapelyPolygon(polygon[0])
689
- area_ha = abs(poly.area) * 1232100
690
- total_area += area_ha
691
- max_area = max(max_area, area_ha)
692
- except:
693
- pass
694
- valid_polygons += 1
695
-
696
- except:
697
- continue
698
-
699
- # Compute means
700
- polygon_count = len(features)
701
- mean_area = total_area / valid_polygons if valid_polygons > 0 else 0
702
- mean_vertices = total_vertices / valid_polygons if valid_polygons > 0 else 0
703
-
695
+ if max_file_size_mb is not None:
696
+ print(f" - Max file size (MB): {max_file_size_mb:.2f}")
697
+
698
+ # Get metrics either from analysis_results or by analyzing geojson_data
699
+ if analysis_results is not None:
700
+ # Use pre-computed analysis results
701
+ metrics = analysis_results
702
+ else:
703
+ # Use analyze_geojson to compute all required metrics in a single sweep
704
+ metrics_to_compute = [
705
+ "count",
706
+ "file_size_mb",
707
+ "mean_area_ha",
708
+ "max_area_ha",
709
+ "mean_vertices",
710
+ "max_vertices",
711
+ ]
712
+ # Add CRS if validation is requested
713
+ if allowed_crs is not None:
714
+ metrics_to_compute.append("crs")
715
+ metrics = analyze_geojson(geojson_data, metrics=metrics_to_compute)
716
+
717
+ # Build results dict with required keys
704
718
  results = {
705
- "count": polygon_count,
706
- "mean_area_ha": round(mean_area, 2),
707
- "max_area_ha": round(max_area, 2),
708
- "mean_vertices": round(mean_vertices, 2),
709
- "max_vertices": max_vertices_value,
719
+ "count": metrics.get("count", 0),
720
+ "file_size_mb": metrics.get("file_size_mb"),
721
+ "mean_area_ha": metrics.get("mean_area_ha", 0),
722
+ "max_area_ha": metrics.get("max_area_ha", 0),
723
+ "mean_vertices": metrics.get("mean_vertices", 0),
724
+ "max_vertices": metrics.get("max_vertices", 0),
725
+ "crs": metrics.get("crs"),
710
726
  "valid": True,
711
727
  }
712
728
 
713
729
  if verbose:
714
730
  print("\nComputed Metrics:")
715
731
  print(f" - Polygon count: {results['count']:,}")
732
+ if results.get("file_size_mb") is not None:
733
+ print(f" - File size (MB): {results['file_size_mb']:,.2f}")
734
+ if results.get("crs") is not None:
735
+ print(f" - CRS: {results['crs']}")
716
736
  print(f" - Mean area (ha): {results['mean_area_ha']:,}")
717
737
  print(f" - Max area (ha): {results['max_area_ha']:,}")
718
738
  print(f" - Mean vertices: {results['mean_vertices']:,}")
@@ -726,34 +746,48 @@ def validate_geojson_constraints(
726
746
  max_max_area_ha=max_max_area_ha,
727
747
  max_mean_vertices=max_mean_vertices,
728
748
  max_max_vertices=max_max_vertices,
749
+ max_file_size_mb=max_file_size_mb,
729
750
  )
730
751
 
752
+ # Check CRS if validation is requested
753
+ if allowed_crs is not None and results.get("crs"):
754
+ if results["crs"] not in allowed_crs:
755
+ violations.append(
756
+ f"CRS '{results['crs']}' is not in allowed list: {allowed_crs}"
757
+ )
758
+
731
759
  # Report results
732
760
  if verbose:
733
761
  print("\n" + "=" * 80)
734
762
  if violations:
735
- print("VALIDATION FAILED")
763
+ print("LIMITS CHECK FAILED")
736
764
  print("=" * 80)
737
765
  for violation in violations:
738
766
  print(f"\n{violation}")
739
767
  results["valid"] = False
740
768
  else:
741
- print("VALIDATION PASSED")
769
+ print("LIMITS CHECK PASSED")
742
770
  print("=" * 80)
743
771
  print("\nAll metrics within acceptable limits")
744
772
 
745
773
  # Raise error with detailed message if any constraint violated
746
774
  if violations:
747
- error_message = "Constraint validation failed:\n" + "\n".join(violations)
775
+ error_message = "GeoJSON limits check failed:\n" + "\n".join(violations)
748
776
  raise ValueError(error_message)
749
777
 
750
778
  return results
751
779
 
752
780
 
781
+ # Backward compatibility aliases
782
+ screen_geojson = check_geojson_limits
783
+ validate_geojson_constraints = check_geojson_limits
784
+
785
+
753
786
  def suggest_processing_mode(
754
787
  feature_count,
755
788
  mean_area_ha=None,
756
789
  mean_vertices=None,
790
+ file_size_mb=None,
757
791
  feature_type="polygon",
758
792
  verbose=True,
759
793
  ):
@@ -762,6 +796,9 @@ def suggest_processing_mode(
762
796
 
763
797
  Decision thresholds from comprehensive benchmark data (Nov 2025):
764
798
 
799
+ FILE SIZE:
800
+ - Files >= 10 MB: recommend sequential mode (avoids payload size limits)
801
+
765
802
  POINTS:
766
803
  - Break-even: 750-1000 features
767
804
  - Sequential faster: < 750 features
@@ -785,6 +822,8 @@ def suggest_processing_mode(
785
822
  Mean area per polygon in hectares (required for polygons, ignored for points)
786
823
  mean_vertices : float, optional
787
824
  Mean number of vertices per polygon (influences decision for complex geometries)
825
+ file_size_mb : float, optional
826
+ File size in megabytes (if >= 10 MB, recommends sequential mode)
788
827
  feature_type : str
789
828
  'polygon', 'multipolygon', or 'point' (default: 'polygon')
790
829
  verbose : bool
@@ -795,6 +834,14 @@ def suggest_processing_mode(
795
834
  str: 'concurrent' or 'sequential'
796
835
  """
797
836
 
837
+ # File size check: large files should use sequential mode
838
+ if file_size_mb is not None and file_size_mb >= 10:
839
+ if verbose:
840
+ print(f"\nMETHOD RECOMMENDATION (File Size Constraint)")
841
+ print(f" File size: {file_size_mb:.2f} MB (>= 10 MB threshold)")
842
+ print(f" Method: SEQUENTIAL (avoids payload size limits)")
843
+ return "sequential"
844
+
798
845
  # Points: simple threshold-based decision
799
846
  if feature_type == "point":
800
847
  breakeven = 750
@@ -61,8 +61,9 @@ def g_esa_worldcover_trees_prep():
61
61
 
62
62
  # EUFO_2020
63
63
  def g_jrc_gfc_2020_prep():
64
- jrc_gfc2020_raw = ee.ImageCollection("JRC/GFC2020/V2")
65
- return jrc_gfc2020_raw.mosaic().rename("EUFO_2020").selfMask()
64
+ # JRC GFC2020 V3 is a single Image with band 'Map'
65
+ jrc_gfc2020 = ee.Image("JRC/GFC2020/V3").select("Map")
66
+ return jrc_gfc2020.rename("EUFO_2020").selfMask()
66
67
 
67
68
 
68
69
  # GFC_TC_2020
@@ -373,14 +374,12 @@ def g_esri_2020_2023_crop_prep():
373
374
 
374
375
  # RADD_year_2019 to RADD_year_< current year >
375
376
  def g_radd_year_prep():
376
- from datetime import datetime
377
-
378
377
  radd = ee.ImageCollection("projects/radar-wur/raddalert/v1")
379
378
  radd_date = (
380
379
  radd.filterMetadata("layer", "contains", "alert").select("Date").mosaic()
381
380
  )
382
381
  start_year = 19
383
- current_year = datetime.now().year % 100
382
+ current_year = CURRENT_YEAR_2DIGIT
384
383
 
385
384
  def make_band(year, img_stack):
386
385
  start = year * 1000
@@ -859,12 +859,14 @@ def format_stats_dataframe(
859
859
  )
860
860
  df.rename(columns={area_col: area_col_stripped}, inplace=True)
861
861
 
862
- # 10) reorder by plotId column if present
863
- df = (
864
- df.sort_values(sort_column).reset_index(drop=True)
865
- if sort_column in df.columns
866
- else df
867
- )
862
+ # 10) reorder by plotId column numerically if present (column is string but contains int values)
863
+ if sort_column in df.columns:
864
+ df["_sort_key"] = pd.to_numeric(df[sort_column], errors="coerce")
865
+ df = (
866
+ df.sort_values(by="_sort_key")
867
+ .drop(columns=["_sort_key"])
868
+ .reset_index(drop=True)
869
+ )
868
870
 
869
871
  # 11) Defragment final DataFrame and return
870
872
  return df.copy()