openforis-whisp 3.0.0a1__py3-none-any.whl → 3.0.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/advanced_stats.py +387 -29
- openforis_whisp/data_checks.py +178 -15
- openforis_whisp/data_conversion.py +154 -59
- openforis_whisp/stats.py +21 -6
- {openforis_whisp-3.0.0a1.dist-info → openforis_whisp-3.0.0a2.dist-info}/METADATA +1 -1
- {openforis_whisp-3.0.0a1.dist-info → openforis_whisp-3.0.0a2.dist-info}/RECORD +8 -8
- {openforis_whisp-3.0.0a1.dist-info → openforis_whisp-3.0.0a2.dist-info}/LICENSE +0 -0
- {openforis_whisp-3.0.0a1.dist-info → openforis_whisp-3.0.0a2.dist-info}/WHEEL +0 -0
|
@@ -32,7 +32,7 @@ import os
|
|
|
32
32
|
import subprocess
|
|
33
33
|
from contextlib import redirect_stdout, contextmanager
|
|
34
34
|
from pathlib import Path
|
|
35
|
-
from typing import Optional, List, Dict, Any, Tuple
|
|
35
|
+
from typing import Optional, List, Dict, Any, Tuple, Union
|
|
36
36
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
37
37
|
import tempfile
|
|
38
38
|
|
|
@@ -203,6 +203,57 @@ def _extract_decimal_places(format_string: str) -> int:
|
|
|
203
203
|
return 2 # Default to 2 decimal places
|
|
204
204
|
|
|
205
205
|
|
|
206
|
+
def _normalize_keep_external_columns(
|
|
207
|
+
keep_external_columns: Union[bool, List[str]],
|
|
208
|
+
all_columns: List[str],
|
|
209
|
+
plot_id_column: str = "plotId",
|
|
210
|
+
) -> List[str]:
|
|
211
|
+
"""
|
|
212
|
+
Normalize keep_external_columns parameter to a list of column names.
|
|
213
|
+
|
|
214
|
+
Converts flexible user input (bool or list) to a concrete list of columns to keep.
|
|
215
|
+
|
|
216
|
+
Parameters
|
|
217
|
+
----------
|
|
218
|
+
keep_external_columns : bool or List[str]
|
|
219
|
+
- False: keep nothing (return empty list)
|
|
220
|
+
- True: keep all columns except geometry and plot_id
|
|
221
|
+
- List[str]: keep specific columns (return as-is)
|
|
222
|
+
all_columns : List[str]
|
|
223
|
+
All available columns to choose from
|
|
224
|
+
plot_id_column : str
|
|
225
|
+
Name of plot ID column to exclude
|
|
226
|
+
|
|
227
|
+
Returns
|
|
228
|
+
-------
|
|
229
|
+
List[str]
|
|
230
|
+
Columns to keep from external (GeoJSON) data
|
|
231
|
+
|
|
232
|
+
Examples
|
|
233
|
+
--------
|
|
234
|
+
>>> cols = _normalize_keep_external_columns(False, ["id", "Country", "geom"], "id")
|
|
235
|
+
>>> cols
|
|
236
|
+
[]
|
|
237
|
+
|
|
238
|
+
>>> cols = _normalize_keep_external_columns(True, ["id", "Country", "geom"], "id")
|
|
239
|
+
>>> cols
|
|
240
|
+
['Country']
|
|
241
|
+
|
|
242
|
+
>>> cols = _normalize_keep_external_columns(["Country"], ["id", "Country", "geom"], "id")
|
|
243
|
+
>>> cols
|
|
244
|
+
['Country']
|
|
245
|
+
"""
|
|
246
|
+
if keep_external_columns is True:
|
|
247
|
+
# Keep all columns except geometry and plot_id
|
|
248
|
+
return [c for c in all_columns if c not in [plot_id_column, "geometry"]]
|
|
249
|
+
elif keep_external_columns is False:
|
|
250
|
+
# Keep nothing
|
|
251
|
+
return []
|
|
252
|
+
else:
|
|
253
|
+
# Use provided list (handle None case)
|
|
254
|
+
return keep_external_columns or []
|
|
255
|
+
|
|
256
|
+
|
|
206
257
|
def _add_admin_context(
|
|
207
258
|
df: pd.DataFrame, admin_code_col: str = "admin_code_median", debug: bool = False
|
|
208
259
|
) -> pd.DataFrame:
|
|
@@ -226,7 +277,7 @@ def _add_admin_context(
|
|
|
226
277
|
pd.DataFrame
|
|
227
278
|
DataFrame with added Country, ProducerCountry, Admin_Level_1 columns
|
|
228
279
|
"""
|
|
229
|
-
logger = logging.getLogger("whisp
|
|
280
|
+
logger = logging.getLogger("whisp")
|
|
230
281
|
|
|
231
282
|
# Return early if admin code column doesn't exist
|
|
232
283
|
if admin_code_col not in df.columns:
|
|
@@ -347,7 +398,7 @@ def join_admin_codes(
|
|
|
347
398
|
pd.DataFrame
|
|
348
399
|
DataFrame with added Country, ProducerCountry, Admin_Level_1 columns
|
|
349
400
|
"""
|
|
350
|
-
logger = logging.getLogger("whisp
|
|
401
|
+
logger = logging.getLogger("whisp")
|
|
351
402
|
|
|
352
403
|
# Return early if admin code column doesn't exist
|
|
353
404
|
if id_col not in df.columns:
|
|
@@ -408,8 +459,9 @@ class ProgressTracker:
|
|
|
408
459
|
"""
|
|
409
460
|
Track batch processing progress with time estimation.
|
|
410
461
|
|
|
411
|
-
Shows progress at
|
|
412
|
-
time remaining based on
|
|
462
|
+
Shows progress at adaptive milestones (more frequent for small datasets,
|
|
463
|
+
less frequent for large datasets) with estimated time remaining based on
|
|
464
|
+
processing speed.
|
|
413
465
|
"""
|
|
414
466
|
|
|
415
467
|
def __init__(self, total: int, logger: logging.Logger = None):
|
|
@@ -426,8 +478,19 @@ class ProgressTracker:
|
|
|
426
478
|
self.total = total
|
|
427
479
|
self.completed = 0
|
|
428
480
|
self.lock = threading.Lock()
|
|
429
|
-
self.logger = logger or logging.getLogger("whisp
|
|
430
|
-
|
|
481
|
+
self.logger = logger or logging.getLogger("whisp")
|
|
482
|
+
|
|
483
|
+
# Adaptive milestones based on dataset size
|
|
484
|
+
# Small datasets (< 50): show every 25% (not too spammy)
|
|
485
|
+
# Medium (50-500): show every 20%
|
|
486
|
+
# Large (500+): show every 10% (more frequent feedback on long runs)
|
|
487
|
+
if total < 50:
|
|
488
|
+
self.milestones = {25, 50, 75, 100}
|
|
489
|
+
elif total < 500:
|
|
490
|
+
self.milestones = {20, 40, 60, 80, 100}
|
|
491
|
+
else:
|
|
492
|
+
self.milestones = {10, 20, 30, 40, 50, 60, 70, 80, 90, 100}
|
|
493
|
+
|
|
431
494
|
self.shown_milestones = set()
|
|
432
495
|
self.start_time = time.time()
|
|
433
496
|
self.last_update_time = self.start_time
|
|
@@ -544,9 +607,11 @@ def validate_ee_endpoint(endpoint_type: str = "high-volume", raise_error: bool =
|
|
|
544
607
|
)
|
|
545
608
|
msg += "ee.Reset()\n"
|
|
546
609
|
if endpoint_type == "high-volume":
|
|
547
|
-
msg +=
|
|
610
|
+
msg += (
|
|
611
|
+
"ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')"
|
|
612
|
+
)
|
|
548
613
|
else:
|
|
549
|
-
msg += "
|
|
614
|
+
msg += "ee.Initialize() # Uses standard endpoint by default"
|
|
550
615
|
|
|
551
616
|
if raise_error:
|
|
552
617
|
raise RuntimeError(msg)
|
|
@@ -713,8 +778,8 @@ def convert_batch_to_ee(batch_gdf: gpd.GeoDataFrame) -> ee.FeatureCollection:
|
|
|
713
778
|
"""
|
|
714
779
|
Convert a batch GeoDataFrame to EE FeatureCollection efficiently.
|
|
715
780
|
|
|
716
|
-
OPTIMIZATION:
|
|
717
|
-
This
|
|
781
|
+
OPTIMIZATION: Passes GeoDataFrame directly to convert_geojson_to_ee to preserve CRS.
|
|
782
|
+
This ensures proper coordinate system handling and reprojection to WGS84 if needed.
|
|
718
783
|
|
|
719
784
|
Preserves the __row_id__ column if present so it can be retrieved after processing.
|
|
720
785
|
|
|
@@ -728,10 +793,13 @@ def convert_batch_to_ee(batch_gdf: gpd.GeoDataFrame) -> ee.FeatureCollection:
|
|
|
728
793
|
ee.FeatureCollection
|
|
729
794
|
EE FeatureCollection with __row_id__ as a feature property
|
|
730
795
|
"""
|
|
731
|
-
#
|
|
732
|
-
#
|
|
733
|
-
|
|
734
|
-
|
|
796
|
+
# Pass GeoDataFrame directly to preserve CRS metadata
|
|
797
|
+
# convert_geojson_to_ee will handle:
|
|
798
|
+
# - CRS detection and conversion to WGS84 if needed
|
|
799
|
+
# - Data type sanitization (datetime, object columns)
|
|
800
|
+
# - Geometry validation and Z-coordinate stripping
|
|
801
|
+
|
|
802
|
+
fc = convert_geojson_to_ee(batch_gdf, enforce_wgs84=True, strip_z_coords=True)
|
|
735
803
|
|
|
736
804
|
# If __row_id__ is in the original GeoDataFrame, it will be preserved
|
|
737
805
|
# as a feature property in the GeoJSON and thus in the EE FeatureCollection
|
|
@@ -763,7 +831,7 @@ def clean_geodataframe(
|
|
|
763
831
|
gpd.GeoDataFrame
|
|
764
832
|
Cleaned GeoDataFrame
|
|
765
833
|
"""
|
|
766
|
-
logger = logger or logging.getLogger("whisp
|
|
834
|
+
logger = logger or logging.getLogger("whisp")
|
|
767
835
|
|
|
768
836
|
if remove_nulls:
|
|
769
837
|
null_count = gdf.geometry.isna().sum()
|
|
@@ -828,7 +896,7 @@ def process_ee_batch(
|
|
|
828
896
|
RuntimeError
|
|
829
897
|
If processing fails after all retries
|
|
830
898
|
"""
|
|
831
|
-
logger = logger or logging.getLogger("whisp
|
|
899
|
+
logger = logger or logging.getLogger("whisp")
|
|
832
900
|
|
|
833
901
|
for attempt in range(max_retries):
|
|
834
902
|
try:
|
|
@@ -955,7 +1023,7 @@ def whisp_stats_geojson_to_df_concurrent(
|
|
|
955
1023
|
"""
|
|
956
1024
|
from openforis_whisp.reformat import format_stats_dataframe
|
|
957
1025
|
|
|
958
|
-
logger = logger or logging.getLogger("whisp
|
|
1026
|
+
logger = logger or logging.getLogger("whisp")
|
|
959
1027
|
|
|
960
1028
|
# Suppress verbose output from dependencies (dynamically adjust based on max_concurrent)
|
|
961
1029
|
_suppress_verbose_output(max_concurrent=max_concurrent)
|
|
@@ -978,6 +1046,16 @@ def whisp_stats_geojson_to_df_concurrent(
|
|
|
978
1046
|
# Add stable plotIds for merging (starting from 1, not 0)
|
|
979
1047
|
gdf[plot_id_column] = range(1, len(gdf) + 1)
|
|
980
1048
|
|
|
1049
|
+
# Strip unnecessary properties before sending to EE
|
|
1050
|
+
# Keep only: geometry, plot_id_column, and external_id_column
|
|
1051
|
+
# This prevents duplication of GeoJSON properties in EE results
|
|
1052
|
+
keep_cols = ["geometry", plot_id_column]
|
|
1053
|
+
if external_id_column and external_id_column in gdf.columns:
|
|
1054
|
+
keep_cols.append(external_id_column)
|
|
1055
|
+
|
|
1056
|
+
gdf_for_ee = gdf[keep_cols].copy()
|
|
1057
|
+
logger.debug(f"Stripped GeoJSON to essential columns: {keep_cols}")
|
|
1058
|
+
|
|
981
1059
|
# Create image if not provided
|
|
982
1060
|
if whisp_image is None:
|
|
983
1061
|
logger.debug("Creating Whisp image...")
|
|
@@ -1001,8 +1079,8 @@ def whisp_stats_geojson_to_df_concurrent(
|
|
|
1001
1079
|
reducer = ee.Reducer.sum().combine(ee.Reducer.median(), sharedInputs=True)
|
|
1002
1080
|
|
|
1003
1081
|
# Batch the data
|
|
1004
|
-
batches = batch_geodataframe(
|
|
1005
|
-
logger.info(f"Processing {len(
|
|
1082
|
+
batches = batch_geodataframe(gdf_for_ee, batch_size)
|
|
1083
|
+
logger.info(f"Processing {len(gdf_for_ee):,} features in {len(batches)} batches")
|
|
1006
1084
|
|
|
1007
1085
|
# Setup semaphore for EE concurrency control
|
|
1008
1086
|
ee_semaphore = threading.BoundedSemaphore(max_concurrent)
|
|
@@ -1064,8 +1142,35 @@ def whisp_stats_geojson_to_df_concurrent(
|
|
|
1064
1142
|
if plot_id_column not in df_server.columns:
|
|
1065
1143
|
df_server[plot_id_column] = range(len(df_server))
|
|
1066
1144
|
|
|
1067
|
-
|
|
1068
|
-
|
|
1145
|
+
# Keep all EE statistics from server (all columns with _sum and _median suffixes)
|
|
1146
|
+
# These are the actual EE processing results
|
|
1147
|
+
df_server_clean = df_server.copy()
|
|
1148
|
+
|
|
1149
|
+
# Keep external metadata: plot_id, external_id, geometry, geometry type, and centroids from client
|
|
1150
|
+
# (formatted wrapper handles keep_external_columns parameter)
|
|
1151
|
+
keep_external_columns = [plot_id_column]
|
|
1152
|
+
if (
|
|
1153
|
+
external_id_column
|
|
1154
|
+
and external_id_column in df_client.columns
|
|
1155
|
+
):
|
|
1156
|
+
keep_external_columns.append(external_id_column)
|
|
1157
|
+
if "geometry" in df_client.columns:
|
|
1158
|
+
keep_external_columns.append("geometry")
|
|
1159
|
+
# Keep geometry type column (Geometry_type)
|
|
1160
|
+
if geometry_type_column in df_client.columns:
|
|
1161
|
+
keep_external_columns.append(geometry_type_column)
|
|
1162
|
+
# Also keep centroid columns (Centroid_lon, Centroid_lat)
|
|
1163
|
+
centroid_cols = [
|
|
1164
|
+
c for c in df_client.columns if c.startswith("Centroid_")
|
|
1165
|
+
]
|
|
1166
|
+
keep_external_columns.extend(centroid_cols)
|
|
1167
|
+
|
|
1168
|
+
df_client_clean = df_client[
|
|
1169
|
+
[c for c in keep_external_columns if c in df_client.columns]
|
|
1170
|
+
].drop_duplicates()
|
|
1171
|
+
|
|
1172
|
+
merged = df_server_clean.merge(
|
|
1173
|
+
df_client_clean,
|
|
1069
1174
|
on=plot_id_column,
|
|
1070
1175
|
how="left",
|
|
1071
1176
|
suffixes=("_ee", "_client"),
|
|
@@ -1442,7 +1547,7 @@ def whisp_stats_geojson_to_df_sequential(
|
|
|
1442
1547
|
"""
|
|
1443
1548
|
from openforis_whisp.reformat import format_stats_dataframe
|
|
1444
1549
|
|
|
1445
|
-
logger = logger or logging.getLogger("whisp
|
|
1550
|
+
logger = logger or logging.getLogger("whisp")
|
|
1446
1551
|
|
|
1447
1552
|
# Suppress verbose output from dependencies (sequential has lower concurrency, use default)
|
|
1448
1553
|
_suppress_verbose_output(max_concurrent=1)
|
|
@@ -1469,6 +1574,16 @@ def whisp_stats_geojson_to_df_sequential(
|
|
|
1469
1574
|
row_id_col = "__row_id__"
|
|
1470
1575
|
gdf[row_id_col] = range(len(gdf))
|
|
1471
1576
|
|
|
1577
|
+
# Strip unnecessary properties before sending to EE
|
|
1578
|
+
# Keep only: geometry, plot_id_column, and external_id_column
|
|
1579
|
+
# This prevents duplication of GeoJSON properties in EE results
|
|
1580
|
+
keep_cols = ["geometry", plot_id_column, row_id_col]
|
|
1581
|
+
if external_id_column and external_id_column in gdf.columns:
|
|
1582
|
+
keep_cols.append(external_id_column)
|
|
1583
|
+
|
|
1584
|
+
gdf_for_ee = gdf[keep_cols].copy()
|
|
1585
|
+
logger.debug(f"Stripped GeoJSON to essential columns: {keep_cols}")
|
|
1586
|
+
|
|
1472
1587
|
# Create image if not provided
|
|
1473
1588
|
if whisp_image is None:
|
|
1474
1589
|
logger.debug("Creating Whisp image...")
|
|
@@ -1491,7 +1606,7 @@ def whisp_stats_geojson_to_df_sequential(
|
|
|
1491
1606
|
# Convert to EE (suppress print statements from convert_geojson_to_ee)
|
|
1492
1607
|
logger.debug("Converting to EE FeatureCollection...")
|
|
1493
1608
|
with redirect_stdout(io.StringIO()):
|
|
1494
|
-
fc = convert_geojson_to_ee(
|
|
1609
|
+
fc = convert_geojson_to_ee(gdf_for_ee, enforce_wgs84=True, strip_z_coords=True)
|
|
1495
1610
|
|
|
1496
1611
|
# Create reducer
|
|
1497
1612
|
reducer = ee.Reducer.sum().combine(ee.Reducer.median(), sharedInputs=True)
|
|
@@ -1633,6 +1748,7 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
|
|
|
1633
1748
|
convert_water_flag: bool = True,
|
|
1634
1749
|
water_flag_threshold: float = 0.5,
|
|
1635
1750
|
sort_column: str = "plotId",
|
|
1751
|
+
include_geometry_audit_trail: bool = False,
|
|
1636
1752
|
) -> pd.DataFrame:
|
|
1637
1753
|
"""
|
|
1638
1754
|
Process GeoJSON concurrently with automatic formatting and validation.
|
|
@@ -1683,15 +1799,26 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
|
|
|
1683
1799
|
Water flag ratio threshold (default 0.5)
|
|
1684
1800
|
sort_column : str
|
|
1685
1801
|
Column to sort by (default "plotId", None to skip)
|
|
1802
|
+
include_geometry_audit_trail : bool, default False
|
|
1803
|
+
If True, includes audit trail columns:
|
|
1804
|
+
- geo_original: Original input geometry (before EE processing)
|
|
1805
|
+
- geometry_type_original: Original geometry type
|
|
1806
|
+
- geometry_type: Processed geometry type (from EE)
|
|
1807
|
+
- geometry_type_changed: Boolean flag if geometry changed
|
|
1808
|
+
- geometry_type_transition: Description of how it changed
|
|
1809
|
+
These columns enable full transparency and auditability for compliance tracking.
|
|
1686
1810
|
|
|
1687
1811
|
Returns
|
|
1688
1812
|
-------
|
|
1689
1813
|
pd.DataFrame
|
|
1690
|
-
Validated, formatted results DataFrame
|
|
1814
|
+
Validated, formatted results DataFrame with optional audit trail
|
|
1691
1815
|
"""
|
|
1692
1816
|
from openforis_whisp.reformat import format_stats_dataframe
|
|
1817
|
+
from datetime import datetime, timezone
|
|
1818
|
+
import json
|
|
1819
|
+
from shapely.geometry import mapping
|
|
1693
1820
|
|
|
1694
|
-
logger = logger or logging.getLogger("whisp
|
|
1821
|
+
logger = logger or logging.getLogger("whisp")
|
|
1695
1822
|
|
|
1696
1823
|
# Auto-detect decimal places from config if not provided
|
|
1697
1824
|
if decimal_places is None:
|
|
@@ -1699,6 +1826,9 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
|
|
|
1699
1826
|
decimal_places = _extract_decimal_places(stats_area_columns_formatting)
|
|
1700
1827
|
logger.debug(f"Using decimal_places={decimal_places} from config")
|
|
1701
1828
|
|
|
1829
|
+
# Normalize keep_external_columns parameter early (will be used in merge logic later)
|
|
1830
|
+
# Load GeoJSON temporarily to get column names for normalization
|
|
1831
|
+
|
|
1702
1832
|
# Step 1: Get raw stats
|
|
1703
1833
|
logger.debug("Step 1/2: Extracting statistics (concurrent)...")
|
|
1704
1834
|
df_raw = whisp_stats_geojson_to_df_concurrent(
|
|
@@ -1759,6 +1889,113 @@ def whisp_formatted_stats_geojson_to_df_concurrent(
|
|
|
1759
1889
|
custom_bands=custom_bands,
|
|
1760
1890
|
)
|
|
1761
1891
|
|
|
1892
|
+
# Step 2c: Add audit trail columns (AFTER validation to preserve columns)
|
|
1893
|
+
if include_geometry_audit_trail:
|
|
1894
|
+
logger.debug("Adding audit trail columns...")
|
|
1895
|
+
try:
|
|
1896
|
+
# Capture original geometries AFTER we have the raw stats
|
|
1897
|
+
logger.debug("Capturing original geometries for audit trail...")
|
|
1898
|
+
gdf_original = _load_geojson_silently(input_geojson_filepath)
|
|
1899
|
+
|
|
1900
|
+
# Use plotId from df_validated to maintain mapping
|
|
1901
|
+
df_original_geom = pd.DataFrame(
|
|
1902
|
+
{
|
|
1903
|
+
"plotId": df_validated["plotId"].values[: len(gdf_original)],
|
|
1904
|
+
"geo_original": gdf_original["geometry"].apply(
|
|
1905
|
+
lambda g: json.dumps(mapping(g)) if g is not None else None
|
|
1906
|
+
),
|
|
1907
|
+
"geometry_type_original": gdf_original["geometry"].geom_type.values,
|
|
1908
|
+
}
|
|
1909
|
+
)
|
|
1910
|
+
|
|
1911
|
+
# Merge original geometries back
|
|
1912
|
+
df_validated = df_validated.merge(df_original_geom, on="plotId", how="left")
|
|
1913
|
+
|
|
1914
|
+
# Extract geometry type from processed 'geo' column if it exists
|
|
1915
|
+
# Note: 'geo' column may not exist after validation removes extra columns
|
|
1916
|
+
if "geo" in df_validated.columns:
|
|
1917
|
+
# Use geo column from validated dataframe
|
|
1918
|
+
def extract_geom_type(x):
|
|
1919
|
+
try:
|
|
1920
|
+
if isinstance(x, dict):
|
|
1921
|
+
return x.get("type")
|
|
1922
|
+
elif isinstance(x, str):
|
|
1923
|
+
# Handle both JSON strings and Python dict string representations
|
|
1924
|
+
try:
|
|
1925
|
+
parsed = json.loads(x)
|
|
1926
|
+
except:
|
|
1927
|
+
# Try ast.literal_eval for Python dict representations
|
|
1928
|
+
import ast
|
|
1929
|
+
|
|
1930
|
+
parsed = ast.literal_eval(x)
|
|
1931
|
+
return (
|
|
1932
|
+
parsed.get("type") if isinstance(parsed, dict) else None
|
|
1933
|
+
)
|
|
1934
|
+
except:
|
|
1935
|
+
pass
|
|
1936
|
+
return None
|
|
1937
|
+
|
|
1938
|
+
df_validated["geometry_type"] = df_validated["geo"].apply(
|
|
1939
|
+
extract_geom_type
|
|
1940
|
+
)
|
|
1941
|
+
else:
|
|
1942
|
+
# If geo doesn't exist, just use the original type
|
|
1943
|
+
df_validated["geometry_type"] = df_validated["geometry_type_original"]
|
|
1944
|
+
|
|
1945
|
+
# Flag if geometry changed
|
|
1946
|
+
df_validated["geometry_type_changed"] = (
|
|
1947
|
+
df_validated["geometry_type_original"] != df_validated["geometry_type"]
|
|
1948
|
+
)
|
|
1949
|
+
|
|
1950
|
+
# Classify the geometry type transition
|
|
1951
|
+
def classify_transition(orig, proc):
|
|
1952
|
+
if orig == proc:
|
|
1953
|
+
return "no_change"
|
|
1954
|
+
elif proc == "LineString":
|
|
1955
|
+
return f"{orig}_simplified_to_linestring"
|
|
1956
|
+
elif proc == "Point":
|
|
1957
|
+
return f"{orig}_simplified_to_point"
|
|
1958
|
+
else:
|
|
1959
|
+
return f"{orig}_to_{proc}"
|
|
1960
|
+
|
|
1961
|
+
df_validated["geometry_type_transition"] = df_validated.apply(
|
|
1962
|
+
lambda row: classify_transition(
|
|
1963
|
+
row["geometry_type_original"], row["geometry_type"]
|
|
1964
|
+
),
|
|
1965
|
+
axis=1,
|
|
1966
|
+
)
|
|
1967
|
+
|
|
1968
|
+
# Store processing metadata
|
|
1969
|
+
df_validated.attrs["processing_metadata"] = {
|
|
1970
|
+
"whisp_version": "2.0",
|
|
1971
|
+
"processing_date": datetime.now().isoformat(),
|
|
1972
|
+
"processing_mode": "concurrent",
|
|
1973
|
+
"ee_endpoint": "high_volume",
|
|
1974
|
+
"validate_geometries": validate_geometries,
|
|
1975
|
+
"datasets_used": national_codes or [],
|
|
1976
|
+
"include_geometry_audit_trail": True,
|
|
1977
|
+
}
|
|
1978
|
+
|
|
1979
|
+
logger.info(
|
|
1980
|
+
f"Audit trail added: {df_validated['geometry_type_changed'].sum()} geometries with type changes"
|
|
1981
|
+
)
|
|
1982
|
+
|
|
1983
|
+
except Exception as e:
|
|
1984
|
+
logger.warning(f"Error adding audit trail: {e}")
|
|
1985
|
+
# Continue without audit trail if something fails
|
|
1986
|
+
|
|
1987
|
+
# Add processing metadata column using pd.concat to avoid fragmentation warning
|
|
1988
|
+
metadata_dict = {
|
|
1989
|
+
"whisp_version": "3.0.0a1",
|
|
1990
|
+
"processing_timestamp_utc": datetime.now(timezone.utc).strftime(
|
|
1991
|
+
"%Y-%m-%d %H:%M:%S UTC"
|
|
1992
|
+
),
|
|
1993
|
+
}
|
|
1994
|
+
metadata_series = pd.Series(
|
|
1995
|
+
[metadata_dict] * len(df_validated), name="whisp_processing_metadata"
|
|
1996
|
+
)
|
|
1997
|
+
df_validated = pd.concat([df_validated, metadata_series], axis=1)
|
|
1998
|
+
|
|
1762
1999
|
logger.info("Concurrent processing + formatting + validation complete")
|
|
1763
2000
|
return df_validated
|
|
1764
2001
|
|
|
@@ -1779,6 +2016,7 @@ def whisp_formatted_stats_geojson_to_df_sequential(
|
|
|
1779
2016
|
convert_water_flag: bool = True,
|
|
1780
2017
|
water_flag_threshold: float = 0.5,
|
|
1781
2018
|
sort_column: str = "plotId",
|
|
2019
|
+
include_geometry_audit_trail: bool = False,
|
|
1782
2020
|
) -> pd.DataFrame:
|
|
1783
2021
|
"""
|
|
1784
2022
|
Process GeoJSON sequentially with automatic formatting and validation.
|
|
@@ -1821,15 +2059,26 @@ def whisp_formatted_stats_geojson_to_df_sequential(
|
|
|
1821
2059
|
Water flag ratio threshold (default 0.5)
|
|
1822
2060
|
sort_column : str
|
|
1823
2061
|
Column to sort by (default "plotId", None to skip)
|
|
2062
|
+
include_geometry_audit_trail : bool, default True
|
|
2063
|
+
If True, includes audit trail columns:
|
|
2064
|
+
- geo_original: Original input geometry (before EE processing)
|
|
2065
|
+
- geometry_type_original: Original geometry type
|
|
2066
|
+
- geometry_type: Processed geometry type (from EE)
|
|
2067
|
+
- geometry_type_changed: Boolean flag if geometry changed
|
|
2068
|
+
- geometry_type_transition: Description of how it changed
|
|
2069
|
+
These columns enable full transparency and auditability for EUDR compliance.
|
|
1824
2070
|
|
|
1825
2071
|
Returns
|
|
1826
2072
|
-------
|
|
1827
2073
|
pd.DataFrame
|
|
1828
|
-
Validated, formatted results DataFrame
|
|
2074
|
+
Validated, formatted results DataFrame with optional audit trail
|
|
1829
2075
|
"""
|
|
1830
2076
|
from openforis_whisp.reformat import format_stats_dataframe
|
|
2077
|
+
from datetime import datetime, timezone
|
|
2078
|
+
import json
|
|
2079
|
+
from shapely.geometry import mapping
|
|
1831
2080
|
|
|
1832
|
-
logger = logger or logging.getLogger("whisp
|
|
2081
|
+
logger = logger or logging.getLogger("whisp")
|
|
1833
2082
|
|
|
1834
2083
|
# Auto-detect decimal places from config if not provided
|
|
1835
2084
|
if decimal_places is None:
|
|
@@ -1893,6 +2142,112 @@ def whisp_formatted_stats_geojson_to_df_sequential(
|
|
|
1893
2142
|
custom_bands=custom_bands,
|
|
1894
2143
|
)
|
|
1895
2144
|
|
|
2145
|
+
# Step 2c: Add audit trail columns (AFTER validation to preserve columns)
|
|
2146
|
+
if include_geometry_audit_trail:
|
|
2147
|
+
logger.debug("Adding audit trail columns...")
|
|
2148
|
+
try:
|
|
2149
|
+
# Capture original geometries AFTER we have the raw stats
|
|
2150
|
+
logger.debug("Capturing original geometries for audit trail...")
|
|
2151
|
+
gdf_original = _load_geojson_silently(input_geojson_filepath)
|
|
2152
|
+
|
|
2153
|
+
# Use plotId from df_validated to maintain mapping
|
|
2154
|
+
df_original_geom = pd.DataFrame(
|
|
2155
|
+
{
|
|
2156
|
+
"plotId": df_validated["plotId"].values[: len(gdf_original)],
|
|
2157
|
+
"geo_original": gdf_original["geometry"].apply(
|
|
2158
|
+
lambda g: json.dumps(mapping(g)) if g is not None else None
|
|
2159
|
+
),
|
|
2160
|
+
"geometry_type_original": gdf_original["geometry"].geom_type.values,
|
|
2161
|
+
}
|
|
2162
|
+
)
|
|
2163
|
+
|
|
2164
|
+
# Merge original geometries back
|
|
2165
|
+
df_validated = df_validated.merge(df_original_geom, on="plotId", how="left")
|
|
2166
|
+
|
|
2167
|
+
# Extract geometry type from processed 'geo' column if it exists
|
|
2168
|
+
# Note: 'geo' column may not exist after validation removes extra columns
|
|
2169
|
+
if "geo" in df_validated.columns:
|
|
2170
|
+
# Use geo column from validated dataframe
|
|
2171
|
+
def extract_geom_type(x):
|
|
2172
|
+
try:
|
|
2173
|
+
if isinstance(x, dict):
|
|
2174
|
+
return x.get("type")
|
|
2175
|
+
elif isinstance(x, str):
|
|
2176
|
+
# Handle both JSON strings and Python dict string representations
|
|
2177
|
+
try:
|
|
2178
|
+
parsed = json.loads(x)
|
|
2179
|
+
except:
|
|
2180
|
+
# Try ast.literal_eval for Python dict representations
|
|
2181
|
+
import ast
|
|
2182
|
+
|
|
2183
|
+
parsed = ast.literal_eval(x)
|
|
2184
|
+
return (
|
|
2185
|
+
parsed.get("type") if isinstance(parsed, dict) else None
|
|
2186
|
+
)
|
|
2187
|
+
except:
|
|
2188
|
+
pass
|
|
2189
|
+
return None
|
|
2190
|
+
|
|
2191
|
+
df_validated["geometry_type"] = df_validated["geo"].apply(
|
|
2192
|
+
extract_geom_type
|
|
2193
|
+
)
|
|
2194
|
+
else:
|
|
2195
|
+
# If geo doesn't exist, just use the original type
|
|
2196
|
+
df_validated["geometry_type"] = df_validated["geometry_type_original"]
|
|
2197
|
+
|
|
2198
|
+
# Flag if geometry changed
|
|
2199
|
+
df_validated["geometry_type_changed"] = (
|
|
2200
|
+
df_validated["geometry_type_original"] != df_validated["geometry_type"]
|
|
2201
|
+
)
|
|
2202
|
+
|
|
2203
|
+
# Classify the geometry type transition
|
|
2204
|
+
def classify_transition(orig, proc):
|
|
2205
|
+
if orig == proc:
|
|
2206
|
+
return "no_change"
|
|
2207
|
+
elif proc == "LineString":
|
|
2208
|
+
return f"{orig}_simplified_to_linestring"
|
|
2209
|
+
elif proc == "Point":
|
|
2210
|
+
return f"{orig}_simplified_to_point"
|
|
2211
|
+
else:
|
|
2212
|
+
return f"{orig}_to_{proc}"
|
|
2213
|
+
|
|
2214
|
+
df_validated["geometry_type_transition"] = df_validated.apply(
|
|
2215
|
+
lambda row: classify_transition(
|
|
2216
|
+
row["geometry_type_original"], row["geometry_type"]
|
|
2217
|
+
),
|
|
2218
|
+
axis=1,
|
|
2219
|
+
)
|
|
2220
|
+
|
|
2221
|
+
# Store processing metadata
|
|
2222
|
+
df_validated.attrs["processing_metadata"] = {
|
|
2223
|
+
"whisp_version": "2.0",
|
|
2224
|
+
"processing_date": datetime.now().isoformat(),
|
|
2225
|
+
"processing_mode": "sequential",
|
|
2226
|
+
"ee_endpoint": "standard",
|
|
2227
|
+
"datasets_used": national_codes or [],
|
|
2228
|
+
"include_geometry_audit_trail": True,
|
|
2229
|
+
}
|
|
2230
|
+
|
|
2231
|
+
logger.info(
|
|
2232
|
+
f"Audit trail added: {df_validated['geometry_type_changed'].sum()} geometries with type changes"
|
|
2233
|
+
)
|
|
2234
|
+
|
|
2235
|
+
except Exception as e:
|
|
2236
|
+
logger.warning(f"Error adding audit trail: {e}")
|
|
2237
|
+
# Continue without audit trail if something fails
|
|
2238
|
+
|
|
2239
|
+
# Add processing metadata column using pd.concat to avoid fragmentation warning
|
|
2240
|
+
metadata_dict = {
|
|
2241
|
+
"whisp_version": "3.0.0a1",
|
|
2242
|
+
"processing_timestamp_utc": datetime.now(timezone.utc).strftime(
|
|
2243
|
+
"%Y-%m-%d %H:%M:%S UTC"
|
|
2244
|
+
),
|
|
2245
|
+
}
|
|
2246
|
+
metadata_series = pd.Series(
|
|
2247
|
+
[metadata_dict] * len(df_validated), name="whisp_processing_metadata"
|
|
2248
|
+
)
|
|
2249
|
+
df_validated = pd.concat([df_validated, metadata_series], axis=1)
|
|
2250
|
+
|
|
1896
2251
|
logger.info("Sequential processing + formatting + validation complete")
|
|
1897
2252
|
return df_validated
|
|
1898
2253
|
|
|
@@ -1923,6 +2278,7 @@ def whisp_formatted_stats_geojson_to_df_fast(
|
|
|
1923
2278
|
convert_water_flag: bool = True,
|
|
1924
2279
|
water_flag_threshold: float = 0.5,
|
|
1925
2280
|
sort_column: str = "plotId",
|
|
2281
|
+
include_geometry_audit_trail: bool = False,
|
|
1926
2282
|
) -> pd.DataFrame:
|
|
1927
2283
|
"""
|
|
1928
2284
|
Process GeoJSON to Whisp statistics with optimized fast processing.
|
|
@@ -1999,7 +2355,7 @@ def whisp_formatted_stats_geojson_to_df_fast(
|
|
|
1999
2355
|
... mode="sequential"
|
|
2000
2356
|
... )
|
|
2001
2357
|
"""
|
|
2002
|
-
logger = logging.getLogger("whisp
|
|
2358
|
+
logger = logging.getLogger("whisp")
|
|
2003
2359
|
|
|
2004
2360
|
# Determine processing mode
|
|
2005
2361
|
if mode == "auto":
|
|
@@ -2050,6 +2406,7 @@ def whisp_formatted_stats_geojson_to_df_fast(
|
|
|
2050
2406
|
convert_water_flag=convert_water_flag,
|
|
2051
2407
|
water_flag_threshold=water_flag_threshold,
|
|
2052
2408
|
sort_column=sort_column,
|
|
2409
|
+
include_geometry_audit_trail=include_geometry_audit_trail,
|
|
2053
2410
|
)
|
|
2054
2411
|
else: # sequential
|
|
2055
2412
|
logger.debug("Routing to sequential processing...")
|
|
@@ -2067,4 +2424,5 @@ def whisp_formatted_stats_geojson_to_df_fast(
|
|
|
2067
2424
|
convert_water_flag=convert_water_flag,
|
|
2068
2425
|
water_flag_threshold=water_flag_threshold,
|
|
2069
2426
|
sort_column=sort_column,
|
|
2427
|
+
include_geometry_audit_trail=include_geometry_audit_trail,
|
|
2070
2428
|
)
|
openforis_whisp/data_checks.py
CHANGED
|
@@ -7,10 +7,69 @@ and thresholds, raising informative errors when constraints are violated.
|
|
|
7
7
|
|
|
8
8
|
import json
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from shapely.geometry import Polygon as ShapelyPolygon
|
|
10
|
+
from shapely.geometry import Polygon as ShapelyPolygon, shape as shapely_shape
|
|
11
11
|
|
|
12
12
|
# Note: area summary stats are estimations for use in deciding pathways for analysis
|
|
13
13
|
# (estimation preferred here as allows efficient processing speed and limits overhead of checking file)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _convert_projected_area_to_ha(area_sq_units: float, crs: str = None) -> float:
|
|
17
|
+
"""
|
|
18
|
+
Convert area from projected CRS units to hectares.
|
|
19
|
+
|
|
20
|
+
Most projected CRS use meters as units, so:
|
|
21
|
+
- area_sq_units is in square meters
|
|
22
|
+
- 1 hectare = 10,000 m²
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
area_sq_units: Area in square units of the projection (typically square meters)
|
|
26
|
+
crs: CRS string for reference (e.g., 'EPSG:3857'). Used for validation.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Area in hectares
|
|
30
|
+
"""
|
|
31
|
+
# Standard conversion: 1 hectare = 10,000 m²
|
|
32
|
+
# Most projected CRS use meters, so this works universally
|
|
33
|
+
return area_sq_units / 10000
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _estimate_area_from_bounds(coords, area_conversion_factor: float) -> float:
|
|
37
|
+
"""
|
|
38
|
+
Estimate area from bounding box when actual area calculation fails.
|
|
39
|
+
Extracts bounding box and calculates its area as a fallback estimate.
|
|
40
|
+
Returns area in hectares.
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
# Flatten all coordinates to find bounds
|
|
44
|
+
all_coords = []
|
|
45
|
+
|
|
46
|
+
def flatten_coords(c):
|
|
47
|
+
if isinstance(c[0], (list, tuple)) and isinstance(c[0][0], (list, tuple)):
|
|
48
|
+
for sub in c:
|
|
49
|
+
flatten_coords(sub)
|
|
50
|
+
else:
|
|
51
|
+
all_coords.extend(c)
|
|
52
|
+
|
|
53
|
+
flatten_coords(coords)
|
|
54
|
+
if not all_coords:
|
|
55
|
+
return 0
|
|
56
|
+
|
|
57
|
+
# Extract lon/lat values
|
|
58
|
+
lons = [c[0] for c in all_coords]
|
|
59
|
+
lats = [c[1] for c in all_coords]
|
|
60
|
+
|
|
61
|
+
min_lon, max_lon = min(lons), max(lons)
|
|
62
|
+
min_lat, max_lat = min(lats), max(lats)
|
|
63
|
+
|
|
64
|
+
# Bounding box area
|
|
65
|
+
bbox_area = (max_lon - min_lon) * (max_lat - min_lat)
|
|
66
|
+
|
|
67
|
+
# Apply conversion factor
|
|
68
|
+
return abs(bbox_area) * area_conversion_factor
|
|
69
|
+
except:
|
|
70
|
+
return 0
|
|
71
|
+
|
|
72
|
+
|
|
14
73
|
def analyze_geojson(
|
|
15
74
|
geojson_data: Path | str | dict,
|
|
16
75
|
metrics=[
|
|
@@ -76,6 +135,8 @@ def analyze_geojson(
|
|
|
76
135
|
- 'vertex_percentiles': {'p25': int, 'p50': int, 'p75': int, 'p90': int}
|
|
77
136
|
"""
|
|
78
137
|
results = {}
|
|
138
|
+
crs_warning = None
|
|
139
|
+
file_path = None
|
|
79
140
|
|
|
80
141
|
try:
|
|
81
142
|
# Load GeoJSON from file if path provided
|
|
@@ -83,11 +144,45 @@ def analyze_geojson(
|
|
|
83
144
|
file_path = Path(geojson_data)
|
|
84
145
|
if not file_path.exists():
|
|
85
146
|
raise FileNotFoundError(f"GeoJSON file not found: {file_path}")
|
|
86
|
-
|
|
87
|
-
|
|
147
|
+
|
|
148
|
+
# Try UTF-8 first (most common), then fall back to auto-detection
|
|
149
|
+
try:
|
|
150
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
151
|
+
geojson_data = json.load(f)
|
|
152
|
+
except UnicodeDecodeError:
|
|
153
|
+
# Auto-detect encoding if UTF-8 fails
|
|
154
|
+
try:
|
|
155
|
+
import chardet
|
|
156
|
+
|
|
157
|
+
with open(file_path, "rb") as f:
|
|
158
|
+
raw_data = f.read()
|
|
159
|
+
detected = chardet.detect(raw_data)
|
|
160
|
+
encoding = detected.get("encoding", "latin-1")
|
|
161
|
+
|
|
162
|
+
with open(file_path, "r", encoding=encoding, errors="replace") as f:
|
|
163
|
+
geojson_data = json.load(f)
|
|
164
|
+
except Exception:
|
|
165
|
+
# Final fallback: use latin-1 which accepts all byte values
|
|
166
|
+
with open(file_path, "r", encoding="latin-1") as f:
|
|
167
|
+
geojson_data = json.load(f)
|
|
168
|
+
|
|
169
|
+
# Detect CRS from file if available
|
|
170
|
+
try:
|
|
171
|
+
import geopandas as gpd
|
|
172
|
+
|
|
173
|
+
gdf = gpd.read_file(file_path)
|
|
174
|
+
if gdf.crs and gdf.crs != "EPSG:4326":
|
|
175
|
+
crs_warning = f"⚠️ CRS is {gdf.crs}, not EPSG:4326. Area metrics will be inaccurate. Data will be auto-reprojected during processing."
|
|
176
|
+
except Exception:
|
|
177
|
+
pass # If we can't detect CRS, continue without warning
|
|
88
178
|
|
|
89
179
|
features = geojson_data.get("features", [])
|
|
90
180
|
|
|
181
|
+
# Add CRS warning to results if detected
|
|
182
|
+
if crs_warning:
|
|
183
|
+
results["crs_warning"] = crs_warning
|
|
184
|
+
print(crs_warning)
|
|
185
|
+
|
|
91
186
|
if "count" in metrics:
|
|
92
187
|
results["count"] = len(features)
|
|
93
188
|
|
|
@@ -113,6 +208,29 @@ def analyze_geojson(
|
|
|
113
208
|
geometry_type_counts = {}
|
|
114
209
|
valid_polygons = 0
|
|
115
210
|
|
|
211
|
+
# Tracking for fallback geometries
|
|
212
|
+
bbox_fallback_count = 0 # Geometries that used bounding box estimate
|
|
213
|
+
geometry_skip_count = 0 # Geometries completely skipped
|
|
214
|
+
polygon_type_stats = {} # Track stats by geometry type
|
|
215
|
+
|
|
216
|
+
# Detect CRS to determine area conversion factor
|
|
217
|
+
area_conversion_factor = 1232100 # Default: WGS84 (degrees to ha)
|
|
218
|
+
detected_crs = None
|
|
219
|
+
|
|
220
|
+
# Try to detect CRS from file if available
|
|
221
|
+
if file_path:
|
|
222
|
+
try:
|
|
223
|
+
import geopandas as gpd
|
|
224
|
+
|
|
225
|
+
gdf_temp = gpd.read_file(str(file_path))
|
|
226
|
+
detected_crs = gdf_temp.crs
|
|
227
|
+
if detected_crs and detected_crs != "EPSG:4326":
|
|
228
|
+
# Projected CRS typically uses meters, so convert m² to ha
|
|
229
|
+
# 1 ha = 10,000 m²
|
|
230
|
+
area_conversion_factor = 1 / 10000
|
|
231
|
+
except Exception:
|
|
232
|
+
pass # Use default if CRS detection fails
|
|
233
|
+
|
|
116
234
|
for feature in features:
|
|
117
235
|
try:
|
|
118
236
|
coords = feature["geometry"]["coordinates"]
|
|
@@ -133,13 +251,27 @@ def analyze_geojson(
|
|
|
133
251
|
|
|
134
252
|
# Calculate area from coordinates using shapely
|
|
135
253
|
try:
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
#
|
|
139
|
-
area_ha = abs(
|
|
254
|
+
# Use shapely.geometry.shape to properly handle all geometry components
|
|
255
|
+
geom = shapely_shape(feature["geometry"])
|
|
256
|
+
# Convert using detected CRS
|
|
257
|
+
area_ha = abs(geom.area) * area_conversion_factor
|
|
140
258
|
areas.append(area_ha)
|
|
141
|
-
except:
|
|
142
|
-
|
|
259
|
+
except Exception as e:
|
|
260
|
+
# Fallback: estimate from bounding box if geometry fails
|
|
261
|
+
bbox_area = _estimate_area_from_bounds(
|
|
262
|
+
coords, area_conversion_factor
|
|
263
|
+
)
|
|
264
|
+
if bbox_area > 0:
|
|
265
|
+
areas.append(bbox_area)
|
|
266
|
+
bbox_fallback_count += 1
|
|
267
|
+
polygon_type_stats["Polygon_bbox"] = (
|
|
268
|
+
polygon_type_stats.get("Polygon_bbox", 0) + 1
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
geometry_skip_count += 1
|
|
272
|
+
polygon_type_stats["Polygon_skipped"] = (
|
|
273
|
+
polygon_type_stats.get("Polygon_skipped", 0) + 1
|
|
274
|
+
)
|
|
143
275
|
valid_polygons += 1
|
|
144
276
|
|
|
145
277
|
elif geom_type == "MultiPolygon":
|
|
@@ -152,12 +284,28 @@ def analyze_geojson(
|
|
|
152
284
|
|
|
153
285
|
# Calculate area from coordinates using shapely
|
|
154
286
|
try:
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
287
|
+
# Use shapely.geometry.shape to properly handle MultiPolygon
|
|
288
|
+
geom = shapely_shape(feature["geometry"])
|
|
289
|
+
# Convert using detected CRS - use total area of all parts
|
|
290
|
+
area_ha = abs(geom.area) * area_conversion_factor
|
|
291
|
+
areas.append(area_ha)
|
|
292
|
+
except Exception as e:
|
|
293
|
+
# Fallback: estimate from bounding box if geometry fails
|
|
294
|
+
bbox_area = _estimate_area_from_bounds(
|
|
295
|
+
coords, area_conversion_factor
|
|
296
|
+
)
|
|
297
|
+
if bbox_area > 0:
|
|
298
|
+
areas.append(bbox_area)
|
|
299
|
+
bbox_fallback_count += 1
|
|
300
|
+
polygon_type_stats["MultiPolygon_bbox"] = (
|
|
301
|
+
polygon_type_stats.get("MultiPolygon_bbox", 0) + 1
|
|
302
|
+
)
|
|
303
|
+
else:
|
|
304
|
+
geometry_skip_count += 1
|
|
305
|
+
polygon_type_stats["MultiPolygon_skipped"] = (
|
|
306
|
+
polygon_type_stats.get("MultiPolygon_skipped", 0)
|
|
307
|
+
+ 1
|
|
308
|
+
)
|
|
161
309
|
valid_polygons += 1
|
|
162
310
|
|
|
163
311
|
except:
|
|
@@ -312,6 +460,21 @@ def analyze_geojson(
|
|
|
312
460
|
else {"p25": 0, "p50": 0, "p75": 0, "p90": 0}
|
|
313
461
|
)
|
|
314
462
|
|
|
463
|
+
# Add geometry quality logging to results
|
|
464
|
+
if bbox_fallback_count > 0 or geometry_skip_count > 0:
|
|
465
|
+
geometry_quality_log = (
|
|
466
|
+
f"Geometry quality summary:\n"
|
|
467
|
+
f" - Bounding box fallback used: {bbox_fallback_count} features\n"
|
|
468
|
+
f" - Geometries skipped: {geometry_skip_count} features"
|
|
469
|
+
)
|
|
470
|
+
if polygon_type_stats:
|
|
471
|
+
geometry_quality_log += "\n - Breakdown:"
|
|
472
|
+
for stat_type, count in sorted(polygon_type_stats.items()):
|
|
473
|
+
geometry_quality_log += f"\n - {stat_type}: {count}"
|
|
474
|
+
|
|
475
|
+
results["geometry_quality_note"] = geometry_quality_log
|
|
476
|
+
print(geometry_quality_log)
|
|
477
|
+
|
|
315
478
|
return results
|
|
316
479
|
|
|
317
480
|
except Exception as e:
|
|
@@ -12,67 +12,81 @@ import geopandas as gpd
|
|
|
12
12
|
import ee
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
# ============================================================================
|
|
16
|
+
# HELPER FUNCTIONS FOR UNIFIED PROCESSING PATHWAY
|
|
17
|
+
# ============================================================================
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _sanitize_geodataframe(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
20
21
|
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
Sanitize GeoDataFrame data types for JSON serialization.
|
|
23
|
+
|
|
24
|
+
Converts problematic data types that cannot be directly serialized:
|
|
25
|
+
- DateTime/Timestamp columns → ISO format strings
|
|
26
|
+
- Object columns → strings
|
|
27
|
+
- Skips geometry column
|
|
25
28
|
|
|
26
29
|
Args:
|
|
27
|
-
|
|
28
|
-
or a GeoJSON dictionary object.
|
|
29
|
-
enforce_wgs84 (bool): Whether to enforce WGS 84 projection (EPSG:4326). Defaults to True.
|
|
30
|
-
Only applies when input is a file path (dicts are assumed to be in WGS84).
|
|
31
|
-
strip_z_coords (bool): Whether to automatically strip Z coordinates from 3D geometries. Defaults to True.
|
|
30
|
+
gdf (gpd.GeoDataFrame): Input GeoDataFrame
|
|
32
31
|
|
|
33
32
|
Returns:
|
|
34
|
-
|
|
33
|
+
gpd.GeoDataFrame: GeoDataFrame with sanitized data types
|
|
34
|
+
"""
|
|
35
|
+
gdf = gdf.copy()
|
|
36
|
+
for col in gdf.columns:
|
|
37
|
+
if col != gdf.geometry.name: # Skip geometry column
|
|
38
|
+
# Handle datetime/timestamp columns
|
|
39
|
+
if pd.api.types.is_datetime64_any_dtype(gdf[col]):
|
|
40
|
+
gdf[col] = gdf[col].dt.strftime("%Y-%m-%d %H:%M:%S").fillna("")
|
|
41
|
+
# Handle other problematic types
|
|
42
|
+
elif gdf[col].dtype == "object":
|
|
43
|
+
# Convert any remaining non-serializable objects to strings
|
|
44
|
+
gdf[col] = gdf[col].astype(str)
|
|
45
|
+
return gdf
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _ensure_wgs84_crs(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
|
|
35
49
|
"""
|
|
36
|
-
|
|
37
|
-
# Input is already a GeoJSON dictionary - skip file reading
|
|
38
|
-
geojson_data = geojson_filepath
|
|
39
|
-
elif isinstance(geojson_filepath, (str, Path)):
|
|
40
|
-
file_path = os.path.abspath(geojson_filepath)
|
|
50
|
+
Ensure GeoDataFrame uses WGS 84 (EPSG:4326) coordinate reference system.
|
|
41
51
|
|
|
42
|
-
|
|
43
|
-
|
|
52
|
+
- If CRS is None, assumes WGS 84
|
|
53
|
+
- If CRS is not WGS 84, converts to WGS 84
|
|
54
|
+
- If already WGS 84, returns unchanged
|
|
44
55
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if col != gdf.geometry.name: # Skip geometry column
|
|
48
|
-
# Handle datetime/timestamp columns
|
|
49
|
-
if pd.api.types.is_datetime64_any_dtype(gdf[col]):
|
|
50
|
-
gdf[col] = gdf[col].dt.strftime("%Y-%m-%d %H:%M:%S").fillna("")
|
|
51
|
-
# Handle other problematic types
|
|
52
|
-
elif gdf[col].dtype == "object":
|
|
53
|
-
# Convert any remaining non-serializable objects to strings
|
|
54
|
-
gdf[col] = gdf[col].astype(str)
|
|
55
|
-
|
|
56
|
-
# Check and convert CRS if needed
|
|
57
|
-
if enforce_wgs84:
|
|
58
|
-
if gdf.crs is None:
|
|
59
|
-
# Assuming WGS 84 if no CRS defined
|
|
60
|
-
pass
|
|
61
|
-
elif gdf.crs != "EPSG:4326":
|
|
62
|
-
gdf = gdf.to_crs("EPSG:4326")
|
|
63
|
-
|
|
64
|
-
# Convert to GeoJSON
|
|
65
|
-
geojson_data = json.loads(gdf.to_json())
|
|
66
|
-
else:
|
|
67
|
-
raise ValueError(
|
|
68
|
-
"Input must be a file path (str or Path) or a GeoJSON dictionary object (dict)"
|
|
69
|
-
)
|
|
56
|
+
Args:
|
|
57
|
+
gdf (gpd.GeoDataFrame): Input GeoDataFrame
|
|
70
58
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
59
|
+
Returns:
|
|
60
|
+
gpd.GeoDataFrame: GeoDataFrame in WGS 84
|
|
61
|
+
"""
|
|
62
|
+
if gdf.crs is None:
|
|
63
|
+
# Assuming WGS 84 if no CRS defined
|
|
64
|
+
return gdf
|
|
65
|
+
elif gdf.crs != "EPSG:4326":
|
|
66
|
+
return gdf.to_crs("EPSG:4326")
|
|
67
|
+
return gdf
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _create_ee_feature_collection(
|
|
71
|
+
geojson_data: dict, strip_z_coords: bool = True, input_source: str = "input"
|
|
72
|
+
) -> ee.FeatureCollection:
|
|
73
|
+
"""
|
|
74
|
+
Create Earth Engine FeatureCollection from GeoJSON dict with error recovery.
|
|
75
|
+
|
|
76
|
+
Attempts to create EE FeatureCollection. If it fails due to 3D coordinates
|
|
77
|
+
and strip_z_coords is True, automatically strips Z values and retries.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
geojson_data (dict): GeoJSON data dictionary
|
|
81
|
+
strip_z_coords (bool): Whether to retry with 2D geometries on failure
|
|
82
|
+
input_source (str): Description of input source for logging
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
ee.FeatureCollection: Earth Engine FeatureCollection
|
|
74
86
|
|
|
75
|
-
|
|
87
|
+
Raises:
|
|
88
|
+
ee.EEException: If conversion fails even after retries
|
|
89
|
+
"""
|
|
76
90
|
try:
|
|
77
91
|
feature_collection = ee.FeatureCollection(
|
|
78
92
|
create_feature_collection(geojson_data)
|
|
@@ -81,16 +95,16 @@ def convert_geojson_to_ee(
|
|
|
81
95
|
except ee.EEException as e:
|
|
82
96
|
if "Invalid GeoJSON geometry" in str(e) and strip_z_coords:
|
|
83
97
|
# Apply print_once deduplication for Z-coordinate stripping messages
|
|
84
|
-
if not hasattr(
|
|
85
|
-
|
|
98
|
+
if not hasattr(_create_ee_feature_collection, "_printed_z_messages"):
|
|
99
|
+
_create_ee_feature_collection._printed_z_messages = set()
|
|
86
100
|
|
|
87
|
-
z_message_key = f"z_coords_{
|
|
88
|
-
if z_message_key not in
|
|
101
|
+
z_message_key = f"z_coords_{input_source}"
|
|
102
|
+
if z_message_key not in _create_ee_feature_collection._printed_z_messages:
|
|
89
103
|
print(
|
|
90
104
|
"Warning: Invalid GeoJSON geometry detected, likely due to 3D coordinates."
|
|
91
105
|
)
|
|
92
106
|
print("Attempting to fix by stripping Z coordinates...")
|
|
93
|
-
|
|
107
|
+
_create_ee_feature_collection._printed_z_messages.add(z_message_key)
|
|
94
108
|
|
|
95
109
|
# Apply Z-coordinate stripping
|
|
96
110
|
geojson_data_fixed = _strip_z_coordinates_from_geojson(geojson_data)
|
|
@@ -101,10 +115,15 @@ def convert_geojson_to_ee(
|
|
|
101
115
|
create_feature_collection(geojson_data_fixed)
|
|
102
116
|
)
|
|
103
117
|
|
|
104
|
-
success_message_key = f"z_coords_success_{
|
|
105
|
-
if
|
|
118
|
+
success_message_key = f"z_coords_success_{input_source}"
|
|
119
|
+
if (
|
|
120
|
+
success_message_key
|
|
121
|
+
not in _create_ee_feature_collection._printed_z_messages
|
|
122
|
+
):
|
|
106
123
|
print("Successfully converted after stripping Z coordinates")
|
|
107
|
-
|
|
124
|
+
_create_ee_feature_collection._printed_z_messages.add(
|
|
125
|
+
success_message_key
|
|
126
|
+
)
|
|
108
127
|
|
|
109
128
|
return feature_collection
|
|
110
129
|
except Exception as retry_error:
|
|
@@ -115,6 +134,82 @@ def convert_geojson_to_ee(
|
|
|
115
134
|
raise e
|
|
116
135
|
|
|
117
136
|
|
|
137
|
+
def convert_geojson_to_ee(
|
|
138
|
+
geojson_input: Union[str, Path, dict, gpd.GeoDataFrame],
|
|
139
|
+
enforce_wgs84: bool = True,
|
|
140
|
+
strip_z_coords: bool = True,
|
|
141
|
+
) -> ee.FeatureCollection:
|
|
142
|
+
"""
|
|
143
|
+
Converts GeoJSON data to an Earth Engine FeatureCollection.
|
|
144
|
+
|
|
145
|
+
Accepts flexible input types with a unified processing pathway:
|
|
146
|
+
- File path (str or Path) → loads with GeoPandas
|
|
147
|
+
- GeoJSON dict → uses directly
|
|
148
|
+
- GeoDataFrame → uses directly
|
|
149
|
+
|
|
150
|
+
Automatically handles:
|
|
151
|
+
- CRS conversion to WGS 84 (EPSG:4326) if needed
|
|
152
|
+
- DateTime/Timestamp columns → converts to ISO strings before JSON serialization
|
|
153
|
+
- Non-serializable objects → converts to strings
|
|
154
|
+
- 3D coordinates → strips Z values when necessary
|
|
155
|
+
- Z-coordinate errors → retries with 2D geometries if enabled
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
geojson_input (Union[str, Path, dict, gpd.GeoDataFrame]):
|
|
159
|
+
- File path (str or Path) to GeoJSON file
|
|
160
|
+
- GeoJSON dictionary object
|
|
161
|
+
- GeoPandas GeoDataFrame
|
|
162
|
+
enforce_wgs84 (bool): Whether to enforce WGS 84 projection (EPSG:4326).
|
|
163
|
+
Defaults to True. Only applies to file path and GeoDataFrame inputs.
|
|
164
|
+
strip_z_coords (bool): Whether to automatically strip Z coordinates from 3D geometries.
|
|
165
|
+
Defaults to True.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
ee.FeatureCollection: Earth Engine FeatureCollection created from the GeoJSON.
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
ValueError: If input type is unsupported or GeoJSON validation fails.
|
|
172
|
+
ee.EEException: If GeoJSON cannot be converted even after retries.
|
|
173
|
+
"""
|
|
174
|
+
# UNIFIED INPUT NORMALIZATION: Convert all inputs to GeoDataFrame first
|
|
175
|
+
if isinstance(geojson_input, gpd.GeoDataFrame):
|
|
176
|
+
gdf = geojson_input.copy()
|
|
177
|
+
input_source = "GeoDataFrame"
|
|
178
|
+
elif isinstance(geojson_input, dict):
|
|
179
|
+
# Convert dict to GeoDataFrame for unified processing
|
|
180
|
+
gdf = gpd.GeoDataFrame.from_features(geojson_input.get("features", []))
|
|
181
|
+
input_source = "dict"
|
|
182
|
+
elif isinstance(geojson_input, (str, Path)):
|
|
183
|
+
# Load file and convert to GeoDataFrame
|
|
184
|
+
file_path = os.path.abspath(geojson_input)
|
|
185
|
+
gdf = gpd.read_file(file_path)
|
|
186
|
+
input_source = f"file ({file_path})"
|
|
187
|
+
else:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
f"Input must be a file path (str or Path), GeoJSON dict, or GeoDataFrame. "
|
|
190
|
+
f"Got {type(geojson_input).__name__}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# UNIFIED DATA SANITIZATION PATHWAY
|
|
194
|
+
# Handle problematic data types before JSON conversion
|
|
195
|
+
gdf = _sanitize_geodataframe(gdf)
|
|
196
|
+
|
|
197
|
+
# UNIFIED CRS HANDLING
|
|
198
|
+
if enforce_wgs84:
|
|
199
|
+
gdf = _ensure_wgs84_crs(gdf)
|
|
200
|
+
|
|
201
|
+
# UNIFIED GEOJSON CONVERSION
|
|
202
|
+
geojson_data = json.loads(gdf.to_json())
|
|
203
|
+
|
|
204
|
+
# UNIFIED VALIDATION
|
|
205
|
+
validation_errors = validate_geojson(geojson_data)
|
|
206
|
+
if validation_errors:
|
|
207
|
+
raise ValueError(f"GeoJSON validation errors: {validation_errors}")
|
|
208
|
+
|
|
209
|
+
# UNIFIED EE CONVERSION with error recovery
|
|
210
|
+
return _create_ee_feature_collection(geojson_data, strip_z_coords, input_source)
|
|
211
|
+
|
|
212
|
+
|
|
118
213
|
def _strip_z_coordinates_from_geojson(geojson_data: dict) -> dict:
|
|
119
214
|
"""
|
|
120
215
|
Helper function to strip Z coordinates from GeoJSON data.
|
openforis_whisp/stats.py
CHANGED
|
@@ -151,7 +151,7 @@ def whisp_formatted_stats_geojson_to_df_legacy(
|
|
|
151
151
|
from shapely.validation import make_valid
|
|
152
152
|
import logging as py_logging
|
|
153
153
|
|
|
154
|
-
logger = py_logging.getLogger("whisp
|
|
154
|
+
logger = py_logging.getLogger("whisp")
|
|
155
155
|
|
|
156
156
|
# Load GeoJSON file
|
|
157
157
|
with open(input_geojson_filepath, "r") as f:
|
|
@@ -169,11 +169,14 @@ def whisp_formatted_stats_geojson_to_df_legacy(
|
|
|
169
169
|
lambda g: make_valid(g) if g and not g.is_valid else g
|
|
170
170
|
)
|
|
171
171
|
|
|
172
|
-
#
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
#
|
|
176
|
-
|
|
172
|
+
# Pass GeoDataFrame directly to preserve CRS metadata
|
|
173
|
+
# convert_geojson_to_ee will handle:
|
|
174
|
+
# - CRS detection and conversion to WGS84 if needed
|
|
175
|
+
# - Data type sanitization (datetime, object columns)
|
|
176
|
+
# - Geometry validation and Z-coordinate stripping
|
|
177
|
+
feature_collection = convert_geojson_to_ee(
|
|
178
|
+
gdf, enforce_wgs84=True, strip_z_coords=True
|
|
179
|
+
)
|
|
177
180
|
else:
|
|
178
181
|
# Original path - no validation
|
|
179
182
|
feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
|
|
@@ -201,6 +204,7 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
201
204
|
batch_size: int = 10,
|
|
202
205
|
max_concurrent: int = 20,
|
|
203
206
|
validate_geometries: bool = False,
|
|
207
|
+
include_geometry_audit_trail: bool = False,
|
|
204
208
|
) -> pd.DataFrame:
|
|
205
209
|
"""
|
|
206
210
|
Main entry point for converting GeoJSON to Whisp statistics.
|
|
@@ -253,6 +257,16 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
253
257
|
Set to True to automatically fix invalid/self-intersecting polygons.
|
|
254
258
|
For production workflows, it's recommended to use geometry validation and
|
|
255
259
|
cleaning tools BEFORE processing with this function.
|
|
260
|
+
include_geometry_audit_trail : bool, default True
|
|
261
|
+
If True (default), includes audit trail columns:
|
|
262
|
+
- geo_original: Original input geometry
|
|
263
|
+
- geometry_type_original: Original geometry type
|
|
264
|
+
- geometry_type: Processed geometry type (from EE)
|
|
265
|
+
- geometry_type_changed: Boolean flag if geometry changed
|
|
266
|
+
- geometry_degradation_type: Description of how it changed
|
|
267
|
+
|
|
268
|
+
Processing metadata stored in df.attrs['processing_metadata'].
|
|
269
|
+
These columns enable full transparency for geometry modifications during processing.
|
|
256
270
|
|
|
257
271
|
Returns
|
|
258
272
|
-------
|
|
@@ -345,6 +359,7 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
345
359
|
batch_size=batch_size,
|
|
346
360
|
max_concurrent=max_concurrent,
|
|
347
361
|
validate_geometries=validate_geometries,
|
|
362
|
+
include_geometry_audit_trail=include_geometry_audit_trail,
|
|
348
363
|
)
|
|
349
364
|
else:
|
|
350
365
|
raise ValueError(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: openforis-whisp
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.0a2
|
|
4
4
|
Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: whisp,geospatial,data-processing
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
openforis_whisp/__init__.py,sha256=-r_9LFxbV6d-o4s0_huhaXxve6GIzCwl3pXKuJo6ixE,3663
|
|
2
|
-
openforis_whisp/advanced_stats.py,sha256=
|
|
3
|
-
openforis_whisp/data_checks.py,sha256=
|
|
4
|
-
openforis_whisp/data_conversion.py,sha256=
|
|
2
|
+
openforis_whisp/advanced_stats.py,sha256=xrwKHG-c44_UkFha7TFgf71mo9UMw5ZZL3XQTPF5luM,92681
|
|
3
|
+
openforis_whisp/data_checks.py,sha256=KwgD72FA_n7joiJadGRpzntd2sLo0aqGNbOjRkB8iQI,32293
|
|
4
|
+
openforis_whisp/data_conversion.py,sha256=L2IsiUyQUt3aHgSYGbIhgPGwM7eyS3nLVEoNO9YqQeM,21888
|
|
5
5
|
openforis_whisp/datasets.py,sha256=aGJy0OYN4d0nsH3_IOYlHl-WCB7KFwZwMJ-dBi5Hc5Y,53470
|
|
6
6
|
openforis_whisp/logger.py,sha256=9M6_3mdpoiWfC-pDwM9vKmB2l5Gul6Rb5rNTNh-_nzs,3054
|
|
7
7
|
openforis_whisp/parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -12,9 +12,9 @@ openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wof
|
|
|
12
12
|
openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
|
|
13
13
|
openforis_whisp/reformat.py,sha256=mIooJ3zfSTDY3_Mx3OAW4jpfQ72q3zasG9tl58PdfN4,33729
|
|
14
14
|
openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
|
|
15
|
-
openforis_whisp/stats.py,sha256=
|
|
15
|
+
openforis_whisp/stats.py,sha256=dCQXx6KKEV99owqyPURk6CL97kQQARjetFrIz1ZbIvs,65725
|
|
16
16
|
openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
|
|
17
|
-
openforis_whisp-3.0.
|
|
18
|
-
openforis_whisp-3.0.
|
|
19
|
-
openforis_whisp-3.0.
|
|
20
|
-
openforis_whisp-3.0.
|
|
17
|
+
openforis_whisp-3.0.0a2.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
|
|
18
|
+
openforis_whisp-3.0.0a2.dist-info/METADATA,sha256=wG4vc7B-f0JXmNkTUh4wJ-H0KPpbgyU9OfMwGewZq_A,16684
|
|
19
|
+
openforis_whisp-3.0.0a2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
20
|
+
openforis_whisp-3.0.0a2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|