openforis-whisp 3.0.0a5__tar.gz → 3.0.0a6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/PKG-INFO +1 -1
  2. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/pyproject.toml +1 -1
  3. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/advanced_stats.py +25 -31
  4. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/parameters/lookup_context_and_metadata.csv +1 -1
  5. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/LICENSE +0 -0
  6. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/README.md +0 -0
  7. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/__init__.py +0 -0
  8. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/data_checks.py +0 -0
  9. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/data_conversion.py +0 -0
  10. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/datasets.py +0 -0
  11. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/logger.py +0 -0
  12. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/parameters/__init__.py +0 -0
  13. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/parameters/config_runtime.py +0 -0
  14. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/parameters/lookup_gaul1_admin.py +0 -0
  15. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/parameters/lookup_gee_datasets.csv +0 -0
  16. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/pd_schemas.py +0 -0
  17. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/reformat.py +0 -0
  18. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/risk.py +0 -0
  19. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/stats.py +0 -0
  20. {openforis_whisp-3.0.0a5 → openforis_whisp-3.0.0a6}/src/openforis_whisp/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openforis-whisp
3
- Version: 3.0.0a5
3
+ Version: 3.0.0a6
4
4
  Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
5
5
  License: MIT
6
6
  Keywords: whisp,geospatial,data-processing
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "openforis-whisp"
7
- version = "3.0.0a5"
7
+ version = "3.0.0a6"
8
8
  description = "Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations."
9
9
  repository = "https://github.com/forestdatapartnership/whisp"
10
10
  authors = ["Andy Arnell <andrew.arnell@fao.org>"]
@@ -1148,13 +1148,11 @@ def process_ee_batch(
1148
1148
  f"Columns from EE: {list(df.columns)}"
1149
1149
  )
1150
1150
  # Use 1-indexed range to match client-side assignment
1151
- df[plot_id_column] = range(1, len(df) + 1)
1151
+ df[plot_id_column] = [str(i) for i in range(1, len(df) + 1)]
1152
1152
 
1153
- # Ensure plotId is integer type (EE may return as string)
1153
+ # Ensure plotId is string type (consistent with creation)
1154
1154
  if plot_id_column in df.columns:
1155
- df[plot_id_column] = pd.to_numeric(
1156
- df[plot_id_column], errors="coerce"
1157
- ).astype("Int64")
1155
+ df[plot_id_column] = df[plot_id_column].astype(str)
1158
1156
 
1159
1157
  # Ensure all column names are strings (fixes pandas .str accessor issues)
1160
1158
  df.columns = df.columns.astype(str)
@@ -1310,7 +1308,7 @@ def whisp_stats_geojson_to_df_concurrent(
1310
1308
  )
1311
1309
 
1312
1310
  # Add stable plotIds for merging (starting from 1, not 0)
1313
- gdf[plot_id_column] = range(1, len(gdf) + 1)
1311
+ gdf[plot_id_column] = [str(i) for i in range(1, len(gdf) + 1)]
1314
1312
 
1315
1313
  # Strip unnecessary properties before sending to EE
1316
1314
  # Keep only: geometry, plot_id_column, and external_id
@@ -1323,7 +1321,7 @@ def whisp_stats_geojson_to_df_concurrent(
1323
1321
 
1324
1322
  gdf_for_ee = gdf[keep_cols].copy()
1325
1323
 
1326
- # CRITICAL: Convert external_id to string to prevent EE from confusing it with integer plotId
1324
+ # CRITICAL: Convert external_id to string (both plotId and external_id are now strings)
1327
1325
  if external_id_column and "external_id" in gdf_for_ee.columns:
1328
1326
  gdf_for_ee["external_id"] = gdf_for_ee["external_id"].astype(str)
1329
1327
  logger.debug(f"Converted external_id column to string type")
@@ -1432,15 +1430,15 @@ def whisp_stats_geojson_to_df_concurrent(
1432
1430
  range(1, len(df_server) + 1), dtype="Int64"
1433
1431
  )
1434
1432
  else:
1435
- df_server[plot_id_column] = pd.to_numeric(
1436
- df_server[plot_id_column], errors="coerce"
1437
- ).astype("Int64")
1433
+ df_server[plot_id_column] = df_server[plot_id_column].astype(
1434
+ str
1435
+ )
1438
1436
 
1439
- # Ensure plotId is Int64 in client data too
1437
+ # Ensure plotId is string in client data too
1440
1438
  if plot_id_column in df_client.columns:
1441
- df_client[plot_id_column] = pd.to_numeric(
1442
- df_client[plot_id_column], errors="coerce"
1443
- ).astype("Int64")
1439
+ df_client[plot_id_column] = df_client[plot_id_column].astype(
1440
+ str
1441
+ )
1444
1442
 
1445
1443
  # Keep all EE statistics from server (all columns with _sum and _median suffixes)
1446
1444
  # These are the actual EE processing results
@@ -1751,15 +1749,15 @@ def whisp_stats_geojson_to_df_concurrent(
1751
1749
  # Use 1-indexed range to match client-side assignment
1752
1750
  df_server[plot_id_column] = range(1, len(df_server) + 1)
1753
1751
 
1754
- # Ensure plotId is integer type (EE may return as string)
1752
+ # Ensure plotId is string type (consistent with creation)
1755
1753
  if plot_id_column in df_server.columns:
1756
- df_server[plot_id_column] = pd.to_numeric(
1757
- df_server[plot_id_column], errors="coerce"
1758
- ).astype("Int64")
1754
+ df_server[plot_id_column] = df_server[
1755
+ plot_id_column
1756
+ ].astype(str)
1759
1757
  if plot_id_column in df_client.columns:
1760
- df_client[plot_id_column] = pd.to_numeric(
1761
- df_client[plot_id_column], errors="coerce"
1762
- ).astype("Int64")
1758
+ df_client[plot_id_column] = df_client[
1759
+ plot_id_column
1760
+ ].astype(str)
1763
1761
 
1764
1762
  # Drop external_id from df_server if it exists (already in df_client)
1765
1763
  if "external_id" in df_server.columns:
@@ -1958,7 +1956,7 @@ def whisp_stats_geojson_to_df_sequential(
1958
1956
  )
1959
1957
 
1960
1958
  # Add stable plotIds for merging (starting from 1, not 0)
1961
- gdf[plot_id_column] = range(1, len(gdf) + 1)
1959
+ gdf[plot_id_column] = [str(i) for i in range(1, len(gdf) + 1)]
1962
1960
 
1963
1961
  # Strip unnecessary properties before sending to EE
1964
1962
  # Keep only: geometry, plot_id_column, and external_id
@@ -1971,7 +1969,7 @@ def whisp_stats_geojson_to_df_sequential(
1971
1969
 
1972
1970
  gdf_for_ee = gdf[keep_cols].copy()
1973
1971
 
1974
- # CRITICAL: Convert external_id to string to prevent EE from confusing it with integer plotId
1972
+ # CRITICAL: Convert external_id to string (both plotId and external_id are now strings)
1975
1973
  if external_id_column and "external_id" in gdf_for_ee.columns:
1976
1974
  gdf_for_ee["external_id"] = gdf_for_ee["external_id"].astype(str)
1977
1975
  logger.debug(f"Converted external_id column to string type")
@@ -2051,11 +2049,9 @@ def whisp_stats_geojson_to_df_sequential(
2051
2049
 
2052
2050
  logger.info("Server-side processing complete")
2053
2051
 
2054
- # Ensure plotId is Int64 type for fast merges
2052
+ # Ensure plotId is string type for consistent merges
2055
2053
  if plot_id_column in df_server.columns:
2056
- df_server[plot_id_column] = pd.to_numeric(
2057
- df_server[plot_id_column], errors="coerce"
2058
- ).astype("Int64")
2054
+ df_server[plot_id_column] = df_server[plot_id_column].astype(str)
2059
2055
 
2060
2056
  # Add client-side metadata if requested
2061
2057
  if add_metadata_client_side:
@@ -2066,11 +2062,9 @@ def whisp_stats_geojson_to_df_sequential(
2066
2062
  return_attributes_only=True,
2067
2063
  )
2068
2064
 
2069
- # Ensure plotId is Int64 type for fast merges
2065
+ # Ensure plotId is string type for consistent merges
2070
2066
  if plot_id_column in df_client.columns:
2071
- df_client[plot_id_column] = pd.to_numeric(
2072
- df_client[plot_id_column], errors="coerce"
2073
- ).astype("Int64")
2067
+ df_client[plot_id_column] = df_client[plot_id_column].astype(str)
2074
2068
 
2075
2069
  # Drop external_id from df_server if it exists (keep from df_client - more reliable)
2076
2070
  if "external_id" in df_server.columns:
@@ -1,5 +1,5 @@
1
1
  name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude_from_output,col_type,is_nullable,is_required,corresponding_variable
2
- plotId,-10,,context_and_metadata,context_and_metadata,NA,NA,0,int64,1,0,plot_id_column
2
+ plotId,-10,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,plot_id_column
3
3
  external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,external_id_column
4
4
  Area,-8,,context_and_metadata,context_and_metadata,NA,NA,0,float32,1,1,geometry_area_column
5
5
  Geometry_type,-7,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_type_column