openforis-whisp 2.0.0b1__tar.gz → 2.0.0b2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/PKG-INFO +1 -1
  2. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/pyproject.toml +1 -1
  3. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/__init__.py +2 -1
  4. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/data_conversion.py +11 -0
  5. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/datasets.py +7 -32
  6. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/parameters/lookup_gee_datasets.csv +2 -5
  7. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/risk.py +29 -29
  8. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/stats.py +92 -0
  9. openforis_whisp-2.0.0b2/src/openforis_whisp/utils.py +487 -0
  10. openforis_whisp-2.0.0b1/src/openforis_whisp/parameters/__init__.py +0 -15
  11. openforis_whisp-2.0.0b1/src/openforis_whisp/utils.py +0 -194
  12. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/LICENSE +0 -0
  13. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/README.md +0 -0
  14. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/logger.py +0 -0
  15. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/parameters/config_runtime.py +0 -0
  16. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/parameters/lookup_context_and_metadata.csv +0 -0
  17. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/pd_schemas.py +0 -0
  18. {openforis_whisp-2.0.0b1 → openforis_whisp-2.0.0b2}/src/openforis_whisp/reformat.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: openforis-whisp
3
- Version: 2.0.0b1
3
+ Version: 2.0.0b2
4
4
  Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
5
5
  License: MIT
6
6
  Keywords: whisp,geospatial,data-processing
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "openforis-whisp"
7
- version = "2.0.0b1"
7
+ version = "2.0.0b2"
8
8
  description = "Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations."
9
9
  repository = "https://github.com/forestdatapartnership/whisp"
10
10
  authors = ["Andy Arnell <andrew.arnell@fao.org>"]
@@ -43,6 +43,7 @@ from openforis_whisp.stats import (
43
43
  whisp_formatted_stats_ee_to_geojson,
44
44
  whisp_formatted_stats_geojson_to_df,
45
45
  whisp_formatted_stats_geojson_to_geojson,
46
+ set_point_geometry_area_to_zero,
46
47
  convert_iso3_to_iso2,
47
48
  )
48
49
 
@@ -71,4 +72,4 @@ from openforis_whisp.data_conversion import (
71
72
 
72
73
  from openforis_whisp.risk import whisp_risk, detect_unit_type
73
74
 
74
- from openforis_whisp.utils import get_example_data_path
75
+ from openforis_whisp.utils import get_example_data_path, generate_test_polygons
@@ -42,6 +42,17 @@ def convert_geojson_to_ee(
42
42
  # Use GeoPandas to read the file and handle CRS
43
43
  gdf = gpd.read_file(file_path)
44
44
 
45
+ # NEW: Handle problematic data types before JSON conversion
46
+ for col in gdf.columns:
47
+ if col != gdf.geometry.name: # Skip geometry column
48
+ # Handle datetime/timestamp columns
49
+ if pd.api.types.is_datetime64_any_dtype(gdf[col]):
50
+ gdf[col] = gdf[col].dt.strftime("%Y-%m-%d %H:%M:%S").fillna("")
51
+ # Handle other problematic types
52
+ elif gdf[col].dtype == "object":
53
+ # Convert any remaining non-serializable objects to strings
54
+ gdf[col] = gdf[col].astype(str)
55
+
45
56
  # Check and convert CRS if needed
46
57
  if enforce_wgs84:
47
58
  if gdf.crs is None:
@@ -340,45 +340,20 @@ def g_esri_2023_tc_prep():
340
340
 
341
341
 
342
342
  # ESRI 2023 - Crop
343
- def g_esri_2023_crop_prep():
343
+ def g_esri_2020_2023_crop_prep():
344
344
  esri_lulc10_raw = ee.ImageCollection(
345
345
  "projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m_TS"
346
346
  )
347
- esri_lulc10_crop = (
348
- esri_lulc10_raw.filterDate("2023-01-01", "2023-12-31").mosaic().eq(5)
347
+ esri_lulc10_crop_2020 = (
348
+ esri_lulc10_raw.filterDate("2020-01-01", "2020-12-31").mosaic().eq(5)
349
349
  )
350
- return esri_lulc10_crop.rename("ESRI_2023_crop")
351
-
352
-
353
- # GLC_FCS30D 2022
354
-
355
- # GLC_FCS30D Tree Cover
356
- # forest classes + swamp + mangrove / what to do with shrubland?
357
- def g_glc_fcs30d_tc_2022_prep():
358
- GLC_FCS30D = (
359
- ee.ImageCollection("projects/sat-io/open-datasets/GLC-FCS30D/annual")
360
- .mosaic()
361
- .select(22)
362
- )
363
- GLC_FCS30D_TC = (
364
- (GLC_FCS30D.gte(51))
365
- .And(GLC_FCS30D.lte(92))
366
- .Or(GLC_FCS30D.eq(181))
367
- .Or(GLC_FCS30D.eq(185))
350
+ esri_lulc10_crop_2023 = (
351
+ esri_lulc10_raw.filterDate("2023-01-01", "2023-12-31").mosaic().eq(5)
368
352
  )
369
- return GLC_FCS30D_TC.rename("GLC_FCS30D_TC_2022")
370
353
 
354
+ newCrop = esri_lulc10_crop_2023.And(esri_lulc10_crop_2020.Not())
371
355
 
372
- # GLC_FCS30D crop
373
- # 10 Rainfed cropland; 11 Herbaceous cover; 12 Tree or shrub cover (Orchard); 20 Irrigated cropland
374
- def g_glc_fcs30d_crop_2022_prep():
375
- GLC_FCS30D = (
376
- ee.ImageCollection("projects/sat-io/open-datasets/GLC-FCS30D/annual")
377
- .mosaic()
378
- .select(22)
379
- )
380
- GLC_FCS30D_crop = GLC_FCS30D.gte(10).And(GLC_FCS30D.lte(20))
381
- return GLC_FCS30D_crop.rename("GLC_FCS30D_crop_2022")
356
+ return newCrop.rename("ESRI_crop_gain_2020_2023")
382
357
 
383
358
 
384
359
  #### disturbances by year
@@ -2,7 +2,7 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
2
2
  EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
3
3
  GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
4
4
  TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
5
- GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_gfc_tc_2020_prep
5
+ GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
6
6
  Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
7
7
  ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
8
8
  TMF_plant,80,,commodities,NA,1,1,0,float32,1,0,g_jrc_tmf_plantation_prep
@@ -163,13 +163,11 @@ GFT_planted_plantation,1900,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_gft_
163
163
  IIASA_planted_plantation,1910,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_iiasa_planted_prep
164
164
  TMF_regrowth_2023,2000,,NA,treecover_after_2020,0,1,0,float32,1,0,g_tmf_regrowth_prep
165
165
  ESRI_2023_TC,2010,,NA,treecover_after_2020,0,1,0,float32,1,0,g_esri_2023_tc_prep
166
- GLC_FCS30D_TC_2022,2020,,NA,treecover_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_tc_2022_prep
167
166
  Oil_palm_2023_FDaP,2100,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_palm_2023_prep
168
167
  Rubber_2023_FDaP,2110,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_rubber_2023_prep
169
168
  Coffee_FDaP_2023,2111,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_coffee_2023_prep
170
169
  Cocoa_2023_FDaP,2120,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_cocoa_2023_prep
171
- ESRI_2023_crop,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2023_crop_prep
172
- GLC_FCS30D_crop_2022,2140,,NA,agri_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_crop_2022_prep
170
+ ESRI_crop_gain_2020_2023,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2020_2023_crop_prep
173
171
  GFW_logging_before_2020,2200,,NA,logging_concession,0,1,0,float32,1,0,g_logging_concessions_prep
174
172
  nCO_ideam_forest_2020,2310,CO,treecover,NA,1,1,0,float32,1,0,nco_ideam_forest_2020_prep
175
173
  nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,1,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
@@ -199,4 +197,3 @@ nBR_INPE_TCamz_pasture_2020,2422,BR,commodities,NA,1,1,0,float32,1,0,nbr_terracl
199
197
  nBR_INPE_TCcer_pasture_2020,2423,BR,commodities,NA,1,1,0,float32,1,0,nbr_terraclass_cer20_ac_prep
200
198
  nBR_MapBiomas_col9_pasture_2020,2424,BR,commodities,NA,1,1,0,float32,1,0,nbr_mapbiomasc9_pasture_prep
201
199
  nCI_Cocoa_bnetd,3000,CI,commodities,NA,1,1,0,float32,1,0,nci_ocs2020_prep
202
-
@@ -161,9 +161,8 @@ def whisp_risk(
161
161
  lookup_df_copy, custom_bands_info, df.columns
162
162
  )
163
163
  print(f"Including custom bands: {list(custom_bands_info.keys())}")
164
- # print(f"appended custom bands info to lookup table")
165
164
  if national_codes:
166
- print(f"Filtering by national codes: {national_codes}")
165
+ print(f"Including additional national data for: {national_codes}")
167
166
  # Filter by national codes
168
167
  filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
169
168
  lookup_df=lookup_df_copy,
@@ -473,7 +472,6 @@ def add_indicators(
473
472
  return df
474
473
 
475
474
 
476
- # Update add_indicator_column to use the unit_type parameter
477
475
  def add_indicator_column(
478
476
  df: data_lookup_type,
479
477
  input_columns: list[str],
@@ -482,49 +480,51 @@ def add_indicator_column(
482
480
  low_name: str = "no",
483
481
  high_name: str = "yes",
484
482
  sum_comparison: bool = False,
485
- unit_type: str = None, # unit_type parameter
483
+ unit_type: str = None,
486
484
  ) -> data_lookup_type:
487
- """
488
- Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
485
+ """Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign."""
489
486
 
490
- Parameters:
491
- df (data_lookup_type): The pandas DataFrame to which the column will be added.
492
- input_columns (list): List of column names to check for threshold.
493
- threshold (float): The threshold value to compare against.
494
- new_column_name (str): The name of the new column to be added.
495
- The '>' sign is used for comparisons.
496
- When 'sum comparison' == True, then the threshold is compared to the sum of all those listed in 'input_columns', as opposed to when Flalse, when each column in the list is compared to the threshold individually
497
- low_name (str): The name for the value when below or equal to threshold (default is 'no').
498
- high_name (str): The name for the value when above threshold (default is 'yes').
499
- sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
500
- unit_type (str): Whether values are in "ha" or "percent".
501
-
502
- Returns:
503
- data_lookup_type: The DataFrame with the new column added.
504
- """
505
487
  # Create a new column and initialize with low_name
506
488
  new_column = pd.Series(low_name, index=df.index, name=new_column_name)
507
489
 
508
- # Default behavior: use '>' for single column comparison
509
490
  if sum_comparison:
510
491
  # Sum all values in specified columns and compare to threshold
511
492
  sum_values = df[input_columns].sum(axis=1)
512
493
  new_column[sum_values > threshold] = high_name
513
494
  else:
514
- # Check if any values in specified columns are above the threshold and update the new column accordingly
495
+ # Check if any values in specified columns are above the threshold
515
496
  for col in input_columns:
516
- # So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
517
- # Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
518
497
  if unit_type == "ha":
519
498
  df[geometry_area_column] = pd.to_numeric(
520
499
  df[geometry_area_column], errors="coerce"
521
500
  )
522
- val_to_check = clamp(
523
- ((df[col] / df[geometry_area_column]) * 100), 0, 100
524
- )
501
+
502
+ # Handle points (Area = 0) separately
503
+ is_point = df[geometry_area_column] == 0
504
+
505
+ # For points: any value > 0 exceeds threshold
506
+ point_mask = is_point & (df[col] > 0)
507
+ new_column[point_mask] = high_name
508
+
509
+ # For polygons: convert to percentage and check threshold
510
+ polygon_mask = ~is_point
511
+ if polygon_mask.any():
512
+ val_to_check = clamp(
513
+ (
514
+ (
515
+ df.loc[polygon_mask, col]
516
+ / df.loc[polygon_mask, geometry_area_column]
517
+ )
518
+ * 100
519
+ ),
520
+ 0,
521
+ 100,
522
+ )
523
+ new_column[polygon_mask & (val_to_check > threshold)] = high_name
525
524
  else:
525
+ # For percentage values, use direct comparison
526
526
  val_to_check = df[col]
527
- new_column[val_to_check > threshold] = high_name
527
+ new_column[val_to_check > threshold] = high_name
528
528
 
529
529
  # Concatenate the new column to the DataFrame
530
530
  df = pd.concat([df, new_column], axis=1)
@@ -592,9 +592,58 @@ def whisp_stats_ee_to_df(
592
592
  print(f"An error occurred during the ISO3 to ISO2 conversion: {e}")
593
593
  return pd.DataFrame() # Return an empty DataFrame in case of error
594
594
 
595
+ # NEW: Set area to 0 for point geometries
596
+ try:
597
+ df_stats = set_point_geometry_area_to_zero(df_stats)
598
+ except Exception as e:
599
+ print(f"An error occurred during point geometry area adjustment: {e}")
600
+ # Continue without the adjustment rather than failing completely
601
+
595
602
  return df_stats
596
603
 
597
604
 
605
+ def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
606
+ """
607
+ Set the geometry area column to 0 for features with Point geometry type.
608
+
609
+ Parameters
610
+ ----------
611
+ df : pd.DataFrame
612
+ DataFrame containing geometry type and area columns
613
+
614
+ Returns
615
+ -------
616
+ pd.DataFrame
617
+ DataFrame with area set to 0 for Point geometries
618
+ """
619
+ # Check if required columns exist
620
+ if geometry_type_column not in df.columns:
621
+ print(
622
+ f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
623
+ )
624
+ return df
625
+
626
+ if geometry_area_column not in df.columns:
627
+ print(
628
+ f"Warning: {geometry_area_column} column not found. Skipping area adjustment for points."
629
+ )
630
+ return df
631
+
632
+ # Create a copy to avoid modifying the original
633
+ df_modified = df.copy()
634
+
635
+ # Set area to 0 where geometry type is Point
636
+ point_mask = df_modified[geometry_type_column] == "Point"
637
+ df_modified.loc[point_mask, geometry_area_column] = 0.0
638
+
639
+ # Log the changes
640
+ num_points = point_mask.sum()
641
+ if num_points > 0:
642
+ print(f"Set area to 0 for {num_points} Point geometries")
643
+
644
+ return df_modified
645
+
646
+
598
647
  def whisp_stats_ee_to_drive(
599
648
  feature_collection: ee.FeatureCollection,
600
649
  external_id_column=None,
@@ -1226,3 +1275,46 @@ def debug_feature_collection_properties(feature_collection, max_features=5):
1226
1275
 
1227
1276
  except Exception as e:
1228
1277
  return {"error": f"Error during debugging: {str(e)}"}
1278
+
1279
+
1280
+ # helper function to set area to 0 for point geometries
1281
+ def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
1282
+ """
1283
+ Set the geometry area column to 0 for features with Point geometry type.
1284
+
1285
+ Parameters
1286
+ ----------
1287
+ df : pd.DataFrame
1288
+ DataFrame containing geometry type and area columns
1289
+
1290
+ Returns
1291
+ -------
1292
+ pd.DataFrame
1293
+ DataFrame with area set to 0 for Point geometries
1294
+ """
1295
+ # Check if required columns exist
1296
+ if geometry_type_column not in df.columns:
1297
+ print(
1298
+ f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
1299
+ )
1300
+ return df
1301
+
1302
+ if geometry_area_column not in df.columns:
1303
+ print(
1304
+ f"Warning: {geometry_area_column} column not found. Skipping area adjustment for points."
1305
+ )
1306
+ return df
1307
+
1308
+ # Create a copy to avoid modifying the original
1309
+ df_modified = df.copy()
1310
+
1311
+ # Set area to 0 where geometry type is Point
1312
+ point_mask = df_modified[geometry_type_column] == "Point"
1313
+ df_modified.loc[point_mask, geometry_area_column] = 0.0
1314
+
1315
+ # Log the changes
1316
+ num_points = point_mask.sum()
1317
+ # if num_points > 0:
1318
+ # print(f"Set area to 0 for {num_points} Point geometries")
1319
+
1320
+ return df_modified
@@ -0,0 +1,487 @@
1
+ import base64
2
+ import ee
3
+ import math
4
+ import os
5
+ import pandas as pd
6
+ import random
7
+ import numpy as np
8
+
9
+ import urllib.request
10
+ import os
11
+
12
+ import importlib.resources as pkg_resources
13
+
14
+ from dotenv import load_dotenv
15
+ from pathlib import Path
16
+
17
+ from shapely.geometry import Polygon, Point, mapping
18
+ from shapely.validation import make_valid
19
+
20
+ from .logger import StdoutLogger
21
+
22
+
23
+ logger = StdoutLogger(__name__)
24
+
25
+
26
+ def get_example_data_path(filename):
27
+ """
28
+ Get the path to an example data file included in the package.
29
+
30
+ Parameters:
31
+ -----------
32
+ filename : str
33
+ The name of the example data file.
34
+
35
+ Returns:
36
+ --------
37
+ str
38
+ The path to the example data file.
39
+ """
40
+ return os.path.join("..", "tests", "fixtures", filename)
41
+
42
+
43
+ def load_env_vars() -> None:
44
+ """Loads the environment variables required for testing the codebase.
45
+
46
+ Returns
47
+ -------
48
+ out : None
49
+ """
50
+
51
+ all_dotenv_paths = [Path(__file__).parents[2] / ".env", Path.cwd() / ".env"]
52
+ dotenv_loaded = False
53
+
54
+ for dotenv_path in all_dotenv_paths:
55
+ logger.logger.debug(f"dotenv_path: {dotenv_path}")
56
+ if dotenv_path.exists():
57
+ dotenv_loaded = load_dotenv(dotenv_path)
58
+ break
59
+
60
+ if not dotenv_loaded:
61
+ raise DotEnvNotFoundError
62
+ logger.logger.info(f"Loaded evironment variables from '{dotenv_path}'")
63
+
64
+
65
+ def init_ee() -> None:
66
+ """Initialize earth engine according to the environment"""
67
+
68
+ # only do the initialization if the credential are missing
69
+ if not ee.data._credentials:
70
+
71
+ # if in test env use the private key
72
+ if "EE_PRIVATE_KEY" in os.environ:
73
+
74
+ # key need to be decoded in a file
75
+ content = base64.b64decode(os.environ["EE_PRIVATE_KEY"]).decode()
76
+ with open("ee_private_key.json", "w") as f:
77
+ f.write(content)
78
+
79
+ # connection to the service account
80
+ service_account = "test-sepal-ui@sepal-ui.iam.gserviceaccount.com"
81
+ credentials = ee.ServiceAccountCredentials(
82
+ service_account, "ee_private_key.json"
83
+ )
84
+ ee.Initialize(credentials)
85
+ logger.logger.info(f"Used env var")
86
+
87
+ # if in local env use the local user credential
88
+ else:
89
+ try:
90
+ load_env_vars()
91
+ logger.logger.info("Called 'ee.Initialize()'.")
92
+ ee.Initialize(project=os.environ["PROJECT"])
93
+ except ee.ee_exception.EEException:
94
+ logger.logger.info("Called 'ee.Authenticate()'.")
95
+ ee.Authenticate()
96
+ ee.Initialize(project=os.environ["PROJECT"])
97
+
98
+
99
+ def clear_ee_credentials():
100
+
101
+ path_to_creds = Path().home() / ".config" / "earthengine" / "credentials"
102
+ if not path_to_creds.exists():
103
+ logger.logger.error(
104
+ f"GEE credentials file '{path_to_creds}' not found, could not de-authenticate."
105
+ )
106
+ else:
107
+ path_to_creds.unlink()
108
+ logger.logger.warning(f"GEE credentials file deleted.")
109
+
110
+
111
+ def remove_geometry_from_feature_collection(feature_collection):
112
+ """Define the function to remove geometry from features in a feature collection"""
113
+ # Function to remove geometry from features
114
+ def remove_geometry(feature):
115
+ # Remove the geometry property
116
+ feature = feature.setGeometry(None)
117
+ return feature
118
+
119
+ # Apply the function to remove geometry to the feature collection
120
+ feature_collection_no_geometry = feature_collection.map(remove_geometry)
121
+ return feature_collection_no_geometry
122
+
123
+
124
+ # Compute centroids of each polygon including the external_id_column
125
+ def get_centroid(feature, external_id_column="external_id"):
126
+ keepProperties = [external_id_column]
127
+ # Get the centroid of the feature's geometry.
128
+ centroid = feature.geometry().centroid(1)
129
+ # Return a new Feature, copying properties from the old Feature.
130
+ return ee.Feature(centroid).copyProperties(feature, keepProperties)
131
+
132
+
133
+ def buffer_point_to_required_area(feature, area, area_unit):
134
+ """buffers feature to get a given area (needs math library); area unit in 'ha' or 'km2' (the default)"""
135
+ area = feature.get("REP_AREA")
136
+
137
+ # buffer_size = get_radius_m_to_buffer_for_given_area(area,"km2")# should work but untested in this function
138
+
139
+ buffer_size = (
140
+ (ee.Number(feature.get("REP_AREA")).divide(math.pi)).sqrt().multiply(1000)
141
+ ) # calculating radius in metres from REP_AREA in km2
142
+
143
+ return ee.Feature(feature).buffer(buffer_size, 1)
144
+ ### buffering (incl., max error parameter should be 0m. But put as 1m anyhow - doesn't seem to make too much of a difference for speed)
145
+
146
+
147
+ def get_radius_m_to_buffer_to_required_area(area, area_unit="km2"):
148
+ """gets radius in metres to buffer to get an area (needs math library); area unit ha or km2 (the default)"""
149
+ if area_unit == "km2":
150
+ unit_fix_factor = 1000
151
+ elif area_unit == "ha":
152
+ unit_fix_factor = 100
153
+ radius = ee.Number(area).divide(math.pi).sqrt().multiply(unit_fix_factor)
154
+ return radius
155
+
156
+
157
+ class DotEnvNotFoundError(FileNotFoundError):
158
+ def __init__(self) -> None:
159
+ super().__init__(
160
+ "Running tests requires setting an appropriate '.env' in the root directory or in your current working "
161
+ "directory. You may copy and edit the '.env.template' file from the root directory or from the README.",
162
+ )
163
+
164
+
165
+ def get_example_geojson(filename="geojson_example.geojson", cache=True):
166
+ """
167
+ Download example geojson file for testing whisp functionality.
168
+
169
+ Parameters:
170
+ -----------
171
+ filename : str
172
+ Local filename to save the geojson
173
+ cache : bool
174
+ If True, cache file in user directory to avoid re-downloading
175
+
176
+ Returns:
177
+ --------
178
+ str
179
+ Path to the downloaded geojson file
180
+ """
181
+ url = "https://raw.githubusercontent.com/forestdatapartnership/whisp/main/tests/fixtures/geojson_example.geojson"
182
+
183
+ if cache:
184
+ cache_dir = os.path.join(os.path.expanduser("~"), ".whisp_cache")
185
+ os.makedirs(cache_dir, exist_ok=True)
186
+ filepath = os.path.join(cache_dir, filename)
187
+
188
+ if os.path.exists(filepath):
189
+ return filepath
190
+ else:
191
+ filepath = filename
192
+
193
+ try:
194
+ urllib.request.urlretrieve(url, filepath)
195
+ return filepath
196
+ except Exception as e:
197
+ raise RuntimeError(f"Failed to download example geojson: {e}")
198
+
199
+
200
+ def generate_random_polygon(
201
+ min_lon, min_lat, max_lon, max_lat, min_area_ha=1, max_area_ha=10, vertex_count=20
202
+ ):
203
+ """
204
+ Generate a random polygon with exact vertex count control.
205
+
206
+ Parameters
207
+ ----------
208
+ min_lon : float
209
+ Minimum longitude
210
+ min_lat : float
211
+ Minimum latitude
212
+ max_lon : float
213
+ Maximum longitude
214
+ max_lat : float
215
+ Maximum latitude
216
+ min_area_ha : float
217
+ Minimum area in hectares
218
+ max_area_ha : float
219
+ Maximum area in hectares
220
+ vertex_count : int
221
+ Exact number of vertices for the polygon
222
+
223
+ Returns
224
+ -------
225
+ tuple
226
+ (Polygon, actual_area_ha)
227
+ """
228
+ target_area_ha = random.uniform(min_area_ha, max_area_ha)
229
+ center_lon = random.uniform(min_lon, max_lon)
230
+ center_lat = random.uniform(min_lat, max_lat)
231
+
232
+ # Estimate radius for target area
233
+ target_area_m2 = target_area_ha * 10000 # hectares to square meters
234
+ radius_meters = math.sqrt(target_area_m2 / math.pi)
235
+ radius_degrees = radius_meters / (111320 * math.cos(math.radians(center_lat)))
236
+
237
+ # Create center point
238
+ center_point = Point(center_lon, center_lat)
239
+
240
+ # Use buffer with resolution to control vertices for smaller vertex counts
241
+ if vertex_count <= 50:
242
+ poly = center_point.buffer(radius_degrees, resolution=vertex_count // 4)
243
+
244
+ # Manual vertex creation for higher vertex counts
245
+ if vertex_count > 50:
246
+ angles = np.linspace(0, 2 * math.pi, vertex_count, endpoint=False)
247
+
248
+ base_radius = radius_degrees
249
+
250
+ # Smooth sine wave variations for natural look
251
+ freq1 = random.uniform(2, 5)
252
+ amp1 = random.uniform(0.08, 0.15)
253
+ freq2 = random.uniform(8, 15)
254
+ amp2 = random.uniform(0.03, 0.08)
255
+
256
+ radius_variation = amp1 * np.sin(
257
+ freq1 * angles + random.uniform(0, 2 * math.pi)
258
+ ) + amp2 * np.sin(freq2 * angles + random.uniform(0, 2 * math.pi))
259
+
260
+ radii = base_radius * (1.0 + radius_variation)
261
+ radii = np.maximum(radii, base_radius * 0.6)
262
+
263
+ xs = center_lon + radii * np.cos(angles)
264
+ ys = center_lat + radii * np.sin(angles)
265
+
266
+ xs = np.clip(xs, min_lon, max_lon)
267
+ ys = np.clip(ys, min_lat, max_lat)
268
+
269
+ vertices = list(zip(xs, ys))
270
+ vertices.append(vertices[0])
271
+
272
+ poly = Polygon(vertices)
273
+
274
+ if not poly.is_valid:
275
+ poly = make_valid(poly)
276
+ if hasattr(poly, "geoms"):
277
+ poly = max(poly.geoms, key=lambda p: p.area)
278
+
279
+ else:
280
+ # Resample to get exact vertex count for buffered circles
281
+ coords = list(poly.exterior.coords)
282
+
283
+ if len(coords) - 1 != vertex_count:
284
+ angles = np.linspace(0, 2 * math.pi, vertex_count, endpoint=False)
285
+
286
+ new_coords = []
287
+ for angle in angles:
288
+ x = center_lon + radius_degrees * math.cos(angle)
289
+ y = center_lat + radius_degrees * math.sin(angle)
290
+
291
+ dx = random.uniform(-radius_degrees * 0.08, radius_degrees * 0.08)
292
+ dy = random.uniform(-radius_degrees * 0.08, radius_degrees * 0.08)
293
+
294
+ new_x = np.clip(x + dx, min_lon, max_lon)
295
+ new_y = np.clip(y + dy, min_lat, max_lat)
296
+ new_coords.append((new_x, new_y))
297
+
298
+ new_coords.append(new_coords[0])
299
+ poly = Polygon(new_coords)
300
+
301
+ # Calculate actual area
302
+ area_sq_degrees = poly.area
303
+ area_sq_meters = (
304
+ area_sq_degrees * (111320 * math.cos(math.radians(center_lat))) ** 2
305
+ )
306
+ actual_area_ha = area_sq_meters / 10000
307
+
308
+ return poly, actual_area_ha
309
+
310
+
311
+ def generate_test_polygons(
312
+ bounds,
313
+ num_polygons=25,
314
+ min_area_ha=1,
315
+ max_area_ha=10,
316
+ min_number_vert=10,
317
+ max_number_vert=20,
318
+ ):
319
+ """
320
+ Generate synthetic test polygons with exact vertex count control.
321
+
322
+ This utility is useful for testing WHISP processing with controlled test data,
323
+ especially when you need polygons with specific characteristics (area, complexity).
324
+
325
+ Parameters
326
+ ----------
327
+ bounds : list or ee.Geometry
328
+ Either a list of [min_lon, min_lat, max_lon, max_lat] or an Earth Engine Geometry.
329
+ Examples:
330
+ - Simple bounds: [-81.0, -19.3, -31.5, 9.6]
331
+ - EE Geometry: ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017').filter(
332
+ ee.Filter.eq('country_na', 'Brazil')).first().geometry()
333
+ num_polygons : int, optional
334
+ Number of polygons to generate (default: 25)
335
+ min_area_ha : float, optional
336
+ Minimum area in hectares (default: 1)
337
+ max_area_ha : float, optional
338
+ Maximum area in hectares (default: 10)
339
+ min_number_vert : int, optional
340
+ Minimum number of vertices per polygon (default: 10)
341
+ max_number_vert : int, optional
342
+ Maximum number of vertices per polygon (default: 20)
343
+
344
+ Returns
345
+ -------
346
+ dict
347
+ GeoJSON FeatureCollection with generated polygons. Each feature includes:
348
+ - internal_id: Sequential ID starting from 1
349
+ - requested_vertices: Number of vertices requested
350
+ - actual_vertices: Actual number of vertices created
351
+ - requested_area_ha: Target area in hectares
352
+ - actual_area_ha: Actual area in hectares
353
+
354
+ Examples
355
+ --------
356
+ >>> import openforis_whisp as whisp
357
+ >>> import ee
358
+ >>>
359
+ >>> # Using simple bounds (list)
360
+ >>> bounds_list = [-81.0, -19.3, -31.5, 9.6]
361
+ >>> geojson = whisp.generate_test_polygons(bounds_list, num_polygons=100)
362
+ >>>
363
+ >>> # Using Earth Engine Geometry
364
+ >>> brazil = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017').filter(
365
+ ... ee.Filter.eq('country_na', 'Brazil')
366
+ ... ).first().geometry()
367
+ >>> geojson = whisp.generate_test_polygons(brazil, num_polygons=100,
368
+ ... min_area_ha=100, max_area_ha=1000)
369
+ >>>
370
+ >>> # Save to file
371
+ >>> import json
372
+ >>> with open('test_polygons.geojson', 'w') as f:
373
+ ... json.dump(geojson, f)
374
+ """
375
+
376
+ # Handle Earth Engine Geometry or simple bounds
377
+ if hasattr(bounds, "bounds"): # It's an ee.Geometry
378
+ logger.logger.info("Extracting bounds from Earth Engine Geometry...")
379
+ try:
380
+ bounds_geom = (
381
+ bounds.bounds()
382
+ if not hasattr(bounds, "coordinates")
383
+ or bounds.type().getInfo() != "Rectangle"
384
+ else bounds
385
+ )
386
+ bounds_coords = bounds_geom.coordinates().getInfo()[0]
387
+ min_lon = min(coord[0] for coord in bounds_coords)
388
+ max_lon = max(coord[0] for coord in bounds_coords)
389
+ min_lat = min(coord[1] for coord in bounds_coords)
390
+ max_lat = max(coord[1] for coord in bounds_coords)
391
+ logger.logger.info(
392
+ f"Bounds: [{min_lon:.2f}, {min_lat:.2f}, {max_lon:.2f}, {max_lat:.2f}]"
393
+ )
394
+ except Exception as e:
395
+ raise ValueError(
396
+ f"Failed to extract bounds from Earth Engine Geometry: {e}"
397
+ )
398
+ elif isinstance(bounds, (list, tuple)) and len(bounds) == 4:
399
+ min_lon, min_lat, max_lon, max_lat = bounds
400
+ else:
401
+ raise ValueError(
402
+ "bounds must be either:\n"
403
+ " - A list of [min_lon, min_lat, max_lon, max_lat]\n"
404
+ " - An Earth Engine Geometry (ee.Geometry, ee.Feature.geometry(), etc.)"
405
+ )
406
+
407
+ # Validate parameters
408
+ if min_number_vert > max_number_vert:
409
+ raise ValueError(
410
+ f"min_number_vert ({min_number_vert}) cannot be greater than max_number_vert ({max_number_vert})"
411
+ )
412
+ if min_area_ha > max_area_ha:
413
+ raise ValueError(
414
+ f"min_area_ha ({min_area_ha}) cannot be greater than max_area_ha ({max_area_ha})"
415
+ )
416
+ if num_polygons < 1:
417
+ raise ValueError(f"num_polygons must be at least 1 (got {num_polygons})")
418
+
419
+ logger.logger.info(
420
+ f"Generating {num_polygons} test polygons with {min_number_vert}-{max_number_vert} vertices..."
421
+ )
422
+
423
+ features = []
424
+
425
+ # Pre-generate all random values
426
+ vertex_counts = np.random.randint(
427
+ min_number_vert, max_number_vert + 1, num_polygons
428
+ )
429
+ target_areas = np.random.uniform(min_area_ha, max_area_ha, num_polygons)
430
+
431
+ for i in range(num_polygons):
432
+ if i > 0 and i % 250 == 0:
433
+ logger.logger.info(
434
+ f"Generated {i}/{num_polygons} polygons ({i/num_polygons*100:.0f}%)..."
435
+ )
436
+
437
+ requested_vertices = vertex_counts[i]
438
+
439
+ polygon, actual_area = generate_random_polygon(
440
+ min_lon,
441
+ min_lat,
442
+ max_lon,
443
+ max_lat,
444
+ min_area_ha=target_areas[i] * 0.9,
445
+ max_area_ha=target_areas[i] * 1.1,
446
+ vertex_count=requested_vertices,
447
+ )
448
+
449
+ actual_vertex_count = len(list(polygon.exterior.coords)) - 1
450
+
451
+ properties = {
452
+ "internal_id": i + 1,
453
+ "requested_vertices": int(requested_vertices),
454
+ "actual_vertices": int(actual_vertex_count),
455
+ "requested_area_ha": round(target_areas[i], 2),
456
+ "actual_area_ha": round(actual_area, 2),
457
+ }
458
+
459
+ feature = {
460
+ "type": "Feature",
461
+ "properties": properties,
462
+ "geometry": mapping(polygon),
463
+ }
464
+
465
+ features.append(feature)
466
+
467
+ logger.logger.info(f"Generated {num_polygons} polygons!")
468
+
469
+ # Print summary statistics
470
+ actual_vertex_counts = [f["properties"]["actual_vertices"] for f in features]
471
+ requested_vertex_counts = [f["properties"]["requested_vertices"] for f in features]
472
+
473
+ logger.logger.info(
474
+ f"Vertex count - Requested: {min(requested_vertex_counts)}-{max(requested_vertex_counts)}, "
475
+ f"Actual: {min(actual_vertex_counts)}-{max(actual_vertex_counts)}"
476
+ )
477
+
478
+ actual_area_counts = [f["properties"]["actual_area_ha"] for f in features]
479
+ requested_area_counts = [f["properties"]["requested_area_ha"] for f in features]
480
+
481
+ logger.logger.info(
482
+ f"Area (ha) - Requested: {min(requested_area_counts):.1f}-{max(requested_area_counts):.1f}, "
483
+ f"Actual: {min(actual_area_counts):.1f}-{max(actual_area_counts):.1f}"
484
+ )
485
+
486
+ geojson = {"type": "FeatureCollection", "features": features}
487
+ return geojson
@@ -1,15 +0,0 @@
1
- """
2
- !!! BAD PRACTICE, ALWAYS IMPORT YOUR MODULES EXPLICITELY !!!
3
-
4
- Module to gather all parameters.
5
-
6
- If you use a module import all the functions here you only have 1 call to make
7
- """
8
-
9
- # from .config_runtime import *
10
-
11
- # from .config_asr_url_info import *
12
-
13
- # from .config_asr_credentials import *
14
-
15
- # from parameters.config_ceo import *
@@ -1,194 +0,0 @@
1
- import base64
2
- import ee
3
- import math
4
- import os
5
- import pandas as pd
6
-
7
- import urllib.request
8
- import os
9
-
10
- import importlib.resources as pkg_resources
11
-
12
- from dotenv import load_dotenv
13
- from pathlib import Path
14
-
15
- from .logger import StdoutLogger
16
-
17
-
18
- logger = StdoutLogger(__name__)
19
-
20
-
21
- def get_example_data_path(filename):
22
- """
23
- Get the path to an example data file included in the package.
24
-
25
- Parameters:
26
- -----------
27
- filename : str
28
- The name of the example data file.
29
-
30
- Returns:
31
- --------
32
- str
33
- The path to the example data file.
34
- """
35
- return os.path.join("..", "tests", "fixtures", filename)
36
-
37
-
38
- def load_env_vars() -> None:
39
- """Loads the environment variables required for testing the codebase.
40
-
41
- Returns
42
- -------
43
- out : None
44
- """
45
-
46
- all_dotenv_paths = [Path(__file__).parents[2] / ".env", Path.cwd() / ".env"]
47
- dotenv_loaded = False
48
-
49
- for dotenv_path in all_dotenv_paths:
50
- logger.logger.debug(f"dotenv_path: {dotenv_path}")
51
- if dotenv_path.exists():
52
- dotenv_loaded = load_dotenv(dotenv_path)
53
- break
54
-
55
- if not dotenv_loaded:
56
- raise DotEnvNotFoundError
57
- logger.logger.info(f"Loaded evironment variables from '{dotenv_path}'")
58
-
59
-
60
- def init_ee() -> None:
61
- """Initialize earth engine according to the environment"""
62
-
63
- # only do the initialization if the credential are missing
64
- if not ee.data._credentials:
65
-
66
- # if in test env use the private key
67
- if "EE_PRIVATE_KEY" in os.environ:
68
-
69
- # key need to be decoded in a file
70
- content = base64.b64decode(os.environ["EE_PRIVATE_KEY"]).decode()
71
- with open("ee_private_key.json", "w") as f:
72
- f.write(content)
73
-
74
- # connection to the service account
75
- service_account = "test-sepal-ui@sepal-ui.iam.gserviceaccount.com"
76
- credentials = ee.ServiceAccountCredentials(
77
- service_account, "ee_private_key.json"
78
- )
79
- ee.Initialize(credentials)
80
- logger.logger.info(f"Used env var")
81
-
82
- # if in local env use the local user credential
83
- else:
84
- try:
85
- load_env_vars()
86
- logger.logger.info("Called 'ee.Initialize()'.")
87
- ee.Initialize(project=os.environ["PROJECT"])
88
- except ee.ee_exception.EEException:
89
- logger.logger.info("Called 'ee.Authenticate()'.")
90
- ee.Authenticate()
91
- ee.Initialize(project=os.environ["PROJECT"])
92
-
93
-
94
- def clear_ee_credentials():
95
-
96
- path_to_creds = Path().home() / ".config" / "earthengine" / "credentials"
97
- if not path_to_creds.exists():
98
- logger.logger.error(
99
- f"GEE credentials file '{path_to_creds}' not found, could not de-authenticate."
100
- )
101
- else:
102
- path_to_creds.unlink()
103
- logger.logger.warning(f"GEE credentials file deleted.")
104
-
105
-
106
- def remove_geometry_from_feature_collection(feature_collection):
107
- """Define the function to remove geometry from features in a feature collection"""
108
- # Function to remove geometry from features
109
- def remove_geometry(feature):
110
- # Remove the geometry property
111
- feature = feature.setGeometry(None)
112
- return feature
113
-
114
- # Apply the function to remove geometry to the feature collection
115
- feature_collection_no_geometry = feature_collection.map(remove_geometry)
116
- return feature_collection_no_geometry
117
-
118
-
119
- # Compute centroids of each polygon including the external_id_column
120
- def get_centroid(feature, external_id_column="external_id"):
121
- keepProperties = [external_id_column]
122
- # Get the centroid of the feature's geometry.
123
- centroid = feature.geometry().centroid(1)
124
- # Return a new Feature, copying properties from the old Feature.
125
- return ee.Feature(centroid).copyProperties(feature, keepProperties)
126
-
127
-
128
- def buffer_point_to_required_area(feature, area, area_unit):
129
- """buffers feature to get a given area (needs math library); area unit in 'ha' or 'km2' (the default)"""
130
- area = feature.get("REP_AREA")
131
-
132
- # buffer_size = get_radius_m_to_buffer_for_given_area(area,"km2")# should work but untested in this function
133
-
134
- buffer_size = (
135
- (ee.Number(feature.get("REP_AREA")).divide(math.pi)).sqrt().multiply(1000)
136
- ) # calculating radius in metres from REP_AREA in km2
137
-
138
- return ee.Feature(feature).buffer(buffer_size, 1)
139
- ### buffering (incl., max error parameter should be 0m. But put as 1m anyhow - doesn't seem to make too much of a difference for speed)
140
-
141
-
142
- def get_radius_m_to_buffer_to_required_area(area, area_unit="km2"):
143
- """gets radius in metres to buffer to get an area (needs math library); area unit ha or km2 (the default)"""
144
- if area_unit == "km2":
145
- unit_fix_factor = 1000
146
- elif area_unit == "ha":
147
- unit_fix_factor = 100
148
- radius = ee.Number(area).divide(math.pi).sqrt().multiply(unit_fix_factor)
149
- return radius
150
-
151
-
152
- class DotEnvNotFoundError(FileNotFoundError):
153
- def __init__(self) -> None:
154
- super().__init__(
155
- "Running tests requires setting an appropriate '.env' in the root directory or in your current working "
156
- "directory. You may copy and edit the '.env.template' file from the root directory or from the README.",
157
- )
158
-
159
-
160
- def get_example_geojson(filename="geojson_example.geojson", cache=True):
161
- """
162
- Download example geojson file for testing whisp functionality.
163
-
164
- Parameters:
165
- -----------
166
- filename : str
167
- Local filename to save the geojson
168
- cache : bool
169
- If True, cache file in user directory to avoid re-downloading
170
-
171
- Returns:
172
- --------
173
- str
174
- Path to the downloaded geojson file
175
- """
176
- url = "https://raw.githubusercontent.com/forestdatapartnership/whisp/main/tests/fixtures/geojson_example.geojson"
177
-
178
- if cache:
179
- cache_dir = os.path.join(os.path.expanduser("~"), ".whisp_cache")
180
- os.makedirs(cache_dir, exist_ok=True)
181
- filepath = os.path.join(cache_dir, filename)
182
-
183
- if os.path.exists(filepath):
184
- return filepath
185
- else:
186
- filepath = filename
187
-
188
- try:
189
- urllib.request.urlretrieve(url, filepath)
190
- return filepath
191
- except Exception as e:
192
- raise RuntimeError(f"Failed to download example geojson: {e}")
193
-
194
-