openforis-whisp 2.0.0b1__py3-none-any.whl → 2.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +2 -1
- openforis_whisp/data_conversion.py +11 -0
- openforis_whisp/datasets.py +7 -32
- openforis_whisp/parameters/lookup_gee_datasets.csv +2 -5
- openforis_whisp/risk.py +29 -29
- openforis_whisp/stats.py +92 -0
- openforis_whisp/utils.py +298 -5
- {openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b2.dist-info}/METADATA +1 -1
- openforis_whisp-2.0.0b2.dist-info/RECORD +16 -0
- openforis_whisp/parameters/__init__.py +0 -15
- openforis_whisp-2.0.0b1.dist-info/RECORD +0 -17
- {openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b2.dist-info}/LICENSE +0 -0
- {openforis_whisp-2.0.0b1.dist-info → openforis_whisp-2.0.0b2.dist-info}/WHEEL +0 -0
openforis_whisp/__init__.py
CHANGED
|
@@ -43,6 +43,7 @@ from openforis_whisp.stats import (
|
|
|
43
43
|
whisp_formatted_stats_ee_to_geojson,
|
|
44
44
|
whisp_formatted_stats_geojson_to_df,
|
|
45
45
|
whisp_formatted_stats_geojson_to_geojson,
|
|
46
|
+
set_point_geometry_area_to_zero,
|
|
46
47
|
convert_iso3_to_iso2,
|
|
47
48
|
)
|
|
48
49
|
|
|
@@ -71,4 +72,4 @@ from openforis_whisp.data_conversion import (
|
|
|
71
72
|
|
|
72
73
|
from openforis_whisp.risk import whisp_risk, detect_unit_type
|
|
73
74
|
|
|
74
|
-
from openforis_whisp.utils import get_example_data_path
|
|
75
|
+
from openforis_whisp.utils import get_example_data_path, generate_test_polygons
|
|
@@ -42,6 +42,17 @@ def convert_geojson_to_ee(
|
|
|
42
42
|
# Use GeoPandas to read the file and handle CRS
|
|
43
43
|
gdf = gpd.read_file(file_path)
|
|
44
44
|
|
|
45
|
+
# NEW: Handle problematic data types before JSON conversion
|
|
46
|
+
for col in gdf.columns:
|
|
47
|
+
if col != gdf.geometry.name: # Skip geometry column
|
|
48
|
+
# Handle datetime/timestamp columns
|
|
49
|
+
if pd.api.types.is_datetime64_any_dtype(gdf[col]):
|
|
50
|
+
gdf[col] = gdf[col].dt.strftime("%Y-%m-%d %H:%M:%S").fillna("")
|
|
51
|
+
# Handle other problematic types
|
|
52
|
+
elif gdf[col].dtype == "object":
|
|
53
|
+
# Convert any remaining non-serializable objects to strings
|
|
54
|
+
gdf[col] = gdf[col].astype(str)
|
|
55
|
+
|
|
45
56
|
# Check and convert CRS if needed
|
|
46
57
|
if enforce_wgs84:
|
|
47
58
|
if gdf.crs is None:
|
openforis_whisp/datasets.py
CHANGED
|
@@ -340,45 +340,20 @@ def g_esri_2023_tc_prep():
|
|
|
340
340
|
|
|
341
341
|
|
|
342
342
|
# ESRI 2023 - Crop
|
|
343
|
-
def
|
|
343
|
+
def g_esri_2020_2023_crop_prep():
|
|
344
344
|
esri_lulc10_raw = ee.ImageCollection(
|
|
345
345
|
"projects/sat-io/open-datasets/landcover/ESRI_Global-LULC_10m_TS"
|
|
346
346
|
)
|
|
347
|
-
|
|
348
|
-
esri_lulc10_raw.filterDate("
|
|
347
|
+
esri_lulc10_crop_2020 = (
|
|
348
|
+
esri_lulc10_raw.filterDate("2020-01-01", "2020-12-31").mosaic().eq(5)
|
|
349
349
|
)
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
# GLC_FCS30D 2022
|
|
354
|
-
|
|
355
|
-
# GLC_FCS30D Tree Cover
|
|
356
|
-
# forest classes + swamp + mangrove / what to do with shrubland?
|
|
357
|
-
def g_glc_fcs30d_tc_2022_prep():
|
|
358
|
-
GLC_FCS30D = (
|
|
359
|
-
ee.ImageCollection("projects/sat-io/open-datasets/GLC-FCS30D/annual")
|
|
360
|
-
.mosaic()
|
|
361
|
-
.select(22)
|
|
362
|
-
)
|
|
363
|
-
GLC_FCS30D_TC = (
|
|
364
|
-
(GLC_FCS30D.gte(51))
|
|
365
|
-
.And(GLC_FCS30D.lte(92))
|
|
366
|
-
.Or(GLC_FCS30D.eq(181))
|
|
367
|
-
.Or(GLC_FCS30D.eq(185))
|
|
350
|
+
esri_lulc10_crop_2023 = (
|
|
351
|
+
esri_lulc10_raw.filterDate("2023-01-01", "2023-12-31").mosaic().eq(5)
|
|
368
352
|
)
|
|
369
|
-
return GLC_FCS30D_TC.rename("GLC_FCS30D_TC_2022")
|
|
370
353
|
|
|
354
|
+
newCrop = esri_lulc10_crop_2023.And(esri_lulc10_crop_2020.Not())
|
|
371
355
|
|
|
372
|
-
|
|
373
|
-
# 10 Rainfed cropland; 11 Herbaceous cover; 12 Tree or shrub cover (Orchard); 20 Irrigated cropland
|
|
374
|
-
def g_glc_fcs30d_crop_2022_prep():
|
|
375
|
-
GLC_FCS30D = (
|
|
376
|
-
ee.ImageCollection("projects/sat-io/open-datasets/GLC-FCS30D/annual")
|
|
377
|
-
.mosaic()
|
|
378
|
-
.select(22)
|
|
379
|
-
)
|
|
380
|
-
GLC_FCS30D_crop = GLC_FCS30D.gte(10).And(GLC_FCS30D.lte(20))
|
|
381
|
-
return GLC_FCS30D_crop.rename("GLC_FCS30D_crop_2022")
|
|
356
|
+
return newCrop.rename("ESRI_crop_gain_2020_2023")
|
|
382
357
|
|
|
383
358
|
|
|
384
359
|
#### disturbances by year
|
|
@@ -2,7 +2,7 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
|
|
|
2
2
|
EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
|
|
3
3
|
GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
|
|
4
4
|
TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
|
|
5
|
-
GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,
|
|
5
|
+
GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
|
|
6
6
|
Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
|
|
7
7
|
ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
|
|
8
8
|
TMF_plant,80,,commodities,NA,1,1,0,float32,1,0,g_jrc_tmf_plantation_prep
|
|
@@ -163,13 +163,11 @@ GFT_planted_plantation,1900,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_gft_
|
|
|
163
163
|
IIASA_planted_plantation,1910,,NA,planted_plantation_2020,0,1,0,float32,1,0,g_iiasa_planted_prep
|
|
164
164
|
TMF_regrowth_2023,2000,,NA,treecover_after_2020,0,1,0,float32,1,0,g_tmf_regrowth_prep
|
|
165
165
|
ESRI_2023_TC,2010,,NA,treecover_after_2020,0,1,0,float32,1,0,g_esri_2023_tc_prep
|
|
166
|
-
GLC_FCS30D_TC_2022,2020,,NA,treecover_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_tc_2022_prep
|
|
167
166
|
Oil_palm_2023_FDaP,2100,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_palm_2023_prep
|
|
168
167
|
Rubber_2023_FDaP,2110,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_rubber_2023_prep
|
|
169
168
|
Coffee_FDaP_2023,2111,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_coffee_2023_prep
|
|
170
169
|
Cocoa_2023_FDaP,2120,,NA,agri_after_2020,0,1,0,float32,1,0,g_fdap_cocoa_2023_prep
|
|
171
|
-
|
|
172
|
-
GLC_FCS30D_crop_2022,2140,,NA,agri_after_2020,0,1,0,float32,1,0,g_glc_fcs30d_crop_2022_prep
|
|
170
|
+
ESRI_crop_gain_2020_2023,2130,,NA,agri_after_2020,0,1,0,float32,1,0,g_esri_2020_2023_crop_prep
|
|
173
171
|
GFW_logging_before_2020,2200,,NA,logging_concession,0,1,0,float32,1,0,g_logging_concessions_prep
|
|
174
172
|
nCO_ideam_forest_2020,2310,CO,treecover,NA,1,1,0,float32,1,0,nco_ideam_forest_2020_prep
|
|
175
173
|
nCO_ideam_eufo_commission_2020,2320,CO,commodities,NA,1,1,0,float32,1,0,nco_ideam_eufo_commission_2020_prep
|
|
@@ -199,4 +197,3 @@ nBR_INPE_TCamz_pasture_2020,2422,BR,commodities,NA,1,1,0,float32,1,0,nbr_terracl
|
|
|
199
197
|
nBR_INPE_TCcer_pasture_2020,2423,BR,commodities,NA,1,1,0,float32,1,0,nbr_terraclass_cer20_ac_prep
|
|
200
198
|
nBR_MapBiomas_col9_pasture_2020,2424,BR,commodities,NA,1,1,0,float32,1,0,nbr_mapbiomasc9_pasture_prep
|
|
201
199
|
nCI_Cocoa_bnetd,3000,CI,commodities,NA,1,1,0,float32,1,0,nci_ocs2020_prep
|
|
202
|
-
|
openforis_whisp/risk.py
CHANGED
|
@@ -161,9 +161,8 @@ def whisp_risk(
|
|
|
161
161
|
lookup_df_copy, custom_bands_info, df.columns
|
|
162
162
|
)
|
|
163
163
|
print(f"Including custom bands: {list(custom_bands_info.keys())}")
|
|
164
|
-
# print(f"appended custom bands info to lookup table")
|
|
165
164
|
if national_codes:
|
|
166
|
-
print(f"
|
|
165
|
+
print(f"Including additional national data for: {national_codes}")
|
|
167
166
|
# Filter by national codes
|
|
168
167
|
filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
|
|
169
168
|
lookup_df=lookup_df_copy,
|
|
@@ -473,7 +472,6 @@ def add_indicators(
|
|
|
473
472
|
return df
|
|
474
473
|
|
|
475
474
|
|
|
476
|
-
# Update add_indicator_column to use the unit_type parameter
|
|
477
475
|
def add_indicator_column(
|
|
478
476
|
df: data_lookup_type,
|
|
479
477
|
input_columns: list[str],
|
|
@@ -482,49 +480,51 @@ def add_indicator_column(
|
|
|
482
480
|
low_name: str = "no",
|
|
483
481
|
high_name: str = "yes",
|
|
484
482
|
sum_comparison: bool = False,
|
|
485
|
-
unit_type: str = None,
|
|
483
|
+
unit_type: str = None,
|
|
486
484
|
) -> data_lookup_type:
|
|
487
|
-
"""
|
|
488
|
-
Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
|
|
485
|
+
"""Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign."""
|
|
489
486
|
|
|
490
|
-
Parameters:
|
|
491
|
-
df (data_lookup_type): The pandas DataFrame to which the column will be added.
|
|
492
|
-
input_columns (list): List of column names to check for threshold.
|
|
493
|
-
threshold (float): The threshold value to compare against.
|
|
494
|
-
new_column_name (str): The name of the new column to be added.
|
|
495
|
-
The '>' sign is used for comparisons.
|
|
496
|
-
When 'sum comparison' == True, then the threshold is compared to the sum of all those listed in 'input_columns', as opposed to when Flalse, when each column in the list is compared to the threshold individually
|
|
497
|
-
low_name (str): The name for the value when below or equal to threshold (default is 'no').
|
|
498
|
-
high_name (str): The name for the value when above threshold (default is 'yes').
|
|
499
|
-
sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
|
|
500
|
-
unit_type (str): Whether values are in "ha" or "percent".
|
|
501
|
-
|
|
502
|
-
Returns:
|
|
503
|
-
data_lookup_type: The DataFrame with the new column added.
|
|
504
|
-
"""
|
|
505
487
|
# Create a new column and initialize with low_name
|
|
506
488
|
new_column = pd.Series(low_name, index=df.index, name=new_column_name)
|
|
507
489
|
|
|
508
|
-
# Default behavior: use '>' for single column comparison
|
|
509
490
|
if sum_comparison:
|
|
510
491
|
# Sum all values in specified columns and compare to threshold
|
|
511
492
|
sum_values = df[input_columns].sum(axis=1)
|
|
512
493
|
new_column[sum_values > threshold] = high_name
|
|
513
494
|
else:
|
|
514
|
-
# Check if any values in specified columns are above the threshold
|
|
495
|
+
# Check if any values in specified columns are above the threshold
|
|
515
496
|
for col in input_columns:
|
|
516
|
-
# So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
|
|
517
|
-
# Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
|
|
518
497
|
if unit_type == "ha":
|
|
519
498
|
df[geometry_area_column] = pd.to_numeric(
|
|
520
499
|
df[geometry_area_column], errors="coerce"
|
|
521
500
|
)
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
501
|
+
|
|
502
|
+
# Handle points (Area = 0) separately
|
|
503
|
+
is_point = df[geometry_area_column] == 0
|
|
504
|
+
|
|
505
|
+
# For points: any value > 0 exceeds threshold
|
|
506
|
+
point_mask = is_point & (df[col] > 0)
|
|
507
|
+
new_column[point_mask] = high_name
|
|
508
|
+
|
|
509
|
+
# For polygons: convert to percentage and check threshold
|
|
510
|
+
polygon_mask = ~is_point
|
|
511
|
+
if polygon_mask.any():
|
|
512
|
+
val_to_check = clamp(
|
|
513
|
+
(
|
|
514
|
+
(
|
|
515
|
+
df.loc[polygon_mask, col]
|
|
516
|
+
/ df.loc[polygon_mask, geometry_area_column]
|
|
517
|
+
)
|
|
518
|
+
* 100
|
|
519
|
+
),
|
|
520
|
+
0,
|
|
521
|
+
100,
|
|
522
|
+
)
|
|
523
|
+
new_column[polygon_mask & (val_to_check > threshold)] = high_name
|
|
525
524
|
else:
|
|
525
|
+
# For percentage values, use direct comparison
|
|
526
526
|
val_to_check = df[col]
|
|
527
|
-
|
|
527
|
+
new_column[val_to_check > threshold] = high_name
|
|
528
528
|
|
|
529
529
|
# Concatenate the new column to the DataFrame
|
|
530
530
|
df = pd.concat([df, new_column], axis=1)
|
openforis_whisp/stats.py
CHANGED
|
@@ -592,9 +592,58 @@ def whisp_stats_ee_to_df(
|
|
|
592
592
|
print(f"An error occurred during the ISO3 to ISO2 conversion: {e}")
|
|
593
593
|
return pd.DataFrame() # Return an empty DataFrame in case of error
|
|
594
594
|
|
|
595
|
+
# NEW: Set area to 0 for point geometries
|
|
596
|
+
try:
|
|
597
|
+
df_stats = set_point_geometry_area_to_zero(df_stats)
|
|
598
|
+
except Exception as e:
|
|
599
|
+
print(f"An error occurred during point geometry area adjustment: {e}")
|
|
600
|
+
# Continue without the adjustment rather than failing completely
|
|
601
|
+
|
|
595
602
|
return df_stats
|
|
596
603
|
|
|
597
604
|
|
|
605
|
+
def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
|
|
606
|
+
"""
|
|
607
|
+
Set the geometry area column to 0 for features with Point geometry type.
|
|
608
|
+
|
|
609
|
+
Parameters
|
|
610
|
+
----------
|
|
611
|
+
df : pd.DataFrame
|
|
612
|
+
DataFrame containing geometry type and area columns
|
|
613
|
+
|
|
614
|
+
Returns
|
|
615
|
+
-------
|
|
616
|
+
pd.DataFrame
|
|
617
|
+
DataFrame with area set to 0 for Point geometries
|
|
618
|
+
"""
|
|
619
|
+
# Check if required columns exist
|
|
620
|
+
if geometry_type_column not in df.columns:
|
|
621
|
+
print(
|
|
622
|
+
f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
|
|
623
|
+
)
|
|
624
|
+
return df
|
|
625
|
+
|
|
626
|
+
if geometry_area_column not in df.columns:
|
|
627
|
+
print(
|
|
628
|
+
f"Warning: {geometry_area_column} column not found. Skipping area adjustment for points."
|
|
629
|
+
)
|
|
630
|
+
return df
|
|
631
|
+
|
|
632
|
+
# Create a copy to avoid modifying the original
|
|
633
|
+
df_modified = df.copy()
|
|
634
|
+
|
|
635
|
+
# Set area to 0 where geometry type is Point
|
|
636
|
+
point_mask = df_modified[geometry_type_column] == "Point"
|
|
637
|
+
df_modified.loc[point_mask, geometry_area_column] = 0.0
|
|
638
|
+
|
|
639
|
+
# Log the changes
|
|
640
|
+
num_points = point_mask.sum()
|
|
641
|
+
if num_points > 0:
|
|
642
|
+
print(f"Set area to 0 for {num_points} Point geometries")
|
|
643
|
+
|
|
644
|
+
return df_modified
|
|
645
|
+
|
|
646
|
+
|
|
598
647
|
def whisp_stats_ee_to_drive(
|
|
599
648
|
feature_collection: ee.FeatureCollection,
|
|
600
649
|
external_id_column=None,
|
|
@@ -1226,3 +1275,46 @@ def debug_feature_collection_properties(feature_collection, max_features=5):
|
|
|
1226
1275
|
|
|
1227
1276
|
except Exception as e:
|
|
1228
1277
|
return {"error": f"Error during debugging: {str(e)}"}
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
# helper function to set area to 0 for point geometries
|
|
1281
|
+
def set_point_geometry_area_to_zero(df: pd.DataFrame) -> pd.DataFrame:
|
|
1282
|
+
"""
|
|
1283
|
+
Set the geometry area column to 0 for features with Point geometry type.
|
|
1284
|
+
|
|
1285
|
+
Parameters
|
|
1286
|
+
----------
|
|
1287
|
+
df : pd.DataFrame
|
|
1288
|
+
DataFrame containing geometry type and area columns
|
|
1289
|
+
|
|
1290
|
+
Returns
|
|
1291
|
+
-------
|
|
1292
|
+
pd.DataFrame
|
|
1293
|
+
DataFrame with area set to 0 for Point geometries
|
|
1294
|
+
"""
|
|
1295
|
+
# Check if required columns exist
|
|
1296
|
+
if geometry_type_column not in df.columns:
|
|
1297
|
+
print(
|
|
1298
|
+
f"Warning: {geometry_type_column} column not found. Skipping area adjustment for points."
|
|
1299
|
+
)
|
|
1300
|
+
return df
|
|
1301
|
+
|
|
1302
|
+
if geometry_area_column not in df.columns:
|
|
1303
|
+
print(
|
|
1304
|
+
f"Warning: {geometry_area_column} column not found. Skipping area adjustment for points."
|
|
1305
|
+
)
|
|
1306
|
+
return df
|
|
1307
|
+
|
|
1308
|
+
# Create a copy to avoid modifying the original
|
|
1309
|
+
df_modified = df.copy()
|
|
1310
|
+
|
|
1311
|
+
# Set area to 0 where geometry type is Point
|
|
1312
|
+
point_mask = df_modified[geometry_type_column] == "Point"
|
|
1313
|
+
df_modified.loc[point_mask, geometry_area_column] = 0.0
|
|
1314
|
+
|
|
1315
|
+
# Log the changes
|
|
1316
|
+
num_points = point_mask.sum()
|
|
1317
|
+
# if num_points > 0:
|
|
1318
|
+
# print(f"Set area to 0 for {num_points} Point geometries")
|
|
1319
|
+
|
|
1320
|
+
return df_modified
|
openforis_whisp/utils.py
CHANGED
|
@@ -3,6 +3,8 @@ import ee
|
|
|
3
3
|
import math
|
|
4
4
|
import os
|
|
5
5
|
import pandas as pd
|
|
6
|
+
import random
|
|
7
|
+
import numpy as np
|
|
6
8
|
|
|
7
9
|
import urllib.request
|
|
8
10
|
import os
|
|
@@ -12,6 +14,9 @@ import importlib.resources as pkg_resources
|
|
|
12
14
|
from dotenv import load_dotenv
|
|
13
15
|
from pathlib import Path
|
|
14
16
|
|
|
17
|
+
from shapely.geometry import Polygon, Point, mapping
|
|
18
|
+
from shapely.validation import make_valid
|
|
19
|
+
|
|
15
20
|
from .logger import StdoutLogger
|
|
16
21
|
|
|
17
22
|
|
|
@@ -160,31 +165,31 @@ class DotEnvNotFoundError(FileNotFoundError):
|
|
|
160
165
|
def get_example_geojson(filename="geojson_example.geojson", cache=True):
|
|
161
166
|
"""
|
|
162
167
|
Download example geojson file for testing whisp functionality.
|
|
163
|
-
|
|
168
|
+
|
|
164
169
|
Parameters:
|
|
165
170
|
-----------
|
|
166
171
|
filename : str
|
|
167
172
|
Local filename to save the geojson
|
|
168
173
|
cache : bool
|
|
169
174
|
If True, cache file in user directory to avoid re-downloading
|
|
170
|
-
|
|
175
|
+
|
|
171
176
|
Returns:
|
|
172
177
|
--------
|
|
173
178
|
str
|
|
174
179
|
Path to the downloaded geojson file
|
|
175
180
|
"""
|
|
176
181
|
url = "https://raw.githubusercontent.com/forestdatapartnership/whisp/main/tests/fixtures/geojson_example.geojson"
|
|
177
|
-
|
|
182
|
+
|
|
178
183
|
if cache:
|
|
179
184
|
cache_dir = os.path.join(os.path.expanduser("~"), ".whisp_cache")
|
|
180
185
|
os.makedirs(cache_dir, exist_ok=True)
|
|
181
186
|
filepath = os.path.join(cache_dir, filename)
|
|
182
|
-
|
|
187
|
+
|
|
183
188
|
if os.path.exists(filepath):
|
|
184
189
|
return filepath
|
|
185
190
|
else:
|
|
186
191
|
filepath = filename
|
|
187
|
-
|
|
192
|
+
|
|
188
193
|
try:
|
|
189
194
|
urllib.request.urlretrieve(url, filepath)
|
|
190
195
|
return filepath
|
|
@@ -192,3 +197,291 @@ def get_example_geojson(filename="geojson_example.geojson", cache=True):
|
|
|
192
197
|
raise RuntimeError(f"Failed to download example geojson: {e}")
|
|
193
198
|
|
|
194
199
|
|
|
200
|
+
def generate_random_polygon(
|
|
201
|
+
min_lon, min_lat, max_lon, max_lat, min_area_ha=1, max_area_ha=10, vertex_count=20
|
|
202
|
+
):
|
|
203
|
+
"""
|
|
204
|
+
Generate a random polygon with exact vertex count control.
|
|
205
|
+
|
|
206
|
+
Parameters
|
|
207
|
+
----------
|
|
208
|
+
min_lon : float
|
|
209
|
+
Minimum longitude
|
|
210
|
+
min_lat : float
|
|
211
|
+
Minimum latitude
|
|
212
|
+
max_lon : float
|
|
213
|
+
Maximum longitude
|
|
214
|
+
max_lat : float
|
|
215
|
+
Maximum latitude
|
|
216
|
+
min_area_ha : float
|
|
217
|
+
Minimum area in hectares
|
|
218
|
+
max_area_ha : float
|
|
219
|
+
Maximum area in hectares
|
|
220
|
+
vertex_count : int
|
|
221
|
+
Exact number of vertices for the polygon
|
|
222
|
+
|
|
223
|
+
Returns
|
|
224
|
+
-------
|
|
225
|
+
tuple
|
|
226
|
+
(Polygon, actual_area_ha)
|
|
227
|
+
"""
|
|
228
|
+
target_area_ha = random.uniform(min_area_ha, max_area_ha)
|
|
229
|
+
center_lon = random.uniform(min_lon, max_lon)
|
|
230
|
+
center_lat = random.uniform(min_lat, max_lat)
|
|
231
|
+
|
|
232
|
+
# Estimate radius for target area
|
|
233
|
+
target_area_m2 = target_area_ha * 10000 # hectares to square meters
|
|
234
|
+
radius_meters = math.sqrt(target_area_m2 / math.pi)
|
|
235
|
+
radius_degrees = radius_meters / (111320 * math.cos(math.radians(center_lat)))
|
|
236
|
+
|
|
237
|
+
# Create center point
|
|
238
|
+
center_point = Point(center_lon, center_lat)
|
|
239
|
+
|
|
240
|
+
# Use buffer with resolution to control vertices for smaller vertex counts
|
|
241
|
+
if vertex_count <= 50:
|
|
242
|
+
poly = center_point.buffer(radius_degrees, resolution=vertex_count // 4)
|
|
243
|
+
|
|
244
|
+
# Manual vertex creation for higher vertex counts
|
|
245
|
+
if vertex_count > 50:
|
|
246
|
+
angles = np.linspace(0, 2 * math.pi, vertex_count, endpoint=False)
|
|
247
|
+
|
|
248
|
+
base_radius = radius_degrees
|
|
249
|
+
|
|
250
|
+
# Smooth sine wave variations for natural look
|
|
251
|
+
freq1 = random.uniform(2, 5)
|
|
252
|
+
amp1 = random.uniform(0.08, 0.15)
|
|
253
|
+
freq2 = random.uniform(8, 15)
|
|
254
|
+
amp2 = random.uniform(0.03, 0.08)
|
|
255
|
+
|
|
256
|
+
radius_variation = amp1 * np.sin(
|
|
257
|
+
freq1 * angles + random.uniform(0, 2 * math.pi)
|
|
258
|
+
) + amp2 * np.sin(freq2 * angles + random.uniform(0, 2 * math.pi))
|
|
259
|
+
|
|
260
|
+
radii = base_radius * (1.0 + radius_variation)
|
|
261
|
+
radii = np.maximum(radii, base_radius * 0.6)
|
|
262
|
+
|
|
263
|
+
xs = center_lon + radii * np.cos(angles)
|
|
264
|
+
ys = center_lat + radii * np.sin(angles)
|
|
265
|
+
|
|
266
|
+
xs = np.clip(xs, min_lon, max_lon)
|
|
267
|
+
ys = np.clip(ys, min_lat, max_lat)
|
|
268
|
+
|
|
269
|
+
vertices = list(zip(xs, ys))
|
|
270
|
+
vertices.append(vertices[0])
|
|
271
|
+
|
|
272
|
+
poly = Polygon(vertices)
|
|
273
|
+
|
|
274
|
+
if not poly.is_valid:
|
|
275
|
+
poly = make_valid(poly)
|
|
276
|
+
if hasattr(poly, "geoms"):
|
|
277
|
+
poly = max(poly.geoms, key=lambda p: p.area)
|
|
278
|
+
|
|
279
|
+
else:
|
|
280
|
+
# Resample to get exact vertex count for buffered circles
|
|
281
|
+
coords = list(poly.exterior.coords)
|
|
282
|
+
|
|
283
|
+
if len(coords) - 1 != vertex_count:
|
|
284
|
+
angles = np.linspace(0, 2 * math.pi, vertex_count, endpoint=False)
|
|
285
|
+
|
|
286
|
+
new_coords = []
|
|
287
|
+
for angle in angles:
|
|
288
|
+
x = center_lon + radius_degrees * math.cos(angle)
|
|
289
|
+
y = center_lat + radius_degrees * math.sin(angle)
|
|
290
|
+
|
|
291
|
+
dx = random.uniform(-radius_degrees * 0.08, radius_degrees * 0.08)
|
|
292
|
+
dy = random.uniform(-radius_degrees * 0.08, radius_degrees * 0.08)
|
|
293
|
+
|
|
294
|
+
new_x = np.clip(x + dx, min_lon, max_lon)
|
|
295
|
+
new_y = np.clip(y + dy, min_lat, max_lat)
|
|
296
|
+
new_coords.append((new_x, new_y))
|
|
297
|
+
|
|
298
|
+
new_coords.append(new_coords[0])
|
|
299
|
+
poly = Polygon(new_coords)
|
|
300
|
+
|
|
301
|
+
# Calculate actual area
|
|
302
|
+
area_sq_degrees = poly.area
|
|
303
|
+
area_sq_meters = (
|
|
304
|
+
area_sq_degrees * (111320 * math.cos(math.radians(center_lat))) ** 2
|
|
305
|
+
)
|
|
306
|
+
actual_area_ha = area_sq_meters / 10000
|
|
307
|
+
|
|
308
|
+
return poly, actual_area_ha
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def generate_test_polygons(
|
|
312
|
+
bounds,
|
|
313
|
+
num_polygons=25,
|
|
314
|
+
min_area_ha=1,
|
|
315
|
+
max_area_ha=10,
|
|
316
|
+
min_number_vert=10,
|
|
317
|
+
max_number_vert=20,
|
|
318
|
+
):
|
|
319
|
+
"""
|
|
320
|
+
Generate synthetic test polygons with exact vertex count control.
|
|
321
|
+
|
|
322
|
+
This utility is useful for testing WHISP processing with controlled test data,
|
|
323
|
+
especially when you need polygons with specific characteristics (area, complexity).
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
bounds : list or ee.Geometry
|
|
328
|
+
Either a list of [min_lon, min_lat, max_lon, max_lat] or an Earth Engine Geometry.
|
|
329
|
+
Examples:
|
|
330
|
+
- Simple bounds: [-81.0, -19.3, -31.5, 9.6]
|
|
331
|
+
- EE Geometry: ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017').filter(
|
|
332
|
+
ee.Filter.eq('country_na', 'Brazil')).first().geometry()
|
|
333
|
+
num_polygons : int, optional
|
|
334
|
+
Number of polygons to generate (default: 25)
|
|
335
|
+
min_area_ha : float, optional
|
|
336
|
+
Minimum area in hectares (default: 1)
|
|
337
|
+
max_area_ha : float, optional
|
|
338
|
+
Maximum area in hectares (default: 10)
|
|
339
|
+
min_number_vert : int, optional
|
|
340
|
+
Minimum number of vertices per polygon (default: 10)
|
|
341
|
+
max_number_vert : int, optional
|
|
342
|
+
Maximum number of vertices per polygon (default: 20)
|
|
343
|
+
|
|
344
|
+
Returns
|
|
345
|
+
-------
|
|
346
|
+
dict
|
|
347
|
+
GeoJSON FeatureCollection with generated polygons. Each feature includes:
|
|
348
|
+
- internal_id: Sequential ID starting from 1
|
|
349
|
+
- requested_vertices: Number of vertices requested
|
|
350
|
+
- actual_vertices: Actual number of vertices created
|
|
351
|
+
- requested_area_ha: Target area in hectares
|
|
352
|
+
- actual_area_ha: Actual area in hectares
|
|
353
|
+
|
|
354
|
+
Examples
|
|
355
|
+
--------
|
|
356
|
+
>>> import openforis_whisp as whisp
|
|
357
|
+
>>> import ee
|
|
358
|
+
>>>
|
|
359
|
+
>>> # Using simple bounds (list)
|
|
360
|
+
>>> bounds_list = [-81.0, -19.3, -31.5, 9.6]
|
|
361
|
+
>>> geojson = whisp.generate_test_polygons(bounds_list, num_polygons=100)
|
|
362
|
+
>>>
|
|
363
|
+
>>> # Using Earth Engine Geometry
|
|
364
|
+
>>> brazil = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017').filter(
|
|
365
|
+
... ee.Filter.eq('country_na', 'Brazil')
|
|
366
|
+
... ).first().geometry()
|
|
367
|
+
>>> geojson = whisp.generate_test_polygons(brazil, num_polygons=100,
|
|
368
|
+
... min_area_ha=100, max_area_ha=1000)
|
|
369
|
+
>>>
|
|
370
|
+
>>> # Save to file
|
|
371
|
+
>>> import json
|
|
372
|
+
>>> with open('test_polygons.geojson', 'w') as f:
|
|
373
|
+
... json.dump(geojson, f)
|
|
374
|
+
"""
|
|
375
|
+
|
|
376
|
+
# Handle Earth Engine Geometry or simple bounds
|
|
377
|
+
if hasattr(bounds, "bounds"): # It's an ee.Geometry
|
|
378
|
+
logger.logger.info("Extracting bounds from Earth Engine Geometry...")
|
|
379
|
+
try:
|
|
380
|
+
bounds_geom = (
|
|
381
|
+
bounds.bounds()
|
|
382
|
+
if not hasattr(bounds, "coordinates")
|
|
383
|
+
or bounds.type().getInfo() != "Rectangle"
|
|
384
|
+
else bounds
|
|
385
|
+
)
|
|
386
|
+
bounds_coords = bounds_geom.coordinates().getInfo()[0]
|
|
387
|
+
min_lon = min(coord[0] for coord in bounds_coords)
|
|
388
|
+
max_lon = max(coord[0] for coord in bounds_coords)
|
|
389
|
+
min_lat = min(coord[1] for coord in bounds_coords)
|
|
390
|
+
max_lat = max(coord[1] for coord in bounds_coords)
|
|
391
|
+
logger.logger.info(
|
|
392
|
+
f"Bounds: [{min_lon:.2f}, {min_lat:.2f}, {max_lon:.2f}, {max_lat:.2f}]"
|
|
393
|
+
)
|
|
394
|
+
except Exception as e:
|
|
395
|
+
raise ValueError(
|
|
396
|
+
f"Failed to extract bounds from Earth Engine Geometry: {e}"
|
|
397
|
+
)
|
|
398
|
+
elif isinstance(bounds, (list, tuple)) and len(bounds) == 4:
|
|
399
|
+
min_lon, min_lat, max_lon, max_lat = bounds
|
|
400
|
+
else:
|
|
401
|
+
raise ValueError(
|
|
402
|
+
"bounds must be either:\n"
|
|
403
|
+
" - A list of [min_lon, min_lat, max_lon, max_lat]\n"
|
|
404
|
+
" - An Earth Engine Geometry (ee.Geometry, ee.Feature.geometry(), etc.)"
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
# Validate parameters
|
|
408
|
+
if min_number_vert > max_number_vert:
|
|
409
|
+
raise ValueError(
|
|
410
|
+
f"min_number_vert ({min_number_vert}) cannot be greater than max_number_vert ({max_number_vert})"
|
|
411
|
+
)
|
|
412
|
+
if min_area_ha > max_area_ha:
|
|
413
|
+
raise ValueError(
|
|
414
|
+
f"min_area_ha ({min_area_ha}) cannot be greater than max_area_ha ({max_area_ha})"
|
|
415
|
+
)
|
|
416
|
+
if num_polygons < 1:
|
|
417
|
+
raise ValueError(f"num_polygons must be at least 1 (got {num_polygons})")
|
|
418
|
+
|
|
419
|
+
logger.logger.info(
|
|
420
|
+
f"Generating {num_polygons} test polygons with {min_number_vert}-{max_number_vert} vertices..."
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
features = []
|
|
424
|
+
|
|
425
|
+
# Pre-generate all random values
|
|
426
|
+
vertex_counts = np.random.randint(
|
|
427
|
+
min_number_vert, max_number_vert + 1, num_polygons
|
|
428
|
+
)
|
|
429
|
+
target_areas = np.random.uniform(min_area_ha, max_area_ha, num_polygons)
|
|
430
|
+
|
|
431
|
+
for i in range(num_polygons):
|
|
432
|
+
if i > 0 and i % 250 == 0:
|
|
433
|
+
logger.logger.info(
|
|
434
|
+
f"Generated {i}/{num_polygons} polygons ({i/num_polygons*100:.0f}%)..."
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
requested_vertices = vertex_counts[i]
|
|
438
|
+
|
|
439
|
+
polygon, actual_area = generate_random_polygon(
|
|
440
|
+
min_lon,
|
|
441
|
+
min_lat,
|
|
442
|
+
max_lon,
|
|
443
|
+
max_lat,
|
|
444
|
+
min_area_ha=target_areas[i] * 0.9,
|
|
445
|
+
max_area_ha=target_areas[i] * 1.1,
|
|
446
|
+
vertex_count=requested_vertices,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
actual_vertex_count = len(list(polygon.exterior.coords)) - 1
|
|
450
|
+
|
|
451
|
+
properties = {
|
|
452
|
+
"internal_id": i + 1,
|
|
453
|
+
"requested_vertices": int(requested_vertices),
|
|
454
|
+
"actual_vertices": int(actual_vertex_count),
|
|
455
|
+
"requested_area_ha": round(target_areas[i], 2),
|
|
456
|
+
"actual_area_ha": round(actual_area, 2),
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
feature = {
|
|
460
|
+
"type": "Feature",
|
|
461
|
+
"properties": properties,
|
|
462
|
+
"geometry": mapping(polygon),
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
features.append(feature)
|
|
466
|
+
|
|
467
|
+
logger.logger.info(f"Generated {num_polygons} polygons!")
|
|
468
|
+
|
|
469
|
+
# Print summary statistics
|
|
470
|
+
actual_vertex_counts = [f["properties"]["actual_vertices"] for f in features]
|
|
471
|
+
requested_vertex_counts = [f["properties"]["requested_vertices"] for f in features]
|
|
472
|
+
|
|
473
|
+
logger.logger.info(
|
|
474
|
+
f"Vertex count - Requested: {min(requested_vertex_counts)}-{max(requested_vertex_counts)}, "
|
|
475
|
+
f"Actual: {min(actual_vertex_counts)}-{max(actual_vertex_counts)}"
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
actual_area_counts = [f["properties"]["actual_area_ha"] for f in features]
|
|
479
|
+
requested_area_counts = [f["properties"]["requested_area_ha"] for f in features]
|
|
480
|
+
|
|
481
|
+
logger.logger.info(
|
|
482
|
+
f"Area (ha) - Requested: {min(requested_area_counts):.1f}-{max(requested_area_counts):.1f}, "
|
|
483
|
+
f"Actual: {min(actual_area_counts):.1f}-{max(actual_area_counts):.1f}"
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
geojson = {"type": "FeatureCollection", "features": features}
|
|
487
|
+
return geojson
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: openforis-whisp
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.0b2
|
|
4
4
|
Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: whisp,geospatial,data-processing
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
openforis_whisp/__init__.py,sha256=bnEZ4_X-mJInltSKVI0STfvrb09Df-z21buIVFDif5w,2524
|
|
2
|
+
openforis_whisp/data_conversion.py,sha256=Mc6dXbvoHBeRzl3o83pyKeI5_sPC8Yc90Tj4bN6_Bv8,17519
|
|
3
|
+
openforis_whisp/datasets.py,sha256=TNIj3yffQkf_QvfOo0cwKOqIvhd_AVcEf0bz3QGsy3Q,50776
|
|
4
|
+
openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
|
|
5
|
+
openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
|
|
6
|
+
openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
|
|
7
|
+
openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=UDvZrQsL5rXJn6CW6P3wofUrPLRmUFZWt6ETbXaxBMs,17454
|
|
8
|
+
openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
|
|
9
|
+
openforis_whisp/reformat.py,sha256=rtkKs8z1mJd5JD9rXuMk1tbbbTvQxCCh68tA4hIQAv8,25445
|
|
10
|
+
openforis_whisp/risk.py,sha256=d_Di5XB8BnHdVXG56xdHTcpB4-CIF5vo2ZRMQRG7Pek,34420
|
|
11
|
+
openforis_whisp/stats.py,sha256=_emqJ2xW6fgGevX8Dt1kRvLDL2vBgPyS4idrAuO_BDY,48124
|
|
12
|
+
openforis_whisp/utils.py,sha256=5HHtbK62Swn4-jnlSe1Jc-hVnJhLKMuDW0_ayHY7mIg,17130
|
|
13
|
+
openforis_whisp-2.0.0b2.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
|
|
14
|
+
openforis_whisp-2.0.0b2.dist-info/METADATA,sha256=Y1a-63w6UrU--JDvbR5eJzlPPNmf6mJT3xrOWtcSJ3c,16684
|
|
15
|
+
openforis_whisp-2.0.0b2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
16
|
+
openforis_whisp-2.0.0b2.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
!!! BAD PRACTICE, ALWAYS IMPORT YOUR MODULES EXPLICITELY !!!
|
|
3
|
-
|
|
4
|
-
Module to gather all parameters.
|
|
5
|
-
|
|
6
|
-
If you use a module import all the functions here you only have 1 call to make
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
# from .config_runtime import *
|
|
10
|
-
|
|
11
|
-
# from .config_asr_url_info import *
|
|
12
|
-
|
|
13
|
-
# from .config_asr_credentials import *
|
|
14
|
-
|
|
15
|
-
# from parameters.config_ceo import *
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
openforis_whisp/__init__.py,sha256=W-uMGp87DCuqJWY31fyYkzDXXZp3g1HOXZoHThJqvJU,2462
|
|
2
|
-
openforis_whisp/data_conversion.py,sha256=_HSjYozNO1xAOAk-uGmzTVCTOc3W7x3GDlvEUgrnj_Q,16909
|
|
3
|
-
openforis_whisp/datasets.py,sha256=gQg-JjcZuCd8-4_J2CN4oNwo-2qwNmKpwS_JV6zf-Jc,51516
|
|
4
|
-
openforis_whisp/logger.py,sha256=n9k0EhAZYZKesnfskv8KyWnkGbjqRqk84ulx9-u_Jsc,2308
|
|
5
|
-
openforis_whisp/parameters/__init__.py,sha256=KL7iORJVjSpZatYjoyWckcmQJnE89_DBC8R6_0_eR6o,349
|
|
6
|
-
openforis_whisp/parameters/config_runtime.py,sha256=NOo39MAi60XCwEx5pwkS0EHKJBh0XY1q06y4j0HAABg,1421
|
|
7
|
-
openforis_whisp/parameters/lookup_context_and_metadata.csv,sha256=KgK0ik_Gd4t_Nq5cUkGPT4ZFZVO93HWSG82jRrOukt4,1298
|
|
8
|
-
openforis_whisp/parameters/lookup_gee_datasets.csv,sha256=rYAMwbd1kwakgucdXrDicJfu4Nvjk8LWBYN7FuIpdDY,17626
|
|
9
|
-
openforis_whisp/pd_schemas.py,sha256=W_ocS773LHfc05dJqvWRa-bRdX0wKFoNp0lMxgFx94Y,2681
|
|
10
|
-
openforis_whisp/reformat.py,sha256=rtkKs8z1mJd5JD9rXuMk1tbbbTvQxCCh68tA4hIQAv8,25445
|
|
11
|
-
openforis_whisp/risk.py,sha256=_YMF-2X1OZXrNMFdNPuJicnG8ktAhlFToJfthWwiRHE,35111
|
|
12
|
-
openforis_whisp/stats.py,sha256=uwyiPXVptpCFSviz-_otXCpHeHnY2IwE2dPwvI7tAAM,45226
|
|
13
|
-
openforis_whisp/utils.py,sha256=ywOl-Hd2FzSYrOVIjtqGaNoZqkI34UChkZMbdjgXWZ0,6492
|
|
14
|
-
openforis_whisp-2.0.0b1.dist-info/LICENSE,sha256=nqyqICO95iw_iwzP1t_IIAf7ZX3DPbL_M9WyQfh2q1k,1085
|
|
15
|
-
openforis_whisp-2.0.0b1.dist-info/METADATA,sha256=NDh_KnflmxTUaA2-w3yTCrZYKFcbgvH8NJu5soe7doc,16684
|
|
16
|
-
openforis_whisp-2.0.0b1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
17
|
-
openforis_whisp-2.0.0b1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|