openforis-whisp 1.0.0a1__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +6 -3
- openforis_whisp/data_conversion.py +36 -13
- openforis_whisp/datasets.py +743 -176
- openforis_whisp/logger.py +38 -2
- openforis_whisp/parameters/config_runtime.py +4 -7
- openforis_whisp/parameters/lookup_context_and_metadata.csv +13 -13
- openforis_whisp/parameters/lookup_gee_datasets.csv +202 -172
- openforis_whisp/reformat.py +245 -96
- openforis_whisp/risk.py +255 -102
- openforis_whisp/stats.py +271 -70
- openforis_whisp-2.0.0a1.dist-info/METADATA +381 -0
- openforis_whisp-2.0.0a1.dist-info/RECORD +17 -0
- openforis_whisp-1.0.0a1.dist-info/METADATA +0 -231
- openforis_whisp-1.0.0a1.dist-info/RECORD +0 -17
- {openforis_whisp-1.0.0a1.dist-info → openforis_whisp-2.0.0a1.dist-info}/LICENSE +0 -0
- {openforis_whisp-1.0.0a1.dist-info → openforis_whisp-2.0.0a1.dist-info}/WHEEL +0 -0
openforis_whisp/risk.py
CHANGED
|
@@ -2,12 +2,15 @@ import pandas as pd
|
|
|
2
2
|
|
|
3
3
|
from .pd_schemas import data_lookup_type
|
|
4
4
|
|
|
5
|
+
|
|
5
6
|
from openforis_whisp.parameters.config_runtime import (
|
|
6
|
-
percent_or_ha,
|
|
7
7
|
geometry_area_column,
|
|
8
8
|
DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH,
|
|
9
|
+
stats_unit_type_column, # Add this import
|
|
9
10
|
)
|
|
10
11
|
|
|
12
|
+
from openforis_whisp.reformat import filter_lookup_by_country_codes
|
|
13
|
+
|
|
11
14
|
# could embed this in each function below that uses lookup_gee_datasets_df.
|
|
12
15
|
lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
|
|
13
16
|
DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH
|
|
@@ -17,25 +20,78 @@ lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
|
|
|
17
20
|
# requires lookup_gee_datasets_df
|
|
18
21
|
|
|
19
22
|
|
|
23
|
+
# Add function to detect unit type from dataframe
|
|
24
|
+
def detect_unit_type(df, explicit_unit_type=None):
|
|
25
|
+
"""
|
|
26
|
+
Determine the unit type from the dataframe or use the override value.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
df (DataFrame): Input DataFrame.
|
|
30
|
+
explicit_unit_type (str, optional): Override unit type ('ha' or 'percent').
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
str: The unit type to use for calculations.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
ValueError: If the unit type can't be determined and no override is provided,
|
|
37
|
+
or if there are mixed unit types in the dataframe.
|
|
38
|
+
"""
|
|
39
|
+
# If override is provided, use it
|
|
40
|
+
if explicit_unit_type is not None:
|
|
41
|
+
if explicit_unit_type not in ["ha", "percent"]:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
f"Invalid unit type: {explicit_unit_type}. Must be 'ha' or 'percent'."
|
|
44
|
+
)
|
|
45
|
+
return explicit_unit_type
|
|
46
|
+
|
|
47
|
+
# Check if unit type column exists in the dataframe
|
|
48
|
+
if stats_unit_type_column not in df.columns:
|
|
49
|
+
raise ValueError(
|
|
50
|
+
f"Column '{stats_unit_type_column}' not found in dataframe. "
|
|
51
|
+
"Please provide 'explicit_unit_type' parameter to specify the unit type."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Get unique values from the column
|
|
55
|
+
unit_types = df[stats_unit_type_column].unique()
|
|
56
|
+
|
|
57
|
+
# Check for mixed unit types
|
|
58
|
+
if len(unit_types) > 1:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
f"Mixed unit types in dataframe: {unit_types}. All rows must use the same unit type."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Get the single unit type
|
|
64
|
+
unit_type = unit_types[0]
|
|
65
|
+
|
|
66
|
+
# Validate that the unit type is recognized
|
|
67
|
+
if unit_type not in ["ha", "percent"]:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
f"Unrecognized unit type: {unit_type}. Must be 'ha' or 'percent'."
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
return unit_type
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Update whisp_risk to accept and pass the unit_type parameter
|
|
20
76
|
def whisp_risk(
|
|
21
77
|
df: data_lookup_type, # CHECK THIS
|
|
22
78
|
ind_1_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
23
79
|
ind_2_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
24
80
|
ind_3_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
25
81
|
ind_4_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
26
|
-
ind_5_pcent_threshold: float = 10,
|
|
27
|
-
ind_6_pcent_threshold: float = 10,
|
|
28
|
-
ind_7_pcent_threshold: float = 10,
|
|
29
|
-
ind_8_pcent_threshold: float = 10,
|
|
30
|
-
ind_9_pcent_threshold: float = 10,
|
|
31
|
-
ind_10_pcent_threshold: float = 10,
|
|
32
|
-
ind_11_pcent_threshold: float = 10,
|
|
82
|
+
ind_5_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
83
|
+
ind_6_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
84
|
+
ind_7_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
85
|
+
ind_8_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
86
|
+
ind_9_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
87
|
+
ind_10_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
88
|
+
ind_11_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
|
|
33
89
|
ind_1_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
34
90
|
ind_2_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
35
91
|
ind_3_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
36
92
|
ind_4_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
37
93
|
ind_5_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
38
|
-
ind_6_input_columns: pd.Series = None,
|
|
94
|
+
ind_6_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
39
95
|
ind_7_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
40
96
|
ind_8_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
41
97
|
ind_9_input_columns: pd.Series = None, # see lookup_gee_datasets for details
|
|
@@ -46,14 +102,16 @@ def whisp_risk(
|
|
|
46
102
|
ind_3_name: str = "Ind_03_disturbance_before_2020",
|
|
47
103
|
ind_4_name: str = "Ind_04_disturbance_after_2020",
|
|
48
104
|
ind_5_name: str = "Ind_05_primary_2020",
|
|
49
|
-
ind_6_name: str ="Ind_06_nat_reg_forest_2020",
|
|
50
|
-
ind_7_name: str ="Ind_07_planted_plantations_2020",
|
|
51
|
-
ind_8_name: str ="
|
|
52
|
-
ind_9_name: str ="
|
|
53
|
-
ind_10_name: str ="
|
|
54
|
-
ind_11_name: str ="
|
|
105
|
+
ind_6_name: str = "Ind_06_nat_reg_forest_2020",
|
|
106
|
+
ind_7_name: str = "Ind_07_planted_plantations_2020",
|
|
107
|
+
ind_8_name: str = "Ind_08_planted_plantations_after_2020",
|
|
108
|
+
ind_9_name: str = "Ind_09_treecover_after_2020",
|
|
109
|
+
ind_10_name: str = "Ind_10_agri_after_2020",
|
|
110
|
+
ind_11_name: str = "Ind_11_logging_concession_before_2020",
|
|
55
111
|
low_name: str = "no",
|
|
56
112
|
high_name: str = "yes",
|
|
113
|
+
explicit_unit_type: str = None,
|
|
114
|
+
national_codes: list[str] = None, # List of ISO2 country codes to filter by
|
|
57
115
|
) -> data_lookup_type:
|
|
58
116
|
"""
|
|
59
117
|
Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
|
|
@@ -74,34 +132,69 @@ def whisp_risk(
|
|
|
74
132
|
ind_4_name (str, optional): Name of the fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
|
|
75
133
|
low_name (str, optional): Value shown in table if less than or equal to the threshold. Defaults to "no".
|
|
76
134
|
high_name (str, optional): Value shown in table if more than the threshold. Defaults to "yes".
|
|
135
|
+
explicit_unit_type (str, optional): Override the autodetected unit type ('ha' or 'percent').
|
|
136
|
+
If not provided, will detect from dataframe 'unit' column.
|
|
77
137
|
|
|
78
138
|
Returns:
|
|
79
139
|
data_lookup_type: DataFrame with added 'EUDR_risk' column.
|
|
80
140
|
"""
|
|
141
|
+
# Determine the unit type to use based on input data and overrid
|
|
142
|
+
unit_type = detect_unit_type(df, explicit_unit_type)
|
|
81
143
|
|
|
144
|
+
print(f"Using unit type: {unit_type}")
|
|
145
|
+
|
|
146
|
+
lookup_df_copy = lookup_gee_datasets_df.copy()
|
|
147
|
+
|
|
148
|
+
# filter by national codes (even if None - this removes all country columns unless specified)
|
|
149
|
+
filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
|
|
150
|
+
lookup_df=lookup_df_copy,
|
|
151
|
+
filter_col="ISO2_code",
|
|
152
|
+
national_codes=national_codes,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Rest of the function remains the same, but pass unit_type to add_indicators
|
|
82
156
|
if ind_1_input_columns is None:
|
|
83
|
-
ind_1_input_columns = get_cols_ind_01_treecover(
|
|
157
|
+
ind_1_input_columns = get_cols_ind_01_treecover(filtered_lookup_gee_datasets_df)
|
|
84
158
|
if ind_2_input_columns is None:
|
|
85
|
-
ind_2_input_columns = get_cols_ind_02_commodities(
|
|
159
|
+
ind_2_input_columns = get_cols_ind_02_commodities(
|
|
160
|
+
filtered_lookup_gee_datasets_df
|
|
161
|
+
)
|
|
86
162
|
if ind_3_input_columns is None:
|
|
87
|
-
ind_3_input_columns = get_cols_ind_03_dist_before_2020(
|
|
163
|
+
ind_3_input_columns = get_cols_ind_03_dist_before_2020(
|
|
164
|
+
filtered_lookup_gee_datasets_df
|
|
165
|
+
)
|
|
88
166
|
if ind_4_input_columns is None:
|
|
89
|
-
ind_4_input_columns = get_cols_ind_04_dist_after_2020(
|
|
167
|
+
ind_4_input_columns = get_cols_ind_04_dist_after_2020(
|
|
168
|
+
filtered_lookup_gee_datasets_df
|
|
169
|
+
)
|
|
90
170
|
if ind_5_input_columns is None:
|
|
91
|
-
ind_5_input_columns = get_cols_ind_05_primary_2020(
|
|
171
|
+
ind_5_input_columns = get_cols_ind_05_primary_2020(
|
|
172
|
+
filtered_lookup_gee_datasets_df
|
|
173
|
+
)
|
|
92
174
|
if ind_6_input_columns is None:
|
|
93
|
-
ind_6_input_columns = get_cols_ind_06_nat_reg_2020(
|
|
175
|
+
ind_6_input_columns = get_cols_ind_06_nat_reg_2020(
|
|
176
|
+
filtered_lookup_gee_datasets_df
|
|
177
|
+
)
|
|
94
178
|
if ind_7_input_columns is None:
|
|
95
|
-
ind_7_input_columns = get_cols_ind_07_planted_2020(
|
|
179
|
+
ind_7_input_columns = get_cols_ind_07_planted_2020(
|
|
180
|
+
filtered_lookup_gee_datasets_df
|
|
181
|
+
)
|
|
96
182
|
if ind_8_input_columns is None:
|
|
97
|
-
ind_8_input_columns =
|
|
183
|
+
ind_8_input_columns = get_cols_ind_08_planted_after_2020(
|
|
184
|
+
filtered_lookup_gee_datasets_df
|
|
185
|
+
)
|
|
98
186
|
if ind_9_input_columns is None:
|
|
99
|
-
ind_9_input_columns =
|
|
187
|
+
ind_9_input_columns = get_cols_ind_09_treecover_after_2020(
|
|
188
|
+
filtered_lookup_gee_datasets_df
|
|
189
|
+
)
|
|
100
190
|
if ind_10_input_columns is None:
|
|
101
|
-
ind_10_input_columns =
|
|
191
|
+
ind_10_input_columns = get_cols_ind_10_agri_after_2020(
|
|
192
|
+
filtered_lookup_gee_datasets_df
|
|
193
|
+
)
|
|
102
194
|
if ind_11_input_columns is None:
|
|
103
|
-
ind_11_input_columns =
|
|
104
|
-
|
|
195
|
+
ind_11_input_columns = get_cols_ind_11_logging_before_2020(
|
|
196
|
+
filtered_lookup_gee_datasets_df
|
|
197
|
+
)
|
|
105
198
|
|
|
106
199
|
# Check range of values
|
|
107
200
|
check_range(ind_1_pcent_threshold)
|
|
@@ -115,7 +208,7 @@ def whisp_risk(
|
|
|
115
208
|
check_range(ind_9_pcent_threshold)
|
|
116
209
|
check_range(ind_10_pcent_threshold)
|
|
117
210
|
check_range(ind_11_pcent_threshold)
|
|
118
|
-
|
|
211
|
+
|
|
119
212
|
input_cols = [
|
|
120
213
|
ind_1_input_columns,
|
|
121
214
|
ind_2_input_columns,
|
|
@@ -127,7 +220,7 @@ def whisp_risk(
|
|
|
127
220
|
ind_8_input_columns,
|
|
128
221
|
ind_9_input_columns,
|
|
129
222
|
ind_10_input_columns,
|
|
130
|
-
ind_11_input_columns,
|
|
223
|
+
ind_11_input_columns,
|
|
131
224
|
]
|
|
132
225
|
thresholds = [
|
|
133
226
|
ind_1_pcent_threshold,
|
|
@@ -137,12 +230,24 @@ def whisp_risk(
|
|
|
137
230
|
ind_5_pcent_threshold,
|
|
138
231
|
ind_6_pcent_threshold,
|
|
139
232
|
ind_7_pcent_threshold,
|
|
140
|
-
ind_8_pcent_threshold,
|
|
233
|
+
ind_8_pcent_threshold,
|
|
141
234
|
ind_9_pcent_threshold,
|
|
142
235
|
ind_10_pcent_threshold,
|
|
143
236
|
ind_11_pcent_threshold,
|
|
144
237
|
]
|
|
145
|
-
names = [
|
|
238
|
+
names = [
|
|
239
|
+
ind_1_name,
|
|
240
|
+
ind_2_name,
|
|
241
|
+
ind_3_name,
|
|
242
|
+
ind_4_name,
|
|
243
|
+
ind_5_name,
|
|
244
|
+
ind_6_name,
|
|
245
|
+
ind_7_name,
|
|
246
|
+
ind_8_name,
|
|
247
|
+
ind_9_name,
|
|
248
|
+
ind_10_name,
|
|
249
|
+
ind_11_name,
|
|
250
|
+
]
|
|
146
251
|
[check_range(threshold) for threshold in thresholds]
|
|
147
252
|
|
|
148
253
|
df_w_indicators = add_indicators(
|
|
@@ -152,6 +257,7 @@ def whisp_risk(
|
|
|
152
257
|
names,
|
|
153
258
|
low_name,
|
|
154
259
|
high_name,
|
|
260
|
+
unit_type, # Pass the unit type
|
|
155
261
|
)
|
|
156
262
|
|
|
157
263
|
df_w_indicators_and_risk_pcrop = add_eudr_risk_pcrop_col(
|
|
@@ -169,20 +275,20 @@ def whisp_risk(
|
|
|
169
275
|
ind_3_name=ind_3_name,
|
|
170
276
|
ind_4_name=ind_4_name,
|
|
171
277
|
)
|
|
172
|
-
|
|
278
|
+
|
|
173
279
|
df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
|
|
174
280
|
df=df_w_indicators,
|
|
175
|
-
ind_1_name=ind_1_name,
|
|
176
|
-
ind_2_name=ind_2_name,
|
|
177
|
-
ind_3_name=ind_3_name,
|
|
281
|
+
ind_1_name=ind_1_name,
|
|
282
|
+
ind_2_name=ind_2_name,
|
|
283
|
+
ind_3_name=ind_3_name,
|
|
178
284
|
ind_4_name=ind_4_name,
|
|
179
|
-
ind_5_name=ind_5_name,
|
|
180
|
-
ind_6_name=ind_6_name,
|
|
181
|
-
ind_7_name=ind_7_name,
|
|
285
|
+
ind_5_name=ind_5_name,
|
|
286
|
+
ind_6_name=ind_6_name,
|
|
287
|
+
ind_7_name=ind_7_name,
|
|
182
288
|
ind_8_name=ind_8_name,
|
|
183
289
|
ind_9_name=ind_9_name,
|
|
184
290
|
ind_10_name=ind_10_name,
|
|
185
|
-
ind_11_name=ind_11_name
|
|
291
|
+
ind_11_name=ind_11_name,
|
|
186
292
|
)
|
|
187
293
|
|
|
188
294
|
return df_w_indicators_and_risk_timber
|
|
@@ -252,16 +358,17 @@ def add_eudr_risk_acrop_col(
|
|
|
252
358
|
for index, row in df.iterrows():
|
|
253
359
|
# If there is no tree cover in 2020, set EUDR_risk_soy to "low"
|
|
254
360
|
if row[ind_1_name] == "no" or row[ind_2_name] == "yes":
|
|
255
|
-
df.at[index,
|
|
361
|
+
df.at[index, "risk_acrop"] = "low"
|
|
256
362
|
# If there is tree cover in 2020 and distrubances post 2020, set EUDR_risk_soy to "high"
|
|
257
363
|
elif row[ind_1_name] == "yes" and row[ind_4_name] == "yes":
|
|
258
|
-
df.at[index,
|
|
364
|
+
df.at[index, "risk_acrop"] = "high"
|
|
259
365
|
# If tree cover and no disturbances post 2020, set EUDR_risk to "more_info_needed"
|
|
260
|
-
else
|
|
261
|
-
df.at[index,
|
|
366
|
+
else:
|
|
367
|
+
df.at[index, "risk_acrop"] = "more_info_needed"
|
|
262
368
|
|
|
263
369
|
return df
|
|
264
|
-
|
|
370
|
+
|
|
371
|
+
|
|
265
372
|
def add_eudr_risk_timber_col(
|
|
266
373
|
df: data_lookup_type,
|
|
267
374
|
ind_1_name: str,
|
|
@@ -275,7 +382,7 @@ def add_eudr_risk_timber_col(
|
|
|
275
382
|
ind_9_name: str,
|
|
276
383
|
ind_10_name: str,
|
|
277
384
|
ind_11_name: str,
|
|
278
|
-
)-> data_lookup_type:
|
|
385
|
+
) -> data_lookup_type:
|
|
279
386
|
"""
|
|
280
387
|
Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
|
|
281
388
|
|
|
@@ -289,36 +396,47 @@ def add_eudr_risk_timber_col(
|
|
|
289
396
|
Returns:
|
|
290
397
|
DataFrame: DataFrame with added 'EUDR_risk' column.
|
|
291
398
|
"""
|
|
292
|
-
|
|
399
|
+
|
|
293
400
|
for index, row in df.iterrows():
|
|
294
401
|
# If there is a commodity in 2020 OR if there is planted-plantation in 2020 AND no agriculture in 2023, set EUDR_risk_degrad to "low"
|
|
295
|
-
if row[ind_2_name] == "yes" or (
|
|
296
|
-
|
|
402
|
+
if row[ind_2_name] == "yes" or (
|
|
403
|
+
row[ind_7_name] == "yes" and row[ind_10_name] == "no"
|
|
404
|
+
):
|
|
405
|
+
df.at[index, "risk_timber"] = "low"
|
|
297
406
|
# If there is no tree cover, set EUDR_risk_degrad to "low"? no because of unstocked forests
|
|
298
407
|
# if row[ind_1_name] == "no" or row[ind_3_name] == "yes" or row[ind_7_name] == "yes":
|
|
299
408
|
# df.at[index, 'EUDR_risk_degrad'] = "low"
|
|
300
|
-
|
|
301
|
-
elif (
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
409
|
+
# If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
|
|
410
|
+
elif (
|
|
411
|
+
row[ind_5_name] == "yes"
|
|
412
|
+
or row[ind_6_name] == "yes"
|
|
413
|
+
or row[ind_7_name] == "yes"
|
|
414
|
+
) and row[ind_10_name] == "yes":
|
|
415
|
+
df.at[index, "risk_timber"] = "high"
|
|
416
|
+
# If primary or naturally regenerating AND planted post 2020, set EUDR_risk to "high"
|
|
417
|
+
elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
|
|
418
|
+
ind_8_name
|
|
419
|
+
] == "yes":
|
|
420
|
+
df.at[index, "risk_timber"] = "high"
|
|
306
421
|
# If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
|
|
307
|
-
#elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
|
|
308
|
-
# df.at[index, 'EUDR_risk_timber'] = "high"
|
|
309
|
-
|
|
422
|
+
# elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
|
|
423
|
+
# df.at[index, 'EUDR_risk_timber'] = "high"
|
|
424
|
+
|
|
310
425
|
# If primary forest OR naturally regenerating AND an information on management practice OR tree cover post 2020, set EUDR_risk_degrad to "low"
|
|
311
|
-
elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
|
|
312
|
-
|
|
426
|
+
elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
|
|
427
|
+
row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
|
|
428
|
+
):
|
|
429
|
+
df.at[index, "risk_timber"] = "low"
|
|
313
430
|
# If primary or naturally regenerating and no other info, set EUDR_risk to "more_info_needed"
|
|
314
|
-
elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes"
|
|
315
|
-
df.at[index,
|
|
431
|
+
elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
|
|
432
|
+
df.at[index, "risk_timber"] = "more_info_needed"
|
|
316
433
|
# If none of the above conditions are met, set EUDR_risk to "high"
|
|
317
434
|
else:
|
|
318
|
-
df.at[index,
|
|
319
|
-
|
|
435
|
+
df.at[index, "risk_timber"] = "high"
|
|
436
|
+
|
|
320
437
|
return df
|
|
321
|
-
|
|
438
|
+
|
|
439
|
+
|
|
322
440
|
def add_indicators(
|
|
323
441
|
df: data_lookup_type,
|
|
324
442
|
input_cols: list[str],
|
|
@@ -326,6 +444,7 @@ def add_indicators(
|
|
|
326
444
|
names: list[str],
|
|
327
445
|
low_name: str = "no",
|
|
328
446
|
high_name: str = "yes",
|
|
447
|
+
unit_type: str = None,
|
|
329
448
|
) -> data_lookup_type:
|
|
330
449
|
for input_col, threshold, name in zip(input_cols, thresholds, names):
|
|
331
450
|
df = add_indicator_column(
|
|
@@ -335,19 +454,22 @@ def add_indicators(
|
|
|
335
454
|
new_column_name=name,
|
|
336
455
|
low_name=low_name,
|
|
337
456
|
high_name=high_name,
|
|
457
|
+
sum_comparison=False,
|
|
458
|
+
unit_type=unit_type, # Pass the unit type
|
|
338
459
|
)
|
|
339
|
-
|
|
340
460
|
return df
|
|
341
461
|
|
|
342
462
|
|
|
463
|
+
# Update add_indicator_column to use the unit_type parameter
|
|
343
464
|
def add_indicator_column(
|
|
344
465
|
df: data_lookup_type,
|
|
345
466
|
input_columns: list[str],
|
|
346
467
|
threshold: float,
|
|
347
468
|
new_column_name: str,
|
|
348
|
-
low_name: str = "
|
|
349
|
-
high_name: str = "
|
|
469
|
+
low_name: str = "no",
|
|
470
|
+
high_name: str = "yes",
|
|
350
471
|
sum_comparison: bool = False,
|
|
472
|
+
unit_type: str = None, # unit_type parameter
|
|
351
473
|
) -> data_lookup_type:
|
|
352
474
|
"""
|
|
353
475
|
Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
|
|
@@ -362,6 +484,7 @@ def add_indicator_column(
|
|
|
362
484
|
low_name (str): The name for the value when below or equal to threshold (default is 'no').
|
|
363
485
|
high_name (str): The name for the value when above threshold (default is 'yes').
|
|
364
486
|
sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
|
|
487
|
+
unit_type (str): Whether values are in "ha" or "percent".
|
|
365
488
|
|
|
366
489
|
Returns:
|
|
367
490
|
data_lookup_type: The DataFrame with the new column added.
|
|
@@ -379,7 +502,10 @@ def add_indicator_column(
|
|
|
379
502
|
for col in input_columns:
|
|
380
503
|
# So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
|
|
381
504
|
# Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
|
|
382
|
-
if
|
|
505
|
+
if unit_type == "ha":
|
|
506
|
+
df[geometry_area_column] = pd.to_numeric(
|
|
507
|
+
df[geometry_area_column], errors="coerce"
|
|
508
|
+
)
|
|
383
509
|
val_to_check = clamp(
|
|
384
510
|
((df[col] / df[geometry_area_column]) * 100), 0, 100
|
|
385
511
|
)
|
|
@@ -475,6 +601,7 @@ def get_cols_ind_04_dist_after_2020(lookup_gee_datasets_df):
|
|
|
475
601
|
]
|
|
476
602
|
)
|
|
477
603
|
|
|
604
|
+
|
|
478
605
|
def get_cols_ind_05_primary_2020(lookup_gee_datasets_df):
|
|
479
606
|
"""
|
|
480
607
|
Generate a list of dataset names for primary forests in 2020
|
|
@@ -488,10 +615,13 @@ def get_cols_ind_05_primary_2020(lookup_gee_datasets_df):
|
|
|
488
615
|
lookup_gee_datasets_df = lookup_gee_datasets_df[
|
|
489
616
|
lookup_gee_datasets_df["exclude_from_output"] != 1
|
|
490
617
|
]
|
|
491
|
-
return list(
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
618
|
+
return list(
|
|
619
|
+
lookup_gee_datasets_df["name"][
|
|
620
|
+
(lookup_gee_datasets_df["use_for_risk_timber"] == 1)
|
|
621
|
+
& (lookup_gee_datasets_df["theme_timber"] == "primary")
|
|
622
|
+
]
|
|
623
|
+
)
|
|
624
|
+
|
|
495
625
|
|
|
496
626
|
def get_cols_ind_06_nat_reg_2020(lookup_gee_datasets_df):
|
|
497
627
|
"""
|
|
@@ -506,10 +636,13 @@ def get_cols_ind_06_nat_reg_2020(lookup_gee_datasets_df):
|
|
|
506
636
|
lookup_gee_datasets_df = lookup_gee_datasets_df[
|
|
507
637
|
lookup_gee_datasets_df["exclude_from_output"] != 1
|
|
508
638
|
]
|
|
509
|
-
return list(
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
639
|
+
return list(
|
|
640
|
+
lookup_gee_datasets_df["name"][
|
|
641
|
+
(lookup_gee_datasets_df["use_for_risk_timber"] == 1)
|
|
642
|
+
& (lookup_gee_datasets_df["theme_timber"] == "naturally_reg_2020")
|
|
643
|
+
]
|
|
644
|
+
)
|
|
645
|
+
|
|
513
646
|
|
|
514
647
|
def get_cols_ind_07_planted_2020(lookup_gee_datasets_df):
|
|
515
648
|
"""
|
|
@@ -524,11 +657,15 @@ def get_cols_ind_07_planted_2020(lookup_gee_datasets_df):
|
|
|
524
657
|
lookup_gee_datasets_df = lookup_gee_datasets_df[
|
|
525
658
|
lookup_gee_datasets_df["exclude_from_output"] != 1
|
|
526
659
|
]
|
|
527
|
-
return list(
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
660
|
+
return list(
|
|
661
|
+
lookup_gee_datasets_df["name"][
|
|
662
|
+
(lookup_gee_datasets_df["use_for_risk_timber"] == 1)
|
|
663
|
+
& (lookup_gee_datasets_df["theme_timber"] == "planted_plantation_2020")
|
|
664
|
+
]
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
def get_cols_ind_08_planted_after_2020(lookup_gee_datasets_df):
|
|
532
669
|
"""
|
|
533
670
|
Generate a list of dataset names for planted and plantation forests post 2020
|
|
534
671
|
|
|
@@ -541,11 +678,18 @@ def get_cols_ind_08_planted_post_2020(lookup_gee_datasets_df):
|
|
|
541
678
|
lookup_gee_datasets_df = lookup_gee_datasets_df[
|
|
542
679
|
lookup_gee_datasets_df["exclude_from_output"] != 1
|
|
543
680
|
]
|
|
544
|
-
return list(
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
681
|
+
return list(
|
|
682
|
+
lookup_gee_datasets_df["name"][
|
|
683
|
+
(lookup_gee_datasets_df["use_for_risk_timber"] == 1)
|
|
684
|
+
& (
|
|
685
|
+
lookup_gee_datasets_df["theme_timber"]
|
|
686
|
+
== "planted_plantation_after_2020"
|
|
687
|
+
)
|
|
688
|
+
]
|
|
689
|
+
)
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def get_cols_ind_09_treecover_after_2020(lookup_gee_datasets_df):
|
|
549
693
|
"""
|
|
550
694
|
Generate a list of dataset names for treecover post 2020
|
|
551
695
|
|
|
@@ -558,12 +702,15 @@ def get_cols_ind_09_treecover_post_2020(lookup_gee_datasets_df):
|
|
|
558
702
|
lookup_gee_datasets_df = lookup_gee_datasets_df[
|
|
559
703
|
lookup_gee_datasets_df["exclude_from_output"] != 1
|
|
560
704
|
]
|
|
561
|
-
return list(
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
705
|
+
return list(
|
|
706
|
+
lookup_gee_datasets_df["name"][
|
|
707
|
+
(lookup_gee_datasets_df["use_for_risk_timber"] == 1)
|
|
708
|
+
& (lookup_gee_datasets_df["theme_timber"] == "treecover_post2020")
|
|
709
|
+
]
|
|
710
|
+
)
|
|
711
|
+
|
|
565
712
|
|
|
566
|
-
def
|
|
713
|
+
def get_cols_ind_10_agri_after_2020(lookup_gee_datasets_df):
|
|
567
714
|
"""
|
|
568
715
|
Generate a list of dataset names for croplands post 2020
|
|
569
716
|
|
|
@@ -576,12 +723,15 @@ def get_cols_ind_10_agri_post_2020(lookup_gee_datasets_df):
|
|
|
576
723
|
lookup_gee_datasets_df = lookup_gee_datasets_df[
|
|
577
724
|
lookup_gee_datasets_df["exclude_from_output"] != 1
|
|
578
725
|
]
|
|
579
|
-
return list(
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
726
|
+
return list(
|
|
727
|
+
lookup_gee_datasets_df["name"][
|
|
728
|
+
(lookup_gee_datasets_df["use_for_risk_timber"] == 1)
|
|
729
|
+
& (lookup_gee_datasets_df["theme_timber"] == "agri_after_2020")
|
|
730
|
+
]
|
|
731
|
+
)
|
|
732
|
+
|
|
583
733
|
|
|
584
|
-
def
|
|
734
|
+
def get_cols_ind_11_logging_before_2020(lookup_gee_datasets_df):
|
|
585
735
|
"""
|
|
586
736
|
Generate a list of dataset names for logging concessions (2020 if available)
|
|
587
737
|
|
|
@@ -594,11 +744,14 @@ def get_cols_ind_11_logging(lookup_gee_datasets_df):
|
|
|
594
744
|
lookup_gee_datasets_df = lookup_gee_datasets_df[
|
|
595
745
|
lookup_gee_datasets_df["exclude_from_output"] != 1
|
|
596
746
|
]
|
|
597
|
-
return list(
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
747
|
+
return list(
|
|
748
|
+
lookup_gee_datasets_df["name"][
|
|
749
|
+
(lookup_gee_datasets_df["use_for_risk_timber"] == 1)
|
|
750
|
+
& (lookup_gee_datasets_df["theme_timber"] == "logging_concession")
|
|
751
|
+
]
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
|
|
602
755
|
def clamp(
|
|
603
756
|
value: float | pd.Series, min_val: float, max_val: float
|
|
604
757
|
) -> float | pd.Series:
|
|
@@ -621,4 +774,4 @@ def clamp(
|
|
|
621
774
|
|
|
622
775
|
def check_range(value: float) -> None:
|
|
623
776
|
if not (0 <= value <= 100):
|
|
624
|
-
raise ValueError("Value must be between 0 and 100.")
|
|
777
|
+
raise ValueError("Value must be between 0 and 100.")
|