openforis-whisp 0.1.0a8__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openforis_whisp/risk.py CHANGED
@@ -2,12 +2,15 @@ import pandas as pd
2
2
 
3
3
  from .pd_schemas import data_lookup_type
4
4
 
5
+
5
6
  from openforis_whisp.parameters.config_runtime import (
6
- percent_or_ha,
7
7
  geometry_area_column,
8
8
  DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH,
9
+ stats_unit_type_column, # Add this import
9
10
  )
10
11
 
12
+ from openforis_whisp.reformat import filter_lookup_by_country_codes
13
+
11
14
  # could embed this in each function below that uses lookup_gee_datasets_df.
12
15
  lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
13
16
  DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH
@@ -17,22 +20,98 @@ lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
17
20
  # requires lookup_gee_datasets_df
18
21
 
19
22
 
23
+ # Add function to detect unit type from dataframe
24
+ def detect_unit_type(df, explicit_unit_type=None):
25
+ """
26
+ Determine the unit type from the dataframe or use the override value.
27
+
28
+ Args:
29
+ df (DataFrame): Input DataFrame.
30
+ explicit_unit_type (str, optional): Override unit type ('ha' or 'percent').
31
+
32
+ Returns:
33
+ str: The unit type to use for calculations.
34
+
35
+ Raises:
36
+ ValueError: If the unit type can't be determined and no override is provided,
37
+ or if there are mixed unit types in the dataframe.
38
+ """
39
+ # If override is provided, use it
40
+ if explicit_unit_type is not None:
41
+ if explicit_unit_type not in ["ha", "percent"]:
42
+ raise ValueError(
43
+ f"Invalid unit type: {explicit_unit_type}. Must be 'ha' or 'percent'."
44
+ )
45
+ return explicit_unit_type
46
+
47
+ # Check if unit type column exists in the dataframe
48
+ if stats_unit_type_column not in df.columns:
49
+ raise ValueError(
50
+ f"Column '{stats_unit_type_column}' not found in dataframe. "
51
+ "Please provide 'explicit_unit_type' parameter to specify the unit type."
52
+ )
53
+
54
+ # Get unique values from the column
55
+ unit_types = df[stats_unit_type_column].unique()
56
+
57
+ # Check for mixed unit types
58
+ if len(unit_types) > 1:
59
+ raise ValueError(
60
+ f"Mixed unit types in dataframe: {unit_types}. All rows must use the same unit type."
61
+ )
62
+
63
+ # Get the single unit type
64
+ unit_type = unit_types[0]
65
+
66
+ # Validate that the unit type is recognized
67
+ if unit_type not in ["ha", "percent"]:
68
+ raise ValueError(
69
+ f"Unrecognized unit type: {unit_type}. Must be 'ha' or 'percent'."
70
+ )
71
+
72
+ return unit_type
73
+
74
+
75
+ # Update whisp_risk to accept and pass the unit_type parameter
20
76
  def whisp_risk(
21
77
  df: data_lookup_type, # CHECK THIS
22
78
  ind_1_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
23
79
  ind_2_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
24
80
  ind_3_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
25
81
  ind_4_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
82
+ ind_5_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
83
+ ind_6_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
84
+ ind_7_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
85
+ ind_8_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
86
+ ind_9_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
87
+ ind_10_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
88
+ ind_11_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
26
89
  ind_1_input_columns: pd.Series = None, # see lookup_gee_datasets for details
27
90
  ind_2_input_columns: pd.Series = None, # see lookup_gee_datasets for details
28
91
  ind_3_input_columns: pd.Series = None, # see lookup_gee_datasets for details
29
92
  ind_4_input_columns: pd.Series = None, # see lookup_gee_datasets for details
30
- ind_1_name: str = "Indicator_1_treecover",
31
- ind_2_name: str = "Indicator_2_commodities",
32
- ind_3_name: str = "Indicator_3_disturbance_before_2020",
33
- ind_4_name: str = "Indicator_4_disturbance_after_2020",
93
+ ind_5_input_columns: pd.Series = None, # see lookup_gee_datasets for details
94
+ ind_6_input_columns: pd.Series = None, # see lookup_gee_datasets for details
95
+ ind_7_input_columns: pd.Series = None, # see lookup_gee_datasets for details
96
+ ind_8_input_columns: pd.Series = None, # see lookup_gee_datasets for details
97
+ ind_9_input_columns: pd.Series = None, # see lookup_gee_datasets for details
98
+ ind_10_input_columns: pd.Series = None, # see lookup_gee_datasets for details
99
+ ind_11_input_columns: pd.Series = None, # see lookup_gee_datasets for details
100
+ ind_1_name: str = "Ind_01_treecover",
101
+ ind_2_name: str = "Ind_02_commodities",
102
+ ind_3_name: str = "Ind_03_disturbance_before_2020",
103
+ ind_4_name: str = "Ind_04_disturbance_after_2020",
104
+ ind_5_name: str = "Ind_05_primary_2020",
105
+ ind_6_name: str = "Ind_06_nat_reg_forest_2020",
106
+ ind_7_name: str = "Ind_07_planted_plantations_2020",
107
+ ind_8_name: str = "Ind_08_planted_plantations_after_2020",
108
+ ind_9_name: str = "Ind_09_treecover_after_2020",
109
+ ind_10_name: str = "Ind_10_agri_after_2020",
110
+ ind_11_name: str = "Ind_11_logging_concession_before_2020",
34
111
  low_name: str = "no",
35
112
  high_name: str = "yes",
113
+ explicit_unit_type: str = None,
114
+ national_codes: list[str] = None, # List of ISO2 country codes to filter by
36
115
  ) -> data_lookup_type:
37
116
  """
38
117
  Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
@@ -53,39 +132,122 @@ def whisp_risk(
53
132
  ind_4_name (str, optional): Name of the fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
54
133
  low_name (str, optional): Value shown in table if less than or equal to the threshold. Defaults to "no".
55
134
  high_name (str, optional): Value shown in table if more than the threshold. Defaults to "yes".
135
+ explicit_unit_type (str, optional): Override the autodetected unit type ('ha' or 'percent').
136
+ If not provided, will detect from dataframe 'unit' column.
56
137
 
57
138
  Returns:
58
139
  data_lookup_type: DataFrame with added 'EUDR_risk' column.
59
140
  """
141
+ # Determine the unit type to use based on input data and overrid
142
+ unit_type = detect_unit_type(df, explicit_unit_type)
143
+
144
+ print(f"Using unit type: {unit_type}")
60
145
 
146
+ lookup_df_copy = lookup_gee_datasets_df.copy()
147
+
148
+ # filter by national codes (even if None - this removes all country columns unless specified)
149
+ filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
150
+ lookup_df=lookup_df_copy,
151
+ filter_col="ISO2_code",
152
+ national_codes=national_codes,
153
+ )
154
+
155
+ # Rest of the function remains the same, but pass unit_type to add_indicators
61
156
  if ind_1_input_columns is None:
62
- ind_1_input_columns = get_cols_ind_1_treecover(lookup_gee_datasets_df)
157
+ ind_1_input_columns = get_cols_ind_01_treecover(filtered_lookup_gee_datasets_df)
63
158
  if ind_2_input_columns is None:
64
- ind_2_input_columns = get_cols_ind_2_commodities(lookup_gee_datasets_df)
159
+ ind_2_input_columns = get_cols_ind_02_commodities(
160
+ filtered_lookup_gee_datasets_df
161
+ )
65
162
  if ind_3_input_columns is None:
66
- ind_3_input_columns = get_cols_ind_3_dist_before_2020(lookup_gee_datasets_df)
163
+ ind_3_input_columns = get_cols_ind_03_dist_before_2020(
164
+ filtered_lookup_gee_datasets_df
165
+ )
67
166
  if ind_4_input_columns is None:
68
- ind_4_input_columns = get_cols_ind_4_dist_after_2020(lookup_gee_datasets_df)
167
+ ind_4_input_columns = get_cols_ind_04_dist_after_2020(
168
+ filtered_lookup_gee_datasets_df
169
+ )
170
+ if ind_5_input_columns is None:
171
+ ind_5_input_columns = get_cols_ind_05_primary_2020(
172
+ filtered_lookup_gee_datasets_df
173
+ )
174
+ if ind_6_input_columns is None:
175
+ ind_6_input_columns = get_cols_ind_06_nat_reg_2020(
176
+ filtered_lookup_gee_datasets_df
177
+ )
178
+ if ind_7_input_columns is None:
179
+ ind_7_input_columns = get_cols_ind_07_planted_2020(
180
+ filtered_lookup_gee_datasets_df
181
+ )
182
+ if ind_8_input_columns is None:
183
+ ind_8_input_columns = get_cols_ind_08_planted_after_2020(
184
+ filtered_lookup_gee_datasets_df
185
+ )
186
+ if ind_9_input_columns is None:
187
+ ind_9_input_columns = get_cols_ind_09_treecover_after_2020(
188
+ filtered_lookup_gee_datasets_df
189
+ )
190
+ if ind_10_input_columns is None:
191
+ ind_10_input_columns = get_cols_ind_10_agri_after_2020(
192
+ filtered_lookup_gee_datasets_df
193
+ )
194
+ if ind_11_input_columns is None:
195
+ ind_11_input_columns = get_cols_ind_11_logging_before_2020(
196
+ filtered_lookup_gee_datasets_df
197
+ )
69
198
 
70
199
  # Check range of values
71
200
  check_range(ind_1_pcent_threshold)
72
201
  check_range(ind_2_pcent_threshold)
73
202
  check_range(ind_3_pcent_threshold)
74
203
  check_range(ind_4_pcent_threshold)
204
+ check_range(ind_5_pcent_threshold)
205
+ check_range(ind_6_pcent_threshold)
206
+ check_range(ind_7_pcent_threshold)
207
+ check_range(ind_8_pcent_threshold)
208
+ check_range(ind_9_pcent_threshold)
209
+ check_range(ind_10_pcent_threshold)
210
+ check_range(ind_11_pcent_threshold)
75
211
 
76
212
  input_cols = [
77
213
  ind_1_input_columns,
78
214
  ind_2_input_columns,
79
215
  ind_3_input_columns,
80
216
  ind_4_input_columns,
217
+ ind_5_input_columns,
218
+ ind_6_input_columns,
219
+ ind_7_input_columns,
220
+ ind_8_input_columns,
221
+ ind_9_input_columns,
222
+ ind_10_input_columns,
223
+ ind_11_input_columns,
81
224
  ]
82
225
  thresholds = [
83
226
  ind_1_pcent_threshold,
84
227
  ind_2_pcent_threshold,
85
228
  ind_3_pcent_threshold,
86
229
  ind_4_pcent_threshold,
230
+ ind_5_pcent_threshold,
231
+ ind_6_pcent_threshold,
232
+ ind_7_pcent_threshold,
233
+ ind_8_pcent_threshold,
234
+ ind_9_pcent_threshold,
235
+ ind_10_pcent_threshold,
236
+ ind_11_pcent_threshold,
237
+ ]
238
+ names = [
239
+ ind_1_name,
240
+ ind_2_name,
241
+ ind_3_name,
242
+ ind_4_name,
243
+ ind_5_name,
244
+ ind_6_name,
245
+ ind_7_name,
246
+ ind_8_name,
247
+ ind_9_name,
248
+ ind_10_name,
249
+ ind_11_name,
87
250
  ]
88
- names = [ind_1_name, ind_2_name, ind_3_name, ind_4_name]
89
251
  [check_range(threshold) for threshold in thresholds]
90
252
 
91
253
  df_w_indicators = add_indicators(
@@ -95,9 +257,18 @@ def whisp_risk(
95
257
  names,
96
258
  low_name,
97
259
  high_name,
260
+ unit_type, # Pass the unit type
261
+ )
262
+
263
+ df_w_indicators_and_risk_pcrop = add_eudr_risk_pcrop_col(
264
+ df=df_w_indicators,
265
+ ind_1_name=ind_1_name,
266
+ ind_2_name=ind_2_name,
267
+ ind_3_name=ind_3_name,
268
+ ind_4_name=ind_4_name,
98
269
  )
99
270
 
100
- df_w_indicators_and_risk = add_eudr_risk_col(
271
+ df_w_indicators_and_risk_acrop = add_eudr_risk_acrop_col(
101
272
  df=df_w_indicators,
102
273
  ind_1_name=ind_1_name,
103
274
  ind_2_name=ind_2_name,
@@ -105,10 +276,25 @@ def whisp_risk(
105
276
  ind_4_name=ind_4_name,
106
277
  )
107
278
 
108
- return df_w_indicators_and_risk
279
+ df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
280
+ df=df_w_indicators,
281
+ ind_1_name=ind_1_name,
282
+ ind_2_name=ind_2_name,
283
+ ind_3_name=ind_3_name,
284
+ ind_4_name=ind_4_name,
285
+ ind_5_name=ind_5_name,
286
+ ind_6_name=ind_6_name,
287
+ ind_7_name=ind_7_name,
288
+ ind_8_name=ind_8_name,
289
+ ind_9_name=ind_9_name,
290
+ ind_10_name=ind_10_name,
291
+ ind_11_name=ind_11_name,
292
+ )
293
+
294
+ return df_w_indicators_and_risk_timber
109
295
 
110
296
 
111
- def add_eudr_risk_col(
297
+ def add_eudr_risk_pcrop_col(
112
298
  df: data_lookup_type,
113
299
  ind_1_name: str,
114
300
  ind_2_name: str,
@@ -136,13 +322,117 @@ def add_eudr_risk_col(
136
322
  or row[ind_2_name] == "yes"
137
323
  or row[ind_3_name] == "yes"
138
324
  ):
139
- df.at[index, "EUDR_risk"] = "low"
325
+ df.at[index, "risk_pcrop"] = "low"
140
326
  # If none of the first three indicators suggest low risk and Indicator 4 suggests no risk, set EUDR_risk to "more_info_needed"
141
327
  elif row[ind_4_name] == "no":
142
- df.at[index, "EUDR_risk"] = "more_info_needed"
328
+ df.at[index, "risk_pcrop"] = "more_info_needed"
329
+ # If none of the above conditions are met, set EUDR_risk to "high"
330
+ else:
331
+ df.at[index, "risk_pcrop"] = "high"
332
+
333
+ return df
334
+
335
+
336
+ def add_eudr_risk_acrop_col(
337
+ df: data_lookup_type,
338
+ ind_1_name: str,
339
+ ind_2_name: str,
340
+ ind_3_name: str,
341
+ ind_4_name: str,
342
+ ) -> data_lookup_type:
343
+ """
344
+ Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
345
+
346
+ Args:
347
+ df (DataFrame): Input DataFrame.
348
+ ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
349
+ ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
350
+ ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
351
+ ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
352
+
353
+ Returns:
354
+ DataFrame: DataFrame with added 'EUDR_risk' column.
355
+ """
356
+
357
+ # soy risk
358
+ for index, row in df.iterrows():
359
+ # If there is no tree cover in 2020, set EUDR_risk_soy to "low"
360
+ if row[ind_1_name] == "no" or row[ind_2_name] == "yes":
361
+ df.at[index, "risk_acrop"] = "low"
362
+ # If there is tree cover in 2020 and distrubances post 2020, set EUDR_risk_soy to "high"
363
+ elif row[ind_1_name] == "yes" and row[ind_4_name] == "yes":
364
+ df.at[index, "risk_acrop"] = "high"
365
+ # If tree cover and no disturbances post 2020, set EUDR_risk to "more_info_needed"
366
+ else:
367
+ df.at[index, "risk_acrop"] = "more_info_needed"
368
+
369
+ return df
370
+
371
+
372
+ def add_eudr_risk_timber_col(
373
+ df: data_lookup_type,
374
+ ind_1_name: str,
375
+ ind_2_name: str,
376
+ ind_3_name: str,
377
+ ind_4_name: str,
378
+ ind_5_name: str,
379
+ ind_6_name: str,
380
+ ind_7_name: str,
381
+ ind_8_name: str,
382
+ ind_9_name: str,
383
+ ind_10_name: str,
384
+ ind_11_name: str,
385
+ ) -> data_lookup_type:
386
+ """
387
+ Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
388
+
389
+ Args:
390
+ df (DataFrame): Input DataFrame.
391
+ ind_1_name (str, optional): Name of first indicator column. Defaults to "Indicator_1_treecover".
392
+ ind_2_name (str, optional): Name of second indicator column. Defaults to "Indicator_2_commodities".
393
+ ind_3_name (str, optional): Name of third indicator column. Defaults to "Indicator_3_disturbance_before_2020".
394
+ ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
395
+
396
+ Returns:
397
+ DataFrame: DataFrame with added 'EUDR_risk' column.
398
+ """
399
+
400
+ for index, row in df.iterrows():
401
+ # If there is a commodity in 2020 OR if there is planted-plantation in 2020 AND no agriculture in 2023, set EUDR_risk_degrad to "low"
402
+ if row[ind_2_name] == "yes" or (
403
+ row[ind_7_name] == "yes" and row[ind_10_name] == "no"
404
+ ):
405
+ df.at[index, "risk_timber"] = "low"
406
+ # If there is no tree cover, set EUDR_risk_degrad to "low"? no because of unstocked forests
407
+ # if row[ind_1_name] == "no" or row[ind_3_name] == "yes" or row[ind_7_name] == "yes":
408
+ # df.at[index, 'EUDR_risk_degrad'] = "low"
409
+ # If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
410
+ elif (
411
+ row[ind_5_name] == "yes"
412
+ or row[ind_6_name] == "yes"
413
+ or row[ind_7_name] == "yes"
414
+ ) and row[ind_10_name] == "yes":
415
+ df.at[index, "risk_timber"] = "high"
416
+ # If primary or naturally regenerating AND planted post 2020, set EUDR_risk to "high"
417
+ elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
418
+ ind_8_name
419
+ ] == "yes":
420
+ df.at[index, "risk_timber"] = "high"
421
+ # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
422
+ # elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
423
+ # df.at[index, 'EUDR_risk_timber'] = "high"
424
+
425
+ # If primary forest OR naturally regenerating AND an information on management practice OR tree cover post 2020, set EUDR_risk_degrad to "low"
426
+ elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
427
+ row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
428
+ ):
429
+ df.at[index, "risk_timber"] = "low"
430
+ # If primary or naturally regenerating and no other info, set EUDR_risk to "more_info_needed"
431
+ elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
432
+ df.at[index, "risk_timber"] = "more_info_needed"
143
433
  # If none of the above conditions are met, set EUDR_risk to "high"
144
434
  else:
145
- df.at[index, "EUDR_risk"] = "high"
435
+ df.at[index, "risk_timber"] = "high"
146
436
 
147
437
  return df
148
438
 
@@ -154,6 +444,7 @@ def add_indicators(
154
444
  names: list[str],
155
445
  low_name: str = "no",
156
446
  high_name: str = "yes",
447
+ unit_type: str = None,
157
448
  ) -> data_lookup_type:
158
449
  for input_col, threshold, name in zip(input_cols, thresholds, names):
159
450
  df = add_indicator_column(
@@ -163,19 +454,22 @@ def add_indicators(
163
454
  new_column_name=name,
164
455
  low_name=low_name,
165
456
  high_name=high_name,
457
+ sum_comparison=False,
458
+ unit_type=unit_type, # Pass the unit type
166
459
  )
167
-
168
460
  return df
169
461
 
170
462
 
463
+ # Update add_indicator_column to use the unit_type parameter
171
464
  def add_indicator_column(
172
465
  df: data_lookup_type,
173
466
  input_columns: list[str],
174
467
  threshold: float,
175
468
  new_column_name: str,
176
- low_name: str = "yes",
177
- high_name: str = "no",
469
+ low_name: str = "no",
470
+ high_name: str = "yes",
178
471
  sum_comparison: bool = False,
472
+ unit_type: str = None, # unit_type parameter
179
473
  ) -> data_lookup_type:
180
474
  """
181
475
  Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
@@ -190,6 +484,7 @@ def add_indicator_column(
190
484
  low_name (str): The name for the value when below or equal to threshold (default is 'no').
191
485
  high_name (str): The name for the value when above threshold (default is 'yes').
192
486
  sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
487
+ unit_type (str): Whether values are in "ha" or "percent".
193
488
 
194
489
  Returns:
195
490
  data_lookup_type: The DataFrame with the new column added.
@@ -207,7 +502,10 @@ def add_indicator_column(
207
502
  for col in input_columns:
208
503
  # So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
209
504
  # Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
210
- if percent_or_ha == "ha":
505
+ if unit_type == "ha":
506
+ df[geometry_area_column] = pd.to_numeric(
507
+ df[geometry_area_column], errors="coerce"
508
+ )
211
509
  val_to_check = clamp(
212
510
  ((df[col] / df[geometry_area_column]) * 100), 0, 100
213
511
  )
@@ -220,7 +518,7 @@ def add_indicator_column(
220
518
  return df
221
519
 
222
520
 
223
- def get_cols_ind_1_treecover(lookup_gee_datasets_df):
521
+ def get_cols_ind_01_treecover(lookup_gee_datasets_df):
224
522
  """
225
523
  Generate a list of dataset names for the treecover theme, excluding those marked for exclusion.
226
524
 
@@ -241,7 +539,7 @@ def get_cols_ind_1_treecover(lookup_gee_datasets_df):
241
539
  )
242
540
 
243
541
 
244
- def get_cols_ind_2_commodities(lookup_gee_datasets_df):
542
+ def get_cols_ind_02_commodities(lookup_gee_datasets_df):
245
543
  """
246
544
  Generate a list of dataset names for the commodities theme, excluding those marked for exclusion.
247
545
 
@@ -262,7 +560,7 @@ def get_cols_ind_2_commodities(lookup_gee_datasets_df):
262
560
  )
263
561
 
264
562
 
265
- def get_cols_ind_3_dist_before_2020(lookup_gee_datasets_df):
563
+ def get_cols_ind_03_dist_before_2020(lookup_gee_datasets_df):
266
564
  """
267
565
  Generate a list of dataset names for the disturbance before 2020 theme, excluding those marked for exclusion.
268
566
 
@@ -283,7 +581,7 @@ def get_cols_ind_3_dist_before_2020(lookup_gee_datasets_df):
283
581
  )
284
582
 
285
583
 
286
- def get_cols_ind_4_dist_after_2020(lookup_gee_datasets_df):
584
+ def get_cols_ind_04_dist_after_2020(lookup_gee_datasets_df):
287
585
  """
288
586
  Generate a list of dataset names for the disturbance after 2020 theme, excluding those marked for exclusion.
289
587
 
@@ -304,6 +602,156 @@ def get_cols_ind_4_dist_after_2020(lookup_gee_datasets_df):
304
602
  )
305
603
 
306
604
 
605
+ def get_cols_ind_05_primary_2020(lookup_gee_datasets_df):
606
+ """
607
+ Generate a list of dataset names for primary forests in 2020
608
+
609
+ Args:
610
+ lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
611
+
612
+ Returns:
613
+ list: List of dataset names set to be used in the risk calculations for the degradation - primary forest in 2020, excluding those marked for exclusion.
614
+ """
615
+ lookup_gee_datasets_df = lookup_gee_datasets_df[
616
+ lookup_gee_datasets_df["exclude_from_output"] != 1
617
+ ]
618
+ return list(
619
+ lookup_gee_datasets_df["name"][
620
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
621
+ & (lookup_gee_datasets_df["theme_timber"] == "primary")
622
+ ]
623
+ )
624
+
625
+
626
+ def get_cols_ind_06_nat_reg_2020(lookup_gee_datasets_df):
627
+ """
628
+ Generate a list of dataset names for naturally_reg_2020 forests in 2020
629
+
630
+ Args:
631
+ lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
632
+
633
+ Returns:
634
+ list: List of dataset names set to be used in the risk calculations for the degradation - naturally_reg_2020 in 2020, excluding those marked for exclusion.
635
+ """
636
+ lookup_gee_datasets_df = lookup_gee_datasets_df[
637
+ lookup_gee_datasets_df["exclude_from_output"] != 1
638
+ ]
639
+ return list(
640
+ lookup_gee_datasets_df["name"][
641
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
642
+ & (lookup_gee_datasets_df["theme_timber"] == "naturally_reg_2020")
643
+ ]
644
+ )
645
+
646
+
647
+ def get_cols_ind_07_planted_2020(lookup_gee_datasets_df):
648
+ """
649
+ Generate a list of dataset names for planted and plantation forests in 2020
650
+
651
+ Args:
652
+ lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
653
+
654
+ Returns:
655
+ list: List of dataset names set to be used in the risk calculations for the degradation - planted and plantation forests in 2020, excluding those marked for exclusion.
656
+ """
657
+ lookup_gee_datasets_df = lookup_gee_datasets_df[
658
+ lookup_gee_datasets_df["exclude_from_output"] != 1
659
+ ]
660
+ return list(
661
+ lookup_gee_datasets_df["name"][
662
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
663
+ & (lookup_gee_datasets_df["theme_timber"] == "planted_plantation_2020")
664
+ ]
665
+ )
666
+
667
+
668
+ def get_cols_ind_08_planted_after_2020(lookup_gee_datasets_df):
669
+ """
670
+ Generate a list of dataset names for planted and plantation forests post 2020
671
+
672
+ Args:
673
+ lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
674
+
675
+ Returns:
676
+ list: List of dataset names set to be used in the risk calculations for the degradation - planted and plantation forests post 2020, excluding those marked for exclusion.
677
+ """
678
+ lookup_gee_datasets_df = lookup_gee_datasets_df[
679
+ lookup_gee_datasets_df["exclude_from_output"] != 1
680
+ ]
681
+ return list(
682
+ lookup_gee_datasets_df["name"][
683
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
684
+ & (
685
+ lookup_gee_datasets_df["theme_timber"]
686
+ == "planted_plantation_after_2020"
687
+ )
688
+ ]
689
+ )
690
+
691
+
692
+ def get_cols_ind_09_treecover_after_2020(lookup_gee_datasets_df):
693
+ """
694
+ Generate a list of dataset names for treecover post 2020
695
+
696
+ Args:
697
+ lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
698
+
699
+ Returns:
700
+ list: List of dataset names set to be used in the risk calculations for the degradation - treecover post 2020, excluding those marked for exclusion.
701
+ """
702
+ lookup_gee_datasets_df = lookup_gee_datasets_df[
703
+ lookup_gee_datasets_df["exclude_from_output"] != 1
704
+ ]
705
+ return list(
706
+ lookup_gee_datasets_df["name"][
707
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
708
+ & (lookup_gee_datasets_df["theme_timber"] == "treecover_post2020")
709
+ ]
710
+ )
711
+
712
+
713
+ def get_cols_ind_10_agri_after_2020(lookup_gee_datasets_df):
714
+ """
715
+ Generate a list of dataset names for croplands post 2020
716
+
717
+ Args:
718
+ lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
719
+
720
+ Returns:
721
+ list: List of dataset names set to be used in the risk calculations for the degradation - croplands post 2020, excluding those marked for exclusion.
722
+ """
723
+ lookup_gee_datasets_df = lookup_gee_datasets_df[
724
+ lookup_gee_datasets_df["exclude_from_output"] != 1
725
+ ]
726
+ return list(
727
+ lookup_gee_datasets_df["name"][
728
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
729
+ & (lookup_gee_datasets_df["theme_timber"] == "agri_after_2020")
730
+ ]
731
+ )
732
+
733
+
734
+ def get_cols_ind_11_logging_before_2020(lookup_gee_datasets_df):
735
+ """
736
+ Generate a list of dataset names for logging concessions (2020 if available)
737
+
738
+ Args:
739
+ lookup_gee_datasets_df (pd.DataFrame): DataFrame containing dataset information.
740
+
741
+ Returns:
742
+ list: List of dataset names set to be used in the risk calculations for the degradation - logging concessions, excluding those marked for exclusion.
743
+ """
744
+ lookup_gee_datasets_df = lookup_gee_datasets_df[
745
+ lookup_gee_datasets_df["exclude_from_output"] != 1
746
+ ]
747
+ return list(
748
+ lookup_gee_datasets_df["name"][
749
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
750
+ & (lookup_gee_datasets_df["theme_timber"] == "logging_concession")
751
+ ]
752
+ )
753
+
754
+
307
755
  def clamp(
308
756
  value: float | pd.Series, min_val: float, max_val: float
309
757
  ) -> float | pd.Series: