openforis-whisp 1.0.0a1__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openforis_whisp/risk.py CHANGED
@@ -2,12 +2,15 @@ import pandas as pd
2
2
 
3
3
  from .pd_schemas import data_lookup_type
4
4
 
5
+
5
6
  from openforis_whisp.parameters.config_runtime import (
6
- percent_or_ha,
7
7
  geometry_area_column,
8
8
  DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH,
9
+ stats_unit_type_column, # Add this import
9
10
  )
10
11
 
12
+ from openforis_whisp.reformat import filter_lookup_by_country_codes
13
+
11
14
  # could embed this in each function below that uses lookup_gee_datasets_df.
12
15
  lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
13
16
  DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH
@@ -17,25 +20,78 @@ lookup_gee_datasets_df: data_lookup_type = pd.read_csv(
17
20
  # requires lookup_gee_datasets_df
18
21
 
19
22
 
23
+ # Add function to detect unit type from dataframe
24
+ def detect_unit_type(df, explicit_unit_type=None):
25
+ """
26
+ Determine the unit type from the dataframe or use the override value.
27
+
28
+ Args:
29
+ df (DataFrame): Input DataFrame.
30
+ explicit_unit_type (str, optional): Override unit type ('ha' or 'percent').
31
+
32
+ Returns:
33
+ str: The unit type to use for calculations.
34
+
35
+ Raises:
36
+ ValueError: If the unit type can't be determined and no override is provided,
37
+ or if there are mixed unit types in the dataframe.
38
+ """
39
+ # If override is provided, use it
40
+ if explicit_unit_type is not None:
41
+ if explicit_unit_type not in ["ha", "percent"]:
42
+ raise ValueError(
43
+ f"Invalid unit type: {explicit_unit_type}. Must be 'ha' or 'percent'."
44
+ )
45
+ return explicit_unit_type
46
+
47
+ # Check if unit type column exists in the dataframe
48
+ if stats_unit_type_column not in df.columns:
49
+ raise ValueError(
50
+ f"Column '{stats_unit_type_column}' not found in dataframe. "
51
+ "Please provide 'explicit_unit_type' parameter to specify the unit type."
52
+ )
53
+
54
+ # Get unique values from the column
55
+ unit_types = df[stats_unit_type_column].unique()
56
+
57
+ # Check for mixed unit types
58
+ if len(unit_types) > 1:
59
+ raise ValueError(
60
+ f"Mixed unit types in dataframe: {unit_types}. All rows must use the same unit type."
61
+ )
62
+
63
+ # Get the single unit type
64
+ unit_type = unit_types[0]
65
+
66
+ # Validate that the unit type is recognized
67
+ if unit_type not in ["ha", "percent"]:
68
+ raise ValueError(
69
+ f"Unrecognized unit type: {unit_type}. Must be 'ha' or 'percent'."
70
+ )
71
+
72
+ return unit_type
73
+
74
+
75
+ # Update whisp_risk to accept and pass the unit_type parameter
20
76
  def whisp_risk(
21
77
  df: data_lookup_type, # CHECK THIS
22
78
  ind_1_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
23
79
  ind_2_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
24
80
  ind_3_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
25
81
  ind_4_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
26
- ind_5_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
27
- ind_6_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
28
- ind_7_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
29
- ind_8_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
30
- ind_9_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
31
- ind_10_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
32
- ind_11_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
82
+ ind_5_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
83
+ ind_6_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
84
+ ind_7_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
85
+ ind_8_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
86
+ ind_9_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
87
+ ind_10_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
88
+ ind_11_pcent_threshold: float = 10, # default values (draft decision tree and parameters)
33
89
  ind_1_input_columns: pd.Series = None, # see lookup_gee_datasets for details
34
90
  ind_2_input_columns: pd.Series = None, # see lookup_gee_datasets for details
35
91
  ind_3_input_columns: pd.Series = None, # see lookup_gee_datasets for details
36
92
  ind_4_input_columns: pd.Series = None, # see lookup_gee_datasets for details
37
93
  ind_5_input_columns: pd.Series = None, # see lookup_gee_datasets for details
38
- ind_6_input_columns: pd.Series = None, # see lookup_gee_datasets for details
94
+ ind_6_input_columns: pd.Series = None, # see lookup_gee_datasets for details
39
95
  ind_7_input_columns: pd.Series = None, # see lookup_gee_datasets for details
40
96
  ind_8_input_columns: pd.Series = None, # see lookup_gee_datasets for details
41
97
  ind_9_input_columns: pd.Series = None, # see lookup_gee_datasets for details
@@ -46,14 +102,16 @@ def whisp_risk(
46
102
  ind_3_name: str = "Ind_03_disturbance_before_2020",
47
103
  ind_4_name: str = "Ind_04_disturbance_after_2020",
48
104
  ind_5_name: str = "Ind_05_primary_2020",
49
- ind_6_name: str ="Ind_06_nat_reg_forest_2020",
50
- ind_7_name: str ="Ind_07_planted_plantations_2020",
51
- ind_8_name: str ="Ind_08_planted_plantations_post_2020",
52
- ind_9_name: str ="Ind_09_treecover_post_2020",
53
- ind_10_name: str ="Ind_10_agri_post_2020",
54
- ind_11_name: str ="Ind_11_logging_concession",
105
+ ind_6_name: str = "Ind_06_nat_reg_forest_2020",
106
+ ind_7_name: str = "Ind_07_planted_plantations_2020",
107
+ ind_8_name: str = "Ind_08_planted_plantations_after_2020",
108
+ ind_9_name: str = "Ind_09_treecover_after_2020",
109
+ ind_10_name: str = "Ind_10_agri_after_2020",
110
+ ind_11_name: str = "Ind_11_logging_concession_before_2020",
55
111
  low_name: str = "no",
56
112
  high_name: str = "yes",
113
+ explicit_unit_type: str = None,
114
+ national_codes: list[str] = None, # List of ISO2 country codes to filter by
57
115
  ) -> data_lookup_type:
58
116
  """
59
117
  Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
@@ -74,34 +132,69 @@ def whisp_risk(
74
132
  ind_4_name (str, optional): Name of the fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
75
133
  low_name (str, optional): Value shown in table if less than or equal to the threshold. Defaults to "no".
76
134
  high_name (str, optional): Value shown in table if more than the threshold. Defaults to "yes".
135
+ explicit_unit_type (str, optional): Override the autodetected unit type ('ha' or 'percent').
136
+ If not provided, will detect from dataframe 'unit' column.
77
137
 
78
138
  Returns:
79
139
  data_lookup_type: DataFrame with added 'EUDR_risk' column.
80
140
  """
141
+ # Determine the unit type to use based on input data and overrid
142
+ unit_type = detect_unit_type(df, explicit_unit_type)
81
143
 
144
+ print(f"Using unit type: {unit_type}")
145
+
146
+ lookup_df_copy = lookup_gee_datasets_df.copy()
147
+
148
+ # filter by national codes (even if None - this removes all country columns unless specified)
149
+ filtered_lookup_gee_datasets_df = filter_lookup_by_country_codes(
150
+ lookup_df=lookup_df_copy,
151
+ filter_col="ISO2_code",
152
+ national_codes=national_codes,
153
+ )
154
+
155
+ # Rest of the function remains the same, but pass unit_type to add_indicators
82
156
  if ind_1_input_columns is None:
83
- ind_1_input_columns = get_cols_ind_01_treecover(lookup_gee_datasets_df)
157
+ ind_1_input_columns = get_cols_ind_01_treecover(filtered_lookup_gee_datasets_df)
84
158
  if ind_2_input_columns is None:
85
- ind_2_input_columns = get_cols_ind_02_commodities(lookup_gee_datasets_df)
159
+ ind_2_input_columns = get_cols_ind_02_commodities(
160
+ filtered_lookup_gee_datasets_df
161
+ )
86
162
  if ind_3_input_columns is None:
87
- ind_3_input_columns = get_cols_ind_03_dist_before_2020(lookup_gee_datasets_df)
163
+ ind_3_input_columns = get_cols_ind_03_dist_before_2020(
164
+ filtered_lookup_gee_datasets_df
165
+ )
88
166
  if ind_4_input_columns is None:
89
- ind_4_input_columns = get_cols_ind_04_dist_after_2020(lookup_gee_datasets_df)
167
+ ind_4_input_columns = get_cols_ind_04_dist_after_2020(
168
+ filtered_lookup_gee_datasets_df
169
+ )
90
170
  if ind_5_input_columns is None:
91
- ind_5_input_columns = get_cols_ind_05_primary_2020(lookup_gee_datasets_df)
171
+ ind_5_input_columns = get_cols_ind_05_primary_2020(
172
+ filtered_lookup_gee_datasets_df
173
+ )
92
174
  if ind_6_input_columns is None:
93
- ind_6_input_columns = get_cols_ind_06_nat_reg_2020(lookup_gee_datasets_df)
175
+ ind_6_input_columns = get_cols_ind_06_nat_reg_2020(
176
+ filtered_lookup_gee_datasets_df
177
+ )
94
178
  if ind_7_input_columns is None:
95
- ind_7_input_columns = get_cols_ind_07_planted_2020(lookup_gee_datasets_df)
179
+ ind_7_input_columns = get_cols_ind_07_planted_2020(
180
+ filtered_lookup_gee_datasets_df
181
+ )
96
182
  if ind_8_input_columns is None:
97
- ind_8_input_columns = get_cols_ind_08_planted_post_2020(lookup_gee_datasets_df)
183
+ ind_8_input_columns = get_cols_ind_08_planted_after_2020(
184
+ filtered_lookup_gee_datasets_df
185
+ )
98
186
  if ind_9_input_columns is None:
99
- ind_9_input_columns = get_cols_ind_09_treecover_post_2020(lookup_gee_datasets_df)
187
+ ind_9_input_columns = get_cols_ind_09_treecover_after_2020(
188
+ filtered_lookup_gee_datasets_df
189
+ )
100
190
  if ind_10_input_columns is None:
101
- ind_10_input_columns = get_cols_ind_10_agri_post_2020(lookup_gee_datasets_df)
191
+ ind_10_input_columns = get_cols_ind_10_agri_after_2020(
192
+ filtered_lookup_gee_datasets_df
193
+ )
102
194
  if ind_11_input_columns is None:
103
- ind_11_input_columns = get_cols_ind_11_logging(lookup_gee_datasets_df)
104
-
195
+ ind_11_input_columns = get_cols_ind_11_logging_before_2020(
196
+ filtered_lookup_gee_datasets_df
197
+ )
105
198
 
106
199
  # Check range of values
107
200
  check_range(ind_1_pcent_threshold)
@@ -115,7 +208,7 @@ def whisp_risk(
115
208
  check_range(ind_9_pcent_threshold)
116
209
  check_range(ind_10_pcent_threshold)
117
210
  check_range(ind_11_pcent_threshold)
118
-
211
+
119
212
  input_cols = [
120
213
  ind_1_input_columns,
121
214
  ind_2_input_columns,
@@ -127,7 +220,7 @@ def whisp_risk(
127
220
  ind_8_input_columns,
128
221
  ind_9_input_columns,
129
222
  ind_10_input_columns,
130
- ind_11_input_columns,
223
+ ind_11_input_columns,
131
224
  ]
132
225
  thresholds = [
133
226
  ind_1_pcent_threshold,
@@ -137,12 +230,24 @@ def whisp_risk(
137
230
  ind_5_pcent_threshold,
138
231
  ind_6_pcent_threshold,
139
232
  ind_7_pcent_threshold,
140
- ind_8_pcent_threshold,
233
+ ind_8_pcent_threshold,
141
234
  ind_9_pcent_threshold,
142
235
  ind_10_pcent_threshold,
143
236
  ind_11_pcent_threshold,
144
237
  ]
145
- names = [ind_1_name, ind_2_name, ind_3_name, ind_4_name,ind_5_name,ind_6_name,ind_7_name,ind_8_name,ind_9_name,ind_10_name,ind_11_name]
238
+ names = [
239
+ ind_1_name,
240
+ ind_2_name,
241
+ ind_3_name,
242
+ ind_4_name,
243
+ ind_5_name,
244
+ ind_6_name,
245
+ ind_7_name,
246
+ ind_8_name,
247
+ ind_9_name,
248
+ ind_10_name,
249
+ ind_11_name,
250
+ ]
146
251
  [check_range(threshold) for threshold in thresholds]
147
252
 
148
253
  df_w_indicators = add_indicators(
@@ -152,6 +257,7 @@ def whisp_risk(
152
257
  names,
153
258
  low_name,
154
259
  high_name,
260
+ unit_type, # Pass the unit type
155
261
  )
156
262
 
157
263
  df_w_indicators_and_risk_pcrop = add_eudr_risk_pcrop_col(
@@ -169,20 +275,20 @@ def whisp_risk(
169
275
  ind_3_name=ind_3_name,
170
276
  ind_4_name=ind_4_name,
171
277
  )
172
-
278
+
173
279
  df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
174
280
  df=df_w_indicators,
175
- ind_1_name=ind_1_name,
176
- ind_2_name=ind_2_name,
177
- ind_3_name=ind_3_name,
281
+ ind_1_name=ind_1_name,
282
+ ind_2_name=ind_2_name,
283
+ ind_3_name=ind_3_name,
178
284
  ind_4_name=ind_4_name,
179
- ind_5_name=ind_5_name,
180
- ind_6_name=ind_6_name,
181
- ind_7_name=ind_7_name,
285
+ ind_5_name=ind_5_name,
286
+ ind_6_name=ind_6_name,
287
+ ind_7_name=ind_7_name,
182
288
  ind_8_name=ind_8_name,
183
289
  ind_9_name=ind_9_name,
184
290
  ind_10_name=ind_10_name,
185
- ind_11_name=ind_11_name
291
+ ind_11_name=ind_11_name,
186
292
  )
187
293
 
188
294
  return df_w_indicators_and_risk_timber
@@ -252,16 +358,17 @@ def add_eudr_risk_acrop_col(
252
358
  for index, row in df.iterrows():
253
359
  # If there is no tree cover in 2020, set EUDR_risk_soy to "low"
254
360
  if row[ind_1_name] == "no" or row[ind_2_name] == "yes":
255
- df.at[index, 'risk_acrop'] = "low"
361
+ df.at[index, "risk_acrop"] = "low"
256
362
  # If there is tree cover in 2020 and distrubances post 2020, set EUDR_risk_soy to "high"
257
363
  elif row[ind_1_name] == "yes" and row[ind_4_name] == "yes":
258
- df.at[index, 'risk_acrop'] = "high"
364
+ df.at[index, "risk_acrop"] = "high"
259
365
  # If tree cover and no disturbances post 2020, set EUDR_risk to "more_info_needed"
260
- else :
261
- df.at[index, 'risk_acrop'] = "more_info_needed"
366
+ else:
367
+ df.at[index, "risk_acrop"] = "more_info_needed"
262
368
 
263
369
  return df
264
-
370
+
371
+
265
372
  def add_eudr_risk_timber_col(
266
373
  df: data_lookup_type,
267
374
  ind_1_name: str,
@@ -275,7 +382,7 @@ def add_eudr_risk_timber_col(
275
382
  ind_9_name: str,
276
383
  ind_10_name: str,
277
384
  ind_11_name: str,
278
- )-> data_lookup_type:
385
+ ) -> data_lookup_type:
279
386
  """
280
387
  Adds the EUDR (European Union Deforestation Risk) column to the DataFrame based on indicator values.
281
388
 
@@ -289,36 +396,47 @@ def add_eudr_risk_timber_col(
289
396
  Returns:
290
397
  DataFrame: DataFrame with added 'EUDR_risk' column.
291
398
  """
292
-
399
+
293
400
  for index, row in df.iterrows():
294
401
  # If there is a commodity in 2020 OR if there is planted-plantation in 2020 AND no agriculture in 2023, set EUDR_risk_degrad to "low"
295
- if row[ind_2_name] == "yes" or (row[ind_7_name] == "yes" and row[ind_10_name] == "no"):
296
- df.at[index, 'risk_timber'] = "low"
402
+ if row[ind_2_name] == "yes" or (
403
+ row[ind_7_name] == "yes" and row[ind_10_name] == "no"
404
+ ):
405
+ df.at[index, "risk_timber"] = "low"
297
406
  # If there is no tree cover, set EUDR_risk_degrad to "low"? no because of unstocked forests
298
407
  # if row[ind_1_name] == "no" or row[ind_3_name] == "yes" or row[ind_7_name] == "yes":
299
408
  # df.at[index, 'EUDR_risk_degrad'] = "low"
300
- # If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
301
- elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
302
- df.at[index, 'risk_timber'] = "high"
303
- #If primary or naturally regenerating AND planted post 2020, set EUDR_risk to "high"
304
- elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[ind_8_name] == "yes":
305
- df.at[index, 'risk_timber'] = "high"
409
+ # If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
410
+ elif (
411
+ row[ind_5_name] == "yes"
412
+ or row[ind_6_name] == "yes"
413
+ or row[ind_7_name] == "yes"
414
+ ) and row[ind_10_name] == "yes":
415
+ df.at[index, "risk_timber"] = "high"
416
+ # If primary or naturally regenerating AND planted post 2020, set EUDR_risk to "high"
417
+ elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
418
+ ind_8_name
419
+ ] == "yes":
420
+ df.at[index, "risk_timber"] = "high"
306
421
  # If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
307
- #elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
308
- # df.at[index, 'EUDR_risk_timber'] = "high"
309
-
422
+ # elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
423
+ # df.at[index, 'EUDR_risk_timber'] = "high"
424
+
310
425
  # If primary forest OR naturally regenerating AND an information on management practice OR tree cover post 2020, set EUDR_risk_degrad to "low"
311
- elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (row[ind_9_name] == "yes" or row[ind_11_name] == "yes"):
312
- df.at[index, 'risk_timber'] = "low"
426
+ elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
427
+ row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
428
+ ):
429
+ df.at[index, "risk_timber"] = "low"
313
430
  # If primary or naturally regenerating and no other info, set EUDR_risk to "more_info_needed"
314
- elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes" :
315
- df.at[index, 'risk_timber'] = "more_info_needed"
431
+ elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
432
+ df.at[index, "risk_timber"] = "more_info_needed"
316
433
  # If none of the above conditions are met, set EUDR_risk to "high"
317
434
  else:
318
- df.at[index, 'risk_timber'] = "high"
319
-
435
+ df.at[index, "risk_timber"] = "high"
436
+
320
437
  return df
321
-
438
+
439
+
322
440
  def add_indicators(
323
441
  df: data_lookup_type,
324
442
  input_cols: list[str],
@@ -326,6 +444,7 @@ def add_indicators(
326
444
  names: list[str],
327
445
  low_name: str = "no",
328
446
  high_name: str = "yes",
447
+ unit_type: str = None,
329
448
  ) -> data_lookup_type:
330
449
  for input_col, threshold, name in zip(input_cols, thresholds, names):
331
450
  df = add_indicator_column(
@@ -335,19 +454,22 @@ def add_indicators(
335
454
  new_column_name=name,
336
455
  low_name=low_name,
337
456
  high_name=high_name,
457
+ sum_comparison=False,
458
+ unit_type=unit_type, # Pass the unit type
338
459
  )
339
-
340
460
  return df
341
461
 
342
462
 
463
+ # Update add_indicator_column to use the unit_type parameter
343
464
  def add_indicator_column(
344
465
  df: data_lookup_type,
345
466
  input_columns: list[str],
346
467
  threshold: float,
347
468
  new_column_name: str,
348
- low_name: str = "yes",
349
- high_name: str = "no",
469
+ low_name: str = "no",
470
+ high_name: str = "yes",
350
471
  sum_comparison: bool = False,
472
+ unit_type: str = None, # unit_type parameter
351
473
  ) -> data_lookup_type:
352
474
  """
353
475
  Add a new column to the DataFrame based on the specified columns, threshold, and comparison sign.
@@ -362,6 +484,7 @@ def add_indicator_column(
362
484
  low_name (str): The name for the value when below or equal to threshold (default is 'no').
363
485
  high_name (str): The name for the value when above threshold (default is 'yes').
364
486
  sum_comparison (bool): If True, sum all values in input_columns and compare to threshold (default is False).
487
+ unit_type (str): Whether values are in "ha" or "percent".
365
488
 
366
489
  Returns:
367
490
  data_lookup_type: The DataFrame with the new column added.
@@ -379,7 +502,10 @@ def add_indicator_column(
379
502
  for col in input_columns:
380
503
  # So that threshold is always in percent, if outputs are in ha, the code converts to percent (based on dividing by the geometry_area_column column.
381
504
  # Clamping is needed due to differences in decimal places (meaning input values may go just over 100)
382
- if percent_or_ha == "ha":
505
+ if unit_type == "ha":
506
+ df[geometry_area_column] = pd.to_numeric(
507
+ df[geometry_area_column], errors="coerce"
508
+ )
383
509
  val_to_check = clamp(
384
510
  ((df[col] / df[geometry_area_column]) * 100), 0, 100
385
511
  )
@@ -475,6 +601,7 @@ def get_cols_ind_04_dist_after_2020(lookup_gee_datasets_df):
475
601
  ]
476
602
  )
477
603
 
604
+
478
605
  def get_cols_ind_05_primary_2020(lookup_gee_datasets_df):
479
606
  """
480
607
  Generate a list of dataset names for primary forests in 2020
@@ -488,10 +615,13 @@ def get_cols_ind_05_primary_2020(lookup_gee_datasets_df):
488
615
  lookup_gee_datasets_df = lookup_gee_datasets_df[
489
616
  lookup_gee_datasets_df["exclude_from_output"] != 1
490
617
  ]
491
- return list(lookup_gee_datasets_df["name"][
492
- (lookup_gee_datasets_df["use_for_risk_timber"] == 1) &
493
- (lookup_gee_datasets_df["theme_timber"] == "primary")
494
- ])
618
+ return list(
619
+ lookup_gee_datasets_df["name"][
620
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
621
+ & (lookup_gee_datasets_df["theme_timber"] == "primary")
622
+ ]
623
+ )
624
+
495
625
 
496
626
  def get_cols_ind_06_nat_reg_2020(lookup_gee_datasets_df):
497
627
  """
@@ -506,10 +636,13 @@ def get_cols_ind_06_nat_reg_2020(lookup_gee_datasets_df):
506
636
  lookup_gee_datasets_df = lookup_gee_datasets_df[
507
637
  lookup_gee_datasets_df["exclude_from_output"] != 1
508
638
  ]
509
- return list(lookup_gee_datasets_df["name"][
510
- (lookup_gee_datasets_df["use_for_risk_timber"] == 1) &
511
- (lookup_gee_datasets_df["theme_timber"] == "naturally_reg_2020")
512
- ])
639
+ return list(
640
+ lookup_gee_datasets_df["name"][
641
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
642
+ & (lookup_gee_datasets_df["theme_timber"] == "naturally_reg_2020")
643
+ ]
644
+ )
645
+
513
646
 
514
647
  def get_cols_ind_07_planted_2020(lookup_gee_datasets_df):
515
648
  """
@@ -524,11 +657,15 @@ def get_cols_ind_07_planted_2020(lookup_gee_datasets_df):
524
657
  lookup_gee_datasets_df = lookup_gee_datasets_df[
525
658
  lookup_gee_datasets_df["exclude_from_output"] != 1
526
659
  ]
527
- return list(lookup_gee_datasets_df["name"][
528
- (lookup_gee_datasets_df["use_for_risk_timber"] == 1) &
529
- (lookup_gee_datasets_df["theme_timber"] == "planted_plantation_2020")
530
- ])
531
- def get_cols_ind_08_planted_post_2020(lookup_gee_datasets_df):
660
+ return list(
661
+ lookup_gee_datasets_df["name"][
662
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
663
+ & (lookup_gee_datasets_df["theme_timber"] == "planted_plantation_2020")
664
+ ]
665
+ )
666
+
667
+
668
+ def get_cols_ind_08_planted_after_2020(lookup_gee_datasets_df):
532
669
  """
533
670
  Generate a list of dataset names for planted and plantation forests post 2020
534
671
 
@@ -541,11 +678,18 @@ def get_cols_ind_08_planted_post_2020(lookup_gee_datasets_df):
541
678
  lookup_gee_datasets_df = lookup_gee_datasets_df[
542
679
  lookup_gee_datasets_df["exclude_from_output"] != 1
543
680
  ]
544
- return list(lookup_gee_datasets_df["name"][
545
- (lookup_gee_datasets_df["use_for_risk_timber"] == 1) &
546
- (lookup_gee_datasets_df["theme_timber"] == "planted_plantation_post_2020")
547
- ])
548
- def get_cols_ind_09_treecover_post_2020(lookup_gee_datasets_df):
681
+ return list(
682
+ lookup_gee_datasets_df["name"][
683
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
684
+ & (
685
+ lookup_gee_datasets_df["theme_timber"]
686
+ == "planted_plantation_after_2020"
687
+ )
688
+ ]
689
+ )
690
+
691
+
692
+ def get_cols_ind_09_treecover_after_2020(lookup_gee_datasets_df):
549
693
  """
550
694
  Generate a list of dataset names for treecover post 2020
551
695
 
@@ -558,12 +702,15 @@ def get_cols_ind_09_treecover_post_2020(lookup_gee_datasets_df):
558
702
  lookup_gee_datasets_df = lookup_gee_datasets_df[
559
703
  lookup_gee_datasets_df["exclude_from_output"] != 1
560
704
  ]
561
- return list(lookup_gee_datasets_df["name"][
562
- (lookup_gee_datasets_df["use_for_risk_timber"] == 1) &
563
- (lookup_gee_datasets_df["theme_timber"] == "treecover_post2020")
564
- ])
705
+ return list(
706
+ lookup_gee_datasets_df["name"][
707
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
708
+ & (lookup_gee_datasets_df["theme_timber"] == "treecover_post2020")
709
+ ]
710
+ )
711
+
565
712
 
566
- def get_cols_ind_10_agri_post_2020(lookup_gee_datasets_df):
713
+ def get_cols_ind_10_agri_after_2020(lookup_gee_datasets_df):
567
714
  """
568
715
  Generate a list of dataset names for croplands post 2020
569
716
 
@@ -576,12 +723,15 @@ def get_cols_ind_10_agri_post_2020(lookup_gee_datasets_df):
576
723
  lookup_gee_datasets_df = lookup_gee_datasets_df[
577
724
  lookup_gee_datasets_df["exclude_from_output"] != 1
578
725
  ]
579
- return list(lookup_gee_datasets_df["name"][
580
- (lookup_gee_datasets_df["use_for_risk_timber"] == 1) &
581
- (lookup_gee_datasets_df["theme_timber"] == "agri_post_2020")
582
- ])
726
+ return list(
727
+ lookup_gee_datasets_df["name"][
728
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
729
+ & (lookup_gee_datasets_df["theme_timber"] == "agri_after_2020")
730
+ ]
731
+ )
732
+
583
733
 
584
- def get_cols_ind_11_logging(lookup_gee_datasets_df):
734
+ def get_cols_ind_11_logging_before_2020(lookup_gee_datasets_df):
585
735
  """
586
736
  Generate a list of dataset names for logging concessions (2020 if available)
587
737
 
@@ -594,11 +744,14 @@ def get_cols_ind_11_logging(lookup_gee_datasets_df):
594
744
  lookup_gee_datasets_df = lookup_gee_datasets_df[
595
745
  lookup_gee_datasets_df["exclude_from_output"] != 1
596
746
  ]
597
- return list(lookup_gee_datasets_df["name"][
598
- (lookup_gee_datasets_df["use_for_risk_timber"] == 1) &
599
- (lookup_gee_datasets_df["theme_timber"] == "logging_concession")
600
- ])
601
-
747
+ return list(
748
+ lookup_gee_datasets_df["name"][
749
+ (lookup_gee_datasets_df["use_for_risk_timber"] == 1)
750
+ & (lookup_gee_datasets_df["theme_timber"] == "logging_concession")
751
+ ]
752
+ )
753
+
754
+
602
755
  def clamp(
603
756
  value: float | pd.Series, min_val: float, max_val: float
604
757
  ) -> float | pd.Series:
@@ -621,4 +774,4 @@ def clamp(
621
774
 
622
775
  def check_range(value: float) -> None:
623
776
  if not (0 <= value <= 100):
624
- raise ValueError("Value must be between 0 and 100.")
777
+ raise ValueError("Value must be between 0 and 100.")