openforis-whisp 2.0.0b2__py3-none-any.whl → 3.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,8 +7,10 @@
7
7
  # b) a prefix of "nXX_" if it is national/sub-national dataset (where XX is replaced by that country code), or a prefix of 'g_' if it covers more than one country.
8
8
  # c) a name for your image, defined by ".rename('add_your_image_name_here')". This becomes the column header in the output table.
9
9
 
10
- # NB for all the above you will need to be running the package in editable mode for these local changes to take effect.
11
- # Editable mode runs the package locally and thus changes to any files are reflected immediately.
10
+ # Tips:
11
+ # -Avoid getInfo() and for loops to speed up processing by keeping everything in the Earth Engine API.
12
+ # -For all the above you will need to be running the package in editable mode for these local changes to take effect.
13
+ # Editable mode runs the package locally and thus changes to any files are reflected immediately.
12
14
 
13
15
  import ee
14
16
 
@@ -24,6 +26,11 @@ from datetime import datetime
24
26
  # defining here instead of importing from config_runtime, to allow functioning as more of a standalone script
25
27
  geometry_area_column = "Area"
26
28
 
29
+ # Calculate current year once at module load time (not in functions)
30
+ # This avoids repeated datetime calls and potential .getInfo() calls
31
+ CURRENT_YEAR = datetime.now().year
32
+ CURRENT_YEAR_2DIGIT = CURRENT_YEAR % 100 # Last two digits for RADD datasets
33
+
27
34
  import inspect
28
35
 
29
36
  import logging
@@ -49,13 +56,13 @@ def g_esa_worldcover_trees_prep():
49
56
  esa_worldcover_trees_2020 = esa_worldcover_2020_raw.eq(95).Or(
50
57
  esa_worldcover_2020_raw.eq(10)
51
58
  ) # get trees and mnangroves
52
- return esa_worldcover_trees_2020.rename("ESA_TC_2020")
59
+ return esa_worldcover_trees_2020.rename("ESA_TC_2020").selfMask()
53
60
 
54
61
 
55
62
  # EUFO_2020
56
63
  def g_jrc_gfc_2020_prep():
57
64
  jrc_gfc2020_raw = ee.ImageCollection("JRC/GFC2020/V2")
58
- return jrc_gfc2020_raw.mosaic().rename("EUFO_2020")
65
+ return jrc_gfc2020_raw.mosaic().rename("EUFO_2020").selfMask()
59
66
 
60
67
 
61
68
  # GFC_TC_2020
@@ -64,7 +71,7 @@ def g_glad_gfc_10pc_prep():
64
71
  gfc_treecover2000 = gfc.select(["treecover2000"])
65
72
  gfc_loss2001_2020 = gfc.select(["lossyear"]).lte(20)
66
73
  gfc_treecover2020 = gfc_treecover2000.where(gfc_loss2001_2020.eq(1), 0)
67
- return gfc_treecover2020.gt(10).rename("GFC_TC_2020")
74
+ return gfc_treecover2020.gt(10).rename("GFC_TC_2020").selfMask()
68
75
 
69
76
 
70
77
  # GLAD_Primary
@@ -77,8 +84,10 @@ def g_glad_pht_prep():
77
84
  )
78
85
  gfc = ee.Image("UMD/hansen/global_forest_change_2024_v1_12")
79
86
  gfc_loss2001_2020 = gfc.select(["lossyear"]).lte(20)
80
- return primary_ht_forests2001.where(gfc_loss2001_2020.eq(1), 0).rename(
81
- "GLAD_Primary"
87
+ return (
88
+ primary_ht_forests2001.where(gfc_loss2001_2020.eq(1), 0)
89
+ .rename("GLAD_Primary")
90
+ .selfMask()
82
91
  )
83
92
 
84
93
 
@@ -90,7 +99,7 @@ def g_jrc_tmf_undisturbed_prep():
90
99
  .mosaic()
91
100
  .eq(1)
92
101
  ) # update from https://github.com/forestdatapartnership/whisp/issues/42
93
- return TMF_undist_2020.rename("TMF_undist")
102
+ return TMF_undist_2020.rename("TMF_undist").selfMask()
94
103
 
95
104
 
96
105
  # Forest Persistence FDaP
@@ -99,7 +108,7 @@ def g_fdap_forest_prep():
99
108
  "projects/forestdatapartnership/assets/community_forests/ForestPersistence_2020"
100
109
  )
101
110
  fdap_forest = fdap_forest_raw.gt(0.75)
102
- return fdap_forest.rename("Forest_FDaP")
111
+ return fdap_forest.rename("Forest_FDaP").selfMask()
103
112
 
104
113
 
105
114
  #########################primary forest
@@ -107,27 +116,27 @@ def g_fdap_forest_prep():
107
116
  def g_gft_primary_prep():
108
117
  gft_raw = ee.ImageCollection("JRC/GFC2020_subtypes/V0").mosaic()
109
118
  gft_primary = gft_raw.eq(10)
110
- return gft_primary.rename("GFT_primary")
119
+ return gft_primary.rename("GFT_primary").selfMask()
111
120
 
112
121
 
113
122
  # Intact Forest Landscape 2020
114
123
  def g_ifl_2020_prep():
115
124
  IFL_2020 = ee.Image("users/potapovpeter/IFL_2020")
116
- return IFL_2020.rename("IFL_2020")
125
+ return IFL_2020.rename("IFL_2020").selfMask()
117
126
 
118
127
 
119
128
  # European Primary Forest Dataset
120
129
  def g_epfd_prep():
121
130
  EPFD = ee.FeatureCollection("HU_BERLIN/EPFD/V2/polygons")
122
131
  EPFD_binary = ee.Image().paint(EPFD, 1)
123
- return EPFD_binary.rename("European_Primary_Forest")
132
+ return EPFD_binary.rename("European_Primary_Forest").selfMask()
124
133
 
125
134
 
126
135
  # EUFO JRC Global forest type - naturally regenerating planted/plantation forests
127
136
  def g_gft_nat_reg_prep():
128
137
  gft_raw = ee.ImageCollection("JRC/GFC2020_subtypes/V0").mosaic()
129
138
  gft_nat_reg = gft_raw.eq(1)
130
- return gft_nat_reg.rename("GFT_naturally_regenerating")
139
+ return gft_nat_reg.rename("GFT_naturally_regenerating").selfMask()
131
140
 
132
141
 
133
142
  #########################planted and plantation forests
@@ -136,13 +145,13 @@ def g_gft_nat_reg_prep():
136
145
  def g_gft_plantation_prep():
137
146
  gft_raw = ee.ImageCollection("JRC/GFC2020_subtypes/V0").mosaic()
138
147
  gft_plantation = gft_raw.eq(20)
139
- return gft_plantation.rename("GFT_planted_plantation")
148
+ return gft_plantation.rename("GFT_planted_plantation").selfMask()
140
149
 
141
150
 
142
151
  def g_iiasa_planted_prep():
143
152
  iiasa = ee.Image("projects/sat-io/open-datasets/GFM/FML_v3-2")
144
153
  iiasa_PL = iiasa.eq(31).Or(iiasa.eq(32))
145
- return iiasa_PL.rename("IIASA_planted_plantation")
154
+ return iiasa_PL.rename("IIASA_planted_plantation").selfMask()
146
155
 
147
156
 
148
157
  #########################TMF regrowth in 2023
@@ -151,7 +160,7 @@ def g_tmf_regrowth_prep():
151
160
  TMF_AC = ee.ImageCollection("projects/JRC/TMF/v1_2024/AnnualChanges").mosaic()
152
161
  TMF_AC_2023 = TMF_AC.select("Dec2023")
153
162
  Regrowth_TMF = TMF_AC_2023.eq(4)
154
- return Regrowth_TMF.rename("TMF_regrowth_2023")
163
+ return Regrowth_TMF.rename("TMF_regrowth_2023").selfMask()
155
164
 
156
165
 
157
166
  ############tree crops
@@ -168,7 +177,7 @@ def g_jrc_tmf_plantation_prep():
168
177
  plantation_2020 = plantation.where(
169
178
  deforestation_year.gte(2021), 0
170
179
  ) # update from https://github.com/forestdatapartnership/whisp/issues/42
171
- return plantation_2020.rename("TMF_plant")
180
+ return plantation_2020.rename("TMF_plant").selfMask()
172
181
 
173
182
 
174
183
  # # Oil_palm_Descals
@@ -181,20 +190,22 @@ def g_creaf_descals_palm_prep():
181
190
  )
182
191
  .mosaic()
183
192
  .select("minNBR_date")
184
- )
193
+ ).selfMask()
185
194
 
186
195
  # Calculate the year of plantation and select all below and including 2020
187
196
  oil_palm_plantation_year = img.divide(365).add(1970).floor().lte(2020)
188
197
 
189
198
  # Create a mask for plantations in the year 2020 or earlier
190
- plantation_2020 = oil_palm_plantation_year.lte(2020).selfMask()
191
- return plantation_2020.rename("Oil_palm_Descals")
199
+ plantation_2020 = oil_palm_plantation_year.lte(2020)
200
+ return plantation_2020.rename("Oil_palm_Descals").selfMask()
192
201
 
193
202
 
194
203
  # Cocoa_ETH
195
204
  def g_eth_kalischek_cocoa_prep():
196
- return ee.Image("projects/ee-nk-cocoa/assets/cocoa_map_threshold_065").rename(
197
- "Cocoa_ETH"
205
+ return (
206
+ ee.Image("projects/ee-nk-cocoa/assets/cocoa_map_threshold_065")
207
+ .rename("Cocoa_ETH")
208
+ .selfMask()
198
209
  )
199
210
 
200
211
 
@@ -212,7 +223,7 @@ def g_fdap_palm_prep():
212
223
  .mosaic()
213
224
  .gt(0.88) # Precision and recall ~78% at 0.88 threshold.
214
225
  )
215
- return fdap_palm.rename("Oil_palm_FDaP")
226
+ return fdap_palm.rename("Oil_palm_FDaP").selfMask()
216
227
 
217
228
 
218
229
  def g_fdap_palm_2023_prep():
@@ -224,7 +235,7 @@ def g_fdap_palm_2023_prep():
224
235
  .mosaic()
225
236
  .gt(0.88) # Precision and recall ~78% at 0.88 threshold.
226
237
  )
227
- return fdap_palm.rename("Oil_palm_2023_FDaP")
238
+ return fdap_palm.rename("Oil_palm_2023_FDaP").selfMask()
228
239
 
229
240
 
230
241
  # Cocoa FDaP
@@ -237,7 +248,7 @@ def g_fdap_cocoa_prep():
237
248
  .mosaic()
238
249
  .gt(0.96) # Precision and recall ~87% 0.96 threshold.
239
250
  )
240
- return fdap_cocoa.rename("Cocoa_FDaP")
251
+ return fdap_cocoa.rename("Cocoa_FDaP").selfMask()
241
252
 
242
253
 
243
254
  def g_fdap_cocoa_2023_prep():
@@ -249,7 +260,7 @@ def g_fdap_cocoa_2023_prep():
249
260
  .mosaic()
250
261
  .gt(0.96) # Precision and recall ~87% 0.96 threshold.
251
262
  )
252
- return fdap_cocoa.rename("Cocoa_2023_FDaP")
263
+ return fdap_cocoa.rename("Cocoa_2023_FDaP").selfMask()
253
264
 
254
265
 
255
266
  # Rubber FDaP
@@ -262,7 +273,7 @@ def g_fdap_rubber_prep():
262
273
  .mosaic()
263
274
  .gt(0.59) # Precision and recall ~80% 0.59 threshold.
264
275
  )
265
- return fdap_rubber.rename("Rubber_FDaP")
276
+ return fdap_rubber.rename("Rubber_FDaP").selfMask()
266
277
 
267
278
 
268
279
  def g_fdap_rubber_2023_prep():
@@ -274,7 +285,7 @@ def g_fdap_rubber_2023_prep():
274
285
  .mosaic()
275
286
  .gt(0.59) # Threshold for Rubber
276
287
  )
277
- return fdap_rubber.rename("Rubber_2023_FDaP")
288
+ return fdap_rubber.rename("Rubber_2023_FDaP").selfMask()
278
289
 
279
290
 
280
291
  # # Coffee FDaP
@@ -291,7 +302,7 @@ def g_fdap_coffee_2020_prep():
291
302
  .gt(0.99) # Precision and recall ~54% 0.99 threshold.
292
303
  )
293
304
 
294
- return coffee_2020.rename("Coffee_FDaP")
305
+ return coffee_2020.rename("Coffee_FDaP").selfMask()
295
306
 
296
307
 
297
308
  def g_fdap_coffee_2023_prep():
@@ -306,7 +317,7 @@ def g_fdap_coffee_2023_prep():
306
317
  .mosaic()
307
318
  .gt(0.99) # Precision and recall ~54% 0.99 threshold.
308
319
  )
309
- return coffee_2023.rename("Coffee_FDaP_2023")
320
+ return coffee_2023.rename("Coffee_FDaP_2023").selfMask()
310
321
 
311
322
 
312
323
  # Rubber_RBGE - from Royal Botanical Gardens of Edinburgh (RBGE) NB for 2021
@@ -315,14 +326,16 @@ def g_rbge_rubber_prep():
315
326
  ee.Image(
316
327
  "users/wangyxtina/MapRubberPaper/rRubber10m202122_perc1585DifESAdist5pxPF"
317
328
  )
318
- .unmask()
319
329
  .rename("Rubber_RBGE")
330
+ .selfMask()
320
331
  )
321
332
 
322
333
 
323
334
  # soy 2020 South America
324
335
  def g_soy_song_2020_prep():
325
- return ee.Image("projects/glad/soy_annual_SA/2020").unmask().rename("Soy_Song_2020")
336
+ return (
337
+ ee.Image("projects/glad/soy_annual_SA/2020").rename("Soy_Song_2020").selfMask()
338
+ )
326
339
 
327
340
 
328
341
  ##############
@@ -336,7 +349,7 @@ def g_esri_2023_tc_prep():
336
349
  esri_lulc10_TC = (
337
350
  esri_lulc10_raw.filterDate("2023-01-01", "2023-12-31").mosaic().eq(2)
338
351
  )
339
- return esri_lulc10_TC.rename("ESRI_2023_TC")
352
+ return esri_lulc10_TC.rename("ESRI_2023_TC").selfMask()
340
353
 
341
354
 
342
355
  # ESRI 2023 - Crop
@@ -353,7 +366,7 @@ def g_esri_2020_2023_crop_prep():
353
366
 
354
367
  newCrop = esri_lulc10_crop_2023.And(esri_lulc10_crop_2020.Not())
355
368
 
356
- return newCrop.rename("ESRI_crop_gain_2020_2023")
369
+ return newCrop.rename("ESRI_crop_gain_2020_2023").selfMask()
357
370
 
358
371
 
359
372
  #### disturbances by year
@@ -363,23 +376,13 @@ def g_radd_year_prep():
363
376
  from datetime import datetime
364
377
 
365
378
  radd = ee.ImageCollection("projects/radar-wur/raddalert/v1")
366
-
367
379
  radd_date = (
368
380
  radd.filterMetadata("layer", "contains", "alert").select("Date").mosaic()
369
381
  )
370
- # date of avaialbility
371
- start_year = 19 ## (starts 2019 in Africa, then 2020 for S America and Asia: https://data.globalforestwatch.org/datasets/gfw::deforestation-alerts-radd/about
372
-
373
- current_year = (
374
- datetime.now().year
375
- % 100
376
- # NB the % 100 part gets last two digits needed
377
- )
382
+ start_year = 19
383
+ current_year = datetime.now().year % 100
378
384
 
379
- img_stack = None
380
- # Generate an image based on GFC with one band of forest tree loss per year from 2001 to <current year>
381
- for year in range(start_year, current_year + 1):
382
- # gfc_loss_year = gfc.select(['lossyear']).eq(i).And(gfc.select(['treecover2000']).gt(10)) # use any definition of loss
385
+ def make_band(year, img_stack):
383
386
  start = year * 1000
384
387
  end = year * 1000 + 365
385
388
  radd_year = (
@@ -387,13 +390,39 @@ def g_radd_year_prep():
387
390
  .updateMask(radd_date.lte(end))
388
391
  .gt(0)
389
392
  .rename("RADD_year_" + "20" + str(year))
393
+ .selfMask()
394
+ )
395
+ return ee.Image(img_stack).addBands(radd_year)
396
+
397
+ years = ee.List.sequence(start_year, current_year)
398
+ first_year = ee.Number(years.get(0))
399
+ start = first_year.multiply(1000)
400
+ end = first_year.multiply(1000).add(365)
401
+ band_name = ee.String("RADD_year_").cat("20").cat(first_year.format("%02d"))
402
+ first_band = (
403
+ radd_date.updateMask(radd_date.gte(start))
404
+ .updateMask(radd_date.lte(end))
405
+ .gt(0)
406
+ .rename(band_name)
407
+ .selfMask()
408
+ )
409
+
410
+ def make_band(year, img_stack):
411
+ year_num = ee.Number(year)
412
+ start = year_num.multiply(1000)
413
+ end = year_num.multiply(1000).add(365)
414
+ band_name = ee.String("RADD_year_").cat("20").cat(year_num.format("%02d"))
415
+ radd_year = (
416
+ radd_date.updateMask(radd_date.gte(start))
417
+ .updateMask(radd_date.lte(end))
418
+ .gt(0)
419
+ .rename(band_name)
420
+ .selfMask()
390
421
  )
422
+ return ee.Image(img_stack).addBands(radd_year)
391
423
 
392
- if img_stack is None:
393
- img_stack = radd_year
394
- else:
395
- img_stack = img_stack.addBands(radd_year)
396
- return img_stack
424
+ img_stack = years.slice(1).iterate(make_band, first_band)
425
+ return ee.Image(img_stack)
397
426
 
398
427
 
399
428
  # TMF_def_2000 to TMF_def_2023
@@ -403,7 +432,9 @@ def g_tmf_def_per_year_prep():
403
432
  img_stack = None
404
433
  # Generate an image based on GFC with one band of forest tree loss per year from 2001 to 2022
405
434
  for i in range(0, 24 + 1):
406
- tmf_def_year = tmf_def.eq(2000 + i).rename("TMF_def_" + str(2000 + i))
435
+ year_num = ee.Number(2000 + i)
436
+ band_name = ee.String("TMF_def_").cat(year_num.format("%d"))
437
+ tmf_def_year = tmf_def.eq(year_num).rename(band_name).selfMask()
407
438
  if img_stack is None:
408
439
  img_stack = tmf_def_year
409
440
  else:
@@ -418,7 +449,9 @@ def g_tmf_deg_per_year_prep():
418
449
  img_stack = None
419
450
  # Generate an image based on GFC with one band of forest tree loss per year from 2001 to 2022
420
451
  for i in range(0, 24 + 1):
421
- tmf_def_year = tmf_def.eq(2000 + i).rename("TMF_deg_" + str(2000 + i))
452
+ year_num = ee.Number(2000 + i)
453
+ band_name = ee.String("TMF_deg_").cat(year_num.format("%d"))
454
+ tmf_def_year = tmf_def.eq(year_num).rename(band_name).selfMask()
422
455
  if img_stack is None:
423
456
  img_stack = tmf_def_year
424
457
  else:
@@ -433,10 +466,12 @@ def g_glad_gfc_loss_per_year_prep():
433
466
  img_stack = None
434
467
  # Generate an image based on GFC with one band of forest tree loss per year from 2001 to 2022
435
468
  for i in range(1, 24 + 1):
469
+ year_num = ee.Number(2000 + i)
470
+ band_name = ee.String("GFC_loss_year_").cat(year_num.format("%d"))
436
471
  gfc_loss_year = (
437
472
  gfc.select(["lossyear"]).eq(i).And(gfc.select(["treecover2000"]).gt(10))
438
473
  )
439
- gfc_loss_year = gfc_loss_year.rename("GFC_loss_year_" + str(2000 + i))
474
+ gfc_loss_year = gfc_loss_year.rename(band_name).selfMask()
440
475
  if img_stack is None:
441
476
  img_stack = gfc_loss_year
442
477
  else:
@@ -457,6 +492,8 @@ def g_modis_fire_prep():
457
492
  img_stack = None
458
493
 
459
494
  for year in range(start_year, end_year + 1):
495
+ year_num = ee.Number(year)
496
+ band_name = ee.String("MODIS_fire_").cat(year_num.format("%d"))
460
497
  date_st = f"{year}-01-01"
461
498
  date_ed = f"{year}-12-31"
462
499
  modis_year = (
@@ -464,7 +501,8 @@ def g_modis_fire_prep():
464
501
  .mosaic()
465
502
  .select(["BurnDate"])
466
503
  .gte(0)
467
- .rename(f"MODIS_fire_{year}")
504
+ .rename(band_name)
505
+ .selfMask()
468
506
  )
469
507
  img_stack = modis_year if img_stack is None else img_stack.addBands(modis_year)
470
508
 
@@ -484,6 +522,8 @@ def g_esa_fire_prep():
484
522
  img_stack = None
485
523
 
486
524
  for year in range(start_year, end_year + 1):
525
+ year_num = ee.Number(year)
526
+ band_name = ee.String("ESA_fire_").cat(year_num.format("%d"))
487
527
  date_st = f"{year}-01-01"
488
528
  date_ed = f"{year}-12-31"
489
529
  esa_year = (
@@ -491,107 +531,18 @@ def g_esa_fire_prep():
491
531
  .mosaic()
492
532
  .select(["BurnDate"])
493
533
  .gte(0)
494
- .rename(f"ESA_fire_{year}")
534
+ .rename(band_name)
535
+ .selfMask()
495
536
  )
496
537
  img_stack = esa_year if img_stack is None else img_stack.addBands(esa_year)
497
538
 
498
539
  return img_stack
499
540
 
500
541
 
501
- # # DIST_alert_2024 to DIST_alert_< current year >
502
- # # Notes:
503
- # # 1) so far only available for 2024 onwards in GEE
504
- # # TO DO - see if gee asset for pre 2020-2024 is available from GLAD team, else download from nasa and put in Whisp assets
505
- # # 2) masked alerts (as dist alerts are for all vegetation) to JRC EUFO 2020 layer, as close to EUDR definition
506
- # # TO DO - ask opinions on if others (such as treecover data from GLAD team) should be used instead
507
-
508
-
509
- # def glad_dist_year_prep():
510
-
511
- # # Load the vegetation disturbance collections
512
-
513
- # # Vegetation disturbance status (0-8, class flag, 8-bit)
514
- # VEGDISTSTATUS = ee.ImageCollection(
515
- # "projects/glad/HLSDIST/current/VEG-DIST-STATUS"
516
- # ).mosaic()
517
- # # Initial vegetation disturbance date (>0: days since 2020-12-31, 16-bit)
518
- # VEGDISTDATE = ee.ImageCollection(
519
- # "projects/glad/HLSDIST/current/VEG-DIST-DATE"
520
- # ).mosaic()
521
-
522
- # # NB relies on initial date of disturbance - consider if last date needed? : VEGLASTDATE = ee.ImageCollection("projects/glad/HLSDIST/current/VEG-LAST-DATE").mosaic(); # Last assessed observation date (≥1, days, 16-bit)
523
-
524
- # # Key for high-confidence alerts (values 3, 6, 7, 8)
525
- # high_conf_values = [3, 6, 7, 8]
526
- # # where:
527
- # # 3 = <50% loss, high confidence, ongoing
528
- # # 6 = ≥50% loss, high confidence, ongoing
529
- # # 7 = <50% loss, high confidence, finished
530
- # # 8 = ≥50% loss, high confidence, finished
531
- # # Note could use <50% loss (i.e. only 6 and 7) for if want to be more strict
532
-
533
- # # Create high-confidence mask
534
- # dist_high_conf = VEGDISTSTATUS.remap(
535
- # high_conf_values, [1] * len(high_conf_values), 0
536
- # )
537
-
538
- # # Determine start year and current year dynamically
539
- # start_year = 2024 # Set the first year of interest
540
- # current_year = datetime.now().year
541
-
542
- # # Calculate days since December 31, 2020 for start and end dates (server-side)
543
- # start_of_2020 = ee.Date("2020-12-31").millis().divide(86400000).int()
544
-
545
- # # Create a list to hold the yearly images
546
- # yearly_images = []
547
-
548
- # for year in range(start_year, current_year + 1):
549
- # start_of_year = (
550
- # ee.Date(f"{year}-01-01")
551
- # .millis()
552
- # .divide(86400000)
553
- # .int()
554
- # .subtract(start_of_2020)
555
- # )
556
- # start_of_next_year = (
557
- # ee.Date(f"{year + 1}-01-01")
558
- # .millis()
559
- # .divide(86400000)
560
- # .int()
561
- # .subtract(start_of_2020)
562
- # )
563
-
564
- # # Filter VEG-DIST-DATE for the selected year
565
- # dist_year = VEGDISTDATE.gte(start_of_year).And(
566
- # VEGDISTDATE.lt(start_of_next_year)
567
- # )
568
-
569
- # # Apply high-confidence mask and rename the band
570
- # high_conf_year = dist_year.updateMask(dist_high_conf).rename(
571
- # f"DIST_year_{year}"
572
- # )
573
-
574
- # # Append the year's data to the list
575
- # yearly_images.append(high_conf_year)
576
-
577
- # # Combine all yearly images into a single image
578
- # img_stack = ee.Image.cat(yearly_images)
579
-
580
- # # Rename the bands correctly
581
- # band_names = [f"DIST_year_{year}" for year in range(start_year, current_year + 1)]
582
- # img_stack = img_stack.select(img_stack.bandNames(), band_names)
583
-
584
- # return img_stack.updateMask(
585
- # jrc_gfc_2020_prep()
586
- # ) # mask yearly dist alerts to forest cover in 2020
587
-
588
-
589
542
  #### disturbances combined (split into before and after 2020)
590
543
 
591
544
  # RADD_after_2020
592
545
  def g_radd_after_2020_prep():
593
- from datetime import datetime
594
-
595
546
  radd = ee.ImageCollection("projects/radar-wur/raddalert/v1")
596
547
 
597
548
  radd_date = (
@@ -600,9 +551,8 @@ def g_radd_after_2020_prep():
600
551
  # date of avaialbility
601
552
  start_year = 21 ## (starts 2019 in Africa, then 2020 for S America and Asia: https://data.globalforestwatch.org/datasets/gfw::deforestation-alerts-radd/about)
602
553
 
603
- current_year = (
604
- datetime.now().year % 100
605
- ) # NB the % 100 part gets last two digits needed
554
+ # Use pre-calculated current year (avoids repeated datetime calls)
555
+ current_year = CURRENT_YEAR_2DIGIT
606
556
  start = start_year * 1000
607
557
  end = current_year * 1000 + 365
608
558
  return (
@@ -610,13 +560,11 @@ def g_radd_after_2020_prep():
610
560
  .updateMask(radd_date.lte(end))
611
561
  .gt(0)
612
562
  .rename("RADD_after_2020")
613
- )
563
+ ).selfMask()
614
564
 
615
565
 
616
566
  # RADD_before_2020
617
567
  def g_radd_before_2020_prep():
618
- from datetime import datetime
619
-
620
568
  radd = ee.ImageCollection("projects/radar-wur/raddalert/v1")
621
569
 
622
570
  radd_date = (
@@ -625,8 +573,6 @@ def g_radd_before_2020_prep():
625
573
  # date of avaialbility
626
574
  start_year = 19 ## (starts 2019 in Africa, then 2020 for S America and Asia: https://data.globalforestwatch.org/datasets/gfw::deforestation-alerts-radd/about)
627
575
 
628
- # current_year = datetime.now().year % 100 # NB the % 100 part gets last two digits needed
629
-
630
576
  start = start_year * 1000
631
577
  end = 20 * 1000 + 365
632
578
  return (
@@ -634,7 +580,7 @@ def g_radd_before_2020_prep():
634
580
  .updateMask(radd_date.lte(end))
635
581
  .gt(0)
636
582
  .rename("RADD_before_2020")
637
- )
583
+ ).selfMask()
638
584
 
639
585
 
640
586
  # # DIST_after_2020
@@ -662,25 +608,35 @@ def g_radd_before_2020_prep():
662
608
  # TMF_deg_before_2020
663
609
  def g_tmf_deg_before_2020_prep():
664
610
  tmf_deg = ee.ImageCollection("projects/JRC/TMF/v1_2024/DegradationYear").mosaic()
665
- return (tmf_deg.lte(2020)).And(tmf_deg.gte(2000)).rename("TMF_deg_before_2020")
611
+ return (
612
+ (tmf_deg.lte(2020))
613
+ .And(tmf_deg.gte(2000))
614
+ .rename("TMF_deg_before_2020")
615
+ .selfMask()
616
+ )
666
617
 
667
618
 
668
619
  # TMF_deg_after_2020
669
620
  def g_tmf_deg_after_2020_prep():
670
621
  tmf_deg = ee.ImageCollection("projects/JRC/TMF/v1_2024/DegradationYear").mosaic()
671
- return tmf_deg.gt(2020).rename("TMF_deg_after_2020")
622
+ return tmf_deg.gt(2020).rename("TMF_deg_after_2020").selfMask()
672
623
 
673
624
 
674
625
  # tmf_def_before_2020
675
626
  def g_tmf_def_before_2020_prep():
676
627
  tmf_def = ee.ImageCollection("projects/JRC/TMF/v1_2024/DeforestationYear").mosaic()
677
- return (tmf_def.lte(2020)).And(tmf_def.gte(2000)).rename("TMF_def_before_2020")
628
+ return (
629
+ (tmf_def.lte(2020))
630
+ .And(tmf_def.gte(2000))
631
+ .rename("TMF_def_before_2020")
632
+ .selfMask()
633
+ )
678
634
 
679
635
 
680
636
  # tmf_def_after_2020
681
637
  def g_tmf_def_after_2020_prep():
682
638
  tmf_def = ee.ImageCollection("projects/JRC/TMF/v1_2024/DeforestationYear").mosaic()
683
- return tmf_def.gt(2020).rename("TMF_def_after_2020")
639
+ return tmf_def.gt(2020).rename("TMF_def_after_2020").selfMask()
684
640
 
685
641
 
686
642
  # GFC_loss_before_2020 (loss within 10 percent cover; includes 2020; correct for version 11)
@@ -690,7 +646,7 @@ def g_glad_gfc_loss_before_2020_prep():
690
646
  gfc_loss = (
691
647
  gfc.select(["lossyear"]).lte(20).And(gfc.select(["treecover2000"]).gt(10))
692
648
  )
693
- return gfc_loss.rename("GFC_loss_before_2020")
649
+ return gfc_loss.rename("GFC_loss_before_2020").selfMask()
694
650
 
695
651
 
696
652
  # GFC_loss_after_2020 (loss within 10 percent cover; correct for version 11)
@@ -698,7 +654,7 @@ def g_glad_gfc_loss_after_2020_prep():
698
654
  # Load the Global Forest Change dataset
699
655
  gfc = ee.Image("UMD/hansen/global_forest_change_2024_v1_12")
700
656
  gfc_loss = gfc.select(["lossyear"]).gt(20).And(gfc.select(["treecover2000"]).gt(10))
701
- return gfc_loss.rename("GFC_loss_after_2020")
657
+ return gfc_loss.rename("GFC_loss_after_2020").selfMask()
702
658
 
703
659
 
704
660
  # MODIS_fire_before_2020
@@ -714,14 +670,15 @@ def g_modis_fire_before_2020_prep():
714
670
  .select(["BurnDate"])
715
671
  .gte(0)
716
672
  .rename("MODIS_fire_before_2020")
717
- )
673
+ ).selfMask()
718
674
 
719
675
 
720
676
  # MODIS_fire_after_2020
721
677
  def g_modis_fire_after_2020_prep():
722
678
  modis_fire = ee.ImageCollection("MODIS/061/MCD64A1")
723
679
  start_year = 2021
724
- end_year = datetime.now().year
680
+ # Use pre-calculated current year (avoids repeated datetime calls)
681
+ end_year = CURRENT_YEAR - 1 # Use year - 1 to ensure data availability
725
682
  date_st = str(start_year) + "-01-01"
726
683
  date_ed = str(end_year) + "-12-31"
727
684
  return (
@@ -730,7 +687,7 @@ def g_modis_fire_after_2020_prep():
730
687
  .select(["BurnDate"])
731
688
  .gte(0)
732
689
  .rename("MODIS_fire_after_2020")
733
- )
690
+ ).selfMask()
734
691
 
735
692
 
736
693
  # ESA_fire_before_2020
@@ -746,7 +703,7 @@ def g_esa_fire_before_2020_prep():
746
703
  .select(["BurnDate"])
747
704
  .gte(0)
748
705
  .rename("ESA_fire_before_2020")
749
- )
706
+ ).selfMask()
750
707
 
751
708
 
752
709
  #########################logging concessions
@@ -792,7 +749,7 @@ def g_logging_concessions_before_2020_prep():
792
749
  ]
793
750
  ).mosaic()
794
751
 
795
- return logging_concessions_binary.rename("GFW_logging_before_2020")
752
+ return logging_concessions_binary.rename("GFW_logging_before_2020").selfMask()
796
753
 
797
754
 
798
755
  #########################national datasets
@@ -810,7 +767,7 @@ def g_logging_concessions_before_2020_prep():
810
767
  def nbr_terraclass_amz20_primary_prep():
811
768
  tcamz20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_amz_2020")
812
769
  tcamz20_f = tcamz20.eq(1)
813
- return tcamz20_f.rename("nBR_INPE_TC_primary_forest_Amazon_2020")
770
+ return tcamz20_f.rename("nBR_INPE_TC_primary_forest_Amazon_2020").selfMask()
814
771
 
815
772
 
816
773
  # [Official NFMS dataset] Brazilian Forest Service dataset on natural forest cover from PRODES and TerraClass data, base year 2022
@@ -824,7 +781,7 @@ def nbr_bfs_ptn_f20_prep():
824
781
  bfs_fptn20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_ptn_2020")
825
782
 
826
783
  bfs_fptn20_binary = ee.Image().paint(bfs_fptn20, 1)
827
- return bfs_fptn20_binary.rename("nBR_BFS_primary_forest_Pantanal_2020")
784
+ return bfs_fptn20_binary.rename("nBR_BFS_primary_forest_Pantanal_2020").selfMask()
828
785
 
829
786
 
830
787
  # Caatinga - filtered with QGIS because the original geodatabase is too large to export as a shapefile (GEE accepted format)
@@ -832,35 +789,39 @@ def nbr_bfs_ptn_f20_prep():
832
789
  def nbr_bfs_caat_f20_prep():
833
790
  bfs_fcaat20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_caat_2020")
834
791
  bfs_fcaat20_binary = ee.Image().paint(bfs_fcaat20, 1)
835
- return bfs_fcaat20_binary.rename("nBR_BFS_primary_forest_Caatinga_2020")
792
+ return bfs_fcaat20_binary.rename("nBR_BFS_primary_forest_Caatinga_2020").selfMask()
836
793
 
837
794
 
838
795
  # Atlantic Forest - filtered with QGIS because the original geodatabase is too large to export as a shapefile (GEE accepted format)
839
796
  def nbr_bfs_atlf_f20_prep():
840
797
  bfs_fatlf20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_atlf_2020")
841
798
  bfs_fatlf20_binary = ee.Image().paint(bfs_fatlf20, 1)
842
- return bfs_fatlf20_binary.rename("nBR_BFS_primary_forest_AtlanticForest_2020")
799
+ return bfs_fatlf20_binary.rename(
800
+ "nBR_BFS_primary_forest_AtlanticForest_2020"
801
+ ).selfMask()
843
802
 
844
803
 
845
804
  # Pampa - filtered in QGIS to save some storage space
846
805
  def nbr_bfs_pmp_f20_prep():
847
806
  bfs_fpmp20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_pmp_2020")
848
807
  bfs_fpmp20_binary = ee.Image().paint(bfs_fpmp20, 1)
849
- return bfs_fpmp20_binary.rename("nBR_BFS_primary_forest_Pampa_2020")
808
+ return bfs_fpmp20_binary.rename("nBR_BFS_primary_forest_Pampa_2020").selfMask()
850
809
 
851
810
 
852
811
  ##########################secondary forests###############################################
853
812
  def nbr_terraclass_amz20_secondary_prep():
854
813
  tcamz20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_amz_2020")
855
814
  tcamz20_f = tcamz20.eq(2)
856
- return tcamz20_f.rename("nBR_INPE_TC_secondary_forest_Amazon_2020")
815
+ return tcamz20_f.rename("nBR_INPE_TC_secondary_forest_Amazon_2020").selfMask()
857
816
 
858
817
 
859
818
  # Cerrado - filtered with QGIS because the original geodatabase is too large to export as a shapefile (GEE accepted format)
860
819
  def nbr_bfs_cer_f20_prep():
861
820
  bfs_fcer20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_cerr_2020")
862
821
  bfs_fcer20_binary = ee.Image().paint(bfs_fcer20, 1)
863
- return bfs_fcer20_binary.rename("nBR_BFS_primary_and_secondary_forest_Cerrado_2020")
822
+ return bfs_fcer20_binary.rename(
823
+ "nBR_BFS_primary_and_secondary_forest_Cerrado_2020"
824
+ ).selfMask()
864
825
 
865
826
 
866
827
  # %%
@@ -879,7 +840,9 @@ def nbr_mapbiomasc9_f20_prep():
879
840
  .Or(mapbiomasc9_20.eq(6))
880
841
  .Or(mapbiomasc9_20.eq(49))
881
842
  )
882
- return mapbiomasc9_20_forest.rename("nBR_MapBiomas_col9_forest_Brazil_2020")
843
+ return mapbiomasc9_20_forest.rename(
844
+ "nBR_MapBiomas_col9_forest_Brazil_2020"
845
+ ).selfMask()
883
846
 
884
847
 
885
848
  # ### ########################NBR plantation forest in 2020:#######################################
@@ -890,7 +853,7 @@ def nbr_mapbiomasc9_f20_prep():
890
853
  def nbr_terraclass_amz20_silv_prep():
891
854
  tcamz20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_amz_2020")
892
855
  tcamz20_silviculture = tcamz20.eq(9)
893
- return tcamz20_silviculture.rename("nBR_INPE_TCsilviculture_Amazon_2020")
856
+ return tcamz20_silviculture.rename("nBR_INPE_TCsilviculture_Amazon_2020").selfMask()
894
857
 
895
858
 
896
859
  # [Official NFMS dataset] INPE/EMBRAPA TerraClass land use/cover in the Cerrado biome, 2020
@@ -899,7 +862,9 @@ def nbr_terraclass_amz20_silv_prep():
899
862
  def nbr_terraclass_silv_cer20_prep():
900
863
  tccer20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_cer_2020")
901
864
  tccer20_silviculture = tccer20.eq(9)
902
- return tccer20_silviculture.rename("nBR_INPE_TCsilviculture_Cerrado_2020")
865
+ return tccer20_silviculture.rename(
866
+ "nBR_INPE_TCsilviculture_Cerrado_2020"
867
+ ).selfMask()
903
868
 
904
869
 
905
870
  # [non-official dataset by MapBiomas multisector initiative]
@@ -913,7 +878,7 @@ def nbr_mapbiomasc9_silv20_prep():
913
878
  mapbiomasc9_20_silviculture = mapbiomasc9_20.eq(9)
914
879
  return mapbiomasc9_20_silviculture.rename(
915
880
  "nBR_MapBiomas_col9_silviculture_Brazil_2020"
916
- )
881
+ ).selfMask()
917
882
 
918
883
 
919
884
  ################ ### NBR Disturbances before 2020:########################################
@@ -958,8 +923,9 @@ def nbr_prodes_before_2020_prep():
958
923
  prodes_before_20_mask = prodes.remap(
959
924
  prodes_before_20_dn, [1] * len(prodes_before_20_dn)
960
925
  ) # .eq(1)
961
- prodes_before_20 = prodes_before_20_mask.selfMask()
962
- return prodes_before_20.rename("nBR_PRODES_deforestation_Brazil_before_2020")
926
+ return prodes_before_20_mask.rename(
927
+ "nBR_PRODES_deforestation_Brazil_before_2020"
928
+ ).selfMask()
963
929
 
964
930
 
965
931
  ## Caution: 1) includes deforestation and conversion of other wooded land and grassland
@@ -984,7 +950,9 @@ def nbr_deter_amazon_before_2020_prep():
984
950
  ).filter(ee.Filter.lt("formatted_date", ee.Date("2020-12-31")))
985
951
 
986
952
  deter_deg_binary = ee.Image().paint(deter_deg, 1)
987
- return deter_deg_binary.rename("nBR_DETER_forestdegradation_Amazon_before_2020")
953
+ return deter_deg_binary.rename(
954
+ "nBR_DETER_forestdegradation_Amazon_before_2020"
955
+ ).selfMask()
988
956
 
989
957
 
990
958
  ################ ### NBR Disturbances after 2020:########################################
@@ -1001,7 +969,9 @@ def nbr_prodes_after_2020_prep():
1001
969
  prodes_after_20_dn, [1] * len(prodes_after_20_dn)
1002
970
  ) # .eq(1)
1003
971
  prodes_after_20 = prodes_after_20_mask.selfMask()
1004
- return prodes_after_20.rename("nBR_PRODES_deforestation_Brazil_after_2020")
972
+ return prodes_after_20.rename(
973
+ "nBR_PRODES_deforestation_Brazil_after_2020"
974
+ ).selfMask()
1005
975
 
1006
976
 
1007
977
  # %%
@@ -1023,7 +993,9 @@ def nbr_deter_amazon_after_2020_prep():
1023
993
  ).filter(ee.Filter.gt("formatted_date", ee.Date("2021-01-01")))
1024
994
 
1025
995
  deter_deg_binary = ee.Image().paint(deter_deg, 1)
1026
- return deter_deg_binary.rename("nBR_DETER_forestdegradation_Amazon_after_2020")
996
+ return deter_deg_binary.rename(
997
+ "nBR_DETER_forestdegradation_Amazon_after_2020"
998
+ ).selfMask()
1027
999
 
1028
1000
 
1029
1001
  # ########################## NBR commodities - permanent/perennial crops in 2020:###############################
@@ -1037,7 +1009,7 @@ def nbr_terraclass_amz_cer20_pc_prep():
1037
1009
  tccer20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_cer_2020")
1038
1010
  tccer20_pc = tccer20.eq(12).Or(tccer20.eq(13))
1039
1011
  tc_pc = ee.ImageCollection([tcamz20_pc, tccer20_pc]).mosaic()
1040
- return tc_pc.rename("nBR_INPE_TCamz_cer_perennial_2020")
1012
+ return tc_pc.rename("nBR_INPE_TCamz_cer_perennial_2020").selfMask()
1041
1013
 
1042
1014
 
1043
1015
  # [non-official dataset by MapBiomas multisector initiative]
@@ -1049,7 +1021,7 @@ def nbr_mapbiomasc9_cof_prep():
1049
1021
  "projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1"
1050
1022
  ).select("classification_2020")
1051
1023
  mapbiomasc9_20_coffee = mapbiomasc9_20.eq(46)
1052
- return mapbiomasc9_20_coffee.rename("nBR_MapBiomas_col9_coffee_2020")
1024
+ return mapbiomasc9_20_coffee.rename("nBR_MapBiomas_col9_coffee_2020").selfMask()
1053
1025
 
1054
1026
 
1055
1027
  # [non-official dataset by MapBiomas multisector initiative]
@@ -1061,7 +1033,7 @@ def nbr_mapbiomasc9_po_prep():
1061
1033
  "projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1"
1062
1034
  ).select("classification_2020")
1063
1035
  mapbiomasc9_20_palm = mapbiomasc9_20.eq(35)
1064
- return mapbiomasc9_20_palm.rename("nBR_MapBiomas_col9_palmoil_2020")
1036
+ return mapbiomasc9_20_palm.rename("nBR_MapBiomas_col9_palmoil_2020").selfMask()
1065
1037
 
1066
1038
 
1067
1039
  # [non-official dataset by MapBiomas multisector initiative]
@@ -1073,7 +1045,7 @@ def nbr_mapbiomasc9_pc_prep():
1073
1045
  "projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1"
1074
1046
  ).select("classification_2020")
1075
1047
  mapbiomasc9_20_pc = mapbiomasc9_20.eq(35).Or(mapbiomasc9_20.eq(46))
1076
- return mapbiomasc9_20_pc.rename("nBR_MapBiomas_col9_pc_2020")
1048
+ return mapbiomasc9_20_pc.rename("nBR_MapBiomas_col9_pc_2020").selfMask()
1077
1049
 
1078
1050
 
1079
1051
  # ######################## NBR commodities - annual crops in 2020:##############################
@@ -1089,7 +1061,7 @@ def nbr_terraclass_amz_cer20_ac_prep():
1089
1061
  tccer20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_cer_2020")
1090
1062
  tccer20_ac = tccer20.eq(14).Or(tccer20.eq(15))
1091
1063
  tc_ac = ee.ImageCollection([tcamz20_ac, tccer20_ac]).mosaic()
1092
- return tc_ac.rename("nBR_INPE_TCamz_cer_annual_2020")
1064
+ return tc_ac.rename("nBR_INPE_TCamz_cer_annual_2020").selfMask()
1093
1065
 
1094
1066
 
1095
1067
  # [non-official dataset by MapBiomas multisector initiative]
@@ -1101,7 +1073,7 @@ def nbr_mapbiomasc9_soy_prep():
1101
1073
  "projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1"
1102
1074
  ).select("classification_2020")
1103
1075
  mapbiomasc9_20_soy = mapbiomasc9_20.eq(39)
1104
- return mapbiomasc9_20_soy.rename("nBR_MapBiomas_col9_soy_2020")
1076
+ return mapbiomasc9_20_soy.rename("nBR_MapBiomas_col9_soy_2020").selfMask()
1105
1077
 
1106
1078
 
1107
1079
  # [non-official dataset by MapBiomas multisector initiative]
@@ -1121,7 +1093,7 @@ def nbr_mapbiomasc9_ac_prep():
1121
1093
  .Or(mapbiomasc9_20.eq(40))
1122
1094
  .Or(mapbiomasc9_20.eq(62))
1123
1095
  )
1124
- return mapbiomasc9_20_ac.rename("nBR_MapBiomas_col9_annual_crops_2020")
1096
+ return mapbiomasc9_20_ac.rename("nBR_MapBiomas_col9_annual_crops_2020").selfMask()
1125
1097
 
1126
1098
 
1127
1099
  # ################################### NBR commodities - pasture/livestock in 2020:##############################
@@ -1134,7 +1106,7 @@ def nbr_mapbiomasc9_ac_prep():
1134
1106
  def nbr_terraclass_amz20_pasture_prep():
1135
1107
  tcamz20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_amz_2020")
1136
1108
  tcamz20_pasture = tcamz20.eq(10).Or(tcamz20.eq(11))
1137
- return tcamz20_pasture.rename("nBR_INPE_TCamz_pasture_2020")
1109
+ return tcamz20_pasture.rename("nBR_INPE_TCamz_pasture_2020").selfMask()
1138
1110
 
1139
1111
 
1140
1112
  # %%
@@ -1146,7 +1118,7 @@ def nbr_terraclass_amz20_pasture_prep():
1146
1118
  def nbr_terraclass_cer20_ac_prep():
1147
1119
  tccer20 = ee.Image("projects/ee-whisp/assets/NBR/terraclass_cer_2020")
1148
1120
  tccer20_pasture = tccer20.eq(11)
1149
- return tccer20_pasture.rename("nBR_INPE_TCcer_pasture_2020")
1121
+ return tccer20_pasture.rename("nBR_INPE_TCcer_pasture_2020").selfMask()
1150
1122
 
1151
1123
 
1152
1124
  # %%
@@ -1159,7 +1131,7 @@ def nbr_mapbiomasc9_pasture_prep():
1159
1131
  "projects/mapbiomas-public/assets/brazil/lulc/collection9/mapbiomas_collection90_integration_v1"
1160
1132
  ).select("classification_2020")
1161
1133
  mapbiomasc9_20_pasture = mapbiomasc9_20.eq(15)
1162
- return mapbiomasc9_20_pasture.rename("nBR_MapBiomas_col9_pasture_2020")
1134
+ return mapbiomasc9_20_pasture.rename("nBR_MapBiomas_col9_pasture_2020").selfMask()
1163
1135
 
1164
1136
 
1165
1137
  ###################################################################
@@ -1169,13 +1141,13 @@ def nbr_mapbiomasc9_pasture_prep():
1169
1141
  def nco_ideam_forest_2020_prep():
1170
1142
  ideam_forest_raw = ee.Image("projects/ee-whisp/assets/nCO/ideam_2020_geo")
1171
1143
  ideam_forest = ideam_forest_raw.eq(1) # get forest class
1172
- return ideam_forest.rename("nCO_ideam_forest_2020")
1144
+ return ideam_forest.rename("nCO_ideam_forest_2020").selfMask()
1173
1145
 
1174
1146
 
1175
1147
  def nco_ideam_eufo_commission_2020_prep():
1176
1148
  ideam_agroforest_raw = ee.Image("projects/ee-whisp/assets/nCO/ideam_2020_geo_EUFO")
1177
1149
  ideam_agroforest = ideam_agroforest_raw.eq(4) # get forest class
1178
- return ideam_agroforest.rename("nCO_ideam_eufo_commission_2020")
1150
+ return ideam_agroforest.rename("nCO_ideam_eufo_commission_2020").selfMask()
1179
1151
 
1180
1152
 
1181
1153
  # Cocoa_bnetd
@@ -1185,47 +1157,138 @@ def nci_ocs2020_prep():
1185
1157
  .select("classification")
1186
1158
  .eq(9)
1187
1159
  .rename("nCI_Cocoa_bnetd")
1188
- ) # cocoa from national land cover map for Côte d'Ivoire
1160
+ ).selfMask() # cocoa from national land cover map for Côte d'Ivoire
1161
+
1162
+
1163
+ # ============================================================================
1164
+ # CONTEXT BANDS (Administrative boundaries and water mask)
1165
+ # ============================================================================
1166
+
1167
+
1168
+ def g_gaul_admin_code():
1169
+ """
1170
+ GAUL 2024 Level 1 administrative boundary codes (500m resolution).
1171
+ Used for spatial context and administrative aggregation.
1172
+
1173
+ Returns
1174
+ -------
1175
+ ee.Image
1176
+ Image with admin codes renamed to 'admin_code' (as int32)
1177
+ """
1178
+ admin_image = ee.Image(
1179
+ "projects/ee-andyarnellgee/assets/gaul_2024_level_1_code_500m"
1180
+ )
1181
+ # Cast to int32 to ensure integer GAUL codes, then rename
1182
+ return admin_image.rename("admin_code")
1183
+
1184
+
1185
+ def g_water_mask_prep():
1186
+ """
1187
+ Water mask from JRC/USGS combined dataset.
1188
+ Used to identify water bodies for downstream filtering and context.
1189
+
1190
+ Multiplied by pixel area to get water area in hectares.
1191
+
1192
+ Returns
1193
+ -------
1194
+ ee.Image
1195
+ Binary water mask image renamed to In_waterbody (will be multiplied by pixel area)
1196
+ """
1197
+ from openforis_whisp.parameters.config_runtime import water_flag
1198
+
1199
+ water_mask_image = ee.Image("projects/ee-andyarnellgee/assets/water_mask_jrc_usgs")
1200
+ return water_mask_image.selfMask().rename(water_flag)
1189
1201
 
1190
1202
 
1191
1203
  ###Combining datasets
1192
1204
 
1193
1205
 
1194
- def combine_datasets(national_codes=None):
1195
- """Combines datasets into a single multiband image, with fallback if assets are missing."""
1196
- img_combined = ee.Image(1).rename(geometry_area_column)
1206
+ def combine_datasets(
1207
+ national_codes=None,
1208
+ validate_bands=False,
1209
+ include_context_bands=True,
1210
+ auto_recovery=False,
1211
+ ):
1212
+ """
1213
+ Combines datasets into a single multiband image, with fallback if assets are missing.
1214
+
1215
+ Parameters
1216
+ ----------
1217
+ national_codes : list, optional
1218
+ List of ISO2 country codes to include national datasets
1219
+ validate_bands : bool, optional
1220
+ If True, validates band names with a slow .getInfo() call (default: False)
1221
+ Only enable for debugging. Normal operation relies on exception handling.
1222
+ include_context_bands : bool, optional
1223
+ If True (default), includes context bands (admin_code, water_flag) in the output.
1224
+ Set to False when using stats.py implementations that compile datasets differently.
1225
+ auto_recovery : bool, optional
1226
+ If True (default), automatically enables validate_bands when an error is detected
1227
+ during initial assembly. This allows graceful recovery from missing/broken datasets.
1197
1228
 
1198
- # Combine images directly
1199
- for img in [func() for func in list_functions(national_codes=national_codes)]:
1229
+ Returns
1230
+ -------
1231
+ ee.Image
1232
+ Combined multiband image with all datasets (and optionally context bands)
1233
+ """
1234
+ # Step 1: Combine all main dataset images
1235
+ all_images = [ee.Image(1).rename(geometry_area_column)]
1236
+ for func in list_functions(national_codes=national_codes):
1200
1237
  try:
1201
- img_combined = img_combined.addBands(img)
1238
+ all_images.append(func())
1202
1239
  except ee.EEException as e:
1203
- # logger.error(f"Error adding image: {e}")
1204
- print(f"Error adding image: {e}")
1205
-
1206
- try:
1207
- # Attempt to print band names to check for errors
1208
- # print(img_combined.bandNames().getInfo())
1209
- img_combined.bandNames().getInfo()
1240
+ print(f"Error loading image: {e}")
1210
1241
 
1211
- except ee.EEException as e:
1212
- # logger.error(f"Error printing band names: {e}")
1213
- # logger.info("Running code for filtering to only valid datasets due to error in input")
1214
- print("using valid datasets filter due to error in input")
1215
- # Validate images
1216
- images_to_test = [
1217
- func() for func in list_functions(national_codes=national_codes)
1218
- ]
1219
- valid_imgs = keep_valid_images(images_to_test) # Validate images
1242
+ img_combined = ee.Image.cat(all_images)
1220
1243
 
1221
- # Retry combining images after validation
1222
- img_combined = ee.Image(1).rename(geometry_area_column)
1223
- for img in valid_imgs:
1224
- img_combined = img_combined.addBands(img)
1244
+ # Step 2: Determine if validation needed
1245
+ should_validate = validate_bands
1246
+ if auto_recovery and not validate_bands:
1247
+ try:
1248
+ # Fast error detection: batch check main + context bands in one call
1249
+ bands_to_check = [img_combined.bandNames().get(0)]
1250
+ if include_context_bands:
1251
+ admin_image = g_gaul_admin_code()
1252
+ water_mask = g_water_mask_prep()
1253
+ bands_to_check.extend(
1254
+ [admin_image.bandNames().get(0), water_mask.bandNames().get(0)]
1255
+ )
1256
+ ee.List(bands_to_check).getInfo() # trigger error if any band is invalid
1257
+ except ee.EEException as e:
1258
+ print(f"Error detected, enabling recovery mode: {str(e)[:80]}...")
1259
+ should_validate = True
1225
1260
 
1261
+ # Step 3: Validate and recover if needed
1262
+ if should_validate:
1263
+ try:
1264
+ img_combined.bandNames().getInfo() # check all bands
1265
+ except ee.EEException as e:
1266
+ print("Using valid datasets filter due to error in validation")
1267
+ valid_imgs = keep_valid_images(
1268
+ [func() for func in list_functions(national_codes=national_codes)]
1269
+ )
1270
+ all_images_retry = [ee.Image(1).rename(geometry_area_column)]
1271
+ all_images_retry.extend(valid_imgs)
1272
+ img_combined = ee.Image.cat(all_images_retry)
1273
+
1274
+ # Step 4: Multiply main datasets by pixel area
1226
1275
  img_combined = img_combined.multiply(ee.Image.pixelArea())
1227
- print("Whisp multiband image compiled")
1228
1276
 
1277
+ # Step 5: Add context bands (admin_code only - water mask is now in prep functions)
1278
+ if include_context_bands:
1279
+ for band_func, band_name in [
1280
+ (g_gaul_admin_code, "admin_code"),
1281
+ (g_water_mask_prep, "In_waterbody"),
1282
+ ]:
1283
+ try:
1284
+ band_img = band_func()
1285
+ if should_validate:
1286
+ band_img.bandNames().getInfo()
1287
+ img_combined = img_combined.addBands(band_img)
1288
+ except ee.EEException as e:
1289
+ print(f"Warning: Could not add {band_name} band: {e}")
1290
+
1291
+ print("Whisp multiband image compiled")
1229
1292
  return img_combined
1230
1293
 
1231
1294
 
@@ -1245,9 +1308,12 @@ def combine_datasets(national_codes=None):
1245
1308
  def list_functions(national_codes=None):
1246
1309
  """
1247
1310
  Returns a list of functions that end with "_prep" and either:
1248
- - Start with "g_" (global/regional products)
1311
+ - Start with "g_" (global/regional products, excluding context bands)
1249
1312
  - Start with any provided national code prefix (nXX_)
1250
1313
 
1314
+ Context band functions (g_gaul_admin_code, g_water_mask_prep) are handled
1315
+ separately and excluded from this list to avoid duplication.
1316
+
1251
1317
  Args:
1252
1318
  national_codes: List of ISO2 country codes (without the 'n' prefix)
1253
1319
  """
@@ -1258,15 +1324,19 @@ def list_functions(national_codes=None):
1258
1324
  if national_codes is None:
1259
1325
  national_codes = []
1260
1326
 
1327
+ # Context band functions that are handled separately
1328
+ context_functions = {"g_gaul_admin_code", "g_water_mask_prep"}
1329
+
1261
1330
  # Create prefixes list with proper formatting ('n' + code + '_')
1262
1331
  allowed_prefixes = ["g_"] + [f"n{code.lower()}_" for code in national_codes]
1263
1332
 
1264
- # Filter functions in a single pass
1333
+ # Filter functions in a single pass, excluding context band functions
1265
1334
  functions = [
1266
1335
  func
1267
1336
  for name, func in inspect.getmembers(current_module, inspect.isfunction)
1268
1337
  if name.endswith("_prep")
1269
1338
  and any(name.startswith(prefix) for prefix in allowed_prefixes)
1339
+ and name not in context_functions
1270
1340
  ]
1271
1341
 
1272
1342
  return functions
@@ -1350,3 +1420,6 @@ def combine_custom_bands(custom_images, custom_bands_info):
1350
1420
  custom_ee_image = custom_ee_image.multiply(ee.Image.pixelArea())
1351
1421
 
1352
1422
  return custom_ee_image # Only return the image
1423
+
1424
+
1425
+ # %%