openforis-whisp 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,752 @@
1
+ import ee
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from .datasets import combine_datasets
5
+ import json
6
+ import country_converter as coco
7
+ from openforis_whisp.parameters.config_runtime import (
8
+ percent_or_ha,
9
+ plot_id_column,
10
+ geo_id_column,
11
+ geometry_type_column,
12
+ geometry_area_column,
13
+ geometry_area_column_formatting,
14
+ centroid_x_coord_column,
15
+ centroid_y_coord_column,
16
+ iso3_country_column,
17
+ iso2_country_column,
18
+ admin_1_column,
19
+ stats_unit_type_column,
20
+ stats_area_columns_formatting,
21
+ stats_percent_columns_formatting,
22
+ water_flag,
23
+ )
24
+ from .data_conversion import (
25
+ convert_ee_to_df,
26
+ convert_geojson_to_ee,
27
+ convert_ee_to_geojson,
28
+ # convert_csv_to_geojson,
29
+ convert_df_to_geojson,
30
+ ) # copied functions from whisp-api and geemap (accessed 2024) to avoid dependency
31
+ from .reformat import validate_dataframe_using_lookups
32
+
33
+ # NB functions that included "formatted" in the name apply a schema for validation and reformatting of the output dataframe. The schema is created from lookup tables.
34
+
35
+
36
+ def whisp_formatted_stats_geojson_to_df(
37
+ input_geojson_filepath: Path | str,
38
+ external_id_column=None, # This variable is expected to be a string or None
39
+ remove_geom=False,
40
+ ) -> pd.DataFrame:
41
+ """
42
+ Main function for most users.
43
+ Converts a GeoJSON file to a pandas DataFrame containing Whisp stats for the input ROI.
44
+ Output df is validated against a panderas schema (created on the fly from the two lookup CSVs).
45
+
46
+ This function first converts the provided GeoJSON file into an Earth Engine FeatureCollection.
47
+ It then processes the FeatureCollection to extract relevant Whisp statistics,
48
+ returning a structured DataFrame that aligns with the expected schema.
49
+
50
+ If `external_id_column` is provided, it will be used to link external identifiers
51
+ from the input GeoJSON to the output DataFrame.
52
+
53
+ Parameters
54
+ ----------
55
+ input_geojson_filepath : Path | str
56
+ The filepath to the GeoJSON of the ROI to analyze.
57
+ external_id_column : str, optional
58
+ The column in the GeoJSON containing external IDs to be preserved in the output DataFrame in the external_id column.
59
+ remove_geom : bool, default=True
60
+ If True, the geometry of the GeoJSON is removed from the output DataFrame.
61
+
62
+ Returns
63
+ -------
64
+ df_stats : pd.DataFrame
65
+ The DataFrame containing the Whisp stats for the input ROI.
66
+ """
67
+ feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
68
+
69
+ return whisp_formatted_stats_ee_to_df(
70
+ feature_collection, external_id_column, remove_geom
71
+ )
72
+
73
+
74
+ def whisp_formatted_stats_geojson_to_geojson(
75
+ input_geojson_filepath,
76
+ output_geojson_filepath,
77
+ external_id_column=None,
78
+ geo_column: str = "geo",
79
+ ):
80
+ """
81
+ Convert a formatted GeoJSON file with a geo column into a GeoJSON file containing Whisp stats.
82
+
83
+ Parameters
84
+ ----------
85
+ input_geojson_filepath : str
86
+ The filepath to the input GeoJSON file.
87
+ output_geojson_filepath : str
88
+ The filepath to save the output GeoJSON file.
89
+ external_id_column : str, optional
90
+ The name of the column containing external IDs, by default None.
91
+ geo_column : str, optional
92
+ The name of the column containing GeoJSON geometries, by default "geo".
93
+
94
+ Returns
95
+ -------
96
+ None
97
+ """
98
+ df = whisp_formatted_stats_geojson_to_df(
99
+ input_geojson_filepath=input_geojson_filepath,
100
+ external_id_column=external_id_column,
101
+ )
102
+ # Convert the df to GeoJSON
103
+ convert_df_to_geojson(df, output_geojson_filepath, geo_column)
104
+
105
+ print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
106
+
107
+
108
+ def whisp_formatted_stats_ee_to_geojson(
109
+ feature_collection: ee.FeatureCollection,
110
+ output_geojson_filepath: str,
111
+ external_id_column=None, # This variable is expected to be a string or None
112
+ geo_column: str = "geo",
113
+ ):
114
+ """
115
+ Convert an Earth Engine FeatureCollection to a GeoJSON file containing Whisp stats.
116
+
117
+ Parameters
118
+ ----------
119
+ feature_collection : ee.FeatureCollection
120
+ The feature collection of the ROI to analyze.
121
+ output_geojson_filepath : str
122
+ The filepath to save the output GeoJSON file.
123
+ external_id_column : str, optional
124
+ The name of the column containing external IDs, by default None.
125
+ remove_geom : bool, optional
126
+ Whether to remove the geometry column, by default False.
127
+ geo_column : str, optional
128
+ The name of the column containing GeoJSON geometries, by default "geo".
129
+
130
+ Returns
131
+ -------
132
+ None
133
+ """
134
+ # Convert ee feature collection to a pandas dataframe
135
+ df_stats = whisp_formatted_stats_ee_to_df(feature_collection, external_id_column)
136
+
137
+ # Convert the df to GeoJSON
138
+ convert_df_to_geojson(df_stats, output_geojson_filepath, geo_column)
139
+
140
+ print(f"GeoJSON with Whisp stats saved to {output_geojson_filepath}")
141
+
142
+
143
+ def whisp_formatted_stats_ee_to_df(
144
+ feature_collection: ee.FeatureCollection,
145
+ external_id_column=None, # This variable is expected to be a string or None
146
+ remove_geom=False,
147
+ ) -> pd.DataFrame:
148
+ """
149
+ Parameters
150
+ ----------
151
+ feature_collection : ee.FeatureCollection
152
+ The feature collection of the ROI to analyze.
153
+
154
+ Returns
155
+ -------
156
+ validated_df : pd.DataFrame
157
+ The validated dataframe containing the Whisp stats for the input ROI.
158
+ """
159
+ # Convert ee feature collection to a pandas dataframe
160
+ df_stats = whisp_stats_ee_to_df(feature_collection, external_id_column, remove_geom)
161
+
162
+ validated_df = validate_dataframe_using_lookups(df_stats)
163
+ return validated_df
164
+
165
+
166
+ ### functions without additional formatting below (i.e., raw output from GEE processing without schema validation step)
167
+
168
+
169
+ def whisp_stats_geojson_to_df(
170
+ input_geojson_filepath: Path | str,
171
+ external_id_column=None, # This variable is expected to be a string or None
172
+ remove_geom=False,
173
+ ) -> pd.DataFrame:
174
+ """
175
+
176
+ Parameters
177
+ ----------
178
+ input_geojson_filepath : Path | str
179
+ The filepath to the GeoJSON of the ROI to analyze.
180
+
181
+ Returns
182
+ -------
183
+ df_stats : pd.DataFrame
184
+ The dataframe containing the Whisp stats for the input ROI.
185
+ """
186
+ feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
187
+
188
+ return whisp_stats_ee_to_df(feature_collection, external_id_column, remove_geom)
189
+
190
+
191
+ def whisp_stats_geojson_to_ee(
192
+ input_geojson_filepath: Path | str,
193
+ external_id_column=None, # This variable is expected to be a string or None
194
+ ) -> ee.FeatureCollection:
195
+ """
196
+
197
+ Parameters
198
+ ----------
199
+ input_geojson_filepath : Path | str
200
+ The filepath to the GeoJSON of the ROI to analyze.
201
+
202
+ Returns
203
+ -------
204
+ df_stats : pd.DataFrame
205
+ The dataframe containing the Whisp stats for the input ROI.
206
+ """
207
+ feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
208
+
209
+ return whisp_stats_ee_to_ee(feature_collection, external_id_column)
210
+
211
+
212
+ def whisp_stats_geojson_to_geojson(
213
+ input_geojson_filepath, output_geojson_filepath, external_id_column=None
214
+ ):
215
+ """
216
+ Convert a GeoJSON file to a GeoJSON object containing Whisp stats for the input ROI.
217
+
218
+ Parameters
219
+ ----------
220
+ input_geojson_filepath : str
221
+ The filepath to the input GeoJSON file.
222
+ output_geojson_filepath : str
223
+ The filepath to save the output GeoJSON file.
224
+ external_id_column : str, optional
225
+ The name of the external ID column, by default None.
226
+
227
+ Returns
228
+ -------
229
+ None
230
+ """
231
+ # Convert GeoJSON to Earth Engine FeatureCollection
232
+ feature_collection = convert_geojson_to_ee(input_geojson_filepath)
233
+
234
+ # Get stats as a FeatureCollection
235
+ stats_feature_collection = whisp_stats_ee_to_ee(
236
+ feature_collection, external_id_column
237
+ )
238
+
239
+ # Convert the stats FeatureCollection to GeoJSON
240
+ stats_geojson = convert_ee_to_geojson(stats_feature_collection)
241
+
242
+ # Save the GeoJSON to a file
243
+ with open(output_geojson_filepath, "w") as f:
244
+ json.dump(stats_geojson, f, indent=2)
245
+
246
+
247
+ def whisp_stats_geojson_to_drive(
248
+ input_geojson_filepath: Path | str,
249
+ external_id_column=None, # This variable is expected to be a string or None
250
+ ):
251
+ """
252
+ Parameters
253
+ ----------
254
+ geojson_filepath : Path | str
255
+ The filepath to the GeoJSON of the ROI to analyze.
256
+
257
+ Returns
258
+ -------
259
+ Message showing location of file in Google Drive
260
+ """
261
+
262
+ try:
263
+ input_geojson_filepath = Path(input_geojson_filepath)
264
+ if not input_geojson_filepath.exists():
265
+ raise FileNotFoundError(f"File {input_geojson_filepath} does not exist.")
266
+
267
+ # Assuming geojson_to_ee is properly imported from data_conversion.py
268
+ feature_collection = convert_geojson_to_ee(str(input_geojson_filepath))
269
+
270
+ return whisp_stats_ee_to_drive(feature_collection, external_id_column)
271
+
272
+ except Exception as e:
273
+ print(f"An error occurred: {e}")
274
+
275
+
276
+ def whisp_stats_ee_to_ee(feature_collection, external_id_column):
277
+ """
278
+ Process a feature collection to get statistics for each feature.
279
+
280
+ Parameters:
281
+ feature_collection (ee.FeatureCollection): The input feature collection.
282
+ external_id_column (str): The name of the external ID column to check.
283
+
284
+ Returns:
285
+ ee.FeatureCollection: The output feature collection with statistics.
286
+ """
287
+
288
+ if external_id_column is not None:
289
+ try:
290
+ # Check if external_id_column is a property in feature_collection (server-side)
291
+ def check_column_exists(feature):
292
+ return ee.Algorithms.If(
293
+ feature.propertyNames().contains(external_id_column),
294
+ feature,
295
+ ee.Feature(
296
+ None
297
+ ), # Return an empty feature if the column does not exist
298
+ )
299
+
300
+ feature_collection_with_check = feature_collection.map(check_column_exists)
301
+ size_fc = feature_collection.size()
302
+ valid_feature_count = feature_collection_with_check.filter(
303
+ ee.Filter.notNull([external_id_column])
304
+ ).size()
305
+
306
+ # Raise an error if the column does not exist in any feature
307
+ if valid_feature_count.neq(size_fc).getInfo():
308
+ raise ValueError(
309
+ f"The column '{external_id_column}' is not a property throughout the feature collection."
310
+ )
311
+
312
+ # Set the geo_id_column
313
+ feature_collection = feature_collection.map(
314
+ lambda feature: feature.set(
315
+ geo_id_column, ee.String(feature.get(external_id_column))
316
+ )
317
+ )
318
+
319
+ except Exception as e:
320
+ # Handle the exception and provide a helpful error message
321
+ print(
322
+ f"An error occurred when trying to set the external_id_column: {external_id_column}. Error: {e}"
323
+ )
324
+
325
+ fc = get_stats(feature_collection)
326
+
327
+ return add_id_to_feature_collection(dataset=fc, id_name=plot_id_column)
328
+
329
+
330
+ def whisp_stats_ee_to_df(
331
+ feature_collection: ee.FeatureCollection,
332
+ external_id_column=None, # This variable is expected to be a string or None
333
+ remove_geom=False,
334
+ ) -> pd.DataFrame:
335
+ """
336
+ Convert a Google Earth Engine FeatureCollection to a pandas DataFrame and convert ISO3 to ISO2 country codes.
337
+
338
+ Parameters
339
+ ----------
340
+ feature_collection : ee.FeatureCollection
341
+ The input FeatureCollection to analyze.
342
+ external_id_column : str, optional
343
+ The name of the external ID column, by default None.
344
+ remove_geom : bool, optional
345
+ Whether to remove the geometry column, by default True.
346
+
347
+ Returns
348
+ -------
349
+ df_stats : pd.DataFrame
350
+ The dataframe containing the Whisp stats for the input ROI.
351
+ """
352
+ try:
353
+ df_stats = convert_ee_to_df(
354
+ ee_object=whisp_stats_ee_to_ee(feature_collection, external_id_column),
355
+ remove_geom=remove_geom,
356
+ )
357
+ except Exception as e:
358
+ print(f"An error occurred during the conversion from EE to DataFrame: {e}")
359
+ return pd.DataFrame() # Return an empty DataFrame in case of error
360
+
361
+ try:
362
+ df_stats = convert_iso3_to_iso2(
363
+ df=df_stats,
364
+ iso3_column=iso3_country_column,
365
+ iso2_column=iso2_country_column,
366
+ )
367
+ except Exception as e:
368
+ print(f"An error occurred during the ISO3 to ISO2 conversion: {e}")
369
+ return pd.DataFrame() # Return an empty DataFrame in case of error
370
+
371
+ return df_stats
372
+
373
+
374
+ def whisp_stats_ee_to_drive(
375
+ feature_collection: ee.FeatureCollection, external_id_column=None
376
+ ):
377
+
378
+ try:
379
+ task = ee.batch.Export.table.toDrive(
380
+ collection=whisp_stats_ee_to_ee(feature_collection, external_id_column),
381
+ description="whisp_output_table",
382
+ # folder="whisp_results",
383
+ fileFormat="CSV",
384
+ )
385
+ task.start()
386
+ print(
387
+ "Exporting to Google Drive: 'whisp_results/whisp_output_table.csv'. To track progress: https://code.earthengine.google.com/tasks"
388
+ )
389
+ except Exception as e:
390
+ print(f"An error occurred during the export: {e}")
391
+
392
+
393
+ #### main stats functions
394
+
395
+ # Get stats for a feature or feature collection
396
+ def get_stats(feature_or_feature_col):
397
+ # Check if the input is a Feature or a FeatureCollection
398
+ if isinstance(feature_or_feature_col, ee.Feature):
399
+ # If the input is a Feature, call the server-side function for processing
400
+ print("feature")
401
+ output = ee.FeatureCollection([get_stats_feature(feature_or_feature_col)])
402
+ elif isinstance(feature_or_feature_col, ee.FeatureCollection):
403
+ # If the input is a FeatureCollection, call the server-side function for processing
404
+ output = get_stats_fc(feature_or_feature_col)
405
+ else:
406
+ output = "Check inputs: not an ee.Feature or ee.FeatureCollection"
407
+ return output
408
+
409
+
410
+ # Get statistics for a feature collection
411
+ def get_stats_fc(feature_col):
412
+
413
+ img_combined = combine_datasets() # imported function
414
+
415
+ out_feature_col = ee.FeatureCollection(
416
+ feature_col.map(lambda feature: get_stats_feature(feature, img_combined))
417
+ )
418
+ # print(out_feature_col.first().getInfo()) # for testing
419
+
420
+ return out_feature_col
421
+
422
+
423
+ # Get statistics for a single feature
424
+ def get_stats_feature(feature, img_combined):
425
+
426
+ # img_combined = combine_datasets()
427
+
428
+ reduce = img_combined.reduceRegion(
429
+ reducer=ee.Reducer.sum(),
430
+ geometry=feature.geometry(),
431
+ scale=10,
432
+ maxPixels=1e10,
433
+ tileScale=8,
434
+ )
435
+
436
+ # Get basic feature information
437
+ feature_info = get_type_and_location(feature)
438
+
439
+ # add statistics unit type (e.g., percentage or hectares) to dictionary
440
+ stats_unit_type = ee.Dictionary({stats_unit_type_column: percent_or_ha})
441
+
442
+ # Now, modified_dict contains all keys with the prefix added
443
+ reduce_ha = reduce.map(
444
+ lambda key, val: divide_and_format(ee.Number(val), ee.Number(10000))
445
+ )
446
+
447
+ # Get value for hectares
448
+ area_ha = ee.Number(ee.Dictionary(reduce_ha).get(geometry_area_column))
449
+
450
+ # Apply the function to each value in the dictionary using map()
451
+ reduce_percent = reduce_ha.map(
452
+ lambda key, val: percent_and_format(ee.Number(val), area_ha)
453
+ )
454
+
455
+ # Reformat the hectare statistics
456
+ reducer_stats_ha = reduce_ha.set(
457
+ geometry_area_column, area_ha.format(geometry_area_column_formatting)
458
+ ) # area ha (formatted)
459
+
460
+ # Reformat the percentage statistics
461
+ reducer_stats_percent = reduce_percent.set(
462
+ geometry_area_column, area_ha.format(geometry_area_column_formatting)
463
+ ) # area ha (formatted)
464
+
465
+ # Add country info onto hectare analysis results
466
+ properties_ha = feature_info.combine(ee.Dictionary(reducer_stats_ha)).combine(
467
+ stats_unit_type
468
+ )
469
+
470
+ # Add country info onto percentage analysis results
471
+ properties_percent = feature_info.combine(
472
+ ee.Dictionary(reducer_stats_percent)
473
+ ).combine(stats_unit_type)
474
+
475
+ # Choose whether to use hectares or percentage based on the `percent_or_ha` variable
476
+ out_feature = ee.Algorithms.If(
477
+ percent_or_ha == "ha",
478
+ feature.set(properties_ha), # .setGeometry(None),
479
+ feature.set(properties_percent), # .setGeometry(None),
480
+ )
481
+
482
+ return out_feature
483
+
484
+
485
+ # Get basic feature information - uses admin and water datasets in gee.
486
+ def get_type_and_location(feature):
487
+ """Extracts basic feature information including country, admin area, geometry type, coordinates, and water flags."""
488
+
489
+ # Get centroid of the feature's geometry
490
+ centroid = feature.geometry().centroid(1)
491
+
492
+ # Fetch location info from geoboundaries (country, admin)
493
+ location = ee.Dictionary(get_geoboundaries_info(centroid))
494
+ country = ee.Dictionary({iso3_country_column: location.get("shapeGroup")})
495
+
496
+ admin_1 = ee.Dictionary(
497
+ {admin_1_column: location.get("shapeName")}
498
+ ) # Administrative level 1 (if available)
499
+
500
+ # Prepare the water flag information
501
+ water_all = water_flag_all_prep()
502
+ water_flag_dict = value_at_point_flag(
503
+ point=centroid, image=water_all, band_name=water_flag, output_name=water_flag
504
+ )
505
+
506
+ # Get the geometry type of the feature
507
+ geom_type = ee.Dictionary({geometry_type_column: feature.geometry().type()})
508
+
509
+ # Get the coordinates (latitude, longitude) of the centroid
510
+ coords_list = centroid.coordinates()
511
+ coords_dict = ee.Dictionary(
512
+ {
513
+ centroid_x_coord_column: coords_list.get(0), # Longitude
514
+ centroid_y_coord_column: coords_list.get(1), # Latitude
515
+ }
516
+ )
517
+
518
+ # Combine all the extracted info into a single dictionary
519
+ feature_info = (
520
+ country.combine(admin_1)
521
+ .combine(geom_type)
522
+ .combine(coords_dict)
523
+ .combine(water_flag_dict)
524
+ )
525
+
526
+ return feature_info
527
+
528
+
529
+ # Define a function to divide each value by 10,000 and format it with one decimal place
530
+ def divide_and_format(val, unit):
531
+ # Convert the image to an ee.Number, divide by 10,000, and format with one decimal place
532
+ formatted_value = ee.Number.parse(
533
+ ee.Number(ee.Number(val).divide(ee.Number(unit))).format(
534
+ stats_area_columns_formatting
535
+ )
536
+ )
537
+ # Return the formatted value
538
+ return ee.Number(formatted_value)
539
+
540
+
541
+ # Define a function to divide by total area of geometry and multiply by 100
542
+ def percent_and_format(val, area_ha):
543
+ formatted_value = ee.Number.parse(
544
+ ee.Number(ee.Number(val).divide(area_ha).multiply(ee.Number(100))).format(
545
+ stats_percent_columns_formatting
546
+ )
547
+ )
548
+ # Return the formatted value
549
+ return ee.Number(formatted_value)
550
+
551
+
552
+ # geoboundaries - admin units from a freqently updated database, allows commercial use (CC BY 4.0 DEED) (disputed territories may need checking)
553
+ def get_geoboundaries_info(geometry):
554
+ gbounds_ADM0 = ee.FeatureCollection("WM/geoLab/geoBoundaries/600/ADM1")
555
+ polygonsIntersectPoint = gbounds_ADM0.filterBounds(geometry)
556
+ backup_dict = ee.Dictionary({"shapeGroup": "Unknown", "shapeName": "Unknown"})
557
+ return ee.Algorithms.If(
558
+ polygonsIntersectPoint.size().gt(0),
559
+ polygonsIntersectPoint.first()
560
+ .toDictionary()
561
+ .select(["shapeGroup", "shapeName"]),
562
+ backup_dict,
563
+ )
564
+
565
+
566
+ #####
567
+ # water flag - to flag plots that may be erroneous (i.e., where errors may have occured in their creation / translation and so fall in either the ocean or inland water -
568
+ def usgs_gsv_ocean_prep(): # TO DO: for speed export image as an asset at samne res as JRC
569
+ # Initialize the Earth Engine API
570
+ # ee.Initialize()
571
+
572
+ # Load the datasets
573
+ mainlands = ee.FeatureCollection(
574
+ "projects/sat-io/open-datasets/shoreline/mainlands"
575
+ )
576
+ big_islands = ee.FeatureCollection(
577
+ "projects/sat-io/open-datasets/shoreline/big_islands"
578
+ )
579
+ small_islands = ee.FeatureCollection(
580
+ "projects/sat-io/open-datasets/shoreline/small_islands"
581
+ )
582
+
583
+ # Combine the datasets into one FeatureCollection
584
+ gsv = ee.FeatureCollection([mainlands, big_islands, small_islands]).flatten()
585
+
586
+ # Rasterize the combined FeatureCollection and make areas outside coast (i.e. ocean) as value 1
587
+ # and then rename the band
588
+ return ee.Image(1).paint(gsv).selfMask().rename("ocean")
589
+
590
+
591
+ def jrc_water_surface_prep():
592
+ jrc_surface_water = ee.Image("JRC/GSW1_4/GlobalSurfaceWater")
593
+
594
+ # use transition band
595
+ jrc_transition = jrc_surface_water.select("transition")
596
+
597
+ # select permanent water bodies:
598
+ # remap the following classes to have a value of 1:
599
+ # "Permanent", "New Permanent", and "Seasonal to Permanent" (i.e., classes 1,2 and 7).
600
+ # All other classes as value 0.
601
+ permanent_inland_water = jrc_transition.remap([1, 2, 7], [1, 1, 1], 0).unmask()
602
+
603
+ # optional - clip to within coast line (not needed currently and extra processing)
604
+ # permanent_inland_water = permanent_inland_water.where(usgs_gsv_ocean_prep(),0)
605
+
606
+ return permanent_inland_water.rename("water_inland")
607
+
608
+
609
+ def water_flag_all_prep():
610
+ # combine both where water surface is 1, then 1, else use non_land_gsv
611
+ return (
612
+ usgs_gsv_ocean_prep()
613
+ .unmask()
614
+ .where(jrc_water_surface_prep(), 1)
615
+ .rename(water_flag)
616
+ )
617
+
618
+
619
+ def value_at_point_flag(point, image, band_name, output_name):
620
+ """Sample an image at the given point and make a dictionary output where the name is defined by output_name parameter"""
621
+ sample = image.sample(region=point, scale=30, numPixels=1).first()
622
+
623
+ # Get the value from the sampled point
624
+ value = sample.get(band_name) # assuming the band name is 'b1', change if necessary
625
+
626
+ # Use a conditional statement to check if the value is 1
627
+ result = value # ee.Algorithms.If(ee.Number(value).eq(1), "True", "False")
628
+
629
+ # Return the output dictionary
630
+ return ee.Dictionary({output_name: result}) # .getInfo()
631
+
632
+
633
+ def add_id_to_feature_collection(dataset, id_name):
634
+ """
635
+ Adds an incremental (1,2,3 etc) 'id' property to each feature in the given FeatureCollection.
636
+
637
+ Args:
638
+ - dataset: ee.FeatureCollection, the FeatureCollection to operate on.
639
+
640
+ Returns:
641
+ - dataset_with_id: ee.FeatureCollection, the FeatureCollection with 'id' property added to each feature.
642
+ """
643
+ # Get the list of system:index values
644
+ indexes = dataset.aggregate_array("system:index")
645
+
646
+ # Create a sequence of numbers starting from 1 to the size of indexes
647
+ ids = ee.List.sequence(1, indexes.size())
648
+
649
+ # Create a dictionary mapping system:index to id
650
+ id_by_index = ee.Dictionary.fromLists(indexes, ids)
651
+
652
+ # Function to add 'id' property to each feature
653
+ def add_id(feature):
654
+ # Get the system:index of the feature
655
+ system_index = feature.get("system:index")
656
+
657
+ # Get the id corresponding to the system:index
658
+ feature_id = id_by_index.get(system_index)
659
+
660
+ # Set the 'id' property of the feature
661
+ return feature.set(id_name, feature_id)
662
+
663
+ # Map the add_id function over the dataset
664
+ dataset_with_id = dataset.map(add_id)
665
+
666
+ return dataset_with_id
667
+
668
+
669
+ # Function to add ID to features
670
+ def add_id_to_feature(feature, id_name):
671
+ index = feature.get("system:index")
672
+ return feature.set(id_name, index)
673
+
674
+
675
+ # Function to flag positive values
676
+ def flag_positive_values(feature, flag_positive):
677
+ for prop_name in flag_positive:
678
+ flag_value = ee.Algorithms.If(
679
+ ee.Number(feature.get(prop_name)).gt(0), "True", "-"
680
+ )
681
+ feature = feature.set(prop_name, flag_value)
682
+ return feature
683
+
684
+
685
+ # Function to exclude properties
686
+ def copy_properties_and_exclude(feature, exclude_properties_from_output):
687
+ return ee.Feature(feature.geometry()).copyProperties(
688
+ source=feature, exclude=exclude_properties_from_output
689
+ )
690
+
691
+
692
+ def ee_image_checker(image):
693
+ """
694
+ Tests if the input is a valid ee.Image.
695
+
696
+ Args:
697
+ image: An ee.Image object.
698
+
699
+ Returns:
700
+ bool: True if the input is a valid ee.Image, False otherwise.
701
+ """
702
+ try:
703
+ if ee.Algorithms.ObjectType(image).getInfo() == "Image":
704
+ # Trigger some action on the image to ensure it's a valid image
705
+ image.getInfo() # This will raise an exception if the image is invalid
706
+ return True
707
+ except ee.EEException as e:
708
+ print(f"Image validation failed with EEException: {e}")
709
+ except Exception as e:
710
+ print(f"Image validation failed with exception: {e}")
711
+ return False
712
+
713
+
714
+ def keep_valid_images(image_list):
715
+ """
716
+ Filters a list to return only valid ee.Images.
717
+
718
+ Args:
719
+ image_list: List of ee.Image objects.
720
+
721
+ Returns:
722
+ list: List of valid ee.Image objects.
723
+ """
724
+ valid_imgs = []
725
+ for image in image_list:
726
+ if ee_image_checker(image):
727
+ valid_imgs.append(image)
728
+ return valid_imgs
729
+
730
+
731
+ def convert_iso3_to_iso2(df, iso3_column, iso2_column):
732
+ """
733
+ Converts ISO3 country codes to ISO2 codes and adds a new column to the DataFrame.
734
+
735
+ Args:
736
+ df (pd.DataFrame): Input DataFrame containing ISO3 country codes.
737
+ iso3_column (str): The column name in the DataFrame with ISO3 country codes.
738
+ iso2_column (str): The new column name to store ISO2 country codes.
739
+
740
+ Returns:
741
+ pd.DataFrame: Updated DataFrame with the new ISO2 column.
742
+ """
743
+ import country_converter as coco
744
+
745
+ # Apply conversion from ISO3 to ISO2
746
+ df[iso2_column] = df[iso3_column].apply(
747
+ lambda x: coco.convert(names=x, to="ISO2")
748
+ if x
749
+ else "not found (disputed territory)"
750
+ )
751
+
752
+ return df