RadGEEToolbox 1.6.10__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2623 @@
1
+ import ee
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+
6
+ class GenericCollection:
7
+ """
8
+ Represents a user-defined RadGEEToolbox class collection of any ee.ImageCollection from Google Earth Engine (GEE).
9
+
10
+ This class enables simplified definition, filtering, masking, and processing of generic geospatial imagery.
11
+ It supports multiple spatial and temporal filters, and caching for efficient computation. It also includes utilities for cloud masking,
12
+ mosaicking, zonal statistics, and transect analysis.
13
+
14
+ Initialization can be done by providing filtering parameters or directly passing in a pre-filtered GEE collection.
15
+
16
+ Inspect the documentation or source code for details on the methods and properties available.
17
+
18
+ Args:
19
+ start_date (str): Start date in 'YYYY-MM-dd' format. Required unless `collection` is provided.
20
+ end_date (str): End date in 'YYYY-MM-dd' format. Required unless `collection` is provided.
21
+ boundary (ee.Geometry, optional): A geometry for filtering to images that intersect with the boundary shape. Overrides `tile_path` and `tile_row` if provided.
22
+ collection (ee.ImageCollection, optional): A pre-filtered Landsat ee.ImageCollection object to be converted to a GenericCollection object. Overrides all other filters.
23
+
24
+ Attributes:
25
+ collection (ee.ImageCollection): The filtered or user-supplied image collection converted to an ee.ImageCollection object.
26
+
27
+ Raises:
28
+ ValueError: Raised if required filter parameters are missing, or if both `collection` and other filters are provided.
29
+
30
+ Note:
31
+ See full usage examples in the documentation or notebooks:
32
+ https://github.com/radwinskis/RadGEEToolbox/tree/main/Example%20Notebooks
33
+
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ collection=None,
39
+ start_date=None,
40
+ end_date=None,
41
+ boundary=None,
42
+ _dates_list=None
43
+ ):
44
+ if collection is None:
45
+ raise ValueError(
46
+ "The required `collection` argument has not been provided. Please specify an input ee.ImageCollection."
47
+ )
48
+
49
+ if isinstance(collection, GenericCollection):
50
+ base_collection = collection.collection
51
+ else:
52
+ # Otherwise, assume it's a valid ee.ImageCollection
53
+ base_collection = collection
54
+
55
+ if (start_date is not None and end_date is None) or \
56
+ (start_date is None and end_date is not None):
57
+ raise ValueError("Please provide both start_date and end_date, or provide neither for entire collection")
58
+
59
+ self.collection = base_collection
60
+ self.start_date = start_date
61
+ self.end_date = end_date
62
+ self.boundary = boundary
63
+
64
+ if self.start_date and self.end_date:
65
+ if self.boundary:
66
+ self.collection = self.get_boundary_and_date_filtered_collection()
67
+ else:
68
+ self.collection = self.get_filtered_collection()
69
+ elif self.boundary:
70
+ self.collection = self.get_boundary_filtered_collection()
71
+ else:
72
+ self.collection = self.get_generic_collection()
73
+
74
+ self._dates_list = _dates_list
75
+ self._dates = None
76
+ self._geometry_masked_collection = None
77
+ self._geometry_masked_out_collection = None
78
+ self._median = None
79
+ self._monthly_median = None
80
+ self._monthly_mean = None
81
+ self._monthly_sum = None
82
+ self._monthly_max = None
83
+ self._monthly_min = None
84
+ self._mean = None
85
+ self._max = None
86
+ self._min = None
87
+ self._MosaicByDate = None
88
+ self._PixelAreaSumCollection = None
89
+ self._daily_aggregate_collection = None
90
+
91
+ @staticmethod
92
+ def image_dater(image):
93
+ """
94
+ Adds date to image properties as 'Date_Filter'.
95
+
96
+ Args:
97
+ image (ee.Image): Input image
98
+
99
+ Returns:
100
+ ee.Image: Image with date in properties.
101
+ """
102
+ date = ee.Number(image.date().format("YYYY-MM-dd"))
103
+ return image.set({"Date_Filter": date})
104
+
105
+
106
+ @staticmethod
107
+ def anomaly_fn(image, geometry, band_name=None, anomaly_band_name=None, replace=True):
108
+ """
109
+ Calculates the anomaly of a singleband image compared to the mean of the singleband image.
110
+
111
+ This function computes the anomaly for each band in the input image by
112
+ subtracting the mean value of that band from a provided image.
113
+ The anomaly is a measure of how much the pixel values deviate from the
114
+ average conditions represented by the mean of the image.
115
+
116
+ Args:
117
+ image (ee.Image): An ee.Image for which the anomaly is to be calculated.
118
+ It is assumed that this image is a singleband image.
119
+ geometry (ee.Geometry): The geometry for image reduction to define the mean value to be used for anomaly calculation.
120
+ band_name (str, optional): A string representing the band name to be used for the output anomaly image. If not provided, the band name of the first band of the input image will be used.
121
+ anomaly_band_name (str, optional): A string representing the band name to be used for the output anomaly image. If not provided, the band name of the first band of the input image will be used.
122
+ replace (bool, optional): A boolean indicating whether to replace the original band with the anomaly band in the output image. If True, the output image will contain only the anomaly band. If False, the output image will contain both the original band and the anomaly band. Default is True.
123
+
124
+ Returns:
125
+ ee.Image: An ee.Image where each band represents the anomaly (deviation from
126
+ the mean) for that band. The output image retains the same band name.
127
+ """
128
+ if band_name:
129
+ band_name = band_name
130
+ else:
131
+ band_name = ee.String(image.bandNames().get(0))
132
+
133
+ image_to_process = image.select([band_name])
134
+
135
+ # Calculate the mean image of the provided collection.
136
+ mean_image = image_to_process.reduceRegion(
137
+ reducer=ee.Reducer.mean(),
138
+ geometry=geometry,
139
+ scale=30,
140
+ maxPixels=1e13
141
+ ).toImage()
142
+
143
+ # Compute the anomaly by subtracting the mean image from the input image.
144
+ anomaly_image = image_to_process.subtract(mean_image)
145
+ if anomaly_band_name is None:
146
+ if band_name:
147
+ anomaly_image = anomaly_image.rename(band_name)
148
+ else:
149
+ # Preserve original properties from the input image.
150
+ anomaly_image = anomaly_image.rename(ee.String(image.bandNames().get(0)))
151
+ else:
152
+ anomaly_image = anomaly_image.rename(anomaly_band_name)
153
+ # return anomaly_image
154
+ if replace:
155
+ return anomaly_image.copyProperties(image)
156
+ else:
157
+ return image.addBands(anomaly_image, overwrite=True)
158
+
159
+ @staticmethod
160
+ def mask_via_band_fn(image, band_to_mask, band_for_mask, threshold, mask_above=False, add_band_to_original_image=False):
161
+ """
162
+ Masks pixels of interest from a specified band of a target image, based on a specified reference band and threshold.
163
+ Designed for single image input which contains both the target and reference band.
164
+ Example use case is masking vegetation from image when targeting land pixels. Can specify whether to mask pixels above or below the threshold.
165
+
166
+ Args:
167
+ image (ee.Image): input ee.Image
168
+ band_to_mask (str): name of the band which will be masked (target image)
169
+ band_for_mask (str): name of the band to use for the mask (band you want to remove/mask from target image)
170
+ threshold (float): value where pixels less or more than threshold (depending on `mask_above` argument) will be masked
171
+ mask_above (bool): if True, masks pixels above the threshold; if False, masks pixels below the threshold
172
+
173
+ Returns:
174
+ ee.Image: masked ee.Image
175
+ """
176
+
177
+ band_to_mask_image = image.select(band_to_mask)
178
+ band_for_mask_image = image.select(band_for_mask)
179
+
180
+ mask = band_for_mask_image.lte(threshold) if mask_above else band_for_mask_image.gte(threshold)
181
+
182
+ if add_band_to_original_image:
183
+ return image.addBands(band_to_mask_image.updateMask(mask).rename(band_to_mask), overwrite=True)
184
+ else:
185
+ return ee.Image(band_to_mask_image.updateMask(mask).rename(band_to_mask).copyProperties(image))
186
+
187
+ @staticmethod
188
+ def mask_via_singleband_image_fn(image_to_mask, image_for_mask, threshold, band_name_to_mask=None, band_name_for_mask=None, mask_above=True):
189
+ """
190
+ Masks pixels of interest from a specified band of a target image, based on a specified reference band and threshold.
191
+ Designed for the case where the target and reference bands are in separate images.
192
+ Example use case is masking vegetation from image when targeting land pixels. Can specify whether to mask pixels above or below the threshold.
193
+
194
+ Args:
195
+ image_to_mask (ee.Image): image which will be masked (target image). If multiband, only the first band will be masked.
196
+ image_for_mask (ee.Image): image to use for the mask (image you want to remove/mask from target image). If multiband, only the first band will be used for the masked.
197
+ threshold (float): value where pixels less or more than threshold (depending on `mask_above` argument) will be masked
198
+ band_name_to_mask (str, optional): name of the band in image_to_mask to be masked. If None, the first band will be used.
199
+ band_name_for_mask (str, optional): name of the band in image_for_mask to be used for masking. If None, the first band will be used.
200
+ mask_above (bool): if True, masks pixels above the threshold; if False, masks pixels below the threshold.
201
+
202
+ Returns:
203
+ ee.Image: masked ee.Image
204
+ """
205
+ if band_name_to_mask is None:
206
+ band_to_mask = ee.String(image_to_mask.bandNames().get(0))
207
+ else:
208
+ band_to_mask = ee.String(band_name_to_mask)
209
+
210
+ if band_name_for_mask is None:
211
+ band_for_mask = ee.String(image_for_mask.bandNames().get(0))
212
+ else:
213
+ band_for_mask = ee.String(band_name_for_mask)
214
+
215
+ band_to_mask_image = image_to_mask.select(band_to_mask)
216
+ band_for_mask_image = image_for_mask.select(band_for_mask)
217
+ if mask_above:
218
+ mask = band_for_mask_image.gt(threshold)
219
+ else:
220
+ mask = band_for_mask_image.lt(threshold)
221
+ return band_to_mask_image.updateMask(mask).rename(band_to_mask).copyProperties(image_to_mask)
222
+
223
+ @staticmethod
224
+ def band_rename_fn(image, current_band_name, new_band_name):
225
+ """Renames a band in an ee.Image (single- or multi-band) in-place.
226
+
227
+ Replaces the band named `current_band_name` with `new_band_name` without
228
+ retaining the original band name. If the band does not exist, returns the
229
+ image unchanged.
230
+
231
+ Args:
232
+ image (ee.Image): The input image (can be multiband).
233
+ current_band_name (str): The existing band name to rename.
234
+ new_band_name (str): The desired new band name.
235
+
236
+ Returns:
237
+ ee.Image: The image with the band renamed (or unchanged if not found).
238
+ """
239
+ img = ee.Image(image)
240
+ current = ee.String(current_band_name)
241
+ new = ee.String(new_band_name)
242
+
243
+ band_names = img.bandNames()
244
+ has_band = band_names.contains(current)
245
+
246
+ def _rename():
247
+ # Build a new band-name list with the target name replaced.
248
+ new_names = band_names.map(
249
+ lambda b: ee.String(
250
+ ee.Algorithms.If(ee.String(b).equals(current), new, b)
251
+ )
252
+ )
253
+ # Rename the image using the updated band-name list.
254
+ return img.rename(ee.List(new_names))
255
+
256
+ out = ee.Image(ee.Algorithms.If(has_band, _rename(), img))
257
+ return out.copyProperties(img)
258
+
259
+ @staticmethod
260
+ def PixelAreaSum(
261
+ image, band_name, geometry, threshold=-1, scale=30, maxPixels=1e12
262
+ ):
263
+ """
264
+ Calculates the summation of area for pixels of interest (above a specific threshold) in a geometry
265
+ and store the value as image property (matching name of chosen band). If multiple band names are provided in a list,
266
+ the function will calculate area for each band in the list and store each as a separate property.
267
+
268
+ NOTE: The resulting value has units of square meters.
269
+
270
+ Args:
271
+ image (ee.Image): input ee.Image
272
+ band_name (string or list of strings): name of band(s) (string) for calculating area. If providing multiple band names, pass as a list of strings.
273
+ geometry (ee.Geometry): ee.Geometry object denoting area to clip to for area calculation
274
+ threshold (float): integer threshold to specify masking of pixels below threshold (defaults to -1). If providing multiple band names, the same threshold will be applied to all bands. Best practice in this case is to mask the bands prior to passing to this function and leave threshold at default of -1.
275
+ scale (int): integer scale of image resolution (meters) (defaults to 30)
276
+ maxPixels (int): integer denoting maximum number of pixels for calculations
277
+
278
+ Returns:
279
+ ee.Image: ee.Image with area calculation in square meters stored as property matching name of band
280
+ """
281
+ # Ensure band_name is a server-side ee.List for consistent processing. Wrap band_name in a list if it's a single string.
282
+ bands = ee.List(band_name) if isinstance(band_name, list) else ee.List([band_name])
283
+ # Create an image representing the area of each pixel in square meters
284
+ area_image = ee.Image.pixelArea()
285
+
286
+ # Function to iterate over each band and calculate area, storing the result as a property on the image
287
+ def calculate_and_set_area(band, img_accumulator):
288
+ # Explcitly cast inputs to expected types
289
+ img_accumulator = ee.Image(img_accumulator)
290
+ band = ee.String(band)
291
+
292
+ # Create a mask from the input image for the current band
293
+ mask = img_accumulator.select(band).gte(threshold)
294
+ # Combine the original image with the area image
295
+ final = img_accumulator.addBands(area_image)
296
+
297
+ # Calculation of area for a given band, utilizing other inputs
298
+ stats = (
299
+ final.select("area").updateMask(mask)
300
+ .rename(band) # renames 'area' to band name like 'ndwi'
301
+ .reduceRegion(
302
+ reducer=ee.Reducer.sum(),
303
+ geometry=geometry,
304
+ scale=scale,
305
+ maxPixels=maxPixels,
306
+ )
307
+ )
308
+ # Retrieving the area value from the stats dictionary with stats.get(band), as the band name is now the key
309
+ reduced_area = stats.get(band)
310
+ # Checking whether the calculated area is valid and replaces with 0 if not. This avoids breaking the loop for erroneous images.
311
+ area_value = ee.Algorithms.If(reduced_area, reduced_area, 0)
312
+
313
+ # Set the property on the image, named after the band
314
+ return img_accumulator.set(band, area_value)
315
+
316
+ # Call to iterate the calculate_and_set_area function over the list of bands, starting with the original image
317
+ final_image = ee.Image(bands.iterate(calculate_and_set_area, image))
318
+ return final_image
319
+
320
+ def PixelAreaSumCollection(
321
+ self, band_name, geometry, threshold=-1, scale=30, maxPixels=1e12, output_type='ImageCollection', area_data_export_path=None
322
+ ):
323
+ """
324
+ Calculates the geodesic summation of area for pixels of interest (above a specific threshold)
325
+ within a geometry and stores the value as an image property (matching name of chosen band) for an entire
326
+ image collection. Optionally exports the area data to a CSV file.
327
+
328
+ NOTE: The resulting value has units of square meters.
329
+
330
+ Args:
331
+ band_name (string or list of strings): name of band(s) (string) for calculating area. If providing multiple band names, pass as a list of strings.
332
+ geometry (ee.Geometry): ee.Geometry object denoting area to clip to for area calculation
333
+ threshold (float): integer threshold to specify masking of pixels below threshold (defaults to -1). If providing multiple band names, the same threshold will be applied to all bands. Best practice in this case is to mask the bands prior to passing to this function and leave threshold at default of -1.
334
+ scale (int): integer scale of image resolution (meters) (defaults to 30)
335
+ maxPixels (int): integer denoting maximum number of pixels for calculations
336
+ output_type (str): 'ImageCollection' to return an ee.ImageCollection, 'GenericCollection' to return a GenericCollection object (defaults to 'ImageCollection')
337
+ area_data_export_path (str, optional): If provided, the function will save the resulting area data to a CSV file at the specified path.
338
+
339
+ Returns:
340
+ ee.ImageCollection or GenericCollection: Image collection of images with area calculation (square meters) stored as property matching name of band. Type of output depends on output_type argument.
341
+ """
342
+ # If the area calculation has not been computed for this GenericCollection instance, the area will be calculated for the provided bands
343
+ if self._PixelAreaSumCollection is None:
344
+ collection = self.collection
345
+ # Area calculation for each image in the collection, using the PixelAreaSum function
346
+ AreaCollection = collection.map(
347
+ lambda image: GenericCollection.PixelAreaSum(
348
+ image,
349
+ band_name=band_name,
350
+ geometry=geometry,
351
+ threshold=threshold,
352
+ scale=scale,
353
+ maxPixels=maxPixels,
354
+ )
355
+ )
356
+ # Storing the result in the instance variable to avoid redundant calculations
357
+ self._PixelAreaSumCollection = AreaCollection
358
+
359
+ # If an export path is provided, the area data will be exported to a CSV file
360
+ if area_data_export_path:
361
+ GenericCollection(collection=self._PixelAreaSumCollection).ExportProperties(property_names=band_name, file_path=area_data_export_path+'.csv')
362
+
363
+ # Returning the result in the desired format based on output_type argument or raising an error for invalid input
364
+ if output_type == 'ImageCollection':
365
+ return self._PixelAreaSumCollection
366
+ elif output_type == 'GenericCollection':
367
+ return GenericCollection(collection=self._PixelAreaSumCollection)
368
+ else:
369
+ raise ValueError("output_type must be 'ImageCollection' or 'GenericCollection'")
370
+
371
+ def combine(self, other):
372
+ """
373
+ Combines the current GenericCollection with another GenericCollection, using the `combine` method.
374
+
375
+ Args:
376
+ other (GenericCollection): Another GenericCollection to combine with current collection.
377
+
378
+ Returns:
379
+ GenericCollection: A new GenericCollection containing images from both collections.
380
+ """
381
+ # Checking if 'other' is an instance of GenericCollection
382
+ if not isinstance(other, GenericCollection):
383
+ raise ValueError("The 'other' parameter must be an instance of GenericCollection.")
384
+
385
+ # Merging the collections using the .combine() method
386
+ merged_collection = self.collection.combine(other.collection)
387
+ return GenericCollection(collection=merged_collection)
388
+
389
+ def merge(self, collections=None, multiband_collection=None, date_key='Date_Filter'):
390
+ """
391
+ Merge many singleband GenericCollection products into the parent collection,
392
+ or merge a single multiband collection with parent collection,
393
+ pairing images by exact Date_Filter and returning one multiband image per date.
394
+
395
+ NOTE: if you want to merge two multiband collections, use the `combine` method instead.
396
+
397
+ Args:
398
+ collections (list): List of singleband collections to merge with parent collection, effectively adds one band per collection to each image in parent
399
+ multiband_collection (GenericCollection, optional): A multiband collection to merge with parent. Specifying a collection here will override `collections`.
400
+ date_key (str): image property key for exact pairing (default 'Date_Filter')
401
+
402
+ Returns:
403
+ GenericCollection: parent with extra single bands attached (one image per date)
404
+ """
405
+
406
+ if collections is None and multiband_collection is not None:
407
+ # Exact-date inner-join merge of two collections (adds ALL bands from 'other').
408
+ join = ee.Join.inner()
409
+ flt = ee.Filter.equals(leftField=date_key, rightField=date_key)
410
+ paired = join.apply(self.collection, multiband_collection.collection, flt)
411
+
412
+ def _pair_two(f):
413
+ f = ee.Feature(f)
414
+ a = ee.Image(f.get('primary'))
415
+ b = ee.Image(f.get('secondary'))
416
+ # Overwrite on name collision
417
+ merged = a.addBands(b, None, True)
418
+ # Keep parent props + date key
419
+ merged = merged.copyProperties(a, a.propertyNames())
420
+ merged = merged.set(date_key, a.get(date_key))
421
+ return ee.Image(merged)
422
+
423
+ return GenericCollection(collection=ee.ImageCollection(paired.map(_pair_two)))
424
+
425
+ # Preferred path: merge many singleband products into the parent
426
+ if not isinstance(collections, list) or len(collections) == 0:
427
+ raise ValueError("Provide a non-empty list of GenericCollection objects in `collections`.")
428
+
429
+ result = self.collection
430
+ for extra in collections:
431
+ if not isinstance(extra, GenericCollection):
432
+ raise ValueError("All items in `collections` must be GenericCollection objects.")
433
+
434
+ join = ee.Join.inner()
435
+ flt = ee.Filter.equals(leftField=date_key, rightField=date_key)
436
+ paired = join.apply(result, extra.collection, flt)
437
+
438
+ def _attach_one(f):
439
+ f = ee.Feature(f)
440
+ parent = ee.Image(f.get('primary'))
441
+ sb = ee.Image(f.get('secondary'))
442
+ # Assume singleband product; grab its first band name server-side
443
+ bname = ee.String(sb.bandNames().get(0))
444
+ # Add the single band; overwrite if the name already exists in parent
445
+ merged = parent.addBands(sb.select([bname]).rename([bname]), None, True)
446
+ # Preserve parent props + date key
447
+ merged = merged.copyProperties(parent, parent.propertyNames())
448
+ merged = merged.set(date_key, parent.get(date_key))
449
+ return ee.Image(merged)
450
+
451
+ result = ee.ImageCollection(paired.map(_attach_one))
452
+
453
+ return GenericCollection(collection=result)
454
+
455
+ @property
456
+ def dates_list(self):
457
+ """
458
+ Property attribute to retrieve list of dates as server-side (GEE) object.
459
+
460
+ Returns:
461
+ ee.List: Server-side ee.List of dates.
462
+ """
463
+ if self._dates_list is None:
464
+ dates = self.collection.aggregate_array("Date_Filter")
465
+ self._dates_list = dates
466
+ return self._dates_list
467
+
468
+ @property
469
+ def dates(self):
470
+ """
471
+ Property attribute to retrieve list of dates as readable and indexable client-side list object.
472
+
473
+ Returns:
474
+ list: list of date strings.
475
+ """
476
+ if self._dates_list is None:
477
+ dates = self.collection.aggregate_array("Date_Filter")
478
+ self._dates_list = dates
479
+ if self._dates is None:
480
+ dates = self._dates_list.getInfo()
481
+ self._dates = dates
482
+ return self._dates
483
+
484
+ def ExportProperties(self, property_names, file_path=None):
485
+ """
486
+ Fetches and returns specified properties from each image in the collection as a list, and returns a pandas DataFrame and optionally saves the results to a csv file.
487
+
488
+ Args:
489
+ property_names (list or str): A property name or list of property names to retrieve. The 'Date_Filter' property is always included to provide temporal context.
490
+ file_path (str, optional): If provided, the function will save the resulting DataFrame to a CSV file at this path. Defaults to None.
491
+
492
+ Returns:
493
+ pd.DataFrame: A pandas DataFrame containing the requested properties for each image, sorted chronologically by 'Date_Filter'.
494
+ """
495
+ # Ensure property_names is a list for consistent processing
496
+ if isinstance(property_names, str):
497
+ property_names = [property_names]
498
+
499
+ # Ensure properties are included without duplication, including 'Date_Filter'
500
+ all_properties_to_fetch = list(set(['Date_Filter'] + property_names))
501
+
502
+ # Defining the helper function to create features with specified properties
503
+ def create_feature_with_properties(image):
504
+ """A function to map over the collection and store the image properties as an ee.Feature.
505
+ Args:
506
+ image (ee.Image): An image from the collection.
507
+ Returns:
508
+ ee.Feature: A feature containing the specified properties from the image.
509
+ """
510
+ properties = image.toDictionary(all_properties_to_fetch)
511
+ return ee.Feature(None, properties)
512
+
513
+ # Map the feature creation function over the server-side collection.
514
+ # The result is an ee.FeatureCollection where each feature holds the properties of one image.
515
+ mapped_collection = self.collection.map(create_feature_with_properties)
516
+ # Explicitly cast to ee.FeatureCollection for clarity
517
+ feature_collection = ee.FeatureCollection(mapped_collection)
518
+
519
+ # Use the existing ee_to_df static method. This performs the single .getInfo() call
520
+ # and converts the structured result directly to a pandas DataFrame.
521
+ df = GenericCollection.ee_to_df(feature_collection, columns=all_properties_to_fetch)
522
+
523
+ # Sort by date for a clean, chronological output.
524
+ if 'Date_Filter' in df.columns:
525
+ df = df.sort_values(by='Date_Filter').reset_index(drop=True)
526
+
527
+ # Check condition for saving to CSV
528
+ if file_path:
529
+ # Check whether file_path ends with .csv, if not, append it
530
+ if not file_path.lower().endswith('.csv'):
531
+ file_path += '.csv'
532
+ # Save DataFrame to CSV
533
+ df.to_csv(file_path, index=True)
534
+ print(f"Properties saved to {file_path}")
535
+
536
+ return df
537
+
538
+ def get_generic_collection(self):
539
+ """
540
+ Filters image collection based on GenericCollection class arguments. Automatically calculated when using collection method, depending on provided class arguments (when tile info is provided).
541
+
542
+ Returns:
543
+ ee.ImageCollection: Filtered image collection - used for subsequent analyses or to acquire ee.ImageCollection from GenericCollection object
544
+ """
545
+ filtered_collection = (
546
+ self.collection
547
+ .map(GenericCollection.image_dater)
548
+ .sort("Date_Filter")
549
+ )
550
+ return filtered_collection
551
+
552
+ def get_filtered_collection(self):
553
+ """
554
+ Filters image collection based on GenericCollection class arguments. Automatically calculated when using collection method, depending on provided class arguments (when tile info is provided).
555
+
556
+ Returns:
557
+ ee.ImageCollection: Filtered image collection - used for subsequent analyses or to acquire ee.ImageCollection from GenericCollection object
558
+ """
559
+ filtered_collection = (
560
+ self.collection
561
+ .filterDate(ee.Date(self.start_date), ee.Date(self.end_date).advance(1, 'day'))
562
+ .map(GenericCollection.image_dater)
563
+ .sort("Date_Filter")
564
+ )
565
+ return filtered_collection
566
+
567
+ def get_boundary_filtered_collection(self):
568
+ """
569
+ Filters and masks image collection based on GenericCollection class arguments. Automatically calculated when using collection method, depending on provided class arguments (when boundary info is provided).
570
+
571
+ Returns:
572
+ ee.ImageCollection: Filtered image collection - used for subsequent analyses or to acquire ee.ImageCollection from GenericCollection object
573
+
574
+ """
575
+ filtered_collection = (
576
+ self.collection
577
+ .filterBounds(self.boundary)
578
+ .map(GenericCollection.image_dater)
579
+ .sort("Date_Filter")
580
+ )
581
+ return filtered_collection
582
+
583
+ def get_boundary_and_date_filtered_collection(self):
584
+ """
585
+ Filters and masks image collection based on GenericCollection class arguments. Automatically calculated when using collection method, depending on provided class arguments (when boundary info is provided).
586
+
587
+ Returns:
588
+ ee.ImageCollection: Filtered image collection - used for subsequent analyses or to acquire ee.ImageCollection from GenericCollection object
589
+
590
+ """
591
+ filtered_collection = (
592
+ self.collection
593
+ .filterDate(ee.Date(self.start_date), ee.Date(self.end_date).advance(1, 'day'))
594
+ .filterBounds(self.boundary)
595
+ .map(GenericCollection.image_dater)
596
+ .sort("Date_Filter")
597
+ )
598
+ return filtered_collection
599
+
600
+ @property
601
+ def median(self):
602
+ """
603
+ Property attribute function to calculate median image from image collection. Results are calculated once per class object then cached for future use.
604
+
605
+ Returns:
606
+ ee.Image: median image from entire collection.
607
+ """
608
+ if self._median is None:
609
+ col = self.collection.median()
610
+ self._median = col
611
+ return self._median
612
+
613
+ @property
614
+ def mean(self):
615
+ """
616
+ Property attribute function to calculate mean image from image collection. Results are calculated once per class object then cached for future use.
617
+
618
+ Returns:
619
+ ee.Image: mean image from entire collection.
620
+
621
+ """
622
+ if self._mean is None:
623
+ col = self.collection.mean()
624
+ self._mean = col
625
+ return self._mean
626
+
627
+ @property
628
+ def max(self):
629
+ """
630
+ Property attribute function to calculate max image from image collection. Results are calculated once per class object then cached for future use.
631
+
632
+ Returns:
633
+ ee.Image: max image from entire collection.
634
+ """
635
+ if self._max is None:
636
+ col = self.collection.max()
637
+ self._max = col
638
+ return self._max
639
+
640
+ @property
641
+ def min(self):
642
+ """
643
+ Property attribute function to calculate min image from image collection. Results are calculated once per class object then cached for future use.
644
+
645
+ Returns:
646
+ ee.Image: min image from entire collection.
647
+ """
648
+ if self._min is None:
649
+ col = self.collection.min()
650
+ self._min = col
651
+ return self._min
652
+
653
+ @property
654
+ def monthly_median_collection(self):
655
+ """Creates a monthly median composite from a GenericCollection image collection.
656
+
657
+ This function computes the median for each
658
+ month within the collection's date range, for each band in the collection. It automatically handles the full
659
+ temporal extent of the input collection.
660
+
661
+ The resulting images have a 'system:time_start' property set to the
662
+ first day of each month and an 'image_count' property indicating how
663
+ many images were used in the composite. Months with no images are
664
+ automatically excluded from the final collection.
665
+
666
+ NOTE: the day of month for the 'system:time_start' property is set to the earliest date of the first month observed and may not be the first day of the month.
667
+
668
+ Returns:
669
+ GenericCollection: A new GenericCollection object with monthly median composites.
670
+ """
671
+ if self._monthly_median is None:
672
+ collection = self.collection
673
+ target_proj = collection.first().projection()
674
+ # Get the start and end dates of the entire collection.
675
+ date_range = collection.reduceColumns(ee.Reducer.minMax(), ["system:time_start"])
676
+ original_start_date = ee.Date(date_range.get('min'))
677
+ end_date = ee.Date(date_range.get('max'))
678
+
679
+ start_year = original_start_date.get('year')
680
+ start_month = original_start_date.get('month')
681
+ start_date = ee.Date.fromYMD(start_year, start_month, 1)
682
+
683
+ # Calculate the total number of months in the date range.
684
+ # The .round() is important for ensuring we get an integer.
685
+ num_months = end_date.difference(start_date, 'month').round()
686
+
687
+ # Generate a list of starting dates for each month.
688
+ # This uses a sequence and advances the start date by 'i' months.
689
+ def get_month_start(i):
690
+ return start_date.advance(i, 'month')
691
+
692
+ month_starts = ee.List.sequence(0, num_months).map(get_month_start)
693
+
694
+ # Define a function to map over the list of month start dates.
695
+ def create_monthly_composite(date):
696
+ # Cast the input to an ee.Date object.
697
+ start_of_month = ee.Date(date)
698
+ # The end date is exclusive, so we advance by 1 month.
699
+ end_of_month = start_of_month.advance(1, 'month')
700
+
701
+ # Filter the original collection to get images for the current month.
702
+ monthly_subset = collection.filterDate(start_of_month, end_of_month)
703
+
704
+ # Count the number of images in the monthly subset.
705
+ image_count = monthly_subset.size()
706
+
707
+ # Compute the median. This is robust to outliers like clouds.
708
+ monthly_median = monthly_subset.median()
709
+
710
+ # Set essential properties on the resulting composite image.
711
+ # The timestamp is crucial for time-series analysis and charting.
712
+ # The image_count is useful metadata for quality assessment.
713
+ return monthly_median.set({
714
+ 'system:time_start': start_of_month.millis(),
715
+ 'month': start_of_month.get('month'),
716
+ 'year': start_of_month.get('year'),
717
+ 'Date_Filter': start_of_month.format('YYYY-MM-dd'),
718
+ 'image_count': image_count
719
+ }).reproject(target_proj)
720
+
721
+ # Map the composite function over the list of month start dates.
722
+ monthly_composites_list = month_starts.map(create_monthly_composite)
723
+
724
+ # Convert the list of images into an ee.ImageCollection.
725
+ monthly_collection = ee.ImageCollection.fromImages(monthly_composites_list)
726
+
727
+ # Filter out any composites that were created from zero images.
728
+ # This prevents empty/masked images from being in the final collection.
729
+ final_collection = GenericCollection(collection=monthly_collection.filter(ee.Filter.gt('image_count', 0)))
730
+ self._monthly_median = final_collection
731
+ else:
732
+ pass
733
+
734
+ return self._monthly_median
735
+
736
+ @property
737
+ def monthly_mean_collection(self):
738
+ """Creates a monthly mean composite from a GenericCollection image collection.
739
+
740
+ This function computes the mean for each
741
+ month within the collection's date range, for each band in the collection. It automatically handles the full
742
+ temporal extent of the input collection.
743
+
744
+ The resulting images have a 'system:time_start' property set to the
745
+ first day of each month and an 'image_count' property indicating how
746
+ many images were used in the composite. Months with no images are
747
+ automatically excluded from the final collection.
748
+
749
+ NOTE: the day of month for the 'system:time_start' property is set to the earliest date of the first month observed and may not be the first day of the month.
750
+
751
+ Returns:
752
+ GenericCollection: A new GenericCollection object with monthly mean composites.
753
+ """
754
+ if self._monthly_mean is None:
755
+ collection = self.collection
756
+ target_proj = collection.first().projection()
757
+ # Get the start and end dates of the entire collection.
758
+ date_range = collection.reduceColumns(ee.Reducer.minMax(), ["system:time_start"])
759
+ original_start_date = ee.Date(date_range.get('min'))
760
+ end_date = ee.Date(date_range.get('max'))
761
+
762
+ start_year = original_start_date.get('year')
763
+ start_month = original_start_date.get('month')
764
+ start_date = ee.Date.fromYMD(start_year, start_month, 1)
765
+
766
+ # Calculate the total number of months in the date range.
767
+ # The .round() is important for ensuring we get an integer.
768
+ num_months = end_date.difference(start_date, 'month').round()
769
+
770
+ # Generate a list of starting dates for each month.
771
+ # This uses a sequence and advances the start date by 'i' months.
772
+ def get_month_start(i):
773
+ return start_date.advance(i, 'month')
774
+
775
+ month_starts = ee.List.sequence(0, num_months).map(get_month_start)
776
+
777
+ # Define a function to map over the list of month start dates.
778
+ def create_monthly_composite(date):
779
+ # Cast the input to an ee.Date object.
780
+ start_of_month = ee.Date(date)
781
+ # The end date is exclusive, so we advance by 1 month.
782
+ end_of_month = start_of_month.advance(1, 'month')
783
+
784
+ # Filter the original collection to get images for the current month.
785
+ monthly_subset = collection.filterDate(start_of_month, end_of_month)
786
+
787
+ # Count the number of images in the monthly subset.
788
+ image_count = monthly_subset.size()
789
+
790
+ # Compute the mean. This is robust to outliers like clouds.
791
+ monthly_mean = monthly_subset.mean()
792
+
793
+ # Set essential properties on the resulting composite image.
794
+ # The timestamp is crucial for time-series analysis and charting.
795
+ # The image_count is useful metadata for quality assessment.
796
+ return monthly_mean.set({
797
+ 'system:time_start': start_of_month.millis(),
798
+ 'month': start_of_month.get('month'),
799
+ 'year': start_of_month.get('year'),
800
+ 'Date_Filter': start_of_month.format('YYYY-MM-dd'),
801
+ 'image_count': image_count
802
+ }).reproject(target_proj)
803
+
804
+ # Map the composite function over the list of month start dates.
805
+ monthly_composites_list = month_starts.map(create_monthly_composite)
806
+
807
+ # Convert the list of images into an ee.ImageCollection.
808
+ monthly_collection = ee.ImageCollection.fromImages(monthly_composites_list)
809
+
810
+ # Filter out any composites that were created from zero images.
811
+ # This prevents empty/masked images from being in the final collection.
812
+ final_collection = GenericCollection(collection=monthly_collection.filter(ee.Filter.gt('image_count', 0)))
813
+ self._monthly_mean = final_collection
814
+ else:
815
+ pass
816
+
817
+ return self._monthly_mean
818
+
819
+ @property
820
+ def monthly_sum_collection(self):
821
+ """Creates a monthly sum composite from a GenericCollection image collection.
822
+
823
+ This function computes the sum for each
824
+ month within the collection's date range, for each band in the collection. It automatically handles the full
825
+ temporal extent of the input collection.
826
+
827
+ The resulting images have a 'system:time_start' property set to the
828
+ first day of each month and an 'image_count' property indicating how
829
+ many images were used in the composite. Months with no images are
830
+ automatically excluded from the final collection.
831
+
832
+ NOTE: the day of month for the 'system:time_start' property is set to the earliest date of the first month observed and may not be the first day of the month.
833
+
834
+ Returns:
835
+ GenericCollection: A new GenericCollection object with monthly sum composites.
836
+ """
837
+ if self._monthly_sum is None:
838
+ collection = self.collection
839
+ target_proj = collection.first().projection()
840
+ # Get the start and end dates of the entire collection.
841
+ date_range = collection.reduceColumns(ee.Reducer.minMax(), ["system:time_start"])
842
+ original_start_date = ee.Date(date_range.get('min'))
843
+ end_date = ee.Date(date_range.get('max'))
844
+
845
+ start_year = original_start_date.get('year')
846
+ start_month = original_start_date.get('month')
847
+ start_date = ee.Date.fromYMD(start_year, start_month, 1)
848
+
849
+ # Calculate the total number of months in the date range.
850
+ # The .round() is important for ensuring we get an integer.
851
+ num_months = end_date.difference(start_date, 'month').round()
852
+
853
+ # Generate a list of starting dates for each month.
854
+ # This uses a sequence and advances the start date by 'i' months.
855
+ def get_month_start(i):
856
+ return start_date.advance(i, 'month')
857
+
858
+ month_starts = ee.List.sequence(0, num_months).map(get_month_start)
859
+
860
+ # Define a function to map over the list of month start dates.
861
+ def create_monthly_composite(date):
862
+ # Cast the input to an ee.Date object.
863
+ start_of_month = ee.Date(date)
864
+ # The end date is exclusive, so we advance by 1 month.
865
+ end_of_month = start_of_month.advance(1, 'month')
866
+
867
+ # Filter the original collection to get images for the current month.
868
+ monthly_subset = collection.filterDate(start_of_month, end_of_month)
869
+
870
+ # Count the number of images in the monthly subset.
871
+ image_count = monthly_subset.size()
872
+
873
+ # Compute the sum. This is robust to outliers like clouds.
874
+ monthly_sum = monthly_subset.sum()
875
+
876
+ # Set essential properties on the resulting composite image.
877
+ # The timestamp is crucial for time-series analysis and charting.
878
+ # The image_count is useful metadata for quality assessment.
879
+ return monthly_sum.set({
880
+ 'system:time_start': start_of_month.millis(),
881
+ 'month': start_of_month.get('month'),
882
+ 'year': start_of_month.get('year'),
883
+ 'Date_Filter': start_of_month.format('YYYY-MM-dd'),
884
+ 'image_count': image_count
885
+ }).reproject(target_proj)
886
+
887
+ # Map the composite function over the list of month start dates.
888
+ monthly_composites_list = month_starts.map(create_monthly_composite)
889
+
890
+ # Convert the list of images into an ee.ImageCollection.
891
+ monthly_collection = ee.ImageCollection.fromImages(monthly_composites_list)
892
+
893
+ # Filter out any composites that were created from zero images.
894
+ # This prevents empty/masked images from being in the final collection.
895
+ final_collection = GenericCollection(collection=monthly_collection.filter(ee.Filter.gt('image_count', 0)))
896
+ self._monthly_sum = final_collection
897
+ else:
898
+ pass
899
+
900
+ return self._monthly_sum
901
+
902
+ @property
903
+ def monthly_max_collection(self):
904
+ """Creates a monthly max composite from a GenericCollection image collection.
905
+
906
+ This function computes the max for each
907
+ month within the collection's date range, for each band in the collection. It automatically handles the full
908
+ temporal extent of the input collection.
909
+
910
+ The resulting images have a 'system:time_start' property set to the
911
+ first day of each month and an 'image_count' property indicating how
912
+ many images were used in the composite. Months with no images are
913
+ automatically excluded from the final collection.
914
+
915
+ NOTE: the day of month for the 'system:time_start' property is set to the earliest date of the first month observed and may not be the first day of the month.
916
+
917
+ Returns:
918
+ GenericCollection: A new GenericCollection object with monthly max composites.
919
+ """
920
+ if self._monthly_max is None:
921
+ collection = self.collection
922
+ target_proj = collection.first().projection()
923
+ # Get the start and end dates of the entire collection.
924
+ date_range = collection.reduceColumns(ee.Reducer.minMax(), ["system:time_start"])
925
+ original_start_date = ee.Date(date_range.get('min'))
926
+ end_date = ee.Date(date_range.get('max'))
927
+
928
+ start_year = original_start_date.get('year')
929
+ start_month = original_start_date.get('month')
930
+ start_date = ee.Date.fromYMD(start_year, start_month, 1)
931
+
932
+ # Calculate the total number of months in the date range.
933
+ # The .round() is important for ensuring we get an integer.
934
+ num_months = end_date.difference(start_date, 'month').round()
935
+
936
+ # Generate a list of starting dates for each month.
937
+ # This uses a sequence and advances the start date by 'i' months.
938
+ def get_month_start(i):
939
+ return start_date.advance(i, 'month')
940
+
941
+ month_starts = ee.List.sequence(0, num_months).map(get_month_start)
942
+
943
+ # Define a function to map over the list of month start dates.
944
+ def create_monthly_composite(date):
945
+ # Cast the input to an ee.Date object.
946
+ start_of_month = ee.Date(date)
947
+ # The end date is exclusive, so we advance by 1 month.
948
+ end_of_month = start_of_month.advance(1, 'month')
949
+
950
+ # Filter the original collection to get images for the current month.
951
+ monthly_subset = collection.filterDate(start_of_month, end_of_month)
952
+
953
+ # Count the number of images in the monthly subset.
954
+ image_count = monthly_subset.size()
955
+
956
+ # Compute the max. This is robust to outliers like clouds.
957
+ monthly_max = monthly_subset.max()
958
+
959
+ # Set essential properties on the resulting composite image.
960
+ # The timestamp is crucial for time-series analysis and charting.
961
+ # The image_count is useful metadata for quality assessment.
962
+ return monthly_max.set({
963
+ 'system:time_start': start_of_month.millis(),
964
+ 'month': start_of_month.get('month'),
965
+ 'year': start_of_month.get('year'),
966
+ 'Date_Filter': start_of_month.format('YYYY-MM-dd'),
967
+ 'image_count': image_count
968
+ }).reproject(target_proj)
969
+
970
+ # Map the composite function over the list of month start dates.
971
+ monthly_composites_list = month_starts.map(create_monthly_composite)
972
+
973
+ # Convert the list of images into an ee.ImageCollection.
974
+ monthly_collection = ee.ImageCollection.fromImages(monthly_composites_list)
975
+
976
+ # Filter out any composites that were created from zero images.
977
+ # This prevents empty/masked images from being in the final collection.
978
+ final_collection = GenericCollection(collection=monthly_collection.filter(ee.Filter.gt('image_count', 0)))
979
+ self._monthly_max = final_collection
980
+ else:
981
+ pass
982
+
983
+ return self._monthly_max
984
+
985
+ @property
986
+ def monthly_min_collection(self):
987
+ """Creates a monthly min composite from a GenericCollection image collection.
988
+
989
+ This function computes the min for each
990
+ month within the collection's date range, for each band in the collection. It automatically handles the full
991
+ temporal extent of the input collection.
992
+
993
+ The resulting images have a 'system:time_start' property set to the
994
+ first day of each month and an 'image_count' property indicating how
995
+ many images were used in the composite. Months with no images are
996
+ automatically excluded from the final collection.
997
+
998
+ NOTE: the day of month for the 'system:time_start' property is set to the earliest date of the first month observed and may not be the first day of the month.
999
+
1000
+ Returns:
1001
+ GenericCollection: A new GenericCollection object with monthly min composites.
1002
+ """
1003
+ if self._monthly_min is None:
1004
+ collection = self.collection
1005
+ target_proj = collection.first().projection()
1006
+ # Get the start and end dates of the entire collection.
1007
+ date_range = collection.reduceColumns(ee.Reducer.minMax(), ["system:time_start"])
1008
+ original_start_date = ee.Date(date_range.get('min'))
1009
+ end_date = ee.Date(date_range.get('max'))
1010
+
1011
+ start_year = original_start_date.get('year')
1012
+ start_month = original_start_date.get('month')
1013
+ start_date = ee.Date.fromYMD(start_year, start_month, 1)
1014
+
1015
+ # Calculate the total number of months in the date range.
1016
+ # The .round() is important for ensuring we get an integer.
1017
+ num_months = end_date.difference(start_date, 'month').round()
1018
+
1019
+ # Generate a list of starting dates for each month.
1020
+ # This uses a sequence and advances the start date by 'i' months.
1021
+ def get_month_start(i):
1022
+ return start_date.advance(i, 'month')
1023
+
1024
+ month_starts = ee.List.sequence(0, num_months).map(get_month_start)
1025
+
1026
+ # Define a function to map over the list of month start dates.
1027
+ def create_monthly_composite(date):
1028
+ # Cast the input to an ee.Date object.
1029
+ start_of_month = ee.Date(date)
1030
+ # The end date is exclusive, so we advance by 1 month.
1031
+ end_of_month = start_of_month.advance(1, 'month')
1032
+
1033
+ # Filter the original collection to get images for the current month.
1034
+ monthly_subset = collection.filterDate(start_of_month, end_of_month)
1035
+
1036
+ # Count the number of images in the monthly subset.
1037
+ image_count = monthly_subset.size()
1038
+
1039
+ # Compute the min. This is robust to outliers like clouds.
1040
+ monthly_min = monthly_subset.min()
1041
+
1042
+ # Set essential properties on the resulting composite image.
1043
+ # The timestamp is crucial for time-series analysis and charting.
1044
+ # The image_count is useful metadata for quality assessment.
1045
+ return monthly_min.set({
1046
+ 'system:time_start': start_of_month.millis(),
1047
+ 'month': start_of_month.get('month'),
1048
+ 'year': start_of_month.get('year'),
1049
+ 'Date_Filter': start_of_month.format('YYYY-MM-dd'),
1050
+ 'image_count': image_count
1051
+ }).reproject(target_proj)
1052
+
1053
+ # Map the composite function over the list of month start dates.
1054
+ monthly_composites_list = month_starts.map(create_monthly_composite)
1055
+
1056
+ # Convert the list of images into an ee.ImageCollection.
1057
+ monthly_collection = ee.ImageCollection.fromImages(monthly_composites_list)
1058
+
1059
+ # Filter out any composites that were created from zero images.
1060
+ # This prevents empty/masked images from being in the final collection.
1061
+ final_collection = GenericCollection(collection=monthly_collection.filter(ee.Filter.gt('image_count', 0)))
1062
+ self._monthly_min = final_collection
1063
+ else:
1064
+ pass
1065
+
1066
+ return self._monthly_min
1067
+
1068
+ @property
1069
+ def daily_aggregate_collection_from_properties(self):
1070
+ """
1071
+ Property attribute to aggregate (sum) collection images that share the same date.
1072
+
1073
+ This is useful for collections with multiple images per day (e.g., 3-hour SMAP data)
1074
+ that need to be converted to a daily sum. It uses the 'Date_Filter' property
1075
+ to group images. The 'system:time_start' of the first image of the day
1076
+ is preserved. Server-side friendly.
1077
+
1078
+ NOTE: This function sums all bands.
1079
+
1080
+ Returns:
1081
+ GenericCollection: GenericCollection image collection with daily summed imagery.
1082
+ """
1083
+ if self._daily_aggregate_collection is None:
1084
+ input_collection = self.collection
1085
+
1086
+ # Function to sum images of the same date and accumulate them
1087
+ def sum_and_accumulate(date, list_accumulator):
1088
+ # Cast inputs to server-side objects
1089
+ date = ee.String(date)
1090
+ list_accumulator = ee.List(list_accumulator)
1091
+
1092
+ # Filter collection to only images from this date
1093
+ date_filter = ee.Filter.eq("Date_Filter", date)
1094
+ date_collection = input_collection.filter(date_filter)
1095
+
1096
+ # Get the first image of the day to use for its metadata
1097
+ first_image = ee.Image(date_collection.first())
1098
+
1099
+ # Reduce the daily collection by summing all images
1100
+ # This creates a single image where each pixel is the sum
1101
+ # of all pixels from that day.
1102
+ daily_sum = date_collection.sum()
1103
+
1104
+ # --- Property Management ---
1105
+ # Copy the 'system:time_start' from the first image of the
1106
+ # day to the new daily-summed image. This is critical.
1107
+ props_to_copy = ["system:time_start"]
1108
+ daily_sum = daily_sum.copyProperties(first_image, props_to_copy)
1109
+
1110
+ # Set the 'Date_Filter' property (since .sum() doesn't preserve it)
1111
+ daily_sum = daily_sum.set("Date_Filter", date)
1112
+
1113
+ # Also add a property to know how many images were summed
1114
+ image_count = date_collection.size()
1115
+ daily_sum = daily_sum.set('images_summed', image_count)
1116
+
1117
+ # Add the new daily image to our list
1118
+ return list_accumulator.add(daily_sum)
1119
+
1120
+ # Get a server-side list of all unique dates in the collection
1121
+ distinct_dates = input_collection.aggregate_array("Date_Filter").distinct()
1122
+
1123
+ # Initialize an empty list as the accumulator
1124
+ initial = ee.List([])
1125
+
1126
+ # Iterate over each date to create sums and accumulate them in a list
1127
+ summed_list = distinct_dates.iterate(sum_and_accumulate, initial)
1128
+
1129
+ # Convert the list of summed images to an ImageCollection
1130
+ new_col = ee.ImageCollection.fromImages(summed_list)
1131
+
1132
+ # Cache the result as a new GenericCollection
1133
+ self._daily_aggregate_collection = GenericCollection(collection=new_col)
1134
+
1135
+ return self._daily_aggregate_collection
1136
+
1137
+ def daily_aggregate_collection(self, method='algorithmic'):
1138
+ """
1139
+ Aggregates (sums) collection images that share the same date.
1140
+
1141
+ This is useful for collections with multiple images per day (e.g., 3-hour SMAP data)
1142
+ that need to be converted to a daily sum. It uses the 'Date_Filter' property
1143
+ to group images. The 'system:time_start' of the first image of the day
1144
+ is preserved. Server-side friendly.
1145
+
1146
+ Args:
1147
+ method (str, optional): The method for generating the list of unique dates.
1148
+ - 'algorithmic' (default): Generates dates from self.start_date and
1149
+ self.end_date. This is highly efficient and robust for large
1150
+ collections. Requires start/end dates to be set on the object.
1151
+ - 'aggregate': Scans the entire collection for unique 'Date_Filter'
1152
+ properties. This can cause a 'User memory limit exceeded' error
1153
+ on very large collections.
1154
+
1155
+ Returns:
1156
+ GenericCollection: A new GenericCollection image collection with daily summed imagery.
1157
+
1158
+ Raises:
1159
+ ValueError: If 'algorithmic' method is used but self.start_date or
1160
+ self.end_date are not set.
1161
+ """
1162
+ input_collection = self.collection
1163
+
1164
+ # --- Select the method for generating the date list ---
1165
+ if method == 'algorithmic':
1166
+ # Check that start/end dates are available on the object
1167
+ if not self.start_date or not self.end_date:
1168
+ raise ValueError(
1169
+ "The 'algorithmic' method requires start_date and end_date to be "
1170
+ "set on the GenericCollection object. Initialize the object "
1171
+ "with start_date and end_date, or use method='aggregate'."
1172
+ )
1173
+
1174
+ # 1. Get ee.Date objects from the instance properties
1175
+ start_date = ee.Date(self.start_date)
1176
+ end_date = ee.Date(self.end_date)
1177
+
1178
+ # 2. Calculate the total number of days in the range
1179
+ num_days = end_date.difference(start_date, 'day').round()
1180
+
1181
+ # 3. Create a server-side list of all day-starting numbers
1182
+ day_numbers = ee.List.sequence(0, num_days)
1183
+
1184
+ # 4. Map over the numbers to create a list of 'YYYY-MM-DD' date strings
1185
+ def get_date_string(n):
1186
+ return start_date.advance(n, 'day').format('YYYY-MM-dd')
1187
+
1188
+ distinct_dates = day_numbers.map(get_date_string)
1189
+
1190
+ elif method == 'aggregate':
1191
+ # This is the original, memory-intensive method.
1192
+ distinct_dates = input_collection.aggregate_array("Date_Filter").distinct()
1193
+
1194
+ else:
1195
+ raise ValueError(f"Unknown method '{method}'. Must be 'algorithmic' or 'aggregate'.")
1196
+ # --- End of date list generation ---
1197
+
1198
+ # Function to sum images of the same date and accumulate them
1199
+ def sum_and_accumulate(date, list_accumulator):
1200
+ # Cast inputs to server-side objects
1201
+ date = ee.String(date)
1202
+ list_accumulator = ee.List(list_accumulator)
1203
+
1204
+ # Filter collection to only images from this date
1205
+ date_filter = ee.Filter.eq("Date_Filter", date)
1206
+ date_collection = input_collection.filter(date_filter)
1207
+
1208
+ # Check if any images actually exist for this day
1209
+ image_count = date_collection.size()
1210
+
1211
+ # Define the summing function to be run *only* if images exist
1212
+ def if_images_exist():
1213
+ # Get the first image of the day to use for its metadata
1214
+ first_image = ee.Image(date_collection.first())
1215
+
1216
+ # Reduce the daily collection by summing all images
1217
+ daily_sum = date_collection.sum()
1218
+
1219
+ # Copy 'system:time_start' from the first image
1220
+ props_to_copy = ["system:time_start"]
1221
+ daily_sum = daily_sum.copyProperties(first_image, props_to_copy)
1222
+
1223
+ # Set the 'Date_Filter' property
1224
+ daily_sum = daily_sum.set("Date_Filter", date)
1225
+ daily_sum = daily_sum.set('images_summed', image_count)
1226
+
1227
+ # Add the new daily image to our list
1228
+ return list_accumulator.add(daily_sum)
1229
+
1230
+ # Use ee.Algorithms.If to run the sum *only* if image_count > 0
1231
+ # This avoids errors from calling .first() or .sum() on empty collections
1232
+ return ee.Algorithms.If(
1233
+ image_count.gt(0),
1234
+ if_images_exist(), # if True
1235
+ list_accumulator # if False (just return the list unchanged)
1236
+ )
1237
+
1238
+ # Initialize an empty list as the accumulator
1239
+ initial = ee.List([])
1240
+
1241
+ # Iterate over each date to create sums and accumulate them
1242
+ summed_list = distinct_dates.iterate(sum_and_accumulate, initial)
1243
+
1244
+ # Convert the list of summed images to an ImageCollection
1245
+ new_col = ee.ImageCollection.fromImages(summed_list)
1246
+
1247
+ # Return the new GenericCollection wrapper
1248
+ return GenericCollection(collection=new_col)
1249
+
1250
+ def daily_aggregate_collection_via_join(self, method='algorithmic'):
1251
+ """
1252
+ Aggregates (sums) collection images that share the same date based on a join approach.
1253
+
1254
+ Args:
1255
+ method (str): The method for which to aggregate the daily collection. Options are 'algorithmic' (default) and 'aggregate'.
1256
+ The algorithmic method is server-side friendly while the aggregate method makes client-side calls.
1257
+ Algorithmic is more efficient and chosen as the default.
1258
+
1259
+ Returns:
1260
+ Image Collection (GenericCollection): The daily aggregated image collection as a GenericCollection object type.
1261
+
1262
+ """
1263
+ input_collection = self.collection
1264
+
1265
+ if method == 'algorithmic':
1266
+ if not self.start_date or not self.end_date:
1267
+ raise ValueError(
1268
+ "The 'algorithmic' method requires start_date and end_date to be "
1269
+ "set on the GenericCollection object. Initialize the object "
1270
+ "with start_date and end_date, or use method='aggregate'."
1271
+ )
1272
+
1273
+ start_date = ee.Date(self.start_date)
1274
+ end_date = ee.Date(self.end_date)
1275
+ num_days = end_date.difference(start_date, 'day').round()
1276
+ day_numbers = ee.List.sequence(0, num_days)
1277
+
1278
+ def get_date_string(n):
1279
+ return start_date.advance(n, 'day').format('YYYY-MM-dd')
1280
+
1281
+ distinct_dates = day_numbers.map(get_date_string) # This is our server-side list
1282
+
1283
+ elif method == 'aggregate':
1284
+ distinct_dates = input_collection.aggregate_array("Date_Filter").distinct()
1285
+
1286
+ else:
1287
+ raise ValueError(f"Unknown method '{method}'. Must be 'algorithmic' or 'aggregate'.")
1288
+
1289
+ def create_date_feature(date_str):
1290
+ return ee.Feature(None, {'Date_Filter': ee.String(date_str)})
1291
+
1292
+ dummy_dates_fc = ee.FeatureCollection(distinct_dates.map(create_date_feature))
1293
+
1294
+ date_filter = ee.Filter.equals(leftField='Date_Filter', rightField='Date_Filter')
1295
+ join = ee.Join.saveAll(matchesKey='matches')
1296
+ joined_fc = join.apply(dummy_dates_fc, input_collection, date_filter)
1297
+
1298
+ def sum_daily_images(feature_with_matches):
1299
+ images_list = ee.List(feature_with_matches.get('matches'))
1300
+ image_count = images_list.size()
1301
+
1302
+ # Define a function to run *only* if the list is not empty
1303
+ def if_images_exist():
1304
+ image_collection_for_day = ee.ImageCollection.fromImages(images_list)
1305
+ first_image = ee.Image(image_collection_for_day.first())
1306
+ daily_sum = image_collection_for_day.sum()
1307
+ daily_sum = daily_sum.copyProperties(first_image, ["system:time_start"])
1308
+ daily_sum = daily_sum.set(
1309
+ 'Date_Filter', feature_with_matches.get('Date_Filter'),
1310
+ 'images_summed', image_count
1311
+ )
1312
+ return daily_sum
1313
+
1314
+ # Use ee.Algorithms.If. If count is 0, return a *null* image.
1315
+ return ee.Algorithms.If(
1316
+ image_count.gt(0),
1317
+ if_images_exist(), # if True
1318
+ None # if False (return None)
1319
+ )
1320
+
1321
+ # Map the robust function, and use dropNulls=True to filter out
1322
+ # any days that had no images (and returned None).
1323
+ image_collection = joined_fc.map(sum_daily_images, dropNulls=True)
1324
+
1325
+ # Explicitly cast to an ImageCollection to avoid client-side confusion
1326
+ final_collection = ee.ImageCollection(image_collection)
1327
+
1328
+ return GenericCollection(
1329
+ collection=final_collection,
1330
+ start_date=self.start_date, # Pass along other metadata
1331
+ end_date=self.end_date,
1332
+ boundary=self.boundary,
1333
+ _dates_list=distinct_dates # <-- This is the key
1334
+ )
1335
+
1336
+ def export_daily_sum_to_asset(
1337
+ self,
1338
+ asset_collection_path,
1339
+ region,
1340
+ scale,
1341
+ filename_prefix="",
1342
+ crs=None,
1343
+ max_pixels=int(1e13),
1344
+ description_prefix="export"
1345
+ ):
1346
+ """
1347
+ Exports a daily-summed (aggregated) collection to a GEE Asset Collection.
1348
+
1349
+ This function is designed to be called from a collection with
1350
+ sub-daily data (e.g., 3-hourly). It efficiently creates one
1351
+ small, independent export task for each day by summing *only*
1352
+ that day's images. This avoids the re-computing of an entire collection per image task performance pitfall.
1353
+
1354
+ It requires self.start_date and self.end_date to be set on the
1355
+ GenericCollection object.
1356
+
1357
+ Args:
1358
+ asset_collection_path (str): The path to the asset collection.
1359
+ region (ee.Geometry): The region to export.
1360
+ scale (int): The scale of the export.
1361
+ filename_prefix (str, optional): The filename prefix. Defaults to "", i.e. blank.
1362
+ crs (str, optional): The coordinate reference system. Defaults to None.
1363
+ max_pixels (int, optional): The maximum number of pixels. Defaults to int(1e13).
1364
+ description_prefix (str, optional): The description prefix. Defaults to "export".
1365
+
1366
+ Returns:
1367
+ None: (queues export tasks)
1368
+ """
1369
+ # This is the *original* 3-hourly (or sub-daily) collection
1370
+ original_collection = self.collection
1371
+
1372
+ # --- 1. Algorithmic Date Generation ---
1373
+ if not self.start_date or not self.end_date:
1374
+ raise ValueError(
1375
+ "export_daily_sum_to_asset requires start_date and end_date "
1376
+ "to be set on the GenericCollection object."
1377
+ )
1378
+
1379
+ start_date = ee.Date(self.start_date)
1380
+ end_date = ee.Date(self.end_date)
1381
+ num_days = end_date.difference(start_date, 'day').round()
1382
+ day_numbers = ee.List.sequence(0, num_days)
1383
+
1384
+ def get_date_string(n):
1385
+ # Use lowercase 'dd' for day of month!
1386
+ return start_date.advance(n, 'day').format('YYYY-MM-dd')
1387
+
1388
+ # Get a client-side list of all dates to loop over
1389
+ date_list = day_numbers.map(get_date_string).getInfo()
1390
+ # --- End of Date Generation ---
1391
+
1392
+ # --- 2. Create Asset Collection (if needed) ---
1393
+ try:
1394
+ ee.data.getAsset(asset_collection_path)
1395
+ except ee.EEException:
1396
+ print(f"Creating new asset collection: {asset_collection_path}")
1397
+ ee.data.createAsset({'type': 'ImageCollection'}, asset_collection_path)
1398
+
1399
+ print(f"Queuing {len(date_list)} small, daily-sum export tasks...")
1400
+
1401
+ # --- 3. Loop and Create Tiny Tasks ---
1402
+ for date_str in date_list:
1403
+
1404
+ # --- This is the simple, efficient recipe for *one* day ---
1405
+
1406
+ # 1. Filter the *original* collection for just this one day
1407
+ daily_images = original_collection.filter(
1408
+ ee.Filter.eq('Date_Filter', date_str)
1409
+ )
1410
+
1411
+ # 2. Get the first image for metadata
1412
+ first_image = daily_images.first()
1413
+
1414
+ # 3. Create the daily sum
1415
+ daily_sum = daily_images.sum()
1416
+
1417
+ # 4. Set properties
1418
+ daily_sum = ee.Image(daily_sum.copyProperties(first_image, ["system:time_start"]))
1419
+ daily_sum = daily_sum.set(
1420
+ 'Date_Filter', date_str,
1421
+ 'images_summed', daily_images.size()
1422
+ )
1423
+ # --- End of recipe ---
1424
+
1425
+ # Define asset ID and description
1426
+ asset_id = asset_collection_path + "/" + filename_prefix + date_str
1427
+ desc = description_prefix + "_" + filename_prefix + date_str
1428
+
1429
+ params = {
1430
+ 'image': daily_sum,
1431
+ 'description': desc,
1432
+ 'assetId': asset_id,
1433
+ 'region': region,
1434
+ 'scale': scale,
1435
+ 'maxPixels': max_pixels
1436
+ }
1437
+ if crs:
1438
+ params['crs'] = crs
1439
+
1440
+ # Start the server-side export task
1441
+ ee.batch.Export.image.toAsset(**params).start()
1442
+
1443
+ print("All", len(date_list), "export tasks queued to", asset_collection_path)
1444
+
1445
+ def smap_flux_to_mm(self):
1446
+ """
1447
+ Converts a daily-summed SMAP flux collection (kg/m²/s)
1448
+ to a daily total amount (mm/day).
1449
+
1450
+ This works by multiplying each image by 10800
1451
+ (3 hours * 60 min/hr * 60 sec/min).
1452
+
1453
+ Assumes 1 kg/m² = 1 mm of water.
1454
+
1455
+ Returns:
1456
+ GenericCollection: A new collection with values in mm/day.
1457
+ """
1458
+ # Define the conversion function
1459
+ def convert_to_mm(image):
1460
+ # Get the original band name(s)
1461
+ band_names = image.bandNames()
1462
+ # Multiply and rename the bands to indicate the new units
1463
+ new_band_names = band_names.map(lambda b: ee.String(b).cat('_mm'))
1464
+
1465
+ converted_image = image.multiply(10800).rename(new_band_names)
1466
+ return converted_image.copyProperties(image, image.propertyNames())
1467
+
1468
+ # Map the function over the entire collection
1469
+ converted_collection = self.collection.map(convert_to_mm)
1470
+
1471
+ # Return a new GenericCollection object
1472
+ return GenericCollection(
1473
+ collection=converted_collection,
1474
+ start_date=self.start_date,
1475
+ end_date=self.end_date,
1476
+ boundary=self.boundary,
1477
+ _dates_list=self._dates_list # Pass along the cached dates!
1478
+ )
1479
+
1480
+ def mask_to_polygon(self, polygon):
1481
+ """
1482
+ Function to mask GenericCollection image collection by a polygon (ee.Geometry), where pixels outside the polygon are masked out.
1483
+
1484
+ Args:
1485
+ polygon (ee.Geometry): ee.Geometry polygon or shape used to mask image collection.
1486
+
1487
+ Returns:
1488
+ GenericCollection: masked GenericCollection image collection
1489
+
1490
+ """
1491
+ if self._geometry_masked_collection is None:
1492
+ # Convert the polygon to a mask
1493
+ mask = ee.Image.constant(1).clip(polygon)
1494
+
1495
+ # Update the mask of each image in the collection
1496
+ masked_collection = self.collection.map(lambda img: img.updateMask(mask))
1497
+
1498
+ # Update the internal collection state
1499
+ self._geometry_masked_collection = GenericCollection(
1500
+ collection=masked_collection
1501
+ )
1502
+
1503
+ # Return the updated object
1504
+ return self._geometry_masked_collection
1505
+
1506
+ def mask_out_polygon(self, polygon):
1507
+ """
1508
+ Function to mask GenericCollection image collection by a polygon (ee.Geometry), where pixels inside the polygon are masked out.
1509
+
1510
+ Args:
1511
+ polygon (ee.Geometry): ee.Geometry polygon or shape used to mask image collection.
1512
+
1513
+ Returns:
1514
+ GenericCollection: masked GenericCollection image collection
1515
+
1516
+ """
1517
+ if self._geometry_masked_out_collection is None:
1518
+ # Convert the polygon to a mask
1519
+ full_mask = ee.Image.constant(1)
1520
+
1521
+ # Use paint to set pixels inside polygon as 0
1522
+ area = full_mask.paint(polygon, 0)
1523
+
1524
+ # Update the mask of each image in the collection
1525
+ masked_collection = self.collection.map(lambda img: img.updateMask(area))
1526
+
1527
+ # Update the internal collection state
1528
+ self._geometry_masked_out_collection = GenericCollection(
1529
+ collection=masked_collection
1530
+ )
1531
+
1532
+ # Return the updated object
1533
+ return self._geometry_masked_out_collection
1534
+
1535
+
1536
+ def binary_mask(self, threshold=None, band_name=None, classify_above_threshold=True, mask_zeros=False):
1537
+ """
1538
+ Function to create a binary mask (value of 1 for pixels above set threshold and value of 0 for all other pixels) of the GenericCollection image collection based on a specified band.
1539
+ If a singleband image is provided, the band name is automatically determined.
1540
+ If multiple bands are available, the user must specify the band name to use for masking.
1541
+
1542
+ Args:
1543
+ threshold (float, optional): The threshold value for creating the binary mask. Defaults to None.
1544
+ band_name (str, optional): The name of the band to use for masking. Defaults to None.
1545
+ classifiy_above_threshold (bool, optional): If True, pixels above the threshold are classified as 1. If False, pixels below the threshold are classified as 1. Defaults to True.
1546
+ mask_zeros (bool, optional): If True, pixels with a value of 0 after the binary mask are masked out in the output binary mask. Useful for classifications. Defaults to False.
1547
+
1548
+ Returns:
1549
+ GenericCollection: GenericCollection singleband image collection with binary masks applied.
1550
+ """
1551
+ if self.collection.size().eq(0).getInfo():
1552
+ raise ValueError("The collection is empty. Cannot create a binary mask.")
1553
+ if band_name is None:
1554
+ first_image = self.collection.first()
1555
+ band_names = first_image.bandNames()
1556
+ if band_names.size().getInfo() == 0:
1557
+ raise ValueError("No bands available in the collection.")
1558
+ if band_names.size().getInfo() > 1:
1559
+ raise ValueError("Multiple bands available, please specify a band name.")
1560
+ else:
1561
+ band_name = band_names.get(0).getInfo()
1562
+ if threshold is None:
1563
+ raise ValueError("Threshold must be specified for binary masking.")
1564
+
1565
+ if classify_above_threshold:
1566
+ if mask_zeros:
1567
+ col = self.collection.map(
1568
+ lambda image: image.select(band_name).gte(threshold).rename(band_name).updateMask(image.select(band_name).gt(0)).copyProperties(image)
1569
+ )
1570
+ else:
1571
+ col = self.collection.map(
1572
+ lambda image: image.select(band_name).gte(threshold).rename(band_name).copyProperties(image)
1573
+ )
1574
+ else:
1575
+ if mask_zeros:
1576
+ col = self.collection.map(
1577
+ lambda image: image.select(band_name).lte(threshold).rename(band_name).updateMask(image.select(band_name).gt(0)).copyProperties(image)
1578
+ )
1579
+ else:
1580
+ col = self.collection.map(
1581
+ lambda image: image.select(band_name).lte(threshold).rename(band_name).copyProperties(image)
1582
+ )
1583
+ return GenericCollection(collection=col)
1584
+
1585
+ def anomaly(self, geometry, band_name=None, anomaly_band_name=None, replace=True):
1586
+ """
1587
+ Calculates the anomaly of each image in a collection compared to the mean of each image.
1588
+
1589
+ This function computes the anomaly for each band in the input image by
1590
+ subtracting the mean value of that band from a provided ImageCollection.
1591
+ The anomaly is a measure of how much the pixel values deviate from the
1592
+ average conditions represented by the collection.
1593
+
1594
+ Args:
1595
+ geometry (ee.Geometry): The geometry for image reduction to define the mean value to be used for anomaly calculation.
1596
+ band_name (str, optional): A string representing the band name to be used for the output anomaly image. If not provided, the band name of the first band of the input image will be used.
1597
+ anomaly_band_name (str, optional): A string representing the band name to be used for the output anomaly image. If not provided, the band name of the first band of the input image will be used.
1598
+ replace (bool, optional): A boolean indicating whether to replace the original band with the anomaly band. If True, the output image will only contain the anomaly band. If False, the output image will retain all original bands and add the anomaly band. Default is True.
1599
+
1600
+ Returns:
1601
+ GenericCollection: A GenericCollection where each image represents the anomaly (deviation from
1602
+ the mean) for the chosen band. The output images retain the same band name.
1603
+ """
1604
+ if self.collection.size().eq(0).getInfo():
1605
+ raise ValueError("The collection is empty.")
1606
+ if band_name is None:
1607
+ first_image = self.collection.first()
1608
+ band_names = first_image.bandNames()
1609
+ if band_names.size().getInfo() == 0:
1610
+ raise ValueError("No bands available in the collection.")
1611
+ elif band_names.size().getInfo() > 1:
1612
+ band_name = band_names.get(0).getInfo()
1613
+ print("Multiple bands available, will be using the first band in the collection for anomaly calculation. Please specify a band name if you wish to use a different band.")
1614
+ else:
1615
+ band_name = band_names.get(0).getInfo()
1616
+
1617
+ col = self.collection.map(lambda image: GenericCollection.anomaly_fn(image, geometry=geometry, band_name=band_name, anomaly_band_name=anomaly_band_name, replace=replace))
1618
+ return GenericCollection(collection=col)
1619
+
1620
+ def mask_via_band(self, band_to_mask, band_for_mask, threshold=-1, mask_above=True, add_band_to_original_image=False):
1621
+ """
1622
+ Masks select pixels of a selected band from an image based on another specified band and threshold (optional).
1623
+ Example use case is masking vegetation from image when targeting land pixels. Can specify whether to mask pixels above or below the threshold.
1624
+
1625
+ Args:
1626
+ band_to_mask (str): name of the band which will be masked (target image)
1627
+ band_for_mask (str): name of the band to use for the mask (band you want to remove/mask from target image)
1628
+ threshold (float): value between -1 and 1 where pixels less than threshold will be masked; defaults to -1 assuming input band is already classified (masked to pixels of interest).
1629
+ mask_above (bool): if True, masks pixels above the threshold; if False, masks pixels below the threshold
1630
+
1631
+ Returns:
1632
+ GenericCollection: A new GenericCollection with the specified band masked to pixels excluding from `band_for_mask`.
1633
+ """
1634
+ if self.collection.size().eq(0).getInfo():
1635
+ raise ValueError("The collection is empty.")
1636
+
1637
+ col = self.collection.map(
1638
+ lambda image: GenericCollection.mask_via_band_fn(
1639
+ image,
1640
+ band_to_mask=band_to_mask,
1641
+ band_for_mask=band_for_mask,
1642
+ threshold=threshold,
1643
+ mask_above=mask_above,
1644
+ add_band_to_original_image=add_band_to_original_image
1645
+ )
1646
+ )
1647
+ return GenericCollection(collection=col)
1648
+
1649
+ def mask_via_singleband_image(self, image_collection_for_mask, band_name_to_mask, band_name_for_mask, threshold=-1, mask_above=False, add_band_to_original_image=False):
1650
+ """
1651
+ Masks select pixels of a selected band from an image collection based on another specified singleband image collection and threshold (optional).
1652
+ Example use case is masking vegetation from image when targeting land pixels. Can specify whether to mask pixels above or below the threshold.
1653
+ This function pairs images from the two collections based on an exact match of the 'Date_Filter' property.
1654
+
1655
+ Args:
1656
+ image_collection_for_mask (GenericCollection): GenericCollection image collection to use for masking (source of pixels that will be used to mask the parent image collection)
1657
+ band_name_to_mask (str): name of the band which will be masked (target image)
1658
+ band_name_for_mask (str): name of the band to use for the mask (band which contains pixels the user wants to remove/mask from target image)
1659
+ threshold (float): threshold value where pixels less (or more, depending on `mask_above`) than threshold will be masked; defaults to -1.
1660
+ mask_above (bool): if True, masks pixels above the threshold; if False, masks pixels below the threshold
1661
+ add_band_to_original_image (bool): if True, adds the band used for masking to the original image as an additional band; if False, only the masked band is retained in the output image.
1662
+
1663
+ Returns:
1664
+ GenericCollection: A new GenericCollection with the specified band masked to pixels excluding from `band_for_mask`.
1665
+ """
1666
+
1667
+ if self.collection.size().eq(0).getInfo():
1668
+ raise ValueError("The collection is empty.")
1669
+ if not isinstance(image_collection_for_mask, GenericCollection):
1670
+ raise ValueError("image_collection_for_mask must be a GenericCollection object.")
1671
+ size1 = self.collection.size().getInfo()
1672
+ size2 = image_collection_for_mask.collection.size().getInfo()
1673
+ if size1 != size2:
1674
+ raise ValueError(f"Warning: Collections have different sizes ({size1} vs {size2}). Please ensure both collections have the same number of images and matching dates.")
1675
+ if size1 == 0 or size2 == 0:
1676
+ raise ValueError("Warning: One of the input collections is empty.")
1677
+
1678
+ # Pair by exact Date_Filter property
1679
+ primary = self.collection.select([band_name_to_mask])
1680
+ secondary = image_collection_for_mask.collection.select([band_name_for_mask])
1681
+ join = ee.Join.inner()
1682
+ flt = ee.Filter.equals(leftField='Date_Filter', rightField='Date_Filter')
1683
+ paired = join.apply(primary, secondary, flt)
1684
+
1685
+ def _map_pair(f):
1686
+ f = ee.Feature(f) # <-- treat as Feature
1687
+ prim = ee.Image(f.get('primary')) # <-- get the primary Image
1688
+ sec = ee.Image(f.get('secondary')) # <-- get the secondary Image
1689
+
1690
+ merged = prim.addBands(sec.select([band_name_for_mask]))
1691
+
1692
+ out = GenericCollection.mask_via_band_fn(
1693
+ merged,
1694
+ band_to_mask=band_name_to_mask,
1695
+ band_for_mask=band_name_for_mask,
1696
+ threshold=threshold,
1697
+ mask_above=mask_above,
1698
+ add_band_to_original_image=add_band_to_original_image
1699
+ )
1700
+
1701
+ # guarantee single band + keep properties
1702
+ out = ee.Image(out).select([band_name_to_mask]).copyProperties(prim, prim.propertyNames())
1703
+ out = out.set('Date_Filter', prim.get('Date_Filter'))
1704
+ return ee.Image(out) # <-- return as Image
1705
+
1706
+ col = ee.ImageCollection(paired.map(_map_pair))
1707
+ return GenericCollection(collection=col)
1708
+
1709
+ def band_rename(self, current_band_name, new_band_name):
1710
+ """Renames a band in all images of the GenericCollection in-place.
1711
+
1712
+ Replaces the band named `current_band_name` with `new_band_name` without
1713
+ retaining the original band name. If the band does not exist in an image,
1714
+ that image is returned unchanged.
1715
+
1716
+ Args:
1717
+ current_band_name (str): The existing band name to rename.
1718
+ new_band_name (str): The desired new band name.
1719
+
1720
+ Returns:
1721
+ GenericCollection: The GenericCollection with the band renamed in all images.
1722
+ """
1723
+ # check if `current_band_name` exists in the first image
1724
+ first_image = self.collection.first()
1725
+ has_band = first_image.bandNames().contains(current_band_name).getInfo()
1726
+ if not has_band:
1727
+ raise ValueError(f"Band '{current_band_name}' does not exist in the collection.")
1728
+
1729
+ renamed_collection = self.collection.map(
1730
+ lambda img: self.band_rename_fn(img, current_band_name, new_band_name)
1731
+ )
1732
+ return GenericCollection(collection=renamed_collection)
1733
+
1734
+ def image_grab(self, img_selector):
1735
+ """
1736
+ Selects ("grabs") an image by index from the collection. Easy way to get latest image or browse imagery one-by-one.
1737
+
1738
+ Args:
1739
+ img_selector: index of image in the collection for which user seeks to select/"grab".
1740
+
1741
+ Returns:
1742
+ ee.Image: ee.Image of selected image
1743
+ """
1744
+ # Convert the collection to a list
1745
+ image_list = self.collection.toList(self.collection.size())
1746
+
1747
+ # Get the image at the specified index
1748
+ image = ee.Image(image_list.get(img_selector))
1749
+
1750
+ return image
1751
+
1752
+ def custom_image_grab(self, img_col, img_selector):
1753
+ """
1754
+ Function to select ("grab") image of a specific index from an ee.ImageCollection object.
1755
+
1756
+ Args:
1757
+ img_col: ee.ImageCollection with same dates as another GenericCollection image collection object.
1758
+ img_selector: index of image in list of dates for which user seeks to "select".
1759
+
1760
+ Returns:
1761
+ ee.Image: ee.Image of selected image
1762
+ """
1763
+ # Convert the collection to a list
1764
+ image_list = img_col.toList(img_col.size())
1765
+
1766
+ # Get the image at the specified index
1767
+ image = ee.Image(image_list.get(img_selector))
1768
+
1769
+ return image
1770
+
1771
+ def image_pick(self, img_date):
1772
+ """
1773
+ Selects ("grabs") image of a specific date in format of 'YYYY-MM-dd' - will not work correctly if collection is composed of multiple images of the same date.
1774
+
1775
+ Args:
1776
+ img_date: date (str) of image to select in format of 'YYYY-MM-dd'
1777
+
1778
+ Returns:
1779
+ ee.Image: ee.Image of selected image
1780
+ """
1781
+ new_col = self.collection.filter(ee.Filter.eq("Date_Filter", img_date))
1782
+ return new_col.first()
1783
+
1784
+ def CollectionStitch(self, img_col2):
1785
+ """
1786
+ Function to mosaic two GenericCollection objects which share image dates.
1787
+ Mosaics are only formed for dates where both image collections have images.
1788
+ Image properties are copied from the primary collection. Server-side friendly.
1789
+
1790
+ Args:
1791
+ img_col2: secondary GenericCollection image collection to be mosaiced with the primary image collection
1792
+
1793
+ Returns:
1794
+ GenericCollection: GenericCollection image collection
1795
+ """
1796
+ dates_list = (
1797
+ ee.List(self._dates_list).cat(ee.List(img_col2.dates_list)).distinct()
1798
+ )
1799
+ filtered_dates1 = self._dates_list
1800
+ filtered_dates2 = img_col2._dates_list
1801
+
1802
+ filtered_col2 = img_col2.collection.filter(
1803
+ ee.Filter.inList("Date_Filter", filtered_dates1)
1804
+ )
1805
+ filtered_col1 = self.collection.filter(
1806
+ ee.Filter.inList(
1807
+ "Date_Filter", filtered_col2.aggregate_array("Date_Filter")
1808
+ )
1809
+ )
1810
+
1811
+ # Create a function that will be mapped over filtered_col1
1812
+ def mosaic_images(img):
1813
+ # Get the date of the image
1814
+ date = img.get("Date_Filter")
1815
+
1816
+ # Get the corresponding image from filtered_col2
1817
+ img2 = filtered_col2.filter(ee.Filter.equals("Date_Filter", date)).first()
1818
+
1819
+ # Create a mosaic of the two images
1820
+ mosaic = ee.ImageCollection.fromImages([img, img2]).mosaic()
1821
+
1822
+ # Copy properties from the first image and set the 'Date_Filter' property
1823
+ mosaic = (
1824
+ mosaic.copyProperties(img)
1825
+ .set("Date_Filter", date)
1826
+ .set("system:time_start", img.get("system:time_start"))
1827
+ )
1828
+
1829
+ return mosaic
1830
+
1831
+ # Map the function over filtered_col1
1832
+ new_col = filtered_col1.map(mosaic_images)
1833
+
1834
+ # Return a GenericCollection instance
1835
+ return GenericCollection(collection=new_col)
1836
+
1837
+ @property
1838
+ def MosaicByDate(self):
1839
+ """
1840
+ Property attribute function to mosaic collection images that share the same date.
1841
+
1842
+ The property CLOUD_COVER for each image is used to calculate an overall mean,
1843
+ which replaces the CLOUD_COVER property for each mosaiced image.
1844
+ Server-side friendly.
1845
+
1846
+ NOTE: if images are removed from the collection from cloud filtering, you may have mosaics composed of only one image.
1847
+
1848
+ Returns:
1849
+ GenericCollection: GenericCollection image collection with mosaiced imagery and mean CLOUD_COVER as a property
1850
+ """
1851
+ if self._MosaicByDate is None:
1852
+ input_collection = self.collection
1853
+
1854
+ # Function to mosaic images of the same date and accumulate them
1855
+ def mosaic_and_accumulate(date, list_accumulator):
1856
+ # date = ee.Date(date)
1857
+ list_accumulator = ee.List(list_accumulator)
1858
+ date_filter = ee.Filter.eq("Date_Filter", date)
1859
+ date_collection = input_collection.filter(date_filter)
1860
+ # Convert the collection to a list
1861
+ image_list = date_collection.toList(date_collection.size())
1862
+
1863
+ # Get the image at the specified index
1864
+ first_image = ee.Image(image_list.get(0))
1865
+ # Create mosaic
1866
+ mosaic = date_collection.mosaic().set("Date_Filter", date)
1867
+
1868
+ props_of_interest = [
1869
+ "system:time_start"
1870
+ ]
1871
+
1872
+ # mosaic = mosaic.copyProperties(self.image_grab(0), props_of_interest).set({
1873
+ # 'CLOUD_COVER': cloud_percentage
1874
+ # })
1875
+ mosaic = mosaic.copyProperties(first_image, props_of_interest)
1876
+
1877
+ return list_accumulator.add(mosaic)
1878
+
1879
+ # Get distinct dates
1880
+ distinct_dates = input_collection.aggregate_array("Date_Filter").distinct()
1881
+
1882
+ # Initialize an empty list as the accumulator
1883
+ initial = ee.List([])
1884
+
1885
+ # Iterate over each date to create mosaics and accumulate them in a list
1886
+ mosaic_list = distinct_dates.iterate(mosaic_and_accumulate, initial)
1887
+
1888
+ new_col = ee.ImageCollection.fromImages(mosaic_list)
1889
+ col = GenericCollection(collection=new_col)
1890
+ self._MosaicByDate = col
1891
+
1892
+ # Convert the list of mosaics to an ImageCollection
1893
+ return self._MosaicByDate
1894
+
1895
+ @staticmethod
1896
+ def ee_to_df(
1897
+ ee_object, columns=None, remove_geom=True, sort_columns=False, **kwargs
1898
+ ):
1899
+ """
1900
+ Converts an ee.FeatureCollection to pandas dataframe. Adapted from the geemap package (https://geemap.org/common/#geemap.common.ee_to_df)
1901
+
1902
+ Args:
1903
+ ee_object (ee.FeatureCollection): ee.FeatureCollection.
1904
+ columns (list): List of column names. Defaults to None.
1905
+ remove_geom (bool): Whether to remove the geometry column. Defaults to True.
1906
+ sort_columns (bool): Whether to sort the column names. Defaults to False.
1907
+ kwargs: Additional arguments passed to ee.data.computeFeature.
1908
+
1909
+ Raises:
1910
+ TypeError: ee_object must be an ee.FeatureCollection
1911
+
1912
+ Returns:
1913
+ pd.DataFrame: pandas DataFrame
1914
+ """
1915
+ if isinstance(ee_object, ee.Feature):
1916
+ ee_object = ee.FeatureCollection([ee_object])
1917
+
1918
+ if not isinstance(ee_object, ee.FeatureCollection):
1919
+ raise TypeError("ee_object must be an ee.FeatureCollection")
1920
+
1921
+ try:
1922
+ property_names = ee_object.first().propertyNames().sort().getInfo()
1923
+ if remove_geom:
1924
+ data = ee_object.map(
1925
+ lambda f: ee.Feature(None, f.toDictionary(property_names))
1926
+ )
1927
+ else:
1928
+ data = ee_object
1929
+
1930
+ kwargs["expression"] = data
1931
+ kwargs["fileFormat"] = "PANDAS_DATAFRAME"
1932
+
1933
+ df = ee.data.computeFeatures(kwargs)
1934
+
1935
+ if isinstance(columns, list):
1936
+ df = df[columns]
1937
+
1938
+ if remove_geom and ("geo" in df.columns):
1939
+ df = df.drop(columns=["geo"], axis=1)
1940
+
1941
+ if sort_columns:
1942
+ df = df.reindex(sorted(df.columns), axis=1)
1943
+
1944
+ return df
1945
+ except Exception as e:
1946
+ raise Exception(e)
1947
+
1948
+ @staticmethod
1949
+ def extract_transect(
1950
+ image,
1951
+ line,
1952
+ reducer="mean",
1953
+ n_segments=100,
1954
+ dist_interval=None,
1955
+ scale=None,
1956
+ crs=None,
1957
+ crsTransform=None,
1958
+ tileScale=1.0,
1959
+ to_pandas=False,
1960
+ **kwargs,
1961
+ ):
1962
+ """
1963
+ Extracts transect from an image. Adapted from the geemap package (https://geemap.org/common/#geemap.common.extract_transect).
1964
+
1965
+ Args:
1966
+ image (ee.Image): The image to extract transect from.
1967
+ line (ee.Geometry.LineString): The LineString used to extract transect from an image.
1968
+ reducer (str, optional): The ee.Reducer to use, e.g., 'mean', 'median', 'min', 'max', 'stdDev'. Defaults to "mean".
1969
+ n_segments (int, optional): The number of segments that the LineString will be split into. Defaults to 100.
1970
+ dist_interval (float, optional): The distance interval used for splitting the LineString. If specified, the n_segments parameter will be ignored. Defaults to None.
1971
+ scale (float, optional): A nominal scale in meters of the projection to work in. Defaults to None.
1972
+ crs (ee.Projection, optional): The projection to work in. If unspecified, the projection of the image's first band is used. If specified in addition to scale, rescaled to the specified scale. Defaults to None.
1973
+ crsTransform (list, optional): The list of CRS transform values. This is a row-major ordering of the 3x2 transform matrix. This option is mutually exclusive with 'scale', and will replace any transform already set on the projection. Defaults to None.
1974
+ tileScale (float, optional): A scaling factor used to reduce aggregation tile size; using a larger tileScale (e.g. 2 or 4) may enable computations that run out of memory with the default. Defaults to 1.
1975
+ to_pandas (bool, optional): Whether to convert the result to a pandas dataframe. Default to False.
1976
+
1977
+ Raises:
1978
+ TypeError: If the geometry type is not LineString.
1979
+ Exception: If the program fails to compute.
1980
+
1981
+ Returns:
1982
+ ee.FeatureCollection: The FeatureCollection containing the transect with distance and reducer values.
1983
+ """
1984
+ try:
1985
+ geom_type = line.type().getInfo()
1986
+ if geom_type != "LineString":
1987
+ raise TypeError("The geometry type must be LineString.")
1988
+
1989
+ reducer = eval("ee.Reducer." + reducer + "()")
1990
+ maxError = image.projection().nominalScale().divide(5)
1991
+
1992
+ length = line.length(maxError)
1993
+ if dist_interval is None:
1994
+ dist_interval = length.divide(n_segments)
1995
+
1996
+ distances = ee.List.sequence(0, length, dist_interval)
1997
+ lines = line.cutLines(distances, maxError).geometries()
1998
+
1999
+ def set_dist_attr(l):
2000
+ l = ee.List(l)
2001
+ geom = ee.Geometry(l.get(0))
2002
+ distance = ee.Number(l.get(1))
2003
+ geom = ee.Geometry.LineString(geom.coordinates())
2004
+ return ee.Feature(geom, {"distance": distance})
2005
+
2006
+ lines = lines.zip(distances).map(set_dist_attr)
2007
+ lines = ee.FeatureCollection(lines)
2008
+
2009
+ transect = image.reduceRegions(
2010
+ **{
2011
+ "collection": ee.FeatureCollection(lines),
2012
+ "reducer": reducer,
2013
+ "scale": scale,
2014
+ "crs": crs,
2015
+ "crsTransform": crsTransform,
2016
+ "tileScale": tileScale,
2017
+ }
2018
+ )
2019
+
2020
+ if to_pandas:
2021
+ return GenericCollection.ee_to_df(transect)
2022
+ return transect
2023
+
2024
+ except Exception as e:
2025
+ raise Exception(e)
2026
+
2027
+ @staticmethod
2028
+ def transect(
2029
+ image,
2030
+ lines,
2031
+ line_names,
2032
+ reducer="mean",
2033
+ n_segments=None,
2034
+ dist_interval=30,
2035
+ to_pandas=True,
2036
+ ):
2037
+ """
2038
+ Computes and stores the values along a transect for each line in a list of lines. Builds off of the extract_transect function from the geemap package
2039
+ where checks are ran to ensure that the reducer column is present in the transect data. If the reducer column is not present, a column of NaNs is created.
2040
+ An ee reducer is used to aggregate the values along the transect, depending on the number of segments or distance interval specified. Defaults to 'mean' reducer.
2041
+
2042
+ Args:
2043
+ image (ee.Image): ee.Image object to use for calculating transect values.
2044
+ lines (list): List of ee.Geometry.LineString objects.
2045
+ line_names (list of strings): List of line string names.
2046
+ reducer (str): The ee reducer to use. Defaults to 'mean'.
2047
+ n_segments (int): The number of segments that the LineString will be split into. Defaults to None.
2048
+ dist_interval (float): The distance interval in meters used for splitting the LineString. If specified, the n_segments parameter will be ignored. Defaults to 30.
2049
+ to_pandas (bool): Whether to convert the result to a pandas dataframe. Defaults to True.
2050
+
2051
+ Returns:
2052
+ pd.DataFrame or ee.FeatureCollection: organized list of values along the transect(s)
2053
+ """
2054
+ # Create empty dataframe
2055
+ transects_df = pd.DataFrame()
2056
+
2057
+ # Check if line is a list of lines or a single line - if single line, convert to list
2058
+ if isinstance(lines, list):
2059
+ pass
2060
+ else:
2061
+ lines = [lines]
2062
+
2063
+ for i, line in enumerate(lines):
2064
+ if n_segments is None:
2065
+ transect_data = GenericCollection.extract_transect(
2066
+ image=image,
2067
+ line=line,
2068
+ reducer=reducer,
2069
+ dist_interval=dist_interval,
2070
+ to_pandas=to_pandas,
2071
+ )
2072
+ if reducer in transect_data.columns:
2073
+ # Extract the 'mean' column and rename it
2074
+ mean_column = transect_data[["mean"]]
2075
+ else:
2076
+ # Handle the case where 'mean' column is not present
2077
+ print(
2078
+ f"{reducer} column not found in transect data for line {line_names[i]}"
2079
+ )
2080
+ # Create a column of NaNs with the same length as the longest column in transects_df
2081
+ max_length = max(transects_df.shape[0], transect_data.shape[0])
2082
+ mean_column = pd.Series([np.nan] * max_length)
2083
+ else:
2084
+ transect_data = GenericCollection.extract_transect(
2085
+ image=image,
2086
+ line=line,
2087
+ reducer=reducer,
2088
+ n_segments=n_segments,
2089
+ to_pandas=to_pandas,
2090
+ )
2091
+ if reducer in transect_data.columns:
2092
+ # Extract the 'mean' column and rename it
2093
+ mean_column = transect_data[["mean"]]
2094
+ else:
2095
+ # Handle the case where 'mean' column is not present
2096
+ print(
2097
+ f"{reducer} column not found in transect data for line {line_names[i]}"
2098
+ )
2099
+ # Create a column of NaNs with the same length as the longest column in transects_df
2100
+ max_length = max(transects_df.shape[0], transect_data.shape[0])
2101
+ mean_column = pd.Series([np.nan] * max_length)
2102
+
2103
+ transects_df = pd.concat([transects_df, mean_column], axis=1)
2104
+
2105
+ transects_df.columns = line_names
2106
+
2107
+ return transects_df
2108
+
2109
+ def transect_iterator(
2110
+ self,
2111
+ lines,
2112
+ line_names,
2113
+ reducer="mean",
2114
+ dist_interval=30,
2115
+ n_segments=None,
2116
+ scale=30,
2117
+ processing_mode='aggregated',
2118
+ save_folder_path=None,
2119
+ sampling_method='line',
2120
+ point_buffer_radius=15
2121
+ ):
2122
+ """
2123
+ Computes and returns pixel values along transects for each image in a collection.
2124
+
2125
+ This iterative function generates time-series data along one or more lines, and
2126
+ supports two different geometric sampling methods ('line' and 'buffered_point')
2127
+ for maximum flexibility and performance.
2128
+
2129
+ There are two processing modes available, aggregated and iterative:
2130
+ - 'aggregated' (default; suggested): Fast, server-side processing. Fetches all results
2131
+ in a single request. Highly recommended. Returns a dictionary of pandas DataFrames.
2132
+ - 'iterative': Slower, client-side loop that processes one image at a time.
2133
+ Kept for backward compatibility (effectively depreciated). Returns None and saves individual CSVs.
2134
+ This method is not recommended unless absolutely necessary, as it is less efficient and may be subject to client-side timeouts.
2135
+
2136
+ Args:
2137
+ lines (list): A list of one or more ee.Geometry.LineString objects that
2138
+ define the transects.
2139
+ line_names (list): A list of string names for each transect. The length
2140
+ of this list must match the length of the `lines` list.
2141
+ reducer (str, optional): The name of the ee.Reducer to apply at each
2142
+ transect point (e.g., 'mean', 'median', 'first'). Defaults to 'mean'.
2143
+ dist_interval (float, optional): The distance interval in meters for
2144
+ sampling points along each transect. Will be overridden if `n_segments` is provided.
2145
+ Defaults to 30. Recommended to increase this value when using the
2146
+ 'line' processing method, or else you may get blank rows.
2147
+ n_segments (int, optional): The number of equal-length segments to split
2148
+ each transect line into for sampling. This parameter overrides `dist_interval`.
2149
+ Defaults to None.
2150
+ scale (int, optional): The nominal scale in meters for the reduction,
2151
+ which should typically match the pixel resolution of the imagery.
2152
+ Defaults to 30.
2153
+ processing_mode (str, optional): The method for processing the collection.
2154
+ - 'aggregated' (default): Fast, server-side processing. Fetches all
2155
+ results in a single request. Highly recommended. Returns a dictionary
2156
+ of pandas DataFrames.
2157
+ - 'iterative': Slower, client-side loop that processes one image at a
2158
+ time. Kept for backward compatibility. Returns None and saves
2159
+ individual CSVs.
2160
+ save_folder_path (str, optional): If provided, the function will save the
2161
+ resulting transect data to CSV files. The behavior depends on the
2162
+ `processing_mode`:
2163
+ - In 'aggregated' mode, one CSV is saved for each transect,
2164
+ containing all dates. (e.g., 'MyTransect_transects.csv').
2165
+ - In 'iterative' mode, one CSV is saved for each date,
2166
+ containing all transects. (e.g., '2022-06-15_transects.csv').
2167
+ sampling_method (str, optional): The geometric method used for sampling.
2168
+ - 'line' (default): Reduces all pixels intersecting each small line
2169
+ segment. This can be unreliable and produce blank rows if
2170
+ `dist_interval` is too small relative to the `scale`.
2171
+ - 'buffered_point': Reduces all pixels within a buffer around the
2172
+ midpoint of each line segment. This method is more robust and
2173
+ reliably avoids blank rows, but may not reduce all pixels along a line segment.
2174
+ point_buffer_radius (int, optional): The radius in meters for the buffer
2175
+ when `sampling_method` is 'buffered_point'. Defaults to 15.
2176
+
2177
+ Returns:
2178
+ dict or None:
2179
+ - If `processing_mode` is 'aggregated', returns a dictionary where each
2180
+ key is a transect name and each value is a pandas DataFrame. In the
2181
+ DataFrame, the index is the distance along the transect and each
2182
+ column represents an image date. Optionally saves CSV files if
2183
+ `save_folder_path` is provided.
2184
+ - If `processing_mode` is 'iterative', returns None as it saves
2185
+ files directly.
2186
+
2187
+ Raises:
2188
+ ValueError: If `lines` and `line_names` have different lengths, or if
2189
+ an unknown reducer or processing mode is specified.
2190
+ """
2191
+ # Validating inputs
2192
+ if len(lines) != len(line_names):
2193
+ raise ValueError("'lines' and 'line_names' must have the same number of elements.")
2194
+ ### Current, server-side processing method ###
2195
+ if processing_mode == 'aggregated':
2196
+ # Validating reducer type
2197
+ try:
2198
+ ee_reducer = getattr(ee.Reducer, reducer)()
2199
+ except AttributeError:
2200
+ raise ValueError(f"Unknown reducer: '{reducer}'.")
2201
+ ### Function to extract transects for a single image
2202
+ def get_transects_for_image(image):
2203
+ image_date = image.get('Date_Filter')
2204
+ # Initialize an empty list to hold all transect FeatureCollections
2205
+ all_transects_for_image = ee.List([])
2206
+ # Looping through each line and processing
2207
+ for i, line in enumerate(lines):
2208
+ # Index line and name
2209
+ line_name = line_names[i]
2210
+ # Determine maxError based on image projection, used for geometry operations
2211
+ maxError = image.projection().nominalScale().divide(5)
2212
+ # Calculate effective distance interval
2213
+ length = line.length(maxError) # using maxError here ensures consistency with cutLines
2214
+ # Determine effective distance interval based on n_segments or dist_interval
2215
+ effective_dist_interval = ee.Algorithms.If(
2216
+ n_segments,
2217
+ length.divide(n_segments),
2218
+ dist_interval or 30 # Defaults to 30 if both are None
2219
+ )
2220
+ # Generate distances along the line(s) for segmentation
2221
+ distances = ee.List.sequence(0, length, effective_dist_interval)
2222
+ # Segmenting the line into smaller lines at the specified distances
2223
+ cut_lines_geoms = line.cutLines(distances, maxError).geometries()
2224
+ # Function to create features with distance attributes
2225
+ # Adjusted to ensure consistent return types
2226
+ def set_dist_attr(l):
2227
+ # l is a list: [geometry, distance]
2228
+ # Extracting geometry portion of line
2229
+ geom_segment = ee.Geometry(ee.List(l).get(0))
2230
+ # Extracting distance value for attribute
2231
+ distance = ee.Number(ee.List(l).get(1))
2232
+ ### Determine final geometry based on sampling method
2233
+ # If the sampling method is 'buffered_point',
2234
+ # create a buffered point feature at the centroid of each segment,
2235
+ # otherwise create a line feature
2236
+ final_feature = ee.Algorithms.If(
2237
+ ee.String(sampling_method).equals('buffered_point'),
2238
+ # True Case: Create the buffered point feature
2239
+ ee.Feature(
2240
+ geom_segment.centroid(maxError).buffer(point_buffer_radius),
2241
+ {'distance': distance}
2242
+ ),
2243
+ # False Case: Create the line segment feature
2244
+ ee.Feature(geom_segment, {'distance': distance})
2245
+ )
2246
+ # Return either the line segment feature or the buffered point feature
2247
+ return final_feature
2248
+ # Creating a FeatureCollection of the cut lines with distance attributes
2249
+ # Using map to apply the set_dist_attr function to each cut line geometry
2250
+ line_features = ee.FeatureCollection(cut_lines_geoms.zip(distances).map(set_dist_attr))
2251
+ # Reducing the image over the line features to get transect values
2252
+ transect_fc = image.reduceRegions(
2253
+ collection=line_features, reducer=ee_reducer, scale=scale
2254
+ )
2255
+ # Adding image date and line name properties to each feature
2256
+ def set_props(feature):
2257
+ return feature.set({'image_date': image_date, 'transect_name': line_name})
2258
+ # Append to the list of all transects for this image
2259
+ all_transects_for_image = all_transects_for_image.add(transect_fc.map(set_props))
2260
+ # Combine all transect FeatureCollections into a single FeatureCollection and flatten
2261
+ # Flatten is used to merge the list of FeatureCollections into one
2262
+ return ee.FeatureCollection(all_transects_for_image).flatten()
2263
+ # Map the function over the entire image collection and flatten the results
2264
+ results_fc = ee.FeatureCollection(self.collection.map(get_transects_for_image)).flatten()
2265
+ # Convert the results to a pandas DataFrame
2266
+ df = GenericCollection.ee_to_df(results_fc, remove_geom=True)
2267
+ # Check if the DataFrame is empty
2268
+ if df.empty:
2269
+ print("Warning: No transect data was generated.")
2270
+ return {}
2271
+ # Initialize dictionary to hold output DataFrames for each transect
2272
+ output_dfs = {}
2273
+ # Loop through each unique transect name and create a pivot table
2274
+ for name in sorted(df['transect_name'].unique()):
2275
+ transect_df = df[df['transect_name'] == name]
2276
+ pivot_df = transect_df.pivot(index='distance', columns='image_date', values=reducer)
2277
+ pivot_df.columns.name = 'Date'
2278
+ output_dfs[name] = pivot_df
2279
+ # Optionally save each transect DataFrame to CSV
2280
+ if save_folder_path:
2281
+ for transect_name, transect_df in output_dfs.items():
2282
+ safe_filename = "".join(x for x in transect_name if x.isalnum() or x in "._-")
2283
+ file_path = f"{save_folder_path}{safe_filename}_transects.csv"
2284
+ transect_df.to_csv(file_path)
2285
+ print(f"Saved transect data to {file_path}")
2286
+
2287
+ return output_dfs
2288
+
2289
+ ### old, depreciated iterative client-side processing method ###
2290
+ elif processing_mode == 'iterative':
2291
+ if not save_folder_path:
2292
+ raise ValueError("`save_folder_path` is required for 'iterative' processing mode.")
2293
+
2294
+ image_collection_dates = self.dates
2295
+ for i, date in enumerate(image_collection_dates):
2296
+ try:
2297
+ print(f"Processing image {i+1}/{len(image_collection_dates)}: {date}")
2298
+ image = self.image_grab(i)
2299
+ transects_df = GenericCollection.transect(
2300
+ image, lines, line_names, reducer, n_segments, dist_interval, to_pandas=True
2301
+ )
2302
+ transects_df.to_csv(f"{save_folder_path}{date}_transects.csv")
2303
+ print(f"{date}_transects saved to csv")
2304
+ except Exception as e:
2305
+ print(f"An error occurred while processing image {i+1}: {e}")
2306
+ else:
2307
+ raise ValueError("`processing_mode` must be 'iterative' or 'aggregated'.")
2308
+
2309
+ @staticmethod
2310
+ def extract_zonal_stats_from_buffer(
2311
+ image,
2312
+ coordinates,
2313
+ buffer_size=1,
2314
+ reducer_type="mean",
2315
+ scale=30,
2316
+ tileScale=1,
2317
+ coordinate_names=None,
2318
+ ):
2319
+ """
2320
+ Function to extract spatial statistics from an image for a list or single set of (long, lat) coordinates, providing individual statistics for each location.
2321
+ A radial buffer is applied around each coordinate to extract the statistics, which defaults to 1 meter.
2322
+ The function returns a pandas DataFrame with the statistics for each coordinate.
2323
+
2324
+ NOTE: Be sure the coordinates are provided as longitude, latitude (x, y) tuples!
2325
+
2326
+ Args:
2327
+ image (ee.Image): The image from which to extract statistics. Should be single-band.
2328
+ coordinates (list or tuple): A single (lon, lat) tuple or a list of (lon, lat) tuples.
2329
+ buffer_size (int, optional): The radial buffer size in meters. Defaults to 1.
2330
+ reducer_type (str, optional): The ee.Reducer to use ('mean', 'median', 'min', etc.). Defaults to 'mean'.
2331
+ scale (int, optional): The scale in meters for the reduction. Defaults to 30.
2332
+ tileScale (int, optional): The tile scale factor. Defaults to 1.
2333
+ coordinate_names (list, optional): A list of names for the coordinates.
2334
+
2335
+ Returns:
2336
+ pd.DataFrame: A pandas DataFrame with the image's 'Date_Filter' as the index and a
2337
+ column for each coordinate location.
2338
+ """
2339
+ if isinstance(coordinates, tuple) and len(coordinates) == 2:
2340
+ coordinates = [coordinates]
2341
+ elif not (
2342
+ isinstance(coordinates, list)
2343
+ and all(isinstance(coord, tuple) and len(coord) == 2 for coord in coordinates)
2344
+ ):
2345
+ raise ValueError(
2346
+ "Coordinates must be a list of tuples with two elements each (longitude, latitude)."
2347
+ )
2348
+
2349
+ if coordinate_names is not None:
2350
+ if not isinstance(coordinate_names, list) or not all(
2351
+ isinstance(name, str) for name in coordinate_names
2352
+ ):
2353
+ raise ValueError("coordinate_names must be a list of strings.")
2354
+ if len(coordinate_names) != len(coordinates):
2355
+ raise ValueError(
2356
+ "coordinate_names must have the same length as the coordinates list."
2357
+ )
2358
+ else:
2359
+ coordinate_names = [f"Location {i+1}" for i in range(len(coordinates))]
2360
+
2361
+ image_date = image.get('Date_Filter')
2362
+
2363
+ points = [
2364
+ ee.Feature(
2365
+ ee.Geometry.Point(coord).buffer(buffer_size),
2366
+ {"location_name": str(name)},
2367
+ )
2368
+ for coord, name in zip(coordinates, coordinate_names)
2369
+ ]
2370
+ features = ee.FeatureCollection(points)
2371
+
2372
+ try:
2373
+ reducer = getattr(ee.Reducer, reducer_type)()
2374
+ except AttributeError:
2375
+ raise ValueError(f"Unknown reducer_type: '{reducer_type}'.")
2376
+
2377
+ stats_fc = image.reduceRegions(
2378
+ collection=features,
2379
+ reducer=reducer,
2380
+ scale=scale,
2381
+ tileScale=tileScale,
2382
+ )
2383
+
2384
+ df = GenericCollection.ee_to_df(stats_fc, remove_geom=True)
2385
+
2386
+ if df.empty:
2387
+ print("Warning: No results returned. The points may not intersect the image.")
2388
+ empty_df = pd.DataFrame(columns=coordinate_names)
2389
+ empty_df.index.name = 'Date'
2390
+ return empty_df
2391
+
2392
+ if reducer_type not in df.columns:
2393
+ print(f"Warning: Reducer type '{reducer_type}' not found in results. Returning raw data.")
2394
+ return df
2395
+
2396
+ pivot_df = df.pivot(columns='location_name', values=reducer_type)
2397
+ pivot_df['Date'] = image_date.getInfo() # .getInfo() is needed here as it's a server object
2398
+ pivot_df = pivot_df.set_index('Date')
2399
+ return pivot_df
2400
+
2401
+ def iterate_zonal_stats(
2402
+ self,
2403
+ geometries,
2404
+ band=None,
2405
+ reducer_type="mean",
2406
+ scale=30,
2407
+ geometry_names=None,
2408
+ buffer_size=1,
2409
+ tileScale=1,
2410
+ dates=None,
2411
+ file_path=None
2412
+ ):
2413
+ """
2414
+ Iterates over a collection of images and extracts spatial statistics (defaults to mean) for a given list of geometries or coordinates. Individual statistics are calculated for each geometry or coordinate provided.
2415
+ When coordinates are provided, a radial buffer is applied around each coordinate to extract the statistics, where the size of the buffer is determined by the buffer_size argument (defaults to 1 meter).
2416
+ The function returns a pandas DataFrame with the statistics for each coordinate and date, or optionally exports the data to a table in .csv format.
2417
+
2418
+ Args:
2419
+ geometries (ee.Geometry, ee.Feature, ee.FeatureCollection, list, or tuple): Input geometries for which to extract statistics. Can be a single ee.Geometry, an ee.Feature, an ee.FeatureCollection, a list of (lon, lat) tuples, or a list of ee.Geometry objects. Be careful to NOT provide coordinates as (lat, lon)!
2420
+ band (str, optional): The name of the band to use for statistics. If None, the first band is used. Defaults to None.
2421
+ reducer_type (str, optional): The ee.Reducer to use, e.g., 'mean', 'median', 'max', 'sum'. Defaults to 'mean'. Any ee.Reducer method can be used.
2422
+ scale (int, optional): Pixel scale in meters for the reduction. Defaults to 30.
2423
+ geometry_names (list, optional): A list of string names for the geometries. If provided, must match the number of geometries. Defaults to None.
2424
+ buffer_size (int, optional): Radial buffer in meters around coordinates. Defaults to 1.
2425
+ tileScale (int, optional): A scaling factor to reduce aggregation tile size. Defaults to 1.
2426
+ dates (list, optional): A list of date strings ('YYYY-MM-DD') for filtering the collection, such that only images from these dates are included for zonal statistic retrieval. Defaults to None, which uses all dates in the collection.
2427
+ file_path (str, optional): File path to save the output CSV.
2428
+
2429
+ Returns:
2430
+ pd.DataFrame or None: A pandas DataFrame with dates as the index and coordinate names
2431
+ as columns. Returns None if using 'iterative' mode with file_path.
2432
+
2433
+ Raises:
2434
+ ValueError: If input parameters are invalid.
2435
+ TypeError: If geometries input type is unsupported.
2436
+ """
2437
+ img_collection_obj = self
2438
+ if band:
2439
+ img_collection_obj = GenericCollection(collection=img_collection_obj.collection.select(band))
2440
+ else:
2441
+ first_image = img_collection_obj.image_grab(0)
2442
+ first_band = first_image.bandNames().get(0)
2443
+ img_collection_obj = GenericCollection(collection=img_collection_obj.collection.select([first_band]))
2444
+ # Filter collection by dates if provided
2445
+ if dates:
2446
+ img_collection_obj = GenericCollection(
2447
+ collection=self.collection.filter(ee.Filter.inList('Date_Filter', dates))
2448
+ )
2449
+
2450
+ # Initialize variables
2451
+ features = None
2452
+ validated_coordinates = []
2453
+
2454
+ # Function to standardize feature names if no names are provided
2455
+ def set_standard_name(feature):
2456
+ has_geo_name = feature.get('geo_name')
2457
+ has_name = feature.get('name')
2458
+ has_index = feature.get('system:index')
2459
+ new_name = ee.Algorithms.If(
2460
+ has_geo_name, has_geo_name,
2461
+ ee.Algorithms.If(has_name, has_name,
2462
+ ee.Algorithms.If(has_index, has_index, 'unnamed_geometry')))
2463
+ return feature.set({'geo_name': new_name})
2464
+
2465
+ if isinstance(geometries, (ee.FeatureCollection, ee.Feature)):
2466
+ features = ee.FeatureCollection(geometries)
2467
+ if geometry_names:
2468
+ print("Warning: 'geometry_names' are ignored when the input is an ee.Feature or ee.FeatureCollection.")
2469
+
2470
+ elif isinstance(geometries, ee.Geometry):
2471
+ name = geometry_names[0] if (geometry_names and geometry_names[0]) else 'unnamed_geometry'
2472
+ features = ee.FeatureCollection([ee.Feature(geometries).set('geo_name', name)])
2473
+
2474
+ elif isinstance(geometries, list):
2475
+ if not geometries: # Handle empty list case
2476
+ raise ValueError("'geometries' list cannot be empty.")
2477
+
2478
+ # Case: List of coordinates
2479
+ if all(isinstance(i, tuple) for i in geometries):
2480
+ validated_coordinates = geometries
2481
+ if geometry_names is None:
2482
+ geometry_names = [f"Location_{i+1}" for i in range(len(validated_coordinates))]
2483
+ elif len(geometry_names) != len(validated_coordinates):
2484
+ raise ValueError("geometry_names must have the same length as the coordinates list.")
2485
+ points = [
2486
+ ee.Feature(ee.Geometry.Point(coord).buffer(buffer_size), {'geo_name': str(name)})
2487
+ for coord, name in zip(validated_coordinates, geometry_names)
2488
+ ]
2489
+ features = ee.FeatureCollection(points)
2490
+
2491
+ # Case: List of Geometries
2492
+ elif all(isinstance(i, ee.Geometry) for i in geometries):
2493
+ if geometry_names is None:
2494
+ geometry_names = [f"Geometry_{i+1}" for i in range(len(geometries))]
2495
+ elif len(geometry_names) != len(geometries):
2496
+ raise ValueError("geometry_names must have the same length as the geometries list.")
2497
+ geom_features = [
2498
+ ee.Feature(geom).set({'geo_name': str(name)})
2499
+ for geom, name in zip(geometries, geometry_names)
2500
+ ]
2501
+ features = ee.FeatureCollection(geom_features)
2502
+
2503
+ else:
2504
+ raise TypeError("Input list must be a list of (lon, lat) tuples OR a list of ee.Geometry objects.")
2505
+
2506
+ elif isinstance(geometries, tuple) and len(geometries) == 2:
2507
+ name = geometry_names[0] if geometry_names else 'Location_1'
2508
+ features = ee.FeatureCollection([
2509
+ ee.Feature(ee.Geometry.Point(geometries).buffer(buffer_size), {'geo_name': name})
2510
+ ])
2511
+ else:
2512
+ raise TypeError("Unsupported type for 'geometries'.")
2513
+
2514
+ features = features.map(set_standard_name)
2515
+
2516
+ try:
2517
+ reducer = getattr(ee.Reducer, reducer_type)()
2518
+ except AttributeError:
2519
+ raise ValueError(f"Unknown reducer_type: '{reducer_type}'.")
2520
+
2521
+ def calculate_stats_for_image(image):
2522
+ image_date = image.get('Date_Filter')
2523
+ stats_fc = image.reduceRegions(
2524
+ collection=features, reducer=reducer, scale=scale, tileScale=tileScale
2525
+ )
2526
+
2527
+ def guarantee_reducer_property(f):
2528
+ has_property = f.propertyNames().contains(reducer_type)
2529
+ return ee.Algorithms.If(has_property, f, f.set(reducer_type, -9999))
2530
+ fixed_stats_fc = stats_fc.map(guarantee_reducer_property)
2531
+
2532
+ return fixed_stats_fc.map(lambda f: f.set('image_date', image_date))
2533
+
2534
+ results_fc = ee.FeatureCollection(img_collection_obj.collection.map(calculate_stats_for_image)).flatten()
2535
+ df = GenericCollection.ee_to_df(results_fc, remove_geom=True)
2536
+
2537
+ # Checking for issues
2538
+ if df.empty:
2539
+ # print("No results found for the given parameters. Check if the geometries intersect with the images, if the dates filter is too restrictive, or if the provided bands are empty.")
2540
+ # return df
2541
+ raise ValueError("No results found for the given parameters. Check if the geometries intersect with the images, if the dates filter is too restrictive, or if the provided bands are empty.")
2542
+ if reducer_type not in df.columns:
2543
+ print(f"Warning: Reducer '{reducer_type}' not found in results.")
2544
+ # return df
2545
+
2546
+ # Get the number of rows before dropping nulls for a helpful message
2547
+ initial_rows = len(df)
2548
+ df.dropna(subset=[reducer_type], inplace=True)
2549
+ df = df[df[reducer_type] != -9999]
2550
+ dropped_rows = initial_rows - len(df)
2551
+ if dropped_rows > 0:
2552
+ print(f"Warning: Discarded {dropped_rows} results due to failed reductions (e.g., no valid pixels in geometry).")
2553
+
2554
+ # Reshape DataFrame to have dates as index and geometry names as columns
2555
+ pivot_df = df.pivot(index='image_date', columns='geo_name', values=reducer_type)
2556
+ pivot_df.index.name = 'Date'
2557
+ if file_path:
2558
+ # Check if file_path ends with .csv and remove it if so for consistency
2559
+ if file_path.endswith('.csv'):
2560
+ file_path = file_path[:-4]
2561
+ pivot_df.to_csv(f"{file_path}.csv")
2562
+ print(f"Zonal stats saved to {file_path}.csv")
2563
+ return
2564
+ return pivot_df
2565
+
2566
+ def export_to_asset_collection(
2567
+ self,
2568
+ asset_collection_path,
2569
+ region,
2570
+ scale,
2571
+ dates=None,
2572
+ filename_prefix="",
2573
+ crs=None,
2574
+ max_pixels=int(1e13),
2575
+ description_prefix="export"
2576
+ ):
2577
+ """
2578
+ Exports an image collection to a Google Earth Engine asset collection. The asset collection will be created if it does not already exist,
2579
+ and each image exported will be named according to the provided filename prefix and date.
2580
+
2581
+ Args:
2582
+ asset_collection_path (str): The path to the asset collection.
2583
+ region (ee.Geometry): The region to export.
2584
+ scale (int): The scale of the export.
2585
+ dates (list, optional): The dates to export. Defaults to None.
2586
+ filename_prefix (str, optional): The filename prefix. Defaults to "", i.e. blank.
2587
+ crs (str, optional): The coordinate reference system. Defaults to None, which will use the image's CRS.
2588
+ max_pixels (int, optional): The maximum number of pixels. Defaults to int(1e13).
2589
+ description_prefix (str, optional): The description prefix. Defaults to "export".
2590
+
2591
+ Returns:
2592
+ None: (queues export tasks)
2593
+ """
2594
+ ic = self.collection
2595
+ if dates is None:
2596
+ dates = self.dates
2597
+ try:
2598
+ ee.data.createAsset({'type': 'ImageCollection'}, asset_collection_path)
2599
+ except Exception:
2600
+ pass
2601
+
2602
+ for date_str in dates:
2603
+ img = ee.Image(ic.filter(ee.Filter.eq('Date_Filter', date_str)).first())
2604
+ asset_id = asset_collection_path + "/" + filename_prefix + date_str
2605
+ desc = description_prefix + "_" + filename_prefix + date_str
2606
+
2607
+ params = {
2608
+ 'image': img,
2609
+ 'description': desc,
2610
+ 'assetId': asset_id,
2611
+ 'region': region,
2612
+ 'scale': scale,
2613
+ 'maxPixels': max_pixels
2614
+ }
2615
+ if crs:
2616
+ params['crs'] = crs
2617
+
2618
+ ee.batch.Export.image.toAsset(**params).start()
2619
+
2620
+ print("Queued", len(dates), "export tasks to", asset_collection_path)
2621
+
2622
+
2623
+