RadGEEToolbox 1.7.4__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2893,200 +2893,197 @@ class GenericCollection:
2893
2893
  lines,
2894
2894
  line_names,
2895
2895
  reducer="mean",
2896
- dist_interval=30,
2896
+ dist_interval=90,
2897
2897
  n_segments=None,
2898
2898
  scale=30,
2899
2899
  processing_mode='aggregated',
2900
2900
  save_folder_path=None,
2901
2901
  sampling_method='line',
2902
- point_buffer_radius=15
2902
+ point_buffer_radius=15,
2903
+ batch_size=10
2903
2904
  ):
2904
2905
  """
2905
- Computes and returns pixel values along transects for each image in a collection.
2906
-
2907
- This iterative function generates time-series data along one or more lines, and
2908
- supports two different geometric sampling methods ('line' and 'buffered_point')
2909
- for maximum flexibility and performance.
2910
-
2911
- There are two processing modes available, aggregated and iterative:
2912
- - 'aggregated' (default; suggested): Fast, server-side processing. Fetches all results
2913
- in a single request. Highly recommended. Returns a dictionary of pandas DataFrames.
2914
- - 'iterative': Slower, client-side loop that processes one image at a time.
2915
- Kept for backward compatibility (effectively depreciated). Returns None and saves individual CSVs.
2916
- This method is not recommended unless absolutely necessary, as it is less efficient and may be subject to client-side timeouts.
2917
-
2906
+ Computes and returns pixel values along transects. Provide a list of ee.Geometry.LineString objects and corresponding names, and the function will compute the specified reducer value
2907
+ at regular intervals along each line for all images in the collection. Use `dist_interval` or `n_segments` to control sampling resolution. The user can choose between 'aggregated' mode (returns a dictionary of DataFrames) or 'iterative' mode (saves individual CSVs for each transect).
2908
+ Alter `sampling_method` to sample directly along the line or via buffered points along the line. Buffered points can help capture more representative pixel values in heterogeneous landscapes, and the buffer radius can be adjusted via `point_buffer_radius`.
2909
+
2918
2910
  Args:
2919
- lines (list): A list of one or more ee.Geometry.LineString objects that
2920
- define the transects.
2921
- line_names (list): A list of string names for each transect. The length
2922
- of this list must match the length of the `lines` list.
2923
- reducer (str, optional): The name of the ee.Reducer to apply at each
2924
- transect point (e.g., 'mean', 'median', 'first'). Defaults to 'mean'.
2925
- dist_interval (float, optional): The distance interval in meters for
2926
- sampling points along each transect. Will be overridden if `n_segments` is provided.
2927
- Defaults to 30. Recommended to increase this value when using the
2928
- 'line' processing method, or else you may get blank rows.
2929
- n_segments (int, optional): The number of equal-length segments to split
2930
- each transect line into for sampling. This parameter overrides `dist_interval`.
2931
- Defaults to None.
2932
- scale (int, optional): The nominal scale in meters for the reduction,
2933
- which should typically match the pixel resolution of the imagery.
2934
- Defaults to 30.
2935
- processing_mode (str, optional): The method for processing the collection.
2936
- - 'aggregated' (default): Fast, server-side processing. Fetches all
2937
- results in a single request. Highly recommended. Returns a dictionary
2938
- of pandas DataFrames.
2939
- - 'iterative': Slower, client-side loop that processes one image at a
2940
- time. Kept for backward compatibility. Returns None and saves
2941
- individual CSVs.
2942
- save_folder_path (str, optional): If provided, the function will save the
2943
- resulting transect data to CSV files. The behavior depends on the
2944
- `processing_mode`:
2945
- - In 'aggregated' mode, one CSV is saved for each transect,
2946
- containing all dates. (e.g., 'MyTransect_transects.csv').
2947
- - In 'iterative' mode, one CSV is saved for each date,
2948
- containing all transects. (e.g., '2022-06-15_transects.csv').
2949
- sampling_method (str, optional): The geometric method used for sampling.
2950
- - 'line' (default): Reduces all pixels intersecting each small line
2951
- segment. This can be unreliable and produce blank rows if
2952
- `dist_interval` is too small relative to the `scale`.
2953
- - 'buffered_point': Reduces all pixels within a buffer around the
2954
- midpoint of each line segment. This method is more robust and
2955
- reliably avoids blank rows, but may not reduce all pixels along a line segment.
2956
- point_buffer_radius (int, optional): The radius in meters for the buffer
2957
- when `sampling_method` is 'buffered_point'. Defaults to 15.
2911
+ lines (list): List of ee.Geometry.LineString objects.
2912
+ line_names (list): List of string names for each transect.
2913
+ reducer (str, optional): Reducer name. Defaults to 'mean'.
2914
+ dist_interval (float, optional): Distance interval in meters. Defaults to 90.
2915
+ n_segments (int, optional): Number of segments (overrides dist_interval).
2916
+ scale (int, optional): Scale in meters. Defaults to 30.
2917
+ processing_mode (str, optional): 'aggregated' or 'iterative'.
2918
+ save_folder_path (str, optional): Path to save CSVs.
2919
+ sampling_method (str, optional): 'line' or 'buffered_point'.
2920
+ point_buffer_radius (int, optional): Buffer radius if using 'buffered_point'.
2921
+ batch_size (int, optional): Images per request in 'aggregated' mode. Defaults to 10. Lower the value if you encounter a 'Too many aggregations' error.
2958
2922
 
2959
2923
  Returns:
2960
- dict or None:
2961
- - If `processing_mode` is 'aggregated', returns a dictionary where each
2962
- key is a transect name and each value is a pandas DataFrame. In the
2963
- DataFrame, the index is the distance along the transect and each
2964
- column represents an image date. Optionally saves CSV files if
2965
- `save_folder_path` is provided.
2966
- - If `processing_mode` is 'iterative', returns None as it saves
2967
- files directly.
2968
-
2969
- Raises:
2970
- ValueError: If `lines` and `line_names` have different lengths, or if
2971
- an unknown reducer or processing mode is specified.
2924
+ dict or None: Dictionary of DataFrames (aggregated) or None (iterative).
2972
2925
  """
2973
- # Validating inputs
2974
2926
  if len(lines) != len(line_names):
2975
2927
  raise ValueError("'lines' and 'line_names' must have the same number of elements.")
2976
- ### Current, server-side processing method ###
2928
+
2929
+ first_img = self.collection.first()
2930
+ bands = first_img.bandNames().getInfo()
2931
+ is_multiband = len(bands) > 1
2932
+
2933
+ # Setup robust dictionary for handling masked/zero values
2934
+ default_val = -9999
2935
+ dummy_dict = ee.Dictionary.fromLists(bands, ee.List.repeat(default_val, len(bands)))
2936
+
2937
+ if is_multiband:
2938
+ reducer_cols = [f"{b}_{reducer}" for b in bands]
2939
+ clean_names = bands
2940
+ rename_keys = bands
2941
+ rename_vals = reducer_cols
2942
+ else:
2943
+ reducer_cols = [reducer]
2944
+ clean_names = [bands[0]]
2945
+ rename_keys = bands
2946
+ rename_vals = reducer_cols
2947
+
2948
+ print("Pre-computing transect geometries from input LineString(s)...")
2949
+
2950
+ master_transect_fc = ee.FeatureCollection([])
2951
+ geom_error = 1.0
2952
+
2953
+ for i, line in enumerate(lines):
2954
+ line_name = line_names[i]
2955
+ length = line.length(geom_error)
2956
+
2957
+ eff_interval = length.divide(n_segments) if n_segments else dist_interval
2958
+
2959
+ distances = ee.List.sequence(0, length, eff_interval)
2960
+ cut_lines = line.cutLines(distances, geom_error).geometries()
2961
+
2962
+ def create_feature(l):
2963
+ geom = ee.Geometry(ee.List(l).get(0))
2964
+ dist = ee.Number(ee.List(l).get(1))
2965
+
2966
+ final_geom = ee.Algorithms.If(
2967
+ ee.String(sampling_method).equals('buffered_point'),
2968
+ geom.centroid(geom_error).buffer(point_buffer_radius),
2969
+ geom
2970
+ )
2971
+
2972
+ return ee.Feature(ee.Geometry(final_geom), {
2973
+ 'transect_name': line_name,
2974
+ 'distance': dist
2975
+ })
2976
+
2977
+ line_fc = ee.FeatureCollection(cut_lines.zip(distances).map(create_feature))
2978
+ master_transect_fc = master_transect_fc.merge(line_fc)
2979
+
2980
+ try:
2981
+ ee_reducer = getattr(ee.Reducer, reducer)()
2982
+ except AttributeError:
2983
+ raise ValueError(f"Unknown reducer: '{reducer}'.")
2984
+
2985
+ def process_image(image):
2986
+ date_val = image.get('Date_Filter')
2987
+
2988
+ # Map over points (Slower but Robust)
2989
+ def reduce_point(f):
2990
+ stats = image.reduceRegion(
2991
+ reducer=ee_reducer,
2992
+ geometry=f.geometry(),
2993
+ scale=scale,
2994
+ maxPixels=1e13
2995
+ )
2996
+ # Combine with defaults (preserves 0, handles masked)
2997
+ safe_stats = dummy_dict.combine(stats, overwrite=True)
2998
+ # Rename keys to match expected outputs (e.g. 'ndvi' -> 'ndvi_mean')
2999
+ final_stats = safe_stats.rename(rename_keys, rename_vals)
3000
+
3001
+ return f.set(final_stats).set({'image_date': date_val})
3002
+
3003
+ return master_transect_fc.map(reduce_point)
3004
+
3005
+ export_cols = ['transect_name', 'distance', 'image_date'] + reducer_cols
3006
+
2977
3007
  if processing_mode == 'aggregated':
2978
- # Validating reducer type
2979
- try:
2980
- ee_reducer = getattr(ee.Reducer, reducer)()
2981
- except AttributeError:
2982
- raise ValueError(f"Unknown reducer: '{reducer}'.")
2983
- ### Function to extract transects for a single image
2984
- def get_transects_for_image(image):
2985
- image_date = image.get('Date_Filter')
2986
- # Initialize an empty list to hold all transect FeatureCollections
2987
- all_transects_for_image = ee.List([])
2988
- # Looping through each line and processing
2989
- for i, line in enumerate(lines):
2990
- # Index line and name
2991
- line_name = line_names[i]
2992
- # Determine maxError based on image projection, used for geometry operations
2993
- maxError = image.projection().nominalScale().divide(5)
2994
- # Calculate effective distance interval
2995
- length = line.length(maxError) # using maxError here ensures consistency with cutLines
2996
- # Determine effective distance interval based on n_segments or dist_interval
2997
- effective_dist_interval = ee.Algorithms.If(
2998
- n_segments,
2999
- length.divide(n_segments),
3000
- dist_interval or 30 # Defaults to 30 if both are None
3001
- )
3002
- # Generate distances along the line(s) for segmentation
3003
- distances = ee.List.sequence(0, length, effective_dist_interval)
3004
- # Segmenting the line into smaller lines at the specified distances
3005
- cut_lines_geoms = line.cutLines(distances, maxError).geometries()
3006
- # Function to create features with distance attributes
3007
- # Adjusted to ensure consistent return types
3008
- def set_dist_attr(l):
3009
- # l is a list: [geometry, distance]
3010
- # Extracting geometry portion of line
3011
- geom_segment = ee.Geometry(ee.List(l).get(0))
3012
- # Extracting distance value for attribute
3013
- distance = ee.Number(ee.List(l).get(1))
3014
- ### Determine final geometry based on sampling method
3015
- # If the sampling method is 'buffered_point',
3016
- # create a buffered point feature at the centroid of each segment,
3017
- # otherwise create a line feature
3018
- final_feature = ee.Algorithms.If(
3019
- ee.String(sampling_method).equals('buffered_point'),
3020
- # True Case: Create the buffered point feature
3021
- ee.Feature(
3022
- geom_segment.centroid(maxError).buffer(point_buffer_radius),
3023
- {'distance': distance}
3024
- ),
3025
- # False Case: Create the line segment feature
3026
- ee.Feature(geom_segment, {'distance': distance})
3027
- )
3028
- # Return either the line segment feature or the buffered point feature
3029
- return final_feature
3030
- # Creating a FeatureCollection of the cut lines with distance attributes
3031
- # Using map to apply the set_dist_attr function to each cut line geometry
3032
- line_features = ee.FeatureCollection(cut_lines_geoms.zip(distances).map(set_dist_attr))
3033
- # Reducing the image over the line features to get transect values
3034
- transect_fc = image.reduceRegions(
3035
- collection=line_features, reducer=ee_reducer, scale=scale
3036
- )
3037
- # Adding image date and line name properties to each feature
3038
- def set_props(feature):
3039
- return feature.set({'image_date': image_date, 'transect_name': line_name})
3040
- # Append to the list of all transects for this image
3041
- all_transects_for_image = all_transects_for_image.add(transect_fc.map(set_props))
3042
- # Combine all transect FeatureCollections into a single FeatureCollection and flatten
3043
- # Flatten is used to merge the list of FeatureCollections into one
3044
- return ee.FeatureCollection(all_transects_for_image).flatten()
3045
- # Map the function over the entire image collection and flatten the results
3046
- results_fc = ee.FeatureCollection(self.collection.map(get_transects_for_image)).flatten()
3047
- # Convert the results to a pandas DataFrame
3048
- df = GenericCollection.ee_to_df(results_fc, remove_geom=True)
3049
- # Check if the DataFrame is empty
3050
- if df.empty:
3051
- print("Warning: No transect data was generated.")
3008
+ collection_size = self.collection.size().getInfo()
3009
+ print(f"Starting batch process of {collection_size} images...")
3010
+
3011
+ dfs = []
3012
+ for i in range(0, collection_size, batch_size):
3013
+ print(f" Processing image {i} to {min(i + batch_size, collection_size)}...")
3014
+
3015
+ batch_col = ee.ImageCollection(self.collection.toList(batch_size, i))
3016
+ results_fc = batch_col.map(process_image).flatten()
3017
+
3018
+ # Dynamic Class Call for ee_to_df
3019
+ df_batch = self.__class__.ee_to_df(results_fc, columns=export_cols, remove_geom=True)
3020
+
3021
+ if not df_batch.empty:
3022
+ dfs.append(df_batch)
3023
+
3024
+ if not dfs:
3025
+ print("Warning: No transect data generated.")
3052
3026
  return {}
3053
- # Initialize dictionary to hold output DataFrames for each transect
3027
+
3028
+ df = pd.concat(dfs, ignore_index=True)
3029
+
3030
+ # Post-Process & Split
3054
3031
  output_dfs = {}
3055
- # Loop through each unique transect name and create a pivot table
3032
+ for col in reducer_cols:
3033
+ df[col] = pd.to_numeric(df[col], errors='coerce')
3034
+ df[col] = df[col].replace(-9999, np.nan)
3035
+
3056
3036
  for name in sorted(df['transect_name'].unique()):
3057
- transect_df = df[df['transect_name'] == name]
3058
- pivot_df = transect_df.pivot(index='distance', columns='image_date', values=reducer)
3059
- pivot_df.columns.name = 'Date'
3060
- output_dfs[name] = pivot_df
3061
- # Optionally save each transect DataFrame to CSV
3062
- if save_folder_path:
3063
- for transect_name, transect_df in output_dfs.items():
3064
- safe_filename = "".join(x for x in transect_name if x.isalnum() or x in "._-")
3065
- file_path = f"{save_folder_path}{safe_filename}_transects.csv"
3066
- transect_df.to_csv(file_path)
3067
- print(f"Saved transect data to {file_path}")
3068
-
3037
+ line_df = df[df['transect_name'] == name]
3038
+
3039
+ for raw_col, band_name in zip(reducer_cols, clean_names):
3040
+ try:
3041
+ # Safety drop for duplicates
3042
+ line_df_clean = line_df.drop_duplicates(subset=['distance', 'image_date'])
3043
+
3044
+ pivot = line_df_clean.pivot(index='distance', columns='image_date', values=raw_col)
3045
+ pivot.columns.name = 'Date'
3046
+ key = f"{name}_{band_name}"
3047
+ output_dfs[key] = pivot
3048
+
3049
+ if save_folder_path:
3050
+ safe_key = "".join(x for x in key if x.isalnum() or x in "._-")
3051
+ fname = f"{save_folder_path}{safe_key}_transects.csv"
3052
+ pivot.to_csv(fname)
3053
+ print(f"Saved: {fname}")
3054
+ except Exception as e:
3055
+ print(f"Skipping pivot for {name}/{band_name}: {e}")
3056
+
3069
3057
  return output_dfs
3070
3058
 
3071
- ### old, depreciated iterative client-side processing method ###
3072
3059
  elif processing_mode == 'iterative':
3073
3060
  if not save_folder_path:
3074
- raise ValueError("`save_folder_path` is required for 'iterative' processing mode.")
3061
+ raise ValueError("save_folder_path is required for iterative mode.")
3075
3062
 
3076
3063
  image_collection_dates = self.dates
3077
3064
  for i, date in enumerate(image_collection_dates):
3078
3065
  try:
3079
3066
  print(f"Processing image {i+1}/{len(image_collection_dates)}: {date}")
3080
- image = self.image_grab(i)
3081
- transects_df = GenericCollection.transect(
3082
- image, lines, line_names, reducer, n_segments, dist_interval, to_pandas=True
3083
- )
3084
- transects_df.to_csv(f"{save_folder_path}{date}_transects.csv")
3085
- print(f"{date}_transects saved to csv")
3067
+ image_list = self.collection.toList(self.collection.size())
3068
+ image = ee.Image(image_list.get(i))
3069
+
3070
+ fc_result = process_image(image)
3071
+ df = self.__class__.ee_to_df(fc_result, columns=export_cols, remove_geom=True)
3072
+
3073
+ if not df.empty:
3074
+ for col in reducer_cols:
3075
+ df[col] = pd.to_numeric(df[col], errors='coerce')
3076
+ df[col] = df[col].replace(-9999, np.nan)
3077
+
3078
+ fname = f"{save_folder_path}{date}_transects.csv"
3079
+ df.to_csv(fname, index=False)
3080
+ print(f"Saved: {fname}")
3081
+ else:
3082
+ print(f"Skipping {date}: No data.")
3086
3083
  except Exception as e:
3087
- print(f"An error occurred while processing image {i+1}: {e}")
3084
+ print(f"Error processing {date}: {e}")
3088
3085
  else:
3089
- raise ValueError("`processing_mode` must be 'iterative' or 'aggregated'.")
3086
+ raise ValueError("processing_mode must be 'iterative' or 'aggregated'.")
3090
3087
 
3091
3088
  @staticmethod
3092
3089
  def extract_zonal_stats_from_buffer(
@@ -3190,7 +3187,8 @@ class GenericCollection:
3190
3187
  buffer_size=1,
3191
3188
  tileScale=1,
3192
3189
  dates=None,
3193
- file_path=None
3190
+ file_path=None,
3191
+ unweighted=False
3194
3192
  ):
3195
3193
  """
3196
3194
  Iterates over a collection of images and extracts spatial statistics (defaults to mean) for a given list of geometries or coordinates. Individual statistics are calculated for each geometry or coordinate provided.
@@ -3207,6 +3205,7 @@ class GenericCollection:
3207
3205
  tileScale (int, optional): A scaling factor to reduce aggregation tile size. Defaults to 1.
3208
3206
  dates (list, optional): A list of date strings ('YYYY-MM-DD') for filtering the collection, such that only images from these dates are included for zonal statistic retrieval. Defaults to None, which uses all dates in the collection.
3209
3207
  file_path (str, optional): File path to save the output CSV.
3208
+ unweighted (bool, optional): If True, uses unweighted statistics when applicable (e.g., for 'mean'). Defaults to False.
3210
3209
 
3211
3210
  Returns:
3212
3211
  pd.DataFrame or None: A pandas DataFrame with dates as the index and coordinate names
@@ -3313,6 +3312,9 @@ class GenericCollection:
3313
3312
  reducer = getattr(ee.Reducer, reducer_type)()
3314
3313
  except AttributeError:
3315
3314
  raise ValueError(f"Unknown reducer_type: '{reducer_type}'.")
3315
+
3316
+ if unweighted:
3317
+ reducer = reducer.unweighted()
3316
3318
 
3317
3319
  # Define the function to map over the image collection
3318
3320
  def calculate_stats_for_image(image):
@@ -3374,6 +3376,394 @@ class GenericCollection:
3374
3376
  print(f"Zonal stats saved to {file_path}.csv")
3375
3377
  return
3376
3378
  return pivot_df
3379
+
3380
+ def multiband_zonal_stats(
3381
+ self,
3382
+ geometry,
3383
+ bands,
3384
+ reducer_types,
3385
+ scale=30,
3386
+ geometry_name='geom',
3387
+ dates=None,
3388
+ include_area=False,
3389
+ file_path=None,
3390
+ unweighted=False
3391
+ ):
3392
+ """
3393
+ Calculates zonal statistics for multiple bands over a single geometry for each image in the collection.
3394
+ Allows for specifying different reducers for different bands. Optionally includes the geometry area.
3395
+
3396
+ Args:
3397
+ geometry (ee.Geometry or ee.Feature): The single geometry to calculate statistics for.
3398
+ bands (list of str): A list of band names to include in the analysis.
3399
+ reducer_types (str or list of str): A single reducer name (e.g., 'mean') to apply to all bands,
3400
+ or a list of reducer names matching the length of the 'bands' list to apply specific reducers
3401
+ to specific bands.
3402
+ scale (int, optional): The scale in meters for the reduction. Defaults to 30.
3403
+ geometry_name (str, optional): A name for the geometry, used in column naming. Defaults to 'geom'.
3404
+ dates (list of str, optional): A list of date strings ('YYYY-MM-DD') to filter the collection.
3405
+ Defaults to None (processes all images).
3406
+ include_area (bool, optional): If True, adds a column with the area of the geometry in square meters.
3407
+ Defaults to False.
3408
+ file_path (str, optional): If provided, saves the resulting DataFrame to a CSV file at this path.
3409
+ unweighted (bool, optional): If True, uses unweighted statistics when applicable (e.g., for 'mean'). Defaults to False.
3410
+
3411
+ Returns:
3412
+ pd.DataFrame: A pandas DataFrame indexed by Date, with columns named as '{band}_{geometry_name}_{reducer}'.
3413
+ """
3414
+ # 1. Input Validation and Setup
3415
+ if not isinstance(geometry, (ee.Geometry, ee.Feature)):
3416
+ raise ValueError("The `geometry` argument must be an ee.Geometry or ee.Feature.")
3417
+
3418
+ region = geometry.geometry() if isinstance(geometry, ee.Feature) else geometry
3419
+
3420
+ if isinstance(bands, str):
3421
+ bands = [bands]
3422
+ if not isinstance(bands, list):
3423
+ raise ValueError("The `bands` argument must be a string or a list of strings.")
3424
+
3425
+ # Handle reducer_types (str vs list)
3426
+ if isinstance(reducer_types, str):
3427
+ reducers_list = [reducer_types] * len(bands)
3428
+ elif isinstance(reducer_types, list):
3429
+ if len(reducer_types) != len(bands):
3430
+ raise ValueError("If `reducer_types` is a list, it must have the same length as `bands`.")
3431
+ reducers_list = reducer_types
3432
+ else:
3433
+ raise ValueError("`reducer_types` must be a string or a list of strings.")
3434
+
3435
+ # 2. Filter Collection
3436
+ processing_col = self.collection
3437
+
3438
+ if dates:
3439
+ processing_col = processing_col.filter(ee.Filter.inList('Date_Filter', dates))
3440
+
3441
+ processing_col = processing_col.select(bands)
3442
+
3443
+ # 3. Pre-calculate Area (if requested)
3444
+ area_val = None
3445
+ area_col_name = f"{geometry_name}_area_m2"
3446
+ if include_area:
3447
+ # Calculate geodesic area in square meters with maxError of 1m
3448
+ area_val = region.area(1)
3449
+
3450
+ # 4. Define the Reduction Logic
3451
+ def calculate_multiband_stats(image):
3452
+ # Base feature with date property
3453
+ date_val = image.get('Date_Filter')
3454
+ feature = ee.Feature(None, {'Date': date_val})
3455
+
3456
+ # If requested, add the static area value to every feature
3457
+ if include_area:
3458
+ feature = feature.set(area_col_name, area_val)
3459
+
3460
+ unique_reducers = list(set(reducers_list))
3461
+
3462
+ # OPTIMIZED PATH: Single reducer type for all bands
3463
+ if len(unique_reducers) == 1:
3464
+ r_type = unique_reducers[0]
3465
+ try:
3466
+ reducer = getattr(ee.Reducer, r_type)()
3467
+ except AttributeError:
3468
+ reducer = ee.Reducer.mean()
3469
+
3470
+ if unweighted:
3471
+ reducer = reducer.unweighted()
3472
+
3473
+ stats = image.reduceRegion(
3474
+ reducer=reducer,
3475
+ geometry=region,
3476
+ scale=scale,
3477
+ maxPixels=1e13
3478
+ )
3479
+
3480
+ for band in bands:
3481
+ col_name = f"{band}_{geometry_name}_{r_type}"
3482
+ val = stats.get(band)
3483
+ feature = feature.set(col_name, val)
3484
+
3485
+ # ITERATIVE PATH: Different reducers for different bands
3486
+ else:
3487
+ for band, r_type in zip(bands, reducers_list):
3488
+ try:
3489
+ reducer = getattr(ee.Reducer, r_type)()
3490
+ except AttributeError:
3491
+ reducer = ee.Reducer.mean()
3492
+
3493
+ if unweighted:
3494
+ reducer = reducer.unweighted()
3495
+
3496
+ stats = image.select(band).reduceRegion(
3497
+ reducer=reducer,
3498
+ geometry=region,
3499
+ scale=scale,
3500
+ maxPixels=1e13
3501
+ )
3502
+
3503
+ val = stats.get(band)
3504
+ col_name = f"{band}_{geometry_name}_{r_type}"
3505
+ feature = feature.set(col_name, val)
3506
+
3507
+ return feature
3508
+
3509
+ # 5. Execute Server-Side Mapping (with explicit Cast)
3510
+ results_fc = ee.FeatureCollection(processing_col.map(calculate_multiband_stats))
3511
+
3512
+ # 6. Client-Side Conversion
3513
+ try:
3514
+ df = GenericCollection.ee_to_df(results_fc, remove_geom=True)
3515
+ except Exception as e:
3516
+ raise RuntimeError(f"Failed to convert Earth Engine results to DataFrame. Error: {e}")
3517
+
3518
+ if df.empty:
3519
+ print("Warning: No results returned. Check if the geometry intersects the imagery or if dates are valid.")
3520
+ return pd.DataFrame()
3521
+
3522
+ # 7. Formatting & Reordering
3523
+ if 'Date' in df.columns:
3524
+ df['Date'] = pd.to_datetime(df['Date'])
3525
+ df = df.sort_values('Date').set_index('Date')
3526
+
3527
+ # Construct the expected column names in the exact order of the input lists
3528
+ expected_order = [f"{band}_{geometry_name}_{r_type}" for band, r_type in zip(bands, reducers_list)]
3529
+
3530
+ # If area was included, append it to the END of the list
3531
+ if include_area:
3532
+ expected_order.append(area_col_name)
3533
+
3534
+ # Reindex the DataFrame to match this order.
3535
+ existing_cols = [c for c in expected_order if c in df.columns]
3536
+ df = df[existing_cols]
3537
+
3538
+ # 8. Export (Optional)
3539
+ if file_path:
3540
+ if not file_path.lower().endswith('.csv'):
3541
+ file_path += '.csv'
3542
+ try:
3543
+ df.to_csv(file_path)
3544
+ print(f"Multiband zonal stats saved to {file_path}")
3545
+ except Exception as e:
3546
+ print(f"Error saving file to {file_path}: {e}")
3547
+
3548
+ return df
3549
+
3550
+ def sample(
3551
+ self,
3552
+ locations,
3553
+ band=None,
3554
+ scale=None,
3555
+ location_names=None,
3556
+ dates=None,
3557
+ file_path=None,
3558
+ tileScale=1
3559
+ ):
3560
+ """
3561
+ Extracts time-series pixel values for a list of locations.
3562
+
3563
+
3564
+ Args:
3565
+ locations (list, tuple, ee.Geometry, or ee.FeatureCollection): Input points.
3566
+ band (str, optional): The name of the band to sample. Defaults to the first band.
3567
+ scale (int, optional): Scale in meters. Defaults to 30 if None.
3568
+ location_names (list of str, optional): Custom names for locations.
3569
+ dates (list, optional): Date filter ['YYYY-MM-DD'].
3570
+ file_path (str, optional): CSV export path.
3571
+ tileScale (int, optional): Aggregation tile scale. Defaults to 1.
3572
+
3573
+ Returns:
3574
+ pd.DataFrame (or CSV if file_path is provided): DataFrame indexed by Date, columns by Location.
3575
+ """
3576
+ col = self.collection
3577
+ if dates:
3578
+ col = col.filter(ee.Filter.inList('Date_Filter', dates))
3579
+
3580
+ first_img = col.first()
3581
+ available_bands = first_img.bandNames().getInfo()
3582
+
3583
+ if band:
3584
+ if band not in available_bands:
3585
+ raise ValueError(f"Band '{band}' not found. Available: {available_bands}")
3586
+ target_band = band
3587
+ else:
3588
+ target_band = available_bands[0]
3589
+
3590
+ processing_col = col.select([target_band])
3591
+
3592
+ def set_name(f):
3593
+ name = ee.Algorithms.If(
3594
+ f.get('geo_name'), f.get('geo_name'),
3595
+ ee.Algorithms.If(f.get('name'), f.get('name'),
3596
+ ee.Algorithms.If(f.get('system:index'), f.get('system:index'), 'unnamed'))
3597
+ )
3598
+ return f.set('geo_name', name)
3599
+
3600
+ if isinstance(locations, (ee.FeatureCollection, ee.Feature)):
3601
+ features = ee.FeatureCollection(locations)
3602
+ elif isinstance(locations, ee.Geometry):
3603
+ lbl = location_names[0] if (location_names and location_names[0]) else 'Point_1'
3604
+ features = ee.FeatureCollection([ee.Feature(locations).set('geo_name', lbl)])
3605
+ elif isinstance(locations, tuple) and len(locations) == 2:
3606
+ lbl = location_names[0] if location_names else 'Location_1'
3607
+ features = ee.FeatureCollection([ee.Feature(ee.Geometry.Point(locations), {'geo_name': lbl})])
3608
+ elif isinstance(locations, list):
3609
+ if all(isinstance(i, tuple) for i in locations):
3610
+ names = location_names if location_names else [f"Loc_{i+1}" for i in range(len(locations))]
3611
+ features = ee.FeatureCollection([
3612
+ ee.Feature(ee.Geometry.Point(p), {'geo_name': str(n)}) for p, n in zip(locations, names)
3613
+ ])
3614
+ elif all(isinstance(i, ee.Geometry) for i in locations):
3615
+ names = location_names if location_names else [f"Geom_{i+1}" for i in range(len(locations))]
3616
+ features = ee.FeatureCollection([
3617
+ ee.Feature(g, {'geo_name': str(n)}) for g, n in zip(locations, names)
3618
+ ])
3619
+ else:
3620
+ raise ValueError("List must contain (lon, lat) tuples or ee.Geometry objects.")
3621
+ else:
3622
+ raise TypeError("Invalid locations input.")
3623
+
3624
+ features = features.map(set_name)
3625
+
3626
+
3627
+ def sample_image(img):
3628
+ date = img.get('Date_Filter')
3629
+ use_scale = scale if scale is not None else 30
3630
+
3631
+
3632
+ default_dict = ee.Dictionary({target_band: -9999})
3633
+
3634
+ def extract_point(f):
3635
+ stats = img.reduceRegion(
3636
+ reducer=ee.Reducer.first(),
3637
+ geometry=f.geometry(),
3638
+ scale=use_scale,
3639
+ tileScale=tileScale
3640
+ )
3641
+
3642
+ # Combine dictionaries.
3643
+ # If stats has 'target_band' (even if 0), it overwrites -9999.
3644
+ # If stats is empty (masked), -9999 remains.
3645
+ safe_stats = default_dict.combine(stats, overwrite=True)
3646
+ val = safe_stats.get(target_band)
3647
+
3648
+ return f.set({
3649
+ target_band: val,
3650
+ 'image_date': date
3651
+ })
3652
+
3653
+ return features.map(extract_point)
3654
+
3655
+ # Flatten the results
3656
+ flat_results = processing_col.map(sample_image).flatten()
3657
+
3658
+ df = GenericCollection.ee_to_df(
3659
+ flat_results,
3660
+ columns=['image_date', 'geo_name', target_band],
3661
+ remove_geom=True
3662
+ )
3663
+
3664
+ if df.empty:
3665
+ print("Warning: No data returned.")
3666
+ return pd.DataFrame()
3667
+
3668
+ # 6. Clean and Pivot
3669
+ df[target_band] = pd.to_numeric(df[target_band], errors='coerce')
3670
+
3671
+ # Filter out ONLY the sentinel value (-9999), preserving 0.
3672
+ df = df[df[target_band] != -9999]
3673
+
3674
+ if df.empty:
3675
+ print(f"Warning: All data points were masked (NoData) for band '{target_band}'.")
3676
+ return pd.DataFrame()
3677
+
3678
+ pivot_df = df.pivot(index='image_date', columns='geo_name', values=target_band)
3679
+ pivot_df.index.name = 'Date'
3680
+ pivot_df.columns.name = None
3681
+ pivot_df = pivot_df.reset_index()
3682
+
3683
+ if file_path:
3684
+ if not file_path.lower().endswith('.csv'):
3685
+ file_path += '.csv'
3686
+ pivot_df.to_csv(file_path, index=False)
3687
+ print(f"Sampled data saved to {file_path}")
3688
+ return None
3689
+
3690
+ return pivot_df
3691
+
3692
+ def multiband_sample(
3693
+ self,
3694
+ location,
3695
+ scale=30,
3696
+ file_path=None
3697
+ ):
3698
+ """
3699
+ Extracts ALL band values for a SINGLE location across the entire collection.
3700
+
3701
+ Args:
3702
+ location (tuple or ee.Geometry): A single (lon, lat) tuple OR ee.Geometry.
3703
+ scale (int, optional): Scale in meters. Defaults to 30.
3704
+ file_path (str, optional): Path to save CSV.
3705
+
3706
+ Returns:
3707
+ pd.DataFrame: DataFrame indexed by Date, with columns for each Band.
3708
+ """
3709
+ if isinstance(location, tuple) and len(location) == 2:
3710
+ geom = ee.Geometry.Point(location)
3711
+ elif isinstance(location, ee.Geometry):
3712
+ geom = location
3713
+ else:
3714
+ raise ValueError("Location must be a single (lon, lat) tuple or ee.Geometry.")
3715
+
3716
+ first_img = self.collection.first()
3717
+ band_names = first_img.bandNames()
3718
+
3719
+ # Create a dictionary of {band_name: -9999}
3720
+ # fill missing values so the Feature structure is consistent
3721
+ dummy_values = ee.List.repeat(-9999, band_names.length())
3722
+ default_dict = ee.Dictionary.fromLists(band_names, dummy_values)
3723
+
3724
+ def get_all_bands(img):
3725
+ date = img.get('Date_Filter')
3726
+
3727
+ # reduceRegion returns a Dictionary.
3728
+ # If a pixel is masked, that band key is missing from 'stats'.
3729
+ stats = img.reduceRegion(
3730
+ reducer=ee.Reducer.first(),
3731
+ geometry=geom,
3732
+ scale=scale,
3733
+ maxPixels=1e13
3734
+ )
3735
+
3736
+ # Combine stats with defaults.
3737
+ # overwrite=True means real data (stats) overwrites the -9999 defaults.
3738
+ complete_stats = default_dict.combine(stats, overwrite=True)
3739
+
3740
+ return ee.Feature(None, complete_stats).set('Date', date)
3741
+
3742
+ fc = ee.FeatureCollection(self.collection.map(get_all_bands))
3743
+
3744
+ df = GenericCollection.ee_to_df(fc, remove_geom=True)
3745
+
3746
+ if df.empty:
3747
+ print("Warning: No data found.")
3748
+ return pd.DataFrame()
3749
+
3750
+ # 6. Cleanup
3751
+ if 'Date' in df.columns:
3752
+ df['Date'] = pd.to_datetime(df['Date'])
3753
+ df = df.set_index('Date').sort_index()
3754
+
3755
+ # Replace our sentinel -9999 with proper NaNs
3756
+ df = df.replace(-9999, np.nan)
3757
+
3758
+ # 7. Export
3759
+ if file_path:
3760
+ if not file_path.lower().endswith('.csv'):
3761
+ file_path += '.csv'
3762
+ df.to_csv(file_path)
3763
+ print(f"Multiband sample saved to {file_path}")
3764
+ return None
3765
+
3766
+ return df
3377
3767
 
3378
3768
  def export_to_asset_collection(
3379
3769
  self,
@@ -3384,7 +3774,8 @@ class GenericCollection:
3384
3774
  filename_prefix="",
3385
3775
  crs=None,
3386
3776
  max_pixels=int(1e13),
3387
- description_prefix="export"
3777
+ description_prefix="export",
3778
+ overwrite=False
3388
3779
  ):
3389
3780
  """
3390
3781
  Exports an image collection to a Google Earth Engine asset collection. The asset collection will be created if it does not already exist,
@@ -3399,6 +3790,7 @@ class GenericCollection:
3399
3790
  crs (str, optional): The coordinate reference system. Defaults to None, which will use the image's CRS.
3400
3791
  max_pixels (int, optional): The maximum number of pixels. Defaults to int(1e13).
3401
3792
  description_prefix (str, optional): The description prefix. Defaults to "export".
3793
+ overwrite (bool, optional): Whether to overwrite existing assets. Defaults to False.
3402
3794
 
3403
3795
  Returns:
3404
3796
  None: (queues export tasks)
@@ -3416,6 +3808,14 @@ class GenericCollection:
3416
3808
  asset_id = asset_collection_path + "/" + filename_prefix + date_str
3417
3809
  desc = description_prefix + "_" + filename_prefix + date_str
3418
3810
 
3811
+ if overwrite:
3812
+ try:
3813
+ ee.data.deleteAsset(asset_id)
3814
+ print(f"Overwriting: Deleted existing asset {asset_id}")
3815
+ except ee.EEException:
3816
+ # Asset does not exist, so nothing to delete. Proceed safely.
3817
+ pass
3818
+
3419
3819
  params = {
3420
3820
  'image': img,
3421
3821
  'description': desc,