RadGEEToolbox 1.7.4__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3538,7 +3538,7 @@ class Sentinel2Collection:
3538
3538
  daily_col = ee.ImageCollection.fromImages(daily_list)
3539
3539
 
3540
3540
  # Create the mosaic
3541
- mosaic = daily_col.mosaic().setDefaultProjection(img.projection())
3541
+ mosaic = daily_col.mosaic().setDefaultProjection(img.select([0]).projection())
3542
3542
 
3543
3543
  # Calculate means for Sentinel-2 specific props
3544
3544
  cloud_pct = daily_col.aggregate_mean("CLOUDY_PIXEL_PERCENTAGE")
@@ -3551,7 +3551,8 @@ class Sentinel2Collection:
3551
3551
  "SENSING_ORBIT_DIRECTION",
3552
3552
  "MISSION_ID",
3553
3553
  "PLATFORM_IDENTIFIER",
3554
- "system:time_start"
3554
+ "system:time_start",
3555
+ "Date_Filter"
3555
3556
  ]
3556
3557
 
3557
3558
  # Return mosaic with properties set
@@ -3793,200 +3794,197 @@ class Sentinel2Collection:
3793
3794
  lines,
3794
3795
  line_names,
3795
3796
  reducer="mean",
3796
- dist_interval= 10,
3797
+ dist_interval=30,
3797
3798
  n_segments=None,
3798
3799
  scale=10,
3799
3800
  processing_mode='aggregated',
3800
3801
  save_folder_path=None,
3801
3802
  sampling_method='line',
3802
- point_buffer_radius=5
3803
+ point_buffer_radius=15,
3804
+ batch_size=10
3803
3805
  ):
3804
3806
  """
3805
- Computes and returns pixel values along transects for each image in a collection.
3806
-
3807
- This iterative function generates time-series data along one or more lines, and
3808
- supports two different geometric sampling methods ('line' and 'buffered_point')
3809
- for maximum flexibility and performance.
3810
-
3811
- There are two processing modes available, aggregated and iterative:
3812
- - 'aggregated' (default; suggested): Fast, server-side processing. Fetches all results
3813
- in a single request. Highly recommended. Returns a dictionary of pandas DataFrames.
3814
- - 'iterative': Slower, client-side loop that processes one image at a time.
3815
- Kept for backward compatibility (effectively depreciated). Returns None and saves individual CSVs.
3816
- This method is not recommended unless absolutely necessary, as it is less efficient and may be subject to client-side timeouts.
3817
-
3807
+ Computes and returns pixel values along transects. Provide a list of ee.Geometry.LineString objects and corresponding names, and the function will compute the specified reducer value
3808
+ at regular intervals along each line for all images in the collection. Use `dist_interval` or `n_segments` to control sampling resolution. The user can choose between 'aggregated' mode (returns a dictionary of DataFrames) or 'iterative' mode (saves individual CSVs for each transect).
3809
+ Alter `sampling_method` to sample directly along the line or via buffered points along the line. Buffered points can help capture more representative pixel values in heterogeneous landscapes, and the buffer radius can be adjusted via `point_buffer_radius`.
3810
+
3818
3811
  Args:
3819
- lines (list): A list of one or more ee.Geometry.LineString objects that
3820
- define the transects.
3821
- line_names (list): A list of string names for each transect. The length
3822
- of this list must match the length of the `lines` list.
3823
- reducer (str, optional): The name of the ee.Reducer to apply at each
3824
- transect point (e.g., 'mean', 'median', 'first'). Defaults to 'mean'.
3825
- dist_interval (float, optional): The distance interval in meters for
3826
- sampling points along each transect. Will be overridden if `n_segments` is provided.
3827
- Defaults to 10. Recommended to increase this value when using the
3828
- 'line' processing method, or else you may get blank rows.
3829
- n_segments (int, optional): The number of equal-length segments to split
3830
- each transect line into for sampling. This parameter overrides `dist_interval`.
3831
- Defaults to None.
3832
- scale (int, optional): The nominal scale in meters for the reduction,
3833
- which should typically match the pixel resolution of the imagery.
3834
- Defaults to 10.
3835
- processing_mode (str, optional): The method for processing the collection.
3836
- - 'aggregated' (default): Fast, server-side processing. Fetches all
3837
- results in a single request. Highly recommended. Returns a dictionary
3838
- of pandas DataFrames.
3839
- - 'iterative': Slower, client-side loop that processes one image at a
3840
- time. Kept for backward compatibility. Returns None and saves
3841
- individual CSVs.
3842
- save_folder_path (str, optional): If provided, the function will save the
3843
- resulting transect data to CSV files. The behavior depends on the
3844
- `processing_mode`:
3845
- - In 'aggregated' mode, one CSV is saved for each transect,
3846
- containing all dates. (e.g., 'MyTransect_transects.csv').
3847
- - In 'iterative' mode, one CSV is saved for each date,
3848
- containing all transects. (e.g., '2022-06-15_transects.csv').
3849
- sampling_method (str, optional): The geometric method used for sampling.
3850
- - 'line' (default): Reduces all pixels intersecting each small line
3851
- segment. This can be unreliable and produce blank rows if
3852
- `dist_interval` is too small relative to the `scale`.
3853
- - 'buffered_point': Reduces all pixels within a buffer around the
3854
- midpoint of each line segment. This method is more robust and
3855
- reliably avoids blank rows, but may not reduce all pixels along a line segment.
3856
- point_buffer_radius (int, optional): The radius in meters for the buffer
3857
- when `sampling_method` is 'buffered_point'. Defaults to 5.
3812
+ lines (list): List of ee.Geometry.LineString objects.
3813
+ line_names (list): List of string names for each transect.
3814
+ reducer (str, optional): Reducer name. Defaults to 'mean'.
3815
+ dist_interval (float, optional): Distance interval in meters. Defaults to 30.
3816
+ n_segments (int, optional): Number of segments (overrides dist_interval).
3817
+ scale (int, optional): Scale in meters. Defaults to 10.
3818
+ processing_mode (str, optional): 'aggregated' or 'iterative'.
3819
+ save_folder_path (str, optional): Path to save CSVs.
3820
+ sampling_method (str, optional): 'line' or 'buffered_point'.
3821
+ point_buffer_radius (int, optional): Buffer radius if using 'buffered_point'.
3822
+ batch_size (int, optional): Images per request in 'aggregated' mode. Defaults to 10. Lower the value if you encounter a 'Too many aggregations' error.
3858
3823
 
3859
3824
  Returns:
3860
- dict or None:
3861
- - If `processing_mode` is 'aggregated', returns a dictionary where each
3862
- key is a transect name and each value is a pandas DataFrame. In the
3863
- DataFrame, the index is the distance along the transect and each
3864
- column represents an image date. Optionally saves CSV files if
3865
- `save_folder_path` is provided.
3866
- - If `processing_mode` is 'iterative', returns None as it saves
3867
- files directly.
3868
-
3869
- Raises:
3870
- ValueError: If `lines` and `line_names` have different lengths, or if
3871
- an unknown reducer or processing mode is specified.
3825
+ dict or None: Dictionary of DataFrames (aggregated) or None (iterative).
3872
3826
  """
3873
- # Validating inputs
3874
3827
  if len(lines) != len(line_names):
3875
3828
  raise ValueError("'lines' and 'line_names' must have the same number of elements.")
3876
- ### Current, server-side processing method ###
3829
+
3830
+ first_img = self.collection.first()
3831
+ bands = first_img.bandNames().getInfo()
3832
+ is_multiband = len(bands) > 1
3833
+
3834
+ # Setup robust dictionary for handling masked/zero values
3835
+ default_val = -9999
3836
+ dummy_dict = ee.Dictionary.fromLists(bands, ee.List.repeat(default_val, len(bands)))
3837
+
3838
+ if is_multiband:
3839
+ reducer_cols = [f"{b}_{reducer}" for b in bands]
3840
+ clean_names = bands
3841
+ rename_keys = bands
3842
+ rename_vals = reducer_cols
3843
+ else:
3844
+ reducer_cols = [reducer]
3845
+ clean_names = [bands[0]]
3846
+ rename_keys = bands
3847
+ rename_vals = reducer_cols
3848
+
3849
+ print("Pre-computing transect geometries from input LineString(s)...")
3850
+
3851
+ master_transect_fc = ee.FeatureCollection([])
3852
+ geom_error = 1.0
3853
+
3854
+ for i, line in enumerate(lines):
3855
+ line_name = line_names[i]
3856
+ length = line.length(geom_error)
3857
+
3858
+ eff_interval = length.divide(n_segments) if n_segments else dist_interval
3859
+
3860
+ distances = ee.List.sequence(0, length, eff_interval)
3861
+ cut_lines = line.cutLines(distances, geom_error).geometries()
3862
+
3863
+ def create_feature(l):
3864
+ geom = ee.Geometry(ee.List(l).get(0))
3865
+ dist = ee.Number(ee.List(l).get(1))
3866
+
3867
+ final_geom = ee.Algorithms.If(
3868
+ ee.String(sampling_method).equals('buffered_point'),
3869
+ geom.centroid(geom_error).buffer(point_buffer_radius),
3870
+ geom
3871
+ )
3872
+
3873
+ return ee.Feature(ee.Geometry(final_geom), {
3874
+ 'transect_name': line_name,
3875
+ 'distance': dist
3876
+ })
3877
+
3878
+ line_fc = ee.FeatureCollection(cut_lines.zip(distances).map(create_feature))
3879
+ master_transect_fc = master_transect_fc.merge(line_fc)
3880
+
3881
+ try:
3882
+ ee_reducer = getattr(ee.Reducer, reducer)()
3883
+ except AttributeError:
3884
+ raise ValueError(f"Unknown reducer: '{reducer}'.")
3885
+
3886
+ def process_image(image):
3887
+ date_val = image.get('Date_Filter')
3888
+
3889
+ # Map over points (Slower but Robust)
3890
+ def reduce_point(f):
3891
+ stats = image.reduceRegion(
3892
+ reducer=ee_reducer,
3893
+ geometry=f.geometry(),
3894
+ scale=scale,
3895
+ maxPixels=1e13
3896
+ )
3897
+ # Combine with defaults (preserves 0, handles masked)
3898
+ safe_stats = dummy_dict.combine(stats, overwrite=True)
3899
+ # Rename keys to match expected outputs (e.g. 'ndvi' -> 'ndvi_mean')
3900
+ final_stats = safe_stats.rename(rename_keys, rename_vals)
3901
+
3902
+ return f.set(final_stats).set({'image_date': date_val})
3903
+
3904
+ return master_transect_fc.map(reduce_point)
3905
+
3906
+ export_cols = ['transect_name', 'distance', 'image_date'] + reducer_cols
3907
+
3877
3908
  if processing_mode == 'aggregated':
3878
- # Validating reducer type
3879
- try:
3880
- ee_reducer = getattr(ee.Reducer, reducer)()
3881
- except AttributeError:
3882
- raise ValueError(f"Unknown reducer: '{reducer}'.")
3883
- ### Function to extract transects for a single image
3884
- def get_transects_for_image(image):
3885
- image_date = image.get('Date_Filter')
3886
- # Initialize an empty list to hold all transect FeatureCollections
3887
- all_transects_for_image = ee.List([])
3888
- # Looping through each line and processing
3889
- for i, line in enumerate(lines):
3890
- # Index line and name
3891
- line_name = line_names[i]
3892
- # Determine maxError based on image projection, used for geometry operations
3893
- maxError = image.projection().nominalScale().divide(5)
3894
- # Calculate effective distance interval
3895
- length = line.length(maxError) # using maxError here ensures consistency with cutLines
3896
- # Determine effective distance interval based on n_segments or dist_interval
3897
- effective_dist_interval = ee.Algorithms.If(
3898
- n_segments,
3899
- length.divide(n_segments),
3900
- dist_interval or 30 # Defaults to 30 if both are None
3901
- )
3902
- # Generate distances along the line(s) for segmentation
3903
- distances = ee.List.sequence(0, length, effective_dist_interval)
3904
- # Segmenting the line into smaller lines at the specified distances
3905
- cut_lines_geoms = line.cutLines(distances, maxError).geometries()
3906
- # Function to create features with distance attributes
3907
- # Adjusted to ensure consistent return types
3908
- def set_dist_attr(l):
3909
- # l is a list: [geometry, distance]
3910
- # Extracting geometry portion of line
3911
- geom_segment = ee.Geometry(ee.List(l).get(0))
3912
- # Extracting distance value for attribute
3913
- distance = ee.Number(ee.List(l).get(1))
3914
- ### Determine final geometry based on sampling method
3915
- # If the sampling method is 'buffered_point',
3916
- # create a buffered point feature at the centroid of each segment,
3917
- # otherwise create a line feature
3918
- final_feature = ee.Algorithms.If(
3919
- ee.String(sampling_method).equals('buffered_point'),
3920
- # True Case: Create the buffered point feature
3921
- ee.Feature(
3922
- geom_segment.centroid(maxError).buffer(point_buffer_radius),
3923
- {'distance': distance}
3924
- ),
3925
- # False Case: Create the line segment feature
3926
- ee.Feature(geom_segment, {'distance': distance})
3927
- )
3928
- # Return either the line segment feature or the buffered point feature
3929
- return final_feature
3930
- # Creating a FeatureCollection of the cut lines with distance attributes
3931
- # Using map to apply the set_dist_attr function to each cut line geometry
3932
- line_features = ee.FeatureCollection(cut_lines_geoms.zip(distances).map(set_dist_attr))
3933
- # Reducing the image over the line features to get transect values
3934
- transect_fc = image.reduceRegions(
3935
- collection=line_features, reducer=ee_reducer, scale=scale
3936
- )
3937
- # Adding image date and line name properties to each feature
3938
- def set_props(feature):
3939
- return feature.set({'image_date': image_date, 'transect_name': line_name})
3940
- # Append to the list of all transects for this image
3941
- all_transects_for_image = all_transects_for_image.add(transect_fc.map(set_props))
3942
- # Combine all transect FeatureCollections into a single FeatureCollection and flatten
3943
- # Flatten is used to merge the list of FeatureCollections into one
3944
- return ee.FeatureCollection(all_transects_for_image).flatten()
3945
- # Map the function over the entire image collection and flatten the results
3946
- results_fc = ee.FeatureCollection(self.collection.map(get_transects_for_image)).flatten()
3947
- # Convert the results to a pandas DataFrame
3948
- df = Sentinel2Collection.ee_to_df(results_fc, remove_geom=True)
3949
- # Check if the DataFrame is empty
3950
- if df.empty:
3951
- print("Warning: No transect data was generated.")
3909
+ collection_size = self.collection.size().getInfo()
3910
+ print(f"Starting batch process of {collection_size} images...")
3911
+
3912
+ dfs = []
3913
+ for i in range(0, collection_size, batch_size):
3914
+ print(f" Processing image {i} to {min(i + batch_size, collection_size)}...")
3915
+
3916
+ batch_col = ee.ImageCollection(self.collection.toList(batch_size, i))
3917
+ results_fc = batch_col.map(process_image).flatten()
3918
+
3919
+ # Dynamic Class Call for ee_to_df
3920
+ df_batch = self.__class__.ee_to_df(results_fc, columns=export_cols, remove_geom=True)
3921
+
3922
+ if not df_batch.empty:
3923
+ dfs.append(df_batch)
3924
+
3925
+ if not dfs:
3926
+ print("Warning: No transect data generated.")
3952
3927
  return {}
3953
- # Initialize dictionary to hold output DataFrames for each transect
3928
+
3929
+ df = pd.concat(dfs, ignore_index=True)
3930
+
3931
+ # Post-Process & Split
3954
3932
  output_dfs = {}
3955
- # Loop through each unique transect name and create a pivot table
3933
+ for col in reducer_cols:
3934
+ df[col] = pd.to_numeric(df[col], errors='coerce')
3935
+ df[col] = df[col].replace(-9999, np.nan)
3936
+
3956
3937
  for name in sorted(df['transect_name'].unique()):
3957
- transect_df = df[df['transect_name'] == name]
3958
- pivot_df = transect_df.pivot(index='distance', columns='image_date', values=reducer)
3959
- pivot_df.columns.name = 'Date'
3960
- output_dfs[name] = pivot_df
3961
- # Optionally save each transect DataFrame to CSV
3962
- if save_folder_path:
3963
- for transect_name, transect_df in output_dfs.items():
3964
- safe_filename = "".join(x for x in transect_name if x.isalnum() or x in "._-")
3965
- file_path = f"{save_folder_path}{safe_filename}_transects.csv"
3966
- transect_df.to_csv(file_path)
3967
- print(f"Saved transect data to {file_path}")
3968
-
3938
+ line_df = df[df['transect_name'] == name]
3939
+
3940
+ for raw_col, band_name in zip(reducer_cols, clean_names):
3941
+ try:
3942
+ # Safety drop for duplicates
3943
+ line_df_clean = line_df.drop_duplicates(subset=['distance', 'image_date'])
3944
+
3945
+ pivot = line_df_clean.pivot(index='distance', columns='image_date', values=raw_col)
3946
+ pivot.columns.name = 'Date'
3947
+ key = f"{name}_{band_name}"
3948
+ output_dfs[key] = pivot
3949
+
3950
+ if save_folder_path:
3951
+ safe_key = "".join(x for x in key if x.isalnum() or x in "._-")
3952
+ fname = f"{save_folder_path}{safe_key}_transects.csv"
3953
+ pivot.to_csv(fname)
3954
+ print(f"Saved: {fname}")
3955
+ except Exception as e:
3956
+ print(f"Skipping pivot for {name}/{band_name}: {e}")
3957
+
3969
3958
  return output_dfs
3970
3959
 
3971
- ### old, depreciated iterative client-side processing method ###
3972
3960
  elif processing_mode == 'iterative':
3973
3961
  if not save_folder_path:
3974
- raise ValueError("`save_folder_path` is required for 'iterative' processing mode.")
3962
+ raise ValueError("save_folder_path is required for iterative mode.")
3975
3963
 
3976
3964
  image_collection_dates = self.dates
3977
3965
  for i, date in enumerate(image_collection_dates):
3978
3966
  try:
3979
3967
  print(f"Processing image {i+1}/{len(image_collection_dates)}: {date}")
3980
- image = self.image_grab(i)
3981
- transects_df = Sentinel2Collection.transect(
3982
- image, lines, line_names, reducer, n_segments, dist_interval, to_pandas=True
3983
- )
3984
- transects_df.to_csv(f"{save_folder_path}{date}_transects.csv")
3985
- print(f"{date}_transects saved to csv")
3968
+ image_list = self.collection.toList(self.collection.size())
3969
+ image = ee.Image(image_list.get(i))
3970
+
3971
+ fc_result = process_image(image)
3972
+ df = self.__class__.ee_to_df(fc_result, columns=export_cols, remove_geom=True)
3973
+
3974
+ if not df.empty:
3975
+ for col in reducer_cols:
3976
+ df[col] = pd.to_numeric(df[col], errors='coerce')
3977
+ df[col] = df[col].replace(-9999, np.nan)
3978
+
3979
+ fname = f"{save_folder_path}{date}_transects.csv"
3980
+ df.to_csv(fname, index=False)
3981
+ print(f"Saved: {fname}")
3982
+ else:
3983
+ print(f"Skipping {date}: No data.")
3986
3984
  except Exception as e:
3987
- print(f"An error occurred while processing image {i+1}: {e}")
3985
+ print(f"Error processing {date}: {e}")
3988
3986
  else:
3989
- raise ValueError("`processing_mode` must be 'iterative' or 'aggregated'.")
3987
+ raise ValueError("processing_mode must be 'iterative' or 'aggregated'.")
3990
3988
 
3991
3989
  @staticmethod
3992
3990
  def extract_zonal_stats_from_buffer(
@@ -4090,7 +4088,8 @@ class Sentinel2Collection:
4090
4088
  buffer_size=1,
4091
4089
  tileScale=1,
4092
4090
  dates=None,
4093
- file_path=None
4091
+ file_path=None,
4092
+ unweighted=False
4094
4093
  ):
4095
4094
  """
4096
4095
  Iterates over a collection of images and extracts spatial statistics (defaults to mean) for a given list of geometries or coordinates. Individual statistics are calculated for each geometry or coordinate provided.
@@ -4109,6 +4108,7 @@ class Sentinel2Collection:
4109
4108
  tileScale (int, optional): A scaling factor to reduce aggregation tile size. Defaults to 1.
4110
4109
  dates (list, optional): A list of date strings ('YYYY-MM-DD') for filtering the collection, such that only images from these dates are included for zonal statistic retrieval. Defaults to None, which uses all dates in the collection.
4111
4110
  file_path (str, optional): File path to save the output CSV.
4111
+ unweighted (bool, optional): Whether to use an unweighted reducer. Defaults to False.
4112
4112
 
4113
4113
  Returns:
4114
4114
  pd.DataFrame or None: A pandas DataFrame with dates as the index and coordinate names
@@ -4215,6 +4215,9 @@ class Sentinel2Collection:
4215
4215
  reducer = getattr(ee.Reducer, reducer_type)()
4216
4216
  except AttributeError:
4217
4217
  raise ValueError(f"Unknown reducer_type: '{reducer_type}'.")
4218
+
4219
+ if unweighted:
4220
+ reducer = reducer.unweighted()
4218
4221
 
4219
4222
  # Define the function to map over the image collection
4220
4223
  def calculate_stats_for_image(image):
@@ -4276,6 +4279,394 @@ class Sentinel2Collection:
4276
4279
  print(f"Zonal stats saved to {file_path}.csv")
4277
4280
  return
4278
4281
  return pivot_df
4282
+
4283
+ def multiband_zonal_stats(
4284
+ self,
4285
+ geometry,
4286
+ bands,
4287
+ reducer_types,
4288
+ scale=30,
4289
+ geometry_name='geom',
4290
+ dates=None,
4291
+ include_area=False,
4292
+ file_path=None,
4293
+ unweighted=False
4294
+ ):
4295
+ """
4296
+ Calculates zonal statistics for multiple bands over a single geometry for each image in the collection.
4297
+ Allows for specifying different reducers for different bands. Optionally includes the geometry area.
4298
+
4299
+ Args:
4300
+ geometry (ee.Geometry or ee.Feature): The single geometry to calculate statistics for.
4301
+ bands (list of str): A list of band names to include in the analysis.
4302
+ reducer_types (str or list of str): A single reducer name (e.g., 'mean') to apply to all bands,
4303
+ or a list of reducer names matching the length of the 'bands' list to apply specific reducers
4304
+ to specific bands.
4305
+ scale (int, optional): The scale in meters for the reduction. Defaults to 30.
4306
+ geometry_name (str, optional): A name for the geometry, used in column naming. Defaults to 'geom'.
4307
+ dates (list of str, optional): A list of date strings ('YYYY-MM-DD') to filter the collection.
4308
+ Defaults to None (processes all images).
4309
+ include_area (bool, optional): If True, adds a column with the area of the geometry in square meters.
4310
+ Defaults to False.
4311
+ file_path (str, optional): If provided, saves the resulting DataFrame to a CSV file at this path.
4312
+ unweighted (bool, optional): Whether to use unweighted reducers. Defaults to False.
4313
+
4314
+ Returns:
4315
+ pd.DataFrame: A pandas DataFrame indexed by Date, with columns named as '{band}_{geometry_name}_{reducer}'.
4316
+ """
4317
+ # 1. Input Validation and Setup
4318
+ if not isinstance(geometry, (ee.Geometry, ee.Feature)):
4319
+ raise ValueError("The `geometry` argument must be an ee.Geometry or ee.Feature.")
4320
+
4321
+ region = geometry.geometry() if isinstance(geometry, ee.Feature) else geometry
4322
+
4323
+ if isinstance(bands, str):
4324
+ bands = [bands]
4325
+ if not isinstance(bands, list):
4326
+ raise ValueError("The `bands` argument must be a string or a list of strings.")
4327
+
4328
+ # Handle reducer_types (str vs list)
4329
+ if isinstance(reducer_types, str):
4330
+ reducers_list = [reducer_types] * len(bands)
4331
+ elif isinstance(reducer_types, list):
4332
+ if len(reducer_types) != len(bands):
4333
+ raise ValueError("If `reducer_types` is a list, it must have the same length as `bands`.")
4334
+ reducers_list = reducer_types
4335
+ else:
4336
+ raise ValueError("`reducer_types` must be a string or a list of strings.")
4337
+
4338
+ # 2. Filter Collection
4339
+ processing_col = self.collection
4340
+
4341
+ if dates:
4342
+ processing_col = processing_col.filter(ee.Filter.inList('Date_Filter', dates))
4343
+
4344
+ processing_col = processing_col.select(bands)
4345
+
4346
+ # 3. Pre-calculate Area (if requested)
4347
+ area_val = None
4348
+ area_col_name = f"{geometry_name}_area_m2"
4349
+ if include_area:
4350
+ # Calculate geodesic area in square meters with maxError of 1m
4351
+ area_val = region.area(1)
4352
+
4353
+ # 4. Define the Reduction Logic
4354
+ def calculate_multiband_stats(image):
4355
+ # Base feature with date property
4356
+ date_val = image.get('Date_Filter')
4357
+ feature = ee.Feature(None, {'Date': date_val})
4358
+
4359
+ # If requested, add the static area value to every feature
4360
+ if include_area:
4361
+ feature = feature.set(area_col_name, area_val)
4362
+
4363
+ unique_reducers = list(set(reducers_list))
4364
+
4365
+ # OPTIMIZED PATH: Single reducer type for all bands
4366
+ if len(unique_reducers) == 1:
4367
+ r_type = unique_reducers[0]
4368
+ try:
4369
+ reducer = getattr(ee.Reducer, r_type)()
4370
+ except AttributeError:
4371
+ reducer = ee.Reducer.mean()
4372
+
4373
+ if unweighted:
4374
+ reducer = reducer.unweighted()
4375
+
4376
+ stats = image.reduceRegion(
4377
+ reducer=reducer,
4378
+ geometry=region,
4379
+ scale=scale,
4380
+ maxPixels=1e13
4381
+ )
4382
+
4383
+ for band in bands:
4384
+ col_name = f"{band}_{geometry_name}_{r_type}"
4385
+ val = stats.get(band)
4386
+ feature = feature.set(col_name, val)
4387
+
4388
+ # ITERATIVE PATH: Different reducers for different bands
4389
+ else:
4390
+ for band, r_type in zip(bands, reducers_list):
4391
+ try:
4392
+ reducer = getattr(ee.Reducer, r_type)()
4393
+ except AttributeError:
4394
+ reducer = ee.Reducer.mean()
4395
+
4396
+ if unweighted:
4397
+ reducer = reducer.unweighted()
4398
+
4399
+ stats = image.select(band).reduceRegion(
4400
+ reducer=reducer,
4401
+ geometry=region,
4402
+ scale=scale,
4403
+ maxPixels=1e13
4404
+ )
4405
+
4406
+ val = stats.get(band)
4407
+ col_name = f"{band}_{geometry_name}_{r_type}"
4408
+ feature = feature.set(col_name, val)
4409
+
4410
+ return feature
4411
+
4412
+ # 5. Execute Server-Side Mapping (with explicit Cast)
4413
+ results_fc = ee.FeatureCollection(processing_col.map(calculate_multiband_stats))
4414
+
4415
+ # 6. Client-Side Conversion
4416
+ try:
4417
+ df = Sentinel2Collection.ee_to_df(results_fc, remove_geom=True)
4418
+ except Exception as e:
4419
+ raise RuntimeError(f"Failed to convert Earth Engine results to DataFrame. Error: {e}")
4420
+
4421
+ if df.empty:
4422
+ print("Warning: No results returned. Check if the geometry intersects the imagery or if dates are valid.")
4423
+ return pd.DataFrame()
4424
+
4425
+ # 7. Formatting & Reordering
4426
+ if 'Date' in df.columns:
4427
+ df['Date'] = pd.to_datetime(df['Date'])
4428
+ df = df.sort_values('Date').set_index('Date')
4429
+
4430
+ # Construct the expected column names in the exact order of the input lists
4431
+ expected_order = [f"{band}_{geometry_name}_{r_type}" for band, r_type in zip(bands, reducers_list)]
4432
+
4433
+ # If area was included, append it to the END of the list
4434
+ if include_area:
4435
+ expected_order.append(area_col_name)
4436
+
4437
+ # Reindex the DataFrame to match this order.
4438
+ existing_cols = [c for c in expected_order if c in df.columns]
4439
+ df = df[existing_cols]
4440
+
4441
+ # 8. Export (Optional)
4442
+ if file_path:
4443
+ if not file_path.lower().endswith('.csv'):
4444
+ file_path += '.csv'
4445
+ try:
4446
+ df.to_csv(file_path)
4447
+ print(f"Multiband zonal stats saved to {file_path}")
4448
+ except Exception as e:
4449
+ print(f"Error saving file to {file_path}: {e}")
4450
+
4451
+ return df
4452
+
4453
+ def sample(
4454
+ self,
4455
+ locations,
4456
+ band=None,
4457
+ scale=None,
4458
+ location_names=None,
4459
+ dates=None,
4460
+ file_path=None,
4461
+ tileScale=1
4462
+ ):
4463
+ """
4464
+ Extracts time-series pixel values for a list of locations.
4465
+
4466
+
4467
+ Args:
4468
+ locations (list, tuple, ee.Geometry, or ee.FeatureCollection): Input points.
4469
+ band (str, optional): The name of the band to sample. Defaults to the first band.
4470
+ scale (int, optional): Scale in meters. Defaults to 30 if None.
4471
+ location_names (list of str, optional): Custom names for locations.
4472
+ dates (list, optional): Date filter ['YYYY-MM-DD'].
4473
+ file_path (str, optional): CSV export path.
4474
+ tileScale (int, optional): Aggregation tile scale. Defaults to 1.
4475
+
4476
+ Returns:
4477
+ pd.DataFrame (or CSV if file_path is provided): DataFrame indexed by Date, columns by Location.
4478
+ """
4479
+ col = self.collection
4480
+ if dates:
4481
+ col = col.filter(ee.Filter.inList('Date_Filter', dates))
4482
+
4483
+ first_img = col.first()
4484
+ available_bands = first_img.bandNames().getInfo()
4485
+
4486
+ if band:
4487
+ if band not in available_bands:
4488
+ raise ValueError(f"Band '{band}' not found. Available: {available_bands}")
4489
+ target_band = band
4490
+ else:
4491
+ target_band = available_bands[0]
4492
+
4493
+ processing_col = col.select([target_band])
4494
+
4495
+ def set_name(f):
4496
+ name = ee.Algorithms.If(
4497
+ f.get('geo_name'), f.get('geo_name'),
4498
+ ee.Algorithms.If(f.get('name'), f.get('name'),
4499
+ ee.Algorithms.If(f.get('system:index'), f.get('system:index'), 'unnamed'))
4500
+ )
4501
+ return f.set('geo_name', name)
4502
+
4503
+ if isinstance(locations, (ee.FeatureCollection, ee.Feature)):
4504
+ features = ee.FeatureCollection(locations)
4505
+ elif isinstance(locations, ee.Geometry):
4506
+ lbl = location_names[0] if (location_names and location_names[0]) else 'Point_1'
4507
+ features = ee.FeatureCollection([ee.Feature(locations).set('geo_name', lbl)])
4508
+ elif isinstance(locations, tuple) and len(locations) == 2:
4509
+ lbl = location_names[0] if location_names else 'Location_1'
4510
+ features = ee.FeatureCollection([ee.Feature(ee.Geometry.Point(locations), {'geo_name': lbl})])
4511
+ elif isinstance(locations, list):
4512
+ if all(isinstance(i, tuple) for i in locations):
4513
+ names = location_names if location_names else [f"Loc_{i+1}" for i in range(len(locations))]
4514
+ features = ee.FeatureCollection([
4515
+ ee.Feature(ee.Geometry.Point(p), {'geo_name': str(n)}) for p, n in zip(locations, names)
4516
+ ])
4517
+ elif all(isinstance(i, ee.Geometry) for i in locations):
4518
+ names = location_names if location_names else [f"Geom_{i+1}" for i in range(len(locations))]
4519
+ features = ee.FeatureCollection([
4520
+ ee.Feature(g, {'geo_name': str(n)}) for g, n in zip(locations, names)
4521
+ ])
4522
+ else:
4523
+ raise ValueError("List must contain (lon, lat) tuples or ee.Geometry objects.")
4524
+ else:
4525
+ raise TypeError("Invalid locations input.")
4526
+
4527
+ features = features.map(set_name)
4528
+
4529
+
4530
+ def sample_image(img):
4531
+ date = img.get('Date_Filter')
4532
+ use_scale = scale if scale is not None else 30
4533
+
4534
+
4535
+ default_dict = ee.Dictionary({target_band: -9999})
4536
+
4537
+ def extract_point(f):
4538
+ stats = img.reduceRegion(
4539
+ reducer=ee.Reducer.first(),
4540
+ geometry=f.geometry(),
4541
+ scale=use_scale,
4542
+ tileScale=tileScale
4543
+ )
4544
+
4545
+ # Combine dictionaries.
4546
+ # If stats has 'target_band' (even if 0), it overwrites -9999.
4547
+ # If stats is empty (masked), -9999 remains.
4548
+ safe_stats = default_dict.combine(stats, overwrite=True)
4549
+ val = safe_stats.get(target_band)
4550
+
4551
+ return f.set({
4552
+ target_band: val,
4553
+ 'image_date': date
4554
+ })
4555
+
4556
+ return features.map(extract_point)
4557
+
4558
+ # Flatten the results
4559
+ flat_results = processing_col.map(sample_image).flatten()
4560
+
4561
+ df = Sentinel2Collection.ee_to_df(
4562
+ flat_results,
4563
+ columns=['image_date', 'geo_name', target_band],
4564
+ remove_geom=True
4565
+ )
4566
+
4567
+ if df.empty:
4568
+ print("Warning: No data returned.")
4569
+ return pd.DataFrame()
4570
+
4571
+ # 6. Clean and Pivot
4572
+ df[target_band] = pd.to_numeric(df[target_band], errors='coerce')
4573
+
4574
+ # Filter out ONLY the sentinel value (-9999), preserving 0.
4575
+ df = df[df[target_band] != -9999]
4576
+
4577
+ if df.empty:
4578
+ print(f"Warning: All data points were masked (NoData) for band '{target_band}'.")
4579
+ return pd.DataFrame()
4580
+
4581
+ pivot_df = df.pivot(index='image_date', columns='geo_name', values=target_band)
4582
+ pivot_df.index.name = 'Date'
4583
+ pivot_df.columns.name = None
4584
+ pivot_df = pivot_df.reset_index()
4585
+
4586
+ if file_path:
4587
+ if not file_path.lower().endswith('.csv'):
4588
+ file_path += '.csv'
4589
+ pivot_df.to_csv(file_path, index=False)
4590
+ print(f"Sampled data saved to {file_path}")
4591
+ return None
4592
+
4593
+ return pivot_df
4594
+
4595
+ def multiband_sample(
4596
+ self,
4597
+ location,
4598
+ scale=30,
4599
+ file_path=None
4600
+ ):
4601
+ """
4602
+ Extracts ALL band values for a SINGLE location across the entire collection.
4603
+
4604
+ Args:
4605
+ location (tuple or ee.Geometry): A single (lon, lat) tuple OR ee.Geometry.
4606
+ scale (int, optional): Scale in meters. Defaults to 30.
4607
+ file_path (str, optional): Path to save CSV.
4608
+
4609
+ Returns:
4610
+ pd.DataFrame: DataFrame indexed by Date, with columns for each Band.
4611
+ """
4612
+ if isinstance(location, tuple) and len(location) == 2:
4613
+ geom = ee.Geometry.Point(location)
4614
+ elif isinstance(location, ee.Geometry):
4615
+ geom = location
4616
+ else:
4617
+ raise ValueError("Location must be a single (lon, lat) tuple or ee.Geometry.")
4618
+
4619
+ first_img = self.collection.first()
4620
+ band_names = first_img.bandNames()
4621
+
4622
+ # Create a dictionary of {band_name: -9999}
4623
+ # fill missing values so the Feature structure is consistent
4624
+ dummy_values = ee.List.repeat(-9999, band_names.length())
4625
+ default_dict = ee.Dictionary.fromLists(band_names, dummy_values)
4626
+
4627
+ def get_all_bands(img):
4628
+ date = img.get('Date_Filter')
4629
+
4630
+ # reduceRegion returns a Dictionary.
4631
+ # If a pixel is masked, that band key is missing from 'stats'.
4632
+ stats = img.reduceRegion(
4633
+ reducer=ee.Reducer.first(),
4634
+ geometry=geom,
4635
+ scale=scale,
4636
+ maxPixels=1e13
4637
+ )
4638
+
4639
+ # Combine stats with defaults.
4640
+ # overwrite=True means real data (stats) overwrites the -9999 defaults.
4641
+ complete_stats = default_dict.combine(stats, overwrite=True)
4642
+
4643
+ return ee.Feature(None, complete_stats).set('Date', date)
4644
+
4645
+ fc = ee.FeatureCollection(self.collection.map(get_all_bands))
4646
+
4647
+ df = Sentinel2Collection.ee_to_df(fc, remove_geom=True)
4648
+
4649
+ if df.empty:
4650
+ print("Warning: No data found.")
4651
+ return pd.DataFrame()
4652
+
4653
+ # 6. Cleanup
4654
+ if 'Date' in df.columns:
4655
+ df['Date'] = pd.to_datetime(df['Date'])
4656
+ df = df.set_index('Date').sort_index()
4657
+
4658
+ # Replace our sentinel -9999 with proper NaNs
4659
+ df = df.replace(-9999, np.nan)
4660
+
4661
+ # 7. Export
4662
+ if file_path:
4663
+ if not file_path.lower().endswith('.csv'):
4664
+ file_path += '.csv'
4665
+ df.to_csv(file_path)
4666
+ print(f"Multiband sample saved to {file_path}")
4667
+ return None
4668
+
4669
+ return df
4279
4670
 
4280
4671
  def export_to_asset_collection(
4281
4672
  self,
@@ -4286,7 +4677,8 @@ class Sentinel2Collection:
4286
4677
  filename_prefix="",
4287
4678
  crs=None,
4288
4679
  max_pixels=int(1e13),
4289
- description_prefix="export"
4680
+ description_prefix="export",
4681
+ overwrite=False
4290
4682
  ):
4291
4683
  """
4292
4684
  Exports an image collection to a Google Earth Engine asset collection. The asset collection will be created if it does not already exist,
@@ -4301,6 +4693,7 @@ class Sentinel2Collection:
4301
4693
  crs (str, optional): The coordinate reference system. Defaults to None, which will use the image's CRS.
4302
4694
  max_pixels (int, optional): The maximum number of pixels. Defaults to int(1e13).
4303
4695
  description_prefix (str, optional): The description prefix. Defaults to "export".
4696
+ overwrite (bool, optional): Whether to overwrite existing assets. Defaults to False.
4304
4697
 
4305
4698
  Returns:
4306
4699
  None: (queues export tasks)
@@ -4318,6 +4711,14 @@ class Sentinel2Collection:
4318
4711
  asset_id = asset_collection_path + "/" + filename_prefix + date_str
4319
4712
  desc = description_prefix + "_" + filename_prefix + date_str
4320
4713
 
4714
+ if overwrite:
4715
+ try:
4716
+ ee.data.deleteAsset(asset_id)
4717
+ print(f"Overwriting: Deleted existing asset {asset_id}")
4718
+ except ee.EEException:
4719
+ # Asset does not exist, so nothing to delete. Proceed safely.
4720
+ pass
4721
+
4321
4722
  params = {
4322
4723
  'image': img,
4323
4724
  'description': desc,