RadGEEToolbox 1.7.4__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,19 +10,24 @@ _LS_SCALE = 0.0000275
10
10
  _LS_OFFSET = -0.2
11
11
 
12
12
  def _scale_landsat_sr(img):
13
- """
14
- Converts Landsat C2 SR DN values to reflectance values for SR_B1..SR_B7 (overwrite bands).
15
-
16
- Args:
17
- img (ee.Image): Input Landsat image without scaled bands.
18
-
19
- Returns:
20
- ee.Image: Image with scaled reflectance bands.
21
- """
22
13
  img = ee.Image(img)
14
+
15
+ # Get the sensor from the image metadata
16
+ # This matches the logic you already use in _landsat_selector
17
+ sensor = img.get('SPACECRAFT_ID')
18
+
19
+ available_bands = img.bandNames()
20
+
21
+ target_bands = ee.List(_LS_SR_BANDS).filter(ee.Filter.inList('item', available_bands))
22
+
23
23
  is_scaled = ee.Algorithms.IsEqual(img.get('rgt:scaled'), 'landsat_sr')
24
- scaled = img.select(_LS_SR_BANDS).multiply(_LS_SCALE).add(_LS_OFFSET)
24
+
25
+ # Apply scaling factors (0.0000275 and -0.2) only to existing SR bands
26
+ scaled = img.select(target_bands).multiply(_LS_SCALE).add(_LS_OFFSET)
27
+ scaled = scaled.max(0.0001) # Ensure reflectance values are not negative after scaling
28
+
25
29
  out = img.addBands(scaled, None, True).set('rgt:scaled', 'landsat_sr')
30
+
26
31
  return ee.Image(ee.Algorithms.If(is_scaled, img, out))
27
32
 
28
33
  class LandsatCollection:
@@ -4347,200 +4352,197 @@ class LandsatCollection:
4347
4352
  lines,
4348
4353
  line_names,
4349
4354
  reducer="mean",
4350
- dist_interval=30,
4355
+ dist_interval=90,
4351
4356
  n_segments=None,
4352
4357
  scale=30,
4353
4358
  processing_mode='aggregated',
4354
4359
  save_folder_path=None,
4355
4360
  sampling_method='line',
4356
- point_buffer_radius=15
4361
+ point_buffer_radius=15,
4362
+ batch_size=10
4357
4363
  ):
4358
4364
  """
4359
- Computes and returns pixel values along transects for each image in a collection.
4360
-
4361
- This iterative function generates time-series data along one or more lines, and
4362
- supports two different geometric sampling methods ('line' and 'buffered_point')
4363
- for maximum flexibility and performance.
4364
-
4365
- There are two processing modes available, aggregated and iterative:
4366
- - 'aggregated' (default; suggested): Fast, server-side processing. Fetches all results
4367
- in a single request. Highly recommended. Returns a dictionary of pandas DataFrames.
4368
- - 'iterative': Slower, client-side loop that processes one image at a time.
4369
- Kept for backward compatibility (effectively depreciated). Returns None and saves individual CSVs.
4370
- This method is not recommended unless absolutely necessary, as it is less efficient and may be subject to client-side timeouts.
4371
-
4365
+ Computes and returns pixel values along transects. Provide a list of ee.Geometry.LineString objects and corresponding names, and the function will compute the specified reducer value
4366
+ at regular intervals along each line for all images in the collection. Use `dist_interval` or `n_segments` to control sampling resolution. The user can choose between 'aggregated' mode (returns a dictionary of DataFrames) or 'iterative' mode (saves individual CSVs for each transect).
4367
+ Alter `sampling_method` to sample directly along the line or via buffered points along the line. Buffered points can help capture more representative pixel values in heterogeneous landscapes, and the buffer radius can be adjusted via `point_buffer_radius`.
4368
+
4372
4369
  Args:
4373
- lines (list): A list of one or more ee.Geometry.LineString objects that
4374
- define the transects.
4375
- line_names (list): A list of string names for each transect. The length
4376
- of this list must match the length of the `lines` list.
4377
- reducer (str, optional): The name of the ee.Reducer to apply at each
4378
- transect point (e.g., 'mean', 'median', 'first'). Defaults to 'mean'.
4379
- dist_interval (float, optional): The distance interval in meters for
4380
- sampling points along each transect. Will be overridden if `n_segments` is provided.
4381
- Defaults to 30. Recommended to increase this value when using the
4382
- 'line' processing method, or else you may get blank rows.
4383
- n_segments (int, optional): The number of equal-length segments to split
4384
- each transect line into for sampling. This parameter overrides `dist_interval`.
4385
- Defaults to None.
4386
- scale (int, optional): The nominal scale in meters for the reduction,
4387
- which should typically match the pixel resolution of the imagery.
4388
- Defaults to 30.
4389
- processing_mode (str, optional): The method for processing the collection.
4390
- - 'aggregated' (default): Fast, server-side processing. Fetches all
4391
- results in a single request. Highly recommended. Returns a dictionary
4392
- of pandas DataFrames.
4393
- - 'iterative': Slower, client-side loop that processes one image at a
4394
- time. Kept for backward compatibility. Returns None and saves
4395
- individual CSVs.
4396
- save_folder_path (str, optional): If provided, the function will save the
4397
- resulting transect data to CSV files. The behavior depends on the
4398
- `processing_mode`:
4399
- - In 'aggregated' mode, one CSV is saved for each transect,
4400
- containing all dates. (e.g., 'MyTransect_transects.csv').
4401
- - In 'iterative' mode, one CSV is saved for each date,
4402
- containing all transects. (e.g., '2022-06-15_transects.csv').
4403
- sampling_method (str, optional): The geometric method used for sampling.
4404
- - 'line' (default): Reduces all pixels intersecting each small line
4405
- segment. This can be unreliable and produce blank rows if
4406
- `dist_interval` is too small relative to the `scale`.
4407
- - 'buffered_point': Reduces all pixels within a buffer around the
4408
- midpoint of each line segment. This method is more robust and
4409
- reliably avoids blank rows, but may not reduce all pixels along a line segment.
4410
- point_buffer_radius (int, optional): The radius in meters for the buffer
4411
- when `sampling_method` is 'buffered_point'. Defaults to 15.
4370
+ lines (list): List of ee.Geometry.LineString objects.
4371
+ line_names (list): List of string names for each transect.
4372
+ reducer (str, optional): Reducer name. Defaults to 'mean'.
4373
+ dist_interval (float, optional): Distance interval in meters. Defaults to 90.
4374
+ n_segments (int, optional): Number of segments (overrides dist_interval).
4375
+ scale (int, optional): Scale in meters. Defaults to 30.
4376
+ processing_mode (str, optional): 'aggregated' or 'iterative'.
4377
+ save_folder_path (str, optional): Path to save CSVs.
4378
+ sampling_method (str, optional): 'line' or 'buffered_point'.
4379
+ point_buffer_radius (int, optional): Buffer radius if using 'buffered_point'.
4380
+ batch_size (int, optional): Images per request in 'aggregated' mode. Defaults to 10. Lower the value if you encounter a 'Too many aggregations' error.
4412
4381
 
4413
4382
  Returns:
4414
- dict or None:
4415
- - If `processing_mode` is 'aggregated', returns a dictionary where each
4416
- key is a transect name and each value is a pandas DataFrame. In the
4417
- DataFrame, the index is the distance along the transect and each
4418
- column represents an image date. Optionally saves CSV files if
4419
- `save_folder_path` is provided.
4420
- - If `processing_mode` is 'iterative', returns None as it saves
4421
- files directly.
4422
-
4423
- Raises:
4424
- ValueError: If `lines` and `line_names` have different lengths, or if
4425
- an unknown reducer or processing mode is specified.
4383
+ dict or None: Dictionary of DataFrames (aggregated) or None (iterative).
4426
4384
  """
4427
- # Validating inputs
4428
4385
  if len(lines) != len(line_names):
4429
4386
  raise ValueError("'lines' and 'line_names' must have the same number of elements.")
4430
- ### Current, server-side processing method ###
4387
+
4388
+ first_img = self.collection.first()
4389
+ bands = first_img.bandNames().getInfo()
4390
+ is_multiband = len(bands) > 1
4391
+
4392
+ # Setup robust dictionary for handling masked/zero values
4393
+ default_val = -9999
4394
+ dummy_dict = ee.Dictionary.fromLists(bands, ee.List.repeat(default_val, len(bands)))
4395
+
4396
+ if is_multiband:
4397
+ reducer_cols = [f"{b}_{reducer}" for b in bands]
4398
+ clean_names = bands
4399
+ rename_keys = bands
4400
+ rename_vals = reducer_cols
4401
+ else:
4402
+ reducer_cols = [reducer]
4403
+ clean_names = [bands[0]]
4404
+ rename_keys = bands
4405
+ rename_vals = reducer_cols
4406
+
4407
+ print("Pre-computing transect geometries from input LineString(s)...")
4408
+
4409
+ master_transect_fc = ee.FeatureCollection([])
4410
+ geom_error = 1.0
4411
+
4412
+ for i, line in enumerate(lines):
4413
+ line_name = line_names[i]
4414
+ length = line.length(geom_error)
4415
+
4416
+ eff_interval = length.divide(n_segments) if n_segments else dist_interval
4417
+
4418
+ distances = ee.List.sequence(0, length, eff_interval)
4419
+ cut_lines = line.cutLines(distances, geom_error).geometries()
4420
+
4421
+ def create_feature(l):
4422
+ geom = ee.Geometry(ee.List(l).get(0))
4423
+ dist = ee.Number(ee.List(l).get(1))
4424
+
4425
+ final_geom = ee.Algorithms.If(
4426
+ ee.String(sampling_method).equals('buffered_point'),
4427
+ geom.centroid(geom_error).buffer(point_buffer_radius),
4428
+ geom
4429
+ )
4430
+
4431
+ return ee.Feature(ee.Geometry(final_geom), {
4432
+ 'transect_name': line_name,
4433
+ 'distance': dist
4434
+ })
4435
+
4436
+ line_fc = ee.FeatureCollection(cut_lines.zip(distances).map(create_feature))
4437
+ master_transect_fc = master_transect_fc.merge(line_fc)
4438
+
4439
+ try:
4440
+ ee_reducer = getattr(ee.Reducer, reducer)()
4441
+ except AttributeError:
4442
+ raise ValueError(f"Unknown reducer: '{reducer}'.")
4443
+
4444
+ def process_image(image):
4445
+ date_val = image.get('Date_Filter')
4446
+
4447
+ # Map over points (Slower but Robust)
4448
+ def reduce_point(f):
4449
+ stats = image.reduceRegion(
4450
+ reducer=ee_reducer,
4451
+ geometry=f.geometry(),
4452
+ scale=scale,
4453
+ maxPixels=1e13
4454
+ )
4455
+ # Combine with defaults (preserves 0, handles masked)
4456
+ safe_stats = dummy_dict.combine(stats, overwrite=True)
4457
+ # Rename keys to match expected outputs (e.g. 'ndvi' -> 'ndvi_mean')
4458
+ final_stats = safe_stats.rename(rename_keys, rename_vals)
4459
+
4460
+ return f.set(final_stats).set({'image_date': date_val})
4461
+
4462
+ return master_transect_fc.map(reduce_point)
4463
+
4464
+ export_cols = ['transect_name', 'distance', 'image_date'] + reducer_cols
4465
+
4431
4466
  if processing_mode == 'aggregated':
4432
- # Validating reducer type
4433
- try:
4434
- ee_reducer = getattr(ee.Reducer, reducer)()
4435
- except AttributeError:
4436
- raise ValueError(f"Unknown reducer: '{reducer}'.")
4437
- ### Function to extract transects for a single image
4438
- def get_transects_for_image(image):
4439
- image_date = image.get('Date_Filter')
4440
- # Initialize an empty list to hold all transect FeatureCollections
4441
- all_transects_for_image = ee.List([])
4442
- # Looping through each line and processing
4443
- for i, line in enumerate(lines):
4444
- # Index line and name
4445
- line_name = line_names[i]
4446
- # Determine maxError based on image projection, used for geometry operations
4447
- maxError = image.projection().nominalScale().divide(5)
4448
- # Calculate effective distance interval
4449
- length = line.length(maxError) # using maxError here ensures consistency with cutLines
4450
- # Determine effective distance interval based on n_segments or dist_interval
4451
- effective_dist_interval = ee.Algorithms.If(
4452
- n_segments,
4453
- length.divide(n_segments),
4454
- dist_interval or 30 # Defaults to 30 if both are None
4455
- )
4456
- # Generate distances along the line(s) for segmentation
4457
- distances = ee.List.sequence(0, length, effective_dist_interval)
4458
- # Segmenting the line into smaller lines at the specified distances
4459
- cut_lines_geoms = line.cutLines(distances, maxError).geometries()
4460
- # Function to create features with distance attributes
4461
- # Adjusted to ensure consistent return types
4462
- def set_dist_attr(l):
4463
- # l is a list: [geometry, distance]
4464
- # Extracting geometry portion of line
4465
- geom_segment = ee.Geometry(ee.List(l).get(0))
4466
- # Extracting distance value for attribute
4467
- distance = ee.Number(ee.List(l).get(1))
4468
- ### Determine final geometry based on sampling method
4469
- # If the sampling method is 'buffered_point',
4470
- # create a buffered point feature at the centroid of each segment,
4471
- # otherwise create a line feature
4472
- final_feature = ee.Algorithms.If(
4473
- ee.String(sampling_method).equals('buffered_point'),
4474
- # True Case: Create the buffered point feature
4475
- ee.Feature(
4476
- geom_segment.centroid(maxError).buffer(point_buffer_radius),
4477
- {'distance': distance}
4478
- ),
4479
- # False Case: Create the line segment feature
4480
- ee.Feature(geom_segment, {'distance': distance})
4481
- )
4482
- # Return either the line segment feature or the buffered point feature
4483
- return final_feature
4484
- # Creating a FeatureCollection of the cut lines with distance attributes
4485
- # Using map to apply the set_dist_attr function to each cut line geometry
4486
- line_features = ee.FeatureCollection(cut_lines_geoms.zip(distances).map(set_dist_attr))
4487
- # Reducing the image over the line features to get transect values
4488
- transect_fc = image.reduceRegions(
4489
- collection=line_features, reducer=ee_reducer, scale=scale
4490
- )
4491
- # Adding image date and line name properties to each feature
4492
- def set_props(feature):
4493
- return feature.set({'image_date': image_date, 'transect_name': line_name})
4494
- # Append to the list of all transects for this image
4495
- all_transects_for_image = all_transects_for_image.add(transect_fc.map(set_props))
4496
- # Combine all transect FeatureCollections into a single FeatureCollection and flatten
4497
- # Flatten is used to merge the list of FeatureCollections into one
4498
- return ee.FeatureCollection(all_transects_for_image).flatten()
4499
- # Map the function over the entire image collection and flatten the results
4500
- results_fc = ee.FeatureCollection(self.collection.map(get_transects_for_image)).flatten()
4501
- # Convert the results to a pandas DataFrame
4502
- df = LandsatCollection.ee_to_df(results_fc, remove_geom=True)
4503
- # Check if the DataFrame is empty
4504
- if df.empty:
4505
- print("Warning: No transect data was generated.")
4467
+ collection_size = self.collection.size().getInfo()
4468
+ print(f"Starting batch process of {collection_size} images...")
4469
+
4470
+ dfs = []
4471
+ for i in range(0, collection_size, batch_size):
4472
+ print(f" Processing image {i} to {min(i + batch_size, collection_size)}...")
4473
+
4474
+ batch_col = ee.ImageCollection(self.collection.toList(batch_size, i))
4475
+ results_fc = batch_col.map(process_image).flatten()
4476
+
4477
+ # Dynamic Class Call for ee_to_df
4478
+ df_batch = self.__class__.ee_to_df(results_fc, columns=export_cols, remove_geom=True)
4479
+
4480
+ if not df_batch.empty:
4481
+ dfs.append(df_batch)
4482
+
4483
+ if not dfs:
4484
+ print("Warning: No transect data generated.")
4506
4485
  return {}
4507
- # Initialize dictionary to hold output DataFrames for each transect
4486
+
4487
+ df = pd.concat(dfs, ignore_index=True)
4488
+
4489
+ # Post-Process & Split
4508
4490
  output_dfs = {}
4509
- # Loop through each unique transect name and create a pivot table
4491
+ for col in reducer_cols:
4492
+ df[col] = pd.to_numeric(df[col], errors='coerce')
4493
+ df[col] = df[col].replace(-9999, np.nan)
4494
+
4510
4495
  for name in sorted(df['transect_name'].unique()):
4511
- transect_df = df[df['transect_name'] == name]
4512
- pivot_df = transect_df.pivot(index='distance', columns='image_date', values=reducer)
4513
- pivot_df.columns.name = 'Date'
4514
- output_dfs[name] = pivot_df
4515
- # Optionally save each transect DataFrame to CSV
4516
- if save_folder_path:
4517
- for transect_name, transect_df in output_dfs.items():
4518
- safe_filename = "".join(x for x in transect_name if x.isalnum() or x in "._-")
4519
- file_path = f"{save_folder_path}{safe_filename}_transects.csv"
4520
- transect_df.to_csv(file_path)
4521
- print(f"Saved transect data to {file_path}")
4522
-
4496
+ line_df = df[df['transect_name'] == name]
4497
+
4498
+ for raw_col, band_name in zip(reducer_cols, clean_names):
4499
+ try:
4500
+ # Safety drop for duplicates
4501
+ line_df_clean = line_df.drop_duplicates(subset=['distance', 'image_date'])
4502
+
4503
+ pivot = line_df_clean.pivot(index='distance', columns='image_date', values=raw_col)
4504
+ pivot.columns.name = 'Date'
4505
+ key = f"{name}_{band_name}"
4506
+ output_dfs[key] = pivot
4507
+
4508
+ if save_folder_path:
4509
+ safe_key = "".join(x for x in key if x.isalnum() or x in "._-")
4510
+ fname = f"{save_folder_path}{safe_key}_transects.csv"
4511
+ pivot.to_csv(fname)
4512
+ print(f"Saved: {fname}")
4513
+ except Exception as e:
4514
+ print(f"Skipping pivot for {name}/{band_name}: {e}")
4515
+
4523
4516
  return output_dfs
4524
4517
 
4525
- ### old, depreciated iterative client-side processing method ###
4526
4518
  elif processing_mode == 'iterative':
4527
4519
  if not save_folder_path:
4528
- raise ValueError("`save_folder_path` is required for 'iterative' processing mode.")
4520
+ raise ValueError("save_folder_path is required for iterative mode.")
4529
4521
 
4530
4522
  image_collection_dates = self.dates
4531
4523
  for i, date in enumerate(image_collection_dates):
4532
4524
  try:
4533
4525
  print(f"Processing image {i+1}/{len(image_collection_dates)}: {date}")
4534
- image = self.image_grab(i)
4535
- transects_df = LandsatCollection.transect(
4536
- image, lines, line_names, reducer, n_segments, dist_interval, to_pandas=True
4537
- )
4538
- transects_df.to_csv(f"{save_folder_path}{date}_transects.csv")
4539
- print(f"{date}_transects saved to csv")
4526
+ image_list = self.collection.toList(self.collection.size())
4527
+ image = ee.Image(image_list.get(i))
4528
+
4529
+ fc_result = process_image(image)
4530
+ df = self.__class__.ee_to_df(fc_result, columns=export_cols, remove_geom=True)
4531
+
4532
+ if not df.empty:
4533
+ for col in reducer_cols:
4534
+ df[col] = pd.to_numeric(df[col], errors='coerce')
4535
+ df[col] = df[col].replace(-9999, np.nan)
4536
+
4537
+ fname = f"{save_folder_path}{date}_transects.csv"
4538
+ df.to_csv(fname, index=False)
4539
+ print(f"Saved: {fname}")
4540
+ else:
4541
+ print(f"Skipping {date}: No data.")
4540
4542
  except Exception as e:
4541
- print(f"An error occurred while processing image {i+1}: {e}")
4543
+ print(f"Error processing {date}: {e}")
4542
4544
  else:
4543
- raise ValueError("`processing_mode` must be 'iterative' or 'aggregated'.")
4545
+ raise ValueError("processing_mode must be 'iterative' or 'aggregated'.")
4544
4546
 
4545
4547
  @staticmethod
4546
4548
  def extract_zonal_stats_from_buffer(
@@ -4644,7 +4646,8 @@ class LandsatCollection:
4644
4646
  buffer_size=1,
4645
4647
  tileScale=1,
4646
4648
  dates=None,
4647
- file_path=None
4649
+ file_path=None,
4650
+ unweighted=False
4648
4651
  ):
4649
4652
  """
4650
4653
  Iterates over a collection of images and extracts spatial statistics (defaults to mean) for a given list of geometries or coordinates. Individual statistics are calculated for each geometry or coordinate provided.
@@ -4663,6 +4666,7 @@ class LandsatCollection:
4663
4666
  tileScale (int, optional): A scaling factor to reduce aggregation tile size. Defaults to 1.
4664
4667
  dates (list, optional): A list of date strings ('YYYY-MM-DD') for filtering the collection, such that only images from these dates are included for zonal statistic retrieval. Defaults to None, which uses all dates in the collection.
4665
4668
  file_path (str, optional): File path to save the output CSV.
4669
+ unweighted (bool, optional): Whether to use unweighted reducer. Defaults to False.
4666
4670
 
4667
4671
  Returns:
4668
4672
  pd.DataFrame or None: A pandas DataFrame with dates as the index and coordinate names
@@ -4769,6 +4773,9 @@ class LandsatCollection:
4769
4773
  reducer = getattr(ee.Reducer, reducer_type)()
4770
4774
  except AttributeError:
4771
4775
  raise ValueError(f"Unknown reducer_type: '{reducer_type}'.")
4776
+
4777
+ if unweighted:
4778
+ reducer = reducer.unweighted()
4772
4779
 
4773
4780
  # Define the function to map over the image collection
4774
4781
  def calculate_stats_for_image(image):
@@ -4830,6 +4837,394 @@ class LandsatCollection:
4830
4837
  print(f"Zonal stats saved to {file_path}.csv")
4831
4838
  return
4832
4839
  return pivot_df
4840
+
4841
+ def multiband_zonal_stats(
4842
+ self,
4843
+ geometry,
4844
+ bands,
4845
+ reducer_types,
4846
+ scale=30,
4847
+ geometry_name='geom',
4848
+ dates=None,
4849
+ include_area=False,
4850
+ file_path=None,
4851
+ unweighted=False
4852
+ ):
4853
+ """
4854
+ Calculates zonal statistics for multiple bands over a single geometry for each image in the collection.
4855
+ Allows for specifying different reducers for different bands. Optionally includes the geometry area.
4856
+
4857
+ Args:
4858
+ geometry (ee.Geometry or ee.Feature): The single geometry to calculate statistics for.
4859
+ bands (list of str): A list of band names to include in the analysis.
4860
+ reducer_types (str or list of str): A single reducer name (e.g., 'mean') to apply to all bands,
4861
+ or a list of reducer names matching the length of the 'bands' list to apply specific reducers
4862
+ to specific bands.
4863
+ scale (int, optional): The scale in meters for the reduction. Defaults to 30.
4864
+ geometry_name (str, optional): A name for the geometry, used in column naming. Defaults to 'geom'.
4865
+ dates (list of str, optional): A list of date strings ('YYYY-MM-DD') to filter the collection.
4866
+ Defaults to None (processes all images).
4867
+ include_area (bool, optional): If True, adds a column with the area of the geometry in square meters.
4868
+ Defaults to False.
4869
+ file_path (str, optional): If provided, saves the resulting DataFrame to a CSV file at this path.
4870
+ unweighted (bool, optional): Whether to use unweighted reducers. Defaults to False.
4871
+
4872
+ Returns:
4873
+ pd.DataFrame: A pandas DataFrame indexed by Date, with columns named as '{band}_{geometry_name}_{reducer}'.
4874
+ """
4875
+ # 1. Input Validation and Setup
4876
+ if not isinstance(geometry, (ee.Geometry, ee.Feature)):
4877
+ raise ValueError("The `geometry` argument must be an ee.Geometry or ee.Feature.")
4878
+
4879
+ region = geometry.geometry() if isinstance(geometry, ee.Feature) else geometry
4880
+
4881
+ if isinstance(bands, str):
4882
+ bands = [bands]
4883
+ if not isinstance(bands, list):
4884
+ raise ValueError("The `bands` argument must be a string or a list of strings.")
4885
+
4886
+ # Handle reducer_types (str vs list)
4887
+ if isinstance(reducer_types, str):
4888
+ reducers_list = [reducer_types] * len(bands)
4889
+ elif isinstance(reducer_types, list):
4890
+ if len(reducer_types) != len(bands):
4891
+ raise ValueError("If `reducer_types` is a list, it must have the same length as `bands`.")
4892
+ reducers_list = reducer_types
4893
+ else:
4894
+ raise ValueError("`reducer_types` must be a string or a list of strings.")
4895
+
4896
+ # 2. Filter Collection
4897
+ processing_col = self.collection
4898
+
4899
+ if dates:
4900
+ processing_col = processing_col.filter(ee.Filter.inList('Date_Filter', dates))
4901
+
4902
+ processing_col = processing_col.select(bands)
4903
+
4904
+ # 3. Pre-calculate Area (if requested)
4905
+ area_val = None
4906
+ area_col_name = f"{geometry_name}_area_m2"
4907
+ if include_area:
4908
+ # Calculate geodesic area in square meters with maxError of 1m
4909
+ area_val = region.area(1)
4910
+
4911
+ # 4. Define the Reduction Logic
4912
+ def calculate_multiband_stats(image):
4913
+ # Base feature with date property
4914
+ date_val = image.get('Date_Filter')
4915
+ feature = ee.Feature(None, {'Date': date_val})
4916
+
4917
+ # If requested, add the static area value to every feature
4918
+ if include_area:
4919
+ feature = feature.set(area_col_name, area_val)
4920
+
4921
+ unique_reducers = list(set(reducers_list))
4922
+
4923
+ # OPTIMIZED PATH: Single reducer type for all bands
4924
+ if len(unique_reducers) == 1:
4925
+ r_type = unique_reducers[0]
4926
+ try:
4927
+ reducer = getattr(ee.Reducer, r_type)()
4928
+ except AttributeError:
4929
+ reducer = ee.Reducer.mean()
4930
+
4931
+ if unweighted:
4932
+ reducer = reducer.unweighted()
4933
+
4934
+ stats = image.reduceRegion(
4935
+ reducer=reducer,
4936
+ geometry=region,
4937
+ scale=scale,
4938
+ maxPixels=1e13
4939
+ )
4940
+
4941
+ for band in bands:
4942
+ col_name = f"{band}_{geometry_name}_{r_type}"
4943
+ val = stats.get(band)
4944
+ feature = feature.set(col_name, val)
4945
+
4946
+ # ITERATIVE PATH: Different reducers for different bands
4947
+ else:
4948
+ for band, r_type in zip(bands, reducers_list):
4949
+ try:
4950
+ reducer = getattr(ee.Reducer, r_type)()
4951
+ except AttributeError:
4952
+ reducer = ee.Reducer.mean()
4953
+
4954
+ if unweighted:
4955
+ reducer = reducer.unweighted()
4956
+
4957
+ stats = image.select(band).reduceRegion(
4958
+ reducer=reducer,
4959
+ geometry=region,
4960
+ scale=scale,
4961
+ maxPixels=1e13
4962
+ )
4963
+
4964
+ val = stats.get(band)
4965
+ col_name = f"{band}_{geometry_name}_{r_type}"
4966
+ feature = feature.set(col_name, val)
4967
+
4968
+ return feature
4969
+
4970
+ # 5. Execute Server-Side Mapping (with explicit Cast)
4971
+ results_fc = ee.FeatureCollection(processing_col.map(calculate_multiband_stats))
4972
+
4973
+ # 6. Client-Side Conversion
4974
+ try:
4975
+ df = LandsatCollection.ee_to_df(results_fc, remove_geom=True)
4976
+ except Exception as e:
4977
+ raise RuntimeError(f"Failed to convert Earth Engine results to DataFrame. Error: {e}")
4978
+
4979
+ if df.empty:
4980
+ print("Warning: No results returned. Check if the geometry intersects the imagery or if dates are valid.")
4981
+ return pd.DataFrame()
4982
+
4983
+ # 7. Formatting & Reordering
4984
+ if 'Date' in df.columns:
4985
+ df['Date'] = pd.to_datetime(df['Date'])
4986
+ df = df.sort_values('Date').set_index('Date')
4987
+
4988
+ # Construct the expected column names in the exact order of the input lists
4989
+ expected_order = [f"{band}_{geometry_name}_{r_type}" for band, r_type in zip(bands, reducers_list)]
4990
+
4991
+ # If area was included, append it to the END of the list
4992
+ if include_area:
4993
+ expected_order.append(area_col_name)
4994
+
4995
+ # Reindex the DataFrame to match this order.
4996
+ existing_cols = [c for c in expected_order if c in df.columns]
4997
+ df = df[existing_cols]
4998
+
4999
+ # 8. Export (Optional)
5000
+ if file_path:
5001
+ if not file_path.lower().endswith('.csv'):
5002
+ file_path += '.csv'
5003
+ try:
5004
+ df.to_csv(file_path)
5005
+ print(f"Multiband zonal stats saved to {file_path}")
5006
+ except Exception as e:
5007
+ print(f"Error saving file to {file_path}: {e}")
5008
+
5009
+ return df
5010
+
5011
+ def sample(
5012
+ self,
5013
+ locations,
5014
+ band=None,
5015
+ scale=None,
5016
+ location_names=None,
5017
+ dates=None,
5018
+ file_path=None,
5019
+ tileScale=1
5020
+ ):
5021
+ """
5022
+ Extracts time-series pixel values for a list of locations.
5023
+
5024
+
5025
+ Args:
5026
+ locations (list, tuple, ee.Geometry, or ee.FeatureCollection): Input points.
5027
+ band (str, optional): The name of the band to sample. Defaults to the first band.
5028
+ scale (int, optional): Scale in meters. Defaults to 30 if None.
5029
+ location_names (list of str, optional): Custom names for locations.
5030
+ dates (list, optional): Date filter ['YYYY-MM-DD'].
5031
+ file_path (str, optional): CSV export path.
5032
+ tileScale (int, optional): Aggregation tile scale. Defaults to 1.
5033
+
5034
+ Returns:
5035
+ pd.DataFrame (or CSV if file_path is provided): DataFrame indexed by Date, columns by Location.
5036
+ """
5037
+ col = self.collection
5038
+ if dates:
5039
+ col = col.filter(ee.Filter.inList('Date_Filter', dates))
5040
+
5041
+ first_img = col.first()
5042
+ available_bands = first_img.bandNames().getInfo()
5043
+
5044
+ if band:
5045
+ if band not in available_bands:
5046
+ raise ValueError(f"Band '{band}' not found. Available: {available_bands}")
5047
+ target_band = band
5048
+ else:
5049
+ target_band = available_bands[0]
5050
+
5051
+ processing_col = col.select([target_band])
5052
+
5053
+ def set_name(f):
5054
+ name = ee.Algorithms.If(
5055
+ f.get('geo_name'), f.get('geo_name'),
5056
+ ee.Algorithms.If(f.get('name'), f.get('name'),
5057
+ ee.Algorithms.If(f.get('system:index'), f.get('system:index'), 'unnamed'))
5058
+ )
5059
+ return f.set('geo_name', name)
5060
+
5061
+ if isinstance(locations, (ee.FeatureCollection, ee.Feature)):
5062
+ features = ee.FeatureCollection(locations)
5063
+ elif isinstance(locations, ee.Geometry):
5064
+ lbl = location_names[0] if (location_names and location_names[0]) else 'Point_1'
5065
+ features = ee.FeatureCollection([ee.Feature(locations).set('geo_name', lbl)])
5066
+ elif isinstance(locations, tuple) and len(locations) == 2:
5067
+ lbl = location_names[0] if location_names else 'Location_1'
5068
+ features = ee.FeatureCollection([ee.Feature(ee.Geometry.Point(locations), {'geo_name': lbl})])
5069
+ elif isinstance(locations, list):
5070
+ if all(isinstance(i, tuple) for i in locations):
5071
+ names = location_names if location_names else [f"Loc_{i+1}" for i in range(len(locations))]
5072
+ features = ee.FeatureCollection([
5073
+ ee.Feature(ee.Geometry.Point(p), {'geo_name': str(n)}) for p, n in zip(locations, names)
5074
+ ])
5075
+ elif all(isinstance(i, ee.Geometry) for i in locations):
5076
+ names = location_names if location_names else [f"Geom_{i+1}" for i in range(len(locations))]
5077
+ features = ee.FeatureCollection([
5078
+ ee.Feature(g, {'geo_name': str(n)}) for g, n in zip(locations, names)
5079
+ ])
5080
+ else:
5081
+ raise ValueError("List must contain (lon, lat) tuples or ee.Geometry objects.")
5082
+ else:
5083
+ raise TypeError("Invalid locations input.")
5084
+
5085
+ features = features.map(set_name)
5086
+
5087
+
5088
+ def sample_image(img):
5089
+ date = img.get('Date_Filter')
5090
+ use_scale = scale if scale is not None else 30
5091
+
5092
+
5093
+ default_dict = ee.Dictionary({target_band: -9999})
5094
+
5095
+ def extract_point(f):
5096
+ stats = img.reduceRegion(
5097
+ reducer=ee.Reducer.first(),
5098
+ geometry=f.geometry(),
5099
+ scale=use_scale,
5100
+ tileScale=tileScale
5101
+ )
5102
+
5103
+ # Combine dictionaries.
5104
+ # If stats has 'target_band' (even if 0), it overwrites -9999.
5105
+ # If stats is empty (masked), -9999 remains.
5106
+ safe_stats = default_dict.combine(stats, overwrite=True)
5107
+ val = safe_stats.get(target_band)
5108
+
5109
+ return f.set({
5110
+ target_band: val,
5111
+ 'image_date': date
5112
+ })
5113
+
5114
+ return features.map(extract_point)
5115
+
5116
+ # Flatten the results
5117
+ flat_results = processing_col.map(sample_image).flatten()
5118
+
5119
+ df = LandsatCollection.ee_to_df(
5120
+ flat_results,
5121
+ columns=['image_date', 'geo_name', target_band],
5122
+ remove_geom=True
5123
+ )
5124
+
5125
+ if df.empty:
5126
+ print("Warning: No data returned.")
5127
+ return pd.DataFrame()
5128
+
5129
+ # 6. Clean and Pivot
5130
+ df[target_band] = pd.to_numeric(df[target_band], errors='coerce')
5131
+
5132
+ # Filter out ONLY the sentinel value (-9999), preserving 0.
5133
+ df = df[df[target_band] != -9999]
5134
+
5135
+ if df.empty:
5136
+ print(f"Warning: All data points were masked (NoData) for band '{target_band}'.")
5137
+ return pd.DataFrame()
5138
+
5139
+ pivot_df = df.pivot(index='image_date', columns='geo_name', values=target_band)
5140
+ pivot_df.index.name = 'Date'
5141
+ pivot_df.columns.name = None
5142
+ pivot_df = pivot_df.reset_index()
5143
+
5144
+ if file_path:
5145
+ if not file_path.lower().endswith('.csv'):
5146
+ file_path += '.csv'
5147
+ pivot_df.to_csv(file_path, index=False)
5148
+ print(f"Sampled data saved to {file_path}")
5149
+ return None
5150
+
5151
+ return pivot_df
5152
+
5153
+ def multiband_sample(
5154
+ self,
5155
+ location,
5156
+ scale=30,
5157
+ file_path=None
5158
+ ):
5159
+ """
5160
+ Extracts ALL band values for a SINGLE location across the entire collection.
5161
+
5162
+ Args:
5163
+ location (tuple or ee.Geometry): A single (lon, lat) tuple OR ee.Geometry.
5164
+ scale (int, optional): Scale in meters. Defaults to 30.
5165
+ file_path (str, optional): Path to save CSV.
5166
+
5167
+ Returns:
5168
+ pd.DataFrame: DataFrame indexed by Date, with columns for each Band.
5169
+ """
5170
+ if isinstance(location, tuple) and len(location) == 2:
5171
+ geom = ee.Geometry.Point(location)
5172
+ elif isinstance(location, ee.Geometry):
5173
+ geom = location
5174
+ else:
5175
+ raise ValueError("Location must be a single (lon, lat) tuple or ee.Geometry.")
5176
+
5177
+ first_img = self.collection.first()
5178
+ band_names = first_img.bandNames()
5179
+
5180
+ # Create a dictionary of {band_name: -9999}
5181
+ # fill missing values so the Feature structure is consistent
5182
+ dummy_values = ee.List.repeat(-9999, band_names.length())
5183
+ default_dict = ee.Dictionary.fromLists(band_names, dummy_values)
5184
+
5185
+ def get_all_bands(img):
5186
+ date = img.get('Date_Filter')
5187
+
5188
+ # reduceRegion returns a Dictionary.
5189
+ # If a pixel is masked, that band key is missing from 'stats'.
5190
+ stats = img.reduceRegion(
5191
+ reducer=ee.Reducer.first(),
5192
+ geometry=geom,
5193
+ scale=scale,
5194
+ maxPixels=1e13
5195
+ )
5196
+
5197
+ # Combine stats with defaults.
5198
+ # overwrite=True means real data (stats) overwrites the -9999 defaults.
5199
+ complete_stats = default_dict.combine(stats, overwrite=True)
5200
+
5201
+ return ee.Feature(None, complete_stats).set('Date', date)
5202
+
5203
+ fc = ee.FeatureCollection(self.collection.map(get_all_bands))
5204
+
5205
+ df = LandsatCollection.ee_to_df(fc, remove_geom=True)
5206
+
5207
+ if df.empty:
5208
+ print("Warning: No data found.")
5209
+ return pd.DataFrame()
5210
+
5211
+ # 6. Cleanup
5212
+ if 'Date' in df.columns:
5213
+ df['Date'] = pd.to_datetime(df['Date'])
5214
+ df = df.set_index('Date').sort_index()
5215
+
5216
+ # Replace our sentinel -9999 with proper NaNs
5217
+ df = df.replace(-9999, np.nan)
5218
+
5219
+ # 7. Export
5220
+ if file_path:
5221
+ if not file_path.lower().endswith('.csv'):
5222
+ file_path += '.csv'
5223
+ df.to_csv(file_path)
5224
+ print(f"Multiband sample saved to {file_path}")
5225
+ return None
5226
+
5227
+ return df
4833
5228
 
4834
5229
  def export_to_asset_collection(
4835
5230
  self,
@@ -4840,7 +5235,8 @@ class LandsatCollection:
4840
5235
  filename_prefix="",
4841
5236
  crs=None,
4842
5237
  max_pixels=int(1e13),
4843
- description_prefix="export"
5238
+ description_prefix="export",
5239
+ overwrite=False
4844
5240
  ):
4845
5241
  """
4846
5242
  Exports an image collection to a Google Earth Engine asset collection. The asset collection will be created if it does not already exist,
@@ -4855,10 +5251,12 @@ class LandsatCollection:
4855
5251
  crs (str, optional): The coordinate reference system. Defaults to None, which will use the image's CRS.
4856
5252
  max_pixels (int, optional): The maximum number of pixels. Defaults to int(1e13).
4857
5253
  description_prefix (str, optional): The description prefix. Defaults to "export".
5254
+ overwrite (bool, optional): Whether to overwrite existing assets. Defaults to False.
4858
5255
 
4859
5256
  Returns:
4860
5257
  None: (queues export tasks)
4861
5258
  """
5259
+ overwrite = overwrite
4862
5260
  ic = self.collection
4863
5261
  if dates is None:
4864
5262
  dates = self.dates
@@ -4872,6 +5270,14 @@ class LandsatCollection:
4872
5270
  asset_id = asset_collection_path + "/" + filename_prefix + date_str
4873
5271
  desc = description_prefix + "_" + filename_prefix + date_str
4874
5272
 
5273
+ if overwrite:
5274
+ try:
5275
+ ee.data.deleteAsset(asset_id)
5276
+ print(f"Overwriting: Deleted existing asset {asset_id}")
5277
+ except ee.EEException:
5278
+ # Asset does not exist, so nothing to delete. Proceed safely.
5279
+ pass
5280
+
4875
5281
  params = {
4876
5282
  'image': img,
4877
5283
  'description': desc,