RadGEEToolbox 1.7.4__py3-none-any.whl → 1.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4347,200 +4347,197 @@ class LandsatCollection:
4347
4347
  lines,
4348
4348
  line_names,
4349
4349
  reducer="mean",
4350
- dist_interval=30,
4350
+ dist_interval=90,
4351
4351
  n_segments=None,
4352
4352
  scale=30,
4353
4353
  processing_mode='aggregated',
4354
4354
  save_folder_path=None,
4355
4355
  sampling_method='line',
4356
- point_buffer_radius=15
4356
+ point_buffer_radius=15,
4357
+ batch_size=10
4357
4358
  ):
4358
4359
  """
4359
- Computes and returns pixel values along transects for each image in a collection.
4360
-
4361
- This iterative function generates time-series data along one or more lines, and
4362
- supports two different geometric sampling methods ('line' and 'buffered_point')
4363
- for maximum flexibility and performance.
4364
-
4365
- There are two processing modes available, aggregated and iterative:
4366
- - 'aggregated' (default; suggested): Fast, server-side processing. Fetches all results
4367
- in a single request. Highly recommended. Returns a dictionary of pandas DataFrames.
4368
- - 'iterative': Slower, client-side loop that processes one image at a time.
4369
- Kept for backward compatibility (effectively depreciated). Returns None and saves individual CSVs.
4370
- This method is not recommended unless absolutely necessary, as it is less efficient and may be subject to client-side timeouts.
4371
-
4360
+ Computes and returns pixel values along transects. Provide a list of ee.Geometry.LineString objects and corresponding names, and the function will compute the specified reducer value
4361
+ at regular intervals along each line for all images in the collection. Use `dist_interval` or `n_segments` to control sampling resolution. The user can choose between 'aggregated' mode (returns a dictionary of DataFrames) or 'iterative' mode (saves individual CSVs for each transect).
4362
+ Alter `sampling_method` to sample directly along the line or via buffered points along the line. Buffered points can help capture more representative pixel values in heterogeneous landscapes, and the buffer radius can be adjusted via `point_buffer_radius`.
4363
+
4372
4364
  Args:
4373
- lines (list): A list of one or more ee.Geometry.LineString objects that
4374
- define the transects.
4375
- line_names (list): A list of string names for each transect. The length
4376
- of this list must match the length of the `lines` list.
4377
- reducer (str, optional): The name of the ee.Reducer to apply at each
4378
- transect point (e.g., 'mean', 'median', 'first'). Defaults to 'mean'.
4379
- dist_interval (float, optional): The distance interval in meters for
4380
- sampling points along each transect. Will be overridden if `n_segments` is provided.
4381
- Defaults to 30. Recommended to increase this value when using the
4382
- 'line' processing method, or else you may get blank rows.
4383
- n_segments (int, optional): The number of equal-length segments to split
4384
- each transect line into for sampling. This parameter overrides `dist_interval`.
4385
- Defaults to None.
4386
- scale (int, optional): The nominal scale in meters for the reduction,
4387
- which should typically match the pixel resolution of the imagery.
4388
- Defaults to 30.
4389
- processing_mode (str, optional): The method for processing the collection.
4390
- - 'aggregated' (default): Fast, server-side processing. Fetches all
4391
- results in a single request. Highly recommended. Returns a dictionary
4392
- of pandas DataFrames.
4393
- - 'iterative': Slower, client-side loop that processes one image at a
4394
- time. Kept for backward compatibility. Returns None and saves
4395
- individual CSVs.
4396
- save_folder_path (str, optional): If provided, the function will save the
4397
- resulting transect data to CSV files. The behavior depends on the
4398
- `processing_mode`:
4399
- - In 'aggregated' mode, one CSV is saved for each transect,
4400
- containing all dates. (e.g., 'MyTransect_transects.csv').
4401
- - In 'iterative' mode, one CSV is saved for each date,
4402
- containing all transects. (e.g., '2022-06-15_transects.csv').
4403
- sampling_method (str, optional): The geometric method used for sampling.
4404
- - 'line' (default): Reduces all pixels intersecting each small line
4405
- segment. This can be unreliable and produce blank rows if
4406
- `dist_interval` is too small relative to the `scale`.
4407
- - 'buffered_point': Reduces all pixels within a buffer around the
4408
- midpoint of each line segment. This method is more robust and
4409
- reliably avoids blank rows, but may not reduce all pixels along a line segment.
4410
- point_buffer_radius (int, optional): The radius in meters for the buffer
4411
- when `sampling_method` is 'buffered_point'. Defaults to 15.
4365
+ lines (list): List of ee.Geometry.LineString objects.
4366
+ line_names (list): List of string names for each transect.
4367
+ reducer (str, optional): Reducer name. Defaults to 'mean'.
4368
+ dist_interval (float, optional): Distance interval in meters. Defaults to 90.
4369
+ n_segments (int, optional): Number of segments (overrides dist_interval).
4370
+ scale (int, optional): Scale in meters. Defaults to 30.
4371
+ processing_mode (str, optional): 'aggregated' or 'iterative'.
4372
+ save_folder_path (str, optional): Path to save CSVs.
4373
+ sampling_method (str, optional): 'line' or 'buffered_point'.
4374
+ point_buffer_radius (int, optional): Buffer radius if using 'buffered_point'.
4375
+ batch_size (int, optional): Images per request in 'aggregated' mode. Defaults to 10. Lower the value if you encounter a 'Too many aggregations' error.
4412
4376
 
4413
4377
  Returns:
4414
- dict or None:
4415
- - If `processing_mode` is 'aggregated', returns a dictionary where each
4416
- key is a transect name and each value is a pandas DataFrame. In the
4417
- DataFrame, the index is the distance along the transect and each
4418
- column represents an image date. Optionally saves CSV files if
4419
- `save_folder_path` is provided.
4420
- - If `processing_mode` is 'iterative', returns None as it saves
4421
- files directly.
4422
-
4423
- Raises:
4424
- ValueError: If `lines` and `line_names` have different lengths, or if
4425
- an unknown reducer or processing mode is specified.
4378
+ dict or None: Dictionary of DataFrames (aggregated) or None (iterative).
4426
4379
  """
4427
- # Validating inputs
4428
4380
  if len(lines) != len(line_names):
4429
4381
  raise ValueError("'lines' and 'line_names' must have the same number of elements.")
4430
- ### Current, server-side processing method ###
4382
+
4383
+ first_img = self.collection.first()
4384
+ bands = first_img.bandNames().getInfo()
4385
+ is_multiband = len(bands) > 1
4386
+
4387
+ # Setup robust dictionary for handling masked/zero values
4388
+ default_val = -9999
4389
+ dummy_dict = ee.Dictionary.fromLists(bands, ee.List.repeat(default_val, len(bands)))
4390
+
4391
+ if is_multiband:
4392
+ reducer_cols = [f"{b}_{reducer}" for b in bands]
4393
+ clean_names = bands
4394
+ rename_keys = bands
4395
+ rename_vals = reducer_cols
4396
+ else:
4397
+ reducer_cols = [reducer]
4398
+ clean_names = [bands[0]]
4399
+ rename_keys = bands
4400
+ rename_vals = reducer_cols
4401
+
4402
+ print("Pre-computing transect geometries from input LineString(s)...")
4403
+
4404
+ master_transect_fc = ee.FeatureCollection([])
4405
+ geom_error = 1.0
4406
+
4407
+ for i, line in enumerate(lines):
4408
+ line_name = line_names[i]
4409
+ length = line.length(geom_error)
4410
+
4411
+ eff_interval = length.divide(n_segments) if n_segments else dist_interval
4412
+
4413
+ distances = ee.List.sequence(0, length, eff_interval)
4414
+ cut_lines = line.cutLines(distances, geom_error).geometries()
4415
+
4416
+ def create_feature(l):
4417
+ geom = ee.Geometry(ee.List(l).get(0))
4418
+ dist = ee.Number(ee.List(l).get(1))
4419
+
4420
+ final_geom = ee.Algorithms.If(
4421
+ ee.String(sampling_method).equals('buffered_point'),
4422
+ geom.centroid(geom_error).buffer(point_buffer_radius),
4423
+ geom
4424
+ )
4425
+
4426
+ return ee.Feature(ee.Geometry(final_geom), {
4427
+ 'transect_name': line_name,
4428
+ 'distance': dist
4429
+ })
4430
+
4431
+ line_fc = ee.FeatureCollection(cut_lines.zip(distances).map(create_feature))
4432
+ master_transect_fc = master_transect_fc.merge(line_fc)
4433
+
4434
+ try:
4435
+ ee_reducer = getattr(ee.Reducer, reducer)()
4436
+ except AttributeError:
4437
+ raise ValueError(f"Unknown reducer: '{reducer}'.")
4438
+
4439
+ def process_image(image):
4440
+ date_val = image.get('Date_Filter')
4441
+
4442
+ # Map over points (Slower but Robust)
4443
+ def reduce_point(f):
4444
+ stats = image.reduceRegion(
4445
+ reducer=ee_reducer,
4446
+ geometry=f.geometry(),
4447
+ scale=scale,
4448
+ maxPixels=1e13
4449
+ )
4450
+ # Combine with defaults (preserves 0, handles masked)
4451
+ safe_stats = dummy_dict.combine(stats, overwrite=True)
4452
+ # Rename keys to match expected outputs (e.g. 'ndvi' -> 'ndvi_mean')
4453
+ final_stats = safe_stats.rename(rename_keys, rename_vals)
4454
+
4455
+ return f.set(final_stats).set({'image_date': date_val})
4456
+
4457
+ return master_transect_fc.map(reduce_point)
4458
+
4459
+ export_cols = ['transect_name', 'distance', 'image_date'] + reducer_cols
4460
+
4431
4461
  if processing_mode == 'aggregated':
4432
- # Validating reducer type
4433
- try:
4434
- ee_reducer = getattr(ee.Reducer, reducer)()
4435
- except AttributeError:
4436
- raise ValueError(f"Unknown reducer: '{reducer}'.")
4437
- ### Function to extract transects for a single image
4438
- def get_transects_for_image(image):
4439
- image_date = image.get('Date_Filter')
4440
- # Initialize an empty list to hold all transect FeatureCollections
4441
- all_transects_for_image = ee.List([])
4442
- # Looping through each line and processing
4443
- for i, line in enumerate(lines):
4444
- # Index line and name
4445
- line_name = line_names[i]
4446
- # Determine maxError based on image projection, used for geometry operations
4447
- maxError = image.projection().nominalScale().divide(5)
4448
- # Calculate effective distance interval
4449
- length = line.length(maxError) # using maxError here ensures consistency with cutLines
4450
- # Determine effective distance interval based on n_segments or dist_interval
4451
- effective_dist_interval = ee.Algorithms.If(
4452
- n_segments,
4453
- length.divide(n_segments),
4454
- dist_interval or 30 # Defaults to 30 if both are None
4455
- )
4456
- # Generate distances along the line(s) for segmentation
4457
- distances = ee.List.sequence(0, length, effective_dist_interval)
4458
- # Segmenting the line into smaller lines at the specified distances
4459
- cut_lines_geoms = line.cutLines(distances, maxError).geometries()
4460
- # Function to create features with distance attributes
4461
- # Adjusted to ensure consistent return types
4462
- def set_dist_attr(l):
4463
- # l is a list: [geometry, distance]
4464
- # Extracting geometry portion of line
4465
- geom_segment = ee.Geometry(ee.List(l).get(0))
4466
- # Extracting distance value for attribute
4467
- distance = ee.Number(ee.List(l).get(1))
4468
- ### Determine final geometry based on sampling method
4469
- # If the sampling method is 'buffered_point',
4470
- # create a buffered point feature at the centroid of each segment,
4471
- # otherwise create a line feature
4472
- final_feature = ee.Algorithms.If(
4473
- ee.String(sampling_method).equals('buffered_point'),
4474
- # True Case: Create the buffered point feature
4475
- ee.Feature(
4476
- geom_segment.centroid(maxError).buffer(point_buffer_radius),
4477
- {'distance': distance}
4478
- ),
4479
- # False Case: Create the line segment feature
4480
- ee.Feature(geom_segment, {'distance': distance})
4481
- )
4482
- # Return either the line segment feature or the buffered point feature
4483
- return final_feature
4484
- # Creating a FeatureCollection of the cut lines with distance attributes
4485
- # Using map to apply the set_dist_attr function to each cut line geometry
4486
- line_features = ee.FeatureCollection(cut_lines_geoms.zip(distances).map(set_dist_attr))
4487
- # Reducing the image over the line features to get transect values
4488
- transect_fc = image.reduceRegions(
4489
- collection=line_features, reducer=ee_reducer, scale=scale
4490
- )
4491
- # Adding image date and line name properties to each feature
4492
- def set_props(feature):
4493
- return feature.set({'image_date': image_date, 'transect_name': line_name})
4494
- # Append to the list of all transects for this image
4495
- all_transects_for_image = all_transects_for_image.add(transect_fc.map(set_props))
4496
- # Combine all transect FeatureCollections into a single FeatureCollection and flatten
4497
- # Flatten is used to merge the list of FeatureCollections into one
4498
- return ee.FeatureCollection(all_transects_for_image).flatten()
4499
- # Map the function over the entire image collection and flatten the results
4500
- results_fc = ee.FeatureCollection(self.collection.map(get_transects_for_image)).flatten()
4501
- # Convert the results to a pandas DataFrame
4502
- df = LandsatCollection.ee_to_df(results_fc, remove_geom=True)
4503
- # Check if the DataFrame is empty
4504
- if df.empty:
4505
- print("Warning: No transect data was generated.")
4462
+ collection_size = self.collection.size().getInfo()
4463
+ print(f"Starting batch process of {collection_size} images...")
4464
+
4465
+ dfs = []
4466
+ for i in range(0, collection_size, batch_size):
4467
+ print(f" Processing image {i} to {min(i + batch_size, collection_size)}...")
4468
+
4469
+ batch_col = ee.ImageCollection(self.collection.toList(batch_size, i))
4470
+ results_fc = batch_col.map(process_image).flatten()
4471
+
4472
+ # Dynamic Class Call for ee_to_df
4473
+ df_batch = self.__class__.ee_to_df(results_fc, columns=export_cols, remove_geom=True)
4474
+
4475
+ if not df_batch.empty:
4476
+ dfs.append(df_batch)
4477
+
4478
+ if not dfs:
4479
+ print("Warning: No transect data generated.")
4506
4480
  return {}
4507
- # Initialize dictionary to hold output DataFrames for each transect
4481
+
4482
+ df = pd.concat(dfs, ignore_index=True)
4483
+
4484
+ # Post-Process & Split
4508
4485
  output_dfs = {}
4509
- # Loop through each unique transect name and create a pivot table
4486
+ for col in reducer_cols:
4487
+ df[col] = pd.to_numeric(df[col], errors='coerce')
4488
+ df[col] = df[col].replace(-9999, np.nan)
4489
+
4510
4490
  for name in sorted(df['transect_name'].unique()):
4511
- transect_df = df[df['transect_name'] == name]
4512
- pivot_df = transect_df.pivot(index='distance', columns='image_date', values=reducer)
4513
- pivot_df.columns.name = 'Date'
4514
- output_dfs[name] = pivot_df
4515
- # Optionally save each transect DataFrame to CSV
4516
- if save_folder_path:
4517
- for transect_name, transect_df in output_dfs.items():
4518
- safe_filename = "".join(x for x in transect_name if x.isalnum() or x in "._-")
4519
- file_path = f"{save_folder_path}{safe_filename}_transects.csv"
4520
- transect_df.to_csv(file_path)
4521
- print(f"Saved transect data to {file_path}")
4522
-
4491
+ line_df = df[df['transect_name'] == name]
4492
+
4493
+ for raw_col, band_name in zip(reducer_cols, clean_names):
4494
+ try:
4495
+ # Safety drop for duplicates
4496
+ line_df_clean = line_df.drop_duplicates(subset=['distance', 'image_date'])
4497
+
4498
+ pivot = line_df_clean.pivot(index='distance', columns='image_date', values=raw_col)
4499
+ pivot.columns.name = 'Date'
4500
+ key = f"{name}_{band_name}"
4501
+ output_dfs[key] = pivot
4502
+
4503
+ if save_folder_path:
4504
+ safe_key = "".join(x for x in key if x.isalnum() or x in "._-")
4505
+ fname = f"{save_folder_path}{safe_key}_transects.csv"
4506
+ pivot.to_csv(fname)
4507
+ print(f"Saved: {fname}")
4508
+ except Exception as e:
4509
+ print(f"Skipping pivot for {name}/{band_name}: {e}")
4510
+
4523
4511
  return output_dfs
4524
4512
 
4525
- ### old, depreciated iterative client-side processing method ###
4526
4513
  elif processing_mode == 'iterative':
4527
4514
  if not save_folder_path:
4528
- raise ValueError("`save_folder_path` is required for 'iterative' processing mode.")
4515
+ raise ValueError("save_folder_path is required for iterative mode.")
4529
4516
 
4530
4517
  image_collection_dates = self.dates
4531
4518
  for i, date in enumerate(image_collection_dates):
4532
4519
  try:
4533
4520
  print(f"Processing image {i+1}/{len(image_collection_dates)}: {date}")
4534
- image = self.image_grab(i)
4535
- transects_df = LandsatCollection.transect(
4536
- image, lines, line_names, reducer, n_segments, dist_interval, to_pandas=True
4537
- )
4538
- transects_df.to_csv(f"{save_folder_path}{date}_transects.csv")
4539
- print(f"{date}_transects saved to csv")
4521
+ image_list = self.collection.toList(self.collection.size())
4522
+ image = ee.Image(image_list.get(i))
4523
+
4524
+ fc_result = process_image(image)
4525
+ df = self.__class__.ee_to_df(fc_result, columns=export_cols, remove_geom=True)
4526
+
4527
+ if not df.empty:
4528
+ for col in reducer_cols:
4529
+ df[col] = pd.to_numeric(df[col], errors='coerce')
4530
+ df[col] = df[col].replace(-9999, np.nan)
4531
+
4532
+ fname = f"{save_folder_path}{date}_transects.csv"
4533
+ df.to_csv(fname, index=False)
4534
+ print(f"Saved: {fname}")
4535
+ else:
4536
+ print(f"Skipping {date}: No data.")
4540
4537
  except Exception as e:
4541
- print(f"An error occurred while processing image {i+1}: {e}")
4538
+ print(f"Error processing {date}: {e}")
4542
4539
  else:
4543
- raise ValueError("`processing_mode` must be 'iterative' or 'aggregated'.")
4540
+ raise ValueError("processing_mode must be 'iterative' or 'aggregated'.")
4544
4541
 
4545
4542
  @staticmethod
4546
4543
  def extract_zonal_stats_from_buffer(
@@ -4644,7 +4641,8 @@ class LandsatCollection:
4644
4641
  buffer_size=1,
4645
4642
  tileScale=1,
4646
4643
  dates=None,
4647
- file_path=None
4644
+ file_path=None,
4645
+ unweighted=False
4648
4646
  ):
4649
4647
  """
4650
4648
  Iterates over a collection of images and extracts spatial statistics (defaults to mean) for a given list of geometries or coordinates. Individual statistics are calculated for each geometry or coordinate provided.
@@ -4663,6 +4661,7 @@ class LandsatCollection:
4663
4661
  tileScale (int, optional): A scaling factor to reduce aggregation tile size. Defaults to 1.
4664
4662
  dates (list, optional): A list of date strings ('YYYY-MM-DD') for filtering the collection, such that only images from these dates are included for zonal statistic retrieval. Defaults to None, which uses all dates in the collection.
4665
4663
  file_path (str, optional): File path to save the output CSV.
4664
+ unweighted (bool, optional): Whether to use unweighted reducer. Defaults to False.
4666
4665
 
4667
4666
  Returns:
4668
4667
  pd.DataFrame or None: A pandas DataFrame with dates as the index and coordinate names
@@ -4769,6 +4768,9 @@ class LandsatCollection:
4769
4768
  reducer = getattr(ee.Reducer, reducer_type)()
4770
4769
  except AttributeError:
4771
4770
  raise ValueError(f"Unknown reducer_type: '{reducer_type}'.")
4771
+
4772
+ if unweighted:
4773
+ reducer = reducer.unweighted()
4772
4774
 
4773
4775
  # Define the function to map over the image collection
4774
4776
  def calculate_stats_for_image(image):
@@ -4830,6 +4832,394 @@ class LandsatCollection:
4830
4832
  print(f"Zonal stats saved to {file_path}.csv")
4831
4833
  return
4832
4834
  return pivot_df
4835
+
4836
+ def multiband_zonal_stats(
4837
+ self,
4838
+ geometry,
4839
+ bands,
4840
+ reducer_types,
4841
+ scale=30,
4842
+ geometry_name='geom',
4843
+ dates=None,
4844
+ include_area=False,
4845
+ file_path=None,
4846
+ unweighted=False
4847
+ ):
4848
+ """
4849
+ Calculates zonal statistics for multiple bands over a single geometry for each image in the collection.
4850
+ Allows for specifying different reducers for different bands. Optionally includes the geometry area.
4851
+
4852
+ Args:
4853
+ geometry (ee.Geometry or ee.Feature): The single geometry to calculate statistics for.
4854
+ bands (list of str): A list of band names to include in the analysis.
4855
+ reducer_types (str or list of str): A single reducer name (e.g., 'mean') to apply to all bands,
4856
+ or a list of reducer names matching the length of the 'bands' list to apply specific reducers
4857
+ to specific bands.
4858
+ scale (int, optional): The scale in meters for the reduction. Defaults to 30.
4859
+ geometry_name (str, optional): A name for the geometry, used in column naming. Defaults to 'geom'.
4860
+ dates (list of str, optional): A list of date strings ('YYYY-MM-DD') to filter the collection.
4861
+ Defaults to None (processes all images).
4862
+ include_area (bool, optional): If True, adds a column with the area of the geometry in square meters.
4863
+ Defaults to False.
4864
+ file_path (str, optional): If provided, saves the resulting DataFrame to a CSV file at this path.
4865
+ unweighted (bool, optional): Whether to use unweighted reducers. Defaults to False.
4866
+
4867
+ Returns:
4868
+ pd.DataFrame: A pandas DataFrame indexed by Date, with columns named as '{band}_{geometry_name}_{reducer}'.
4869
+ """
4870
+ # 1. Input Validation and Setup
4871
+ if not isinstance(geometry, (ee.Geometry, ee.Feature)):
4872
+ raise ValueError("The `geometry` argument must be an ee.Geometry or ee.Feature.")
4873
+
4874
+ region = geometry.geometry() if isinstance(geometry, ee.Feature) else geometry
4875
+
4876
+ if isinstance(bands, str):
4877
+ bands = [bands]
4878
+ if not isinstance(bands, list):
4879
+ raise ValueError("The `bands` argument must be a string or a list of strings.")
4880
+
4881
+ # Handle reducer_types (str vs list)
4882
+ if isinstance(reducer_types, str):
4883
+ reducers_list = [reducer_types] * len(bands)
4884
+ elif isinstance(reducer_types, list):
4885
+ if len(reducer_types) != len(bands):
4886
+ raise ValueError("If `reducer_types` is a list, it must have the same length as `bands`.")
4887
+ reducers_list = reducer_types
4888
+ else:
4889
+ raise ValueError("`reducer_types` must be a string or a list of strings.")
4890
+
4891
+ # 2. Filter Collection
4892
+ processing_col = self.collection
4893
+
4894
+ if dates:
4895
+ processing_col = processing_col.filter(ee.Filter.inList('Date_Filter', dates))
4896
+
4897
+ processing_col = processing_col.select(bands)
4898
+
4899
+ # 3. Pre-calculate Area (if requested)
4900
+ area_val = None
4901
+ area_col_name = f"{geometry_name}_area_m2"
4902
+ if include_area:
4903
+ # Calculate geodesic area in square meters with maxError of 1m
4904
+ area_val = region.area(1)
4905
+
4906
+ # 4. Define the Reduction Logic
4907
+ def calculate_multiband_stats(image):
4908
+ # Base feature with date property
4909
+ date_val = image.get('Date_Filter')
4910
+ feature = ee.Feature(None, {'Date': date_val})
4911
+
4912
+ # If requested, add the static area value to every feature
4913
+ if include_area:
4914
+ feature = feature.set(area_col_name, area_val)
4915
+
4916
+ unique_reducers = list(set(reducers_list))
4917
+
4918
+ # OPTIMIZED PATH: Single reducer type for all bands
4919
+ if len(unique_reducers) == 1:
4920
+ r_type = unique_reducers[0]
4921
+ try:
4922
+ reducer = getattr(ee.Reducer, r_type)()
4923
+ except AttributeError:
4924
+ reducer = ee.Reducer.mean()
4925
+
4926
+ if unweighted:
4927
+ reducer = reducer.unweighted()
4928
+
4929
+ stats = image.reduceRegion(
4930
+ reducer=reducer,
4931
+ geometry=region,
4932
+ scale=scale,
4933
+ maxPixels=1e13
4934
+ )
4935
+
4936
+ for band in bands:
4937
+ col_name = f"{band}_{geometry_name}_{r_type}"
4938
+ val = stats.get(band)
4939
+ feature = feature.set(col_name, val)
4940
+
4941
+ # ITERATIVE PATH: Different reducers for different bands
4942
+ else:
4943
+ for band, r_type in zip(bands, reducers_list):
4944
+ try:
4945
+ reducer = getattr(ee.Reducer, r_type)()
4946
+ except AttributeError:
4947
+ reducer = ee.Reducer.mean()
4948
+
4949
+ if unweighted:
4950
+ reducer = reducer.unweighted()
4951
+
4952
+ stats = image.select(band).reduceRegion(
4953
+ reducer=reducer,
4954
+ geometry=region,
4955
+ scale=scale,
4956
+ maxPixels=1e13
4957
+ )
4958
+
4959
+ val = stats.get(band)
4960
+ col_name = f"{band}_{geometry_name}_{r_type}"
4961
+ feature = feature.set(col_name, val)
4962
+
4963
+ return feature
4964
+
4965
+ # 5. Execute Server-Side Mapping (with explicit Cast)
4966
+ results_fc = ee.FeatureCollection(processing_col.map(calculate_multiband_stats))
4967
+
4968
+ # 6. Client-Side Conversion
4969
+ try:
4970
+ df = LandsatCollection.ee_to_df(results_fc, remove_geom=True)
4971
+ except Exception as e:
4972
+ raise RuntimeError(f"Failed to convert Earth Engine results to DataFrame. Error: {e}")
4973
+
4974
+ if df.empty:
4975
+ print("Warning: No results returned. Check if the geometry intersects the imagery or if dates are valid.")
4976
+ return pd.DataFrame()
4977
+
4978
+ # 7. Formatting & Reordering
4979
+ if 'Date' in df.columns:
4980
+ df['Date'] = pd.to_datetime(df['Date'])
4981
+ df = df.sort_values('Date').set_index('Date')
4982
+
4983
+ # Construct the expected column names in the exact order of the input lists
4984
+ expected_order = [f"{band}_{geometry_name}_{r_type}" for band, r_type in zip(bands, reducers_list)]
4985
+
4986
+ # If area was included, append it to the END of the list
4987
+ if include_area:
4988
+ expected_order.append(area_col_name)
4989
+
4990
+ # Reindex the DataFrame to match this order.
4991
+ existing_cols = [c for c in expected_order if c in df.columns]
4992
+ df = df[existing_cols]
4993
+
4994
+ # 8. Export (Optional)
4995
+ if file_path:
4996
+ if not file_path.lower().endswith('.csv'):
4997
+ file_path += '.csv'
4998
+ try:
4999
+ df.to_csv(file_path)
5000
+ print(f"Multiband zonal stats saved to {file_path}")
5001
+ except Exception as e:
5002
+ print(f"Error saving file to {file_path}: {e}")
5003
+
5004
+ return df
5005
+
5006
+ def sample(
5007
+ self,
5008
+ locations,
5009
+ band=None,
5010
+ scale=None,
5011
+ location_names=None,
5012
+ dates=None,
5013
+ file_path=None,
5014
+ tileScale=1
5015
+ ):
5016
+ """
5017
+ Extracts time-series pixel values for a list of locations.
5018
+
5019
+
5020
+ Args:
5021
+ locations (list, tuple, ee.Geometry, or ee.FeatureCollection): Input points.
5022
+ band (str, optional): The name of the band to sample. Defaults to the first band.
5023
+ scale (int, optional): Scale in meters. Defaults to 30 if None.
5024
+ location_names (list of str, optional): Custom names for locations.
5025
+ dates (list, optional): Date filter ['YYYY-MM-DD'].
5026
+ file_path (str, optional): CSV export path.
5027
+ tileScale (int, optional): Aggregation tile scale. Defaults to 1.
5028
+
5029
+ Returns:
5030
+ pd.DataFrame (or CSV if file_path is provided): DataFrame indexed by Date, columns by Location.
5031
+ """
5032
+ col = self.collection
5033
+ if dates:
5034
+ col = col.filter(ee.Filter.inList('Date_Filter', dates))
5035
+
5036
+ first_img = col.first()
5037
+ available_bands = first_img.bandNames().getInfo()
5038
+
5039
+ if band:
5040
+ if band not in available_bands:
5041
+ raise ValueError(f"Band '{band}' not found. Available: {available_bands}")
5042
+ target_band = band
5043
+ else:
5044
+ target_band = available_bands[0]
5045
+
5046
+ processing_col = col.select([target_band])
5047
+
5048
+ def set_name(f):
5049
+ name = ee.Algorithms.If(
5050
+ f.get('geo_name'), f.get('geo_name'),
5051
+ ee.Algorithms.If(f.get('name'), f.get('name'),
5052
+ ee.Algorithms.If(f.get('system:index'), f.get('system:index'), 'unnamed'))
5053
+ )
5054
+ return f.set('geo_name', name)
5055
+
5056
+ if isinstance(locations, (ee.FeatureCollection, ee.Feature)):
5057
+ features = ee.FeatureCollection(locations)
5058
+ elif isinstance(locations, ee.Geometry):
5059
+ lbl = location_names[0] if (location_names and location_names[0]) else 'Point_1'
5060
+ features = ee.FeatureCollection([ee.Feature(locations).set('geo_name', lbl)])
5061
+ elif isinstance(locations, tuple) and len(locations) == 2:
5062
+ lbl = location_names[0] if location_names else 'Location_1'
5063
+ features = ee.FeatureCollection([ee.Feature(ee.Geometry.Point(locations), {'geo_name': lbl})])
5064
+ elif isinstance(locations, list):
5065
+ if all(isinstance(i, tuple) for i in locations):
5066
+ names = location_names if location_names else [f"Loc_{i+1}" for i in range(len(locations))]
5067
+ features = ee.FeatureCollection([
5068
+ ee.Feature(ee.Geometry.Point(p), {'geo_name': str(n)}) for p, n in zip(locations, names)
5069
+ ])
5070
+ elif all(isinstance(i, ee.Geometry) for i in locations):
5071
+ names = location_names if location_names else [f"Geom_{i+1}" for i in range(len(locations))]
5072
+ features = ee.FeatureCollection([
5073
+ ee.Feature(g, {'geo_name': str(n)}) for g, n in zip(locations, names)
5074
+ ])
5075
+ else:
5076
+ raise ValueError("List must contain (lon, lat) tuples or ee.Geometry objects.")
5077
+ else:
5078
+ raise TypeError("Invalid locations input.")
5079
+
5080
+ features = features.map(set_name)
5081
+
5082
+
5083
+ def sample_image(img):
5084
+ date = img.get('Date_Filter')
5085
+ use_scale = scale if scale is not None else 30
5086
+
5087
+
5088
+ default_dict = ee.Dictionary({target_band: -9999})
5089
+
5090
+ def extract_point(f):
5091
+ stats = img.reduceRegion(
5092
+ reducer=ee.Reducer.first(),
5093
+ geometry=f.geometry(),
5094
+ scale=use_scale,
5095
+ tileScale=tileScale
5096
+ )
5097
+
5098
+ # Combine dictionaries.
5099
+ # If stats has 'target_band' (even if 0), it overwrites -9999.
5100
+ # If stats is empty (masked), -9999 remains.
5101
+ safe_stats = default_dict.combine(stats, overwrite=True)
5102
+ val = safe_stats.get(target_band)
5103
+
5104
+ return f.set({
5105
+ target_band: val,
5106
+ 'image_date': date
5107
+ })
5108
+
5109
+ return features.map(extract_point)
5110
+
5111
+ # Flatten the results
5112
+ flat_results = processing_col.map(sample_image).flatten()
5113
+
5114
+ df = LandsatCollection.ee_to_df(
5115
+ flat_results,
5116
+ columns=['image_date', 'geo_name', target_band],
5117
+ remove_geom=True
5118
+ )
5119
+
5120
+ if df.empty:
5121
+ print("Warning: No data returned.")
5122
+ return pd.DataFrame()
5123
+
5124
+ # 6. Clean and Pivot
5125
+ df[target_band] = pd.to_numeric(df[target_band], errors='coerce')
5126
+
5127
+ # Filter out ONLY the sentinel value (-9999), preserving 0.
5128
+ df = df[df[target_band] != -9999]
5129
+
5130
+ if df.empty:
5131
+ print(f"Warning: All data points were masked (NoData) for band '{target_band}'.")
5132
+ return pd.DataFrame()
5133
+
5134
+ pivot_df = df.pivot(index='image_date', columns='geo_name', values=target_band)
5135
+ pivot_df.index.name = 'Date'
5136
+ pivot_df.columns.name = None
5137
+ pivot_df = pivot_df.reset_index()
5138
+
5139
+ if file_path:
5140
+ if not file_path.lower().endswith('.csv'):
5141
+ file_path += '.csv'
5142
+ pivot_df.to_csv(file_path, index=False)
5143
+ print(f"Sampled data saved to {file_path}")
5144
+ return None
5145
+
5146
+ return pivot_df
5147
+
5148
+ def multiband_sample(
5149
+ self,
5150
+ location,
5151
+ scale=30,
5152
+ file_path=None
5153
+ ):
5154
+ """
5155
+ Extracts ALL band values for a SINGLE location across the entire collection.
5156
+
5157
+ Args:
5158
+ location (tuple or ee.Geometry): A single (lon, lat) tuple OR ee.Geometry.
5159
+ scale (int, optional): Scale in meters. Defaults to 30.
5160
+ file_path (str, optional): Path to save CSV.
5161
+
5162
+ Returns:
5163
+ pd.DataFrame: DataFrame indexed by Date, with columns for each Band.
5164
+ """
5165
+ if isinstance(location, tuple) and len(location) == 2:
5166
+ geom = ee.Geometry.Point(location)
5167
+ elif isinstance(location, ee.Geometry):
5168
+ geom = location
5169
+ else:
5170
+ raise ValueError("Location must be a single (lon, lat) tuple or ee.Geometry.")
5171
+
5172
+ first_img = self.collection.first()
5173
+ band_names = first_img.bandNames()
5174
+
5175
+ # Create a dictionary of {band_name: -9999}
5176
+ # fill missing values so the Feature structure is consistent
5177
+ dummy_values = ee.List.repeat(-9999, band_names.length())
5178
+ default_dict = ee.Dictionary.fromLists(band_names, dummy_values)
5179
+
5180
+ def get_all_bands(img):
5181
+ date = img.get('Date_Filter')
5182
+
5183
+ # reduceRegion returns a Dictionary.
5184
+ # If a pixel is masked, that band key is missing from 'stats'.
5185
+ stats = img.reduceRegion(
5186
+ reducer=ee.Reducer.first(),
5187
+ geometry=geom,
5188
+ scale=scale,
5189
+ maxPixels=1e13
5190
+ )
5191
+
5192
+ # Combine stats with defaults.
5193
+ # overwrite=True means real data (stats) overwrites the -9999 defaults.
5194
+ complete_stats = default_dict.combine(stats, overwrite=True)
5195
+
5196
+ return ee.Feature(None, complete_stats).set('Date', date)
5197
+
5198
+ fc = ee.FeatureCollection(self.collection.map(get_all_bands))
5199
+
5200
+ df = LandsatCollection.ee_to_df(fc, remove_geom=True)
5201
+
5202
+ if df.empty:
5203
+ print("Warning: No data found.")
5204
+ return pd.DataFrame()
5205
+
5206
+ # 6. Cleanup
5207
+ if 'Date' in df.columns:
5208
+ df['Date'] = pd.to_datetime(df['Date'])
5209
+ df = df.set_index('Date').sort_index()
5210
+
5211
+ # Replace our sentinel -9999 with proper NaNs
5212
+ df = df.replace(-9999, np.nan)
5213
+
5214
+ # 7. Export
5215
+ if file_path:
5216
+ if not file_path.lower().endswith('.csv'):
5217
+ file_path += '.csv'
5218
+ df.to_csv(file_path)
5219
+ print(f"Multiband sample saved to {file_path}")
5220
+ return None
5221
+
5222
+ return df
4833
5223
 
4834
5224
  def export_to_asset_collection(
4835
5225
  self,
@@ -4840,7 +5230,8 @@ class LandsatCollection:
4840
5230
  filename_prefix="",
4841
5231
  crs=None,
4842
5232
  max_pixels=int(1e13),
4843
- description_prefix="export"
5233
+ description_prefix="export",
5234
+ overwrite=False
4844
5235
  ):
4845
5236
  """
4846
5237
  Exports an image collection to a Google Earth Engine asset collection. The asset collection will be created if it does not already exist,
@@ -4855,10 +5246,12 @@ class LandsatCollection:
4855
5246
  crs (str, optional): The coordinate reference system. Defaults to None, which will use the image's CRS.
4856
5247
  max_pixels (int, optional): The maximum number of pixels. Defaults to int(1e13).
4857
5248
  description_prefix (str, optional): The description prefix. Defaults to "export".
5249
+ overwrite (bool, optional): Whether to overwrite existing assets. Defaults to False.
4858
5250
 
4859
5251
  Returns:
4860
5252
  None: (queues export tasks)
4861
5253
  """
5254
+ overwrite = overwrite
4862
5255
  ic = self.collection
4863
5256
  if dates is None:
4864
5257
  dates = self.dates
@@ -4872,6 +5265,14 @@ class LandsatCollection:
4872
5265
  asset_id = asset_collection_path + "/" + filename_prefix + date_str
4873
5266
  desc = description_prefix + "_" + filename_prefix + date_str
4874
5267
 
5268
+ if overwrite:
5269
+ try:
5270
+ ee.data.deleteAsset(asset_id)
5271
+ print(f"Overwriting: Deleted existing asset {asset_id}")
5272
+ except ee.EEException:
5273
+ # Asset does not exist, so nothing to delete. Proceed safely.
5274
+ pass
5275
+
4875
5276
  params = {
4876
5277
  'image': img,
4877
5278
  'description': desc,