RadGEEToolbox 1.7.4__py3-none-any.whl → 1.7.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- RadGEEToolbox/GenericCollection.py +568 -168
- RadGEEToolbox/LandsatCollection.py +569 -168
- RadGEEToolbox/Sentinel1Collection.py +556 -168
- RadGEEToolbox/Sentinel2Collection.py +571 -170
- RadGEEToolbox/__init__.py +1 -1
- {radgeetoolbox-1.7.4.dist-info → radgeetoolbox-1.7.5.dist-info}/METADATA +6 -6
- radgeetoolbox-1.7.5.dist-info/RECORD +14 -0
- {radgeetoolbox-1.7.4.dist-info → radgeetoolbox-1.7.5.dist-info}/WHEEL +1 -1
- radgeetoolbox-1.7.4.dist-info/RECORD +0 -14
- {radgeetoolbox-1.7.4.dist-info → radgeetoolbox-1.7.5.dist-info}/licenses/LICENSE.txt +0 -0
- {radgeetoolbox-1.7.4.dist-info → radgeetoolbox-1.7.5.dist-info}/top_level.txt +0 -0
|
@@ -2893,200 +2893,197 @@ class GenericCollection:
|
|
|
2893
2893
|
lines,
|
|
2894
2894
|
line_names,
|
|
2895
2895
|
reducer="mean",
|
|
2896
|
-
dist_interval=
|
|
2896
|
+
dist_interval=90,
|
|
2897
2897
|
n_segments=None,
|
|
2898
2898
|
scale=30,
|
|
2899
2899
|
processing_mode='aggregated',
|
|
2900
2900
|
save_folder_path=None,
|
|
2901
2901
|
sampling_method='line',
|
|
2902
|
-
point_buffer_radius=15
|
|
2902
|
+
point_buffer_radius=15,
|
|
2903
|
+
batch_size=10
|
|
2903
2904
|
):
|
|
2904
2905
|
"""
|
|
2905
|
-
Computes and returns pixel values along transects
|
|
2906
|
-
|
|
2907
|
-
|
|
2908
|
-
|
|
2909
|
-
for maximum flexibility and performance.
|
|
2910
|
-
|
|
2911
|
-
There are two processing modes available, aggregated and iterative:
|
|
2912
|
-
- 'aggregated' (default; suggested): Fast, server-side processing. Fetches all results
|
|
2913
|
-
in a single request. Highly recommended. Returns a dictionary of pandas DataFrames.
|
|
2914
|
-
- 'iterative': Slower, client-side loop that processes one image at a time.
|
|
2915
|
-
Kept for backward compatibility (effectively depreciated). Returns None and saves individual CSVs.
|
|
2916
|
-
This method is not recommended unless absolutely necessary, as it is less efficient and may be subject to client-side timeouts.
|
|
2917
|
-
|
|
2906
|
+
Computes and returns pixel values along transects. Provide a list of ee.Geometry.LineString objects and corresponding names, and the function will compute the specified reducer value
|
|
2907
|
+
at regular intervals along each line for all images in the collection. Use `dist_interval` or `n_segments` to control sampling resolution. The user can choose between 'aggregated' mode (returns a dictionary of DataFrames) or 'iterative' mode (saves individual CSVs for each transect).
|
|
2908
|
+
Alter `sampling_method` to sample directly along the line or via buffered points along the line. Buffered points can help capture more representative pixel values in heterogeneous landscapes, and the buffer radius can be adjusted via `point_buffer_radius`.
|
|
2909
|
+
|
|
2918
2910
|
Args:
|
|
2919
|
-
lines (list):
|
|
2920
|
-
|
|
2921
|
-
|
|
2922
|
-
|
|
2923
|
-
|
|
2924
|
-
|
|
2925
|
-
|
|
2926
|
-
|
|
2927
|
-
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
|
-
each transect line into for sampling. This parameter overrides `dist_interval`.
|
|
2931
|
-
Defaults to None.
|
|
2932
|
-
scale (int, optional): The nominal scale in meters for the reduction,
|
|
2933
|
-
which should typically match the pixel resolution of the imagery.
|
|
2934
|
-
Defaults to 30.
|
|
2935
|
-
processing_mode (str, optional): The method for processing the collection.
|
|
2936
|
-
- 'aggregated' (default): Fast, server-side processing. Fetches all
|
|
2937
|
-
results in a single request. Highly recommended. Returns a dictionary
|
|
2938
|
-
of pandas DataFrames.
|
|
2939
|
-
- 'iterative': Slower, client-side loop that processes one image at a
|
|
2940
|
-
time. Kept for backward compatibility. Returns None and saves
|
|
2941
|
-
individual CSVs.
|
|
2942
|
-
save_folder_path (str, optional): If provided, the function will save the
|
|
2943
|
-
resulting transect data to CSV files. The behavior depends on the
|
|
2944
|
-
`processing_mode`:
|
|
2945
|
-
- In 'aggregated' mode, one CSV is saved for each transect,
|
|
2946
|
-
containing all dates. (e.g., 'MyTransect_transects.csv').
|
|
2947
|
-
- In 'iterative' mode, one CSV is saved for each date,
|
|
2948
|
-
containing all transects. (e.g., '2022-06-15_transects.csv').
|
|
2949
|
-
sampling_method (str, optional): The geometric method used for sampling.
|
|
2950
|
-
- 'line' (default): Reduces all pixels intersecting each small line
|
|
2951
|
-
segment. This can be unreliable and produce blank rows if
|
|
2952
|
-
`dist_interval` is too small relative to the `scale`.
|
|
2953
|
-
- 'buffered_point': Reduces all pixels within a buffer around the
|
|
2954
|
-
midpoint of each line segment. This method is more robust and
|
|
2955
|
-
reliably avoids blank rows, but may not reduce all pixels along a line segment.
|
|
2956
|
-
point_buffer_radius (int, optional): The radius in meters for the buffer
|
|
2957
|
-
when `sampling_method` is 'buffered_point'. Defaults to 15.
|
|
2911
|
+
lines (list): List of ee.Geometry.LineString objects.
|
|
2912
|
+
line_names (list): List of string names for each transect.
|
|
2913
|
+
reducer (str, optional): Reducer name. Defaults to 'mean'.
|
|
2914
|
+
dist_interval (float, optional): Distance interval in meters. Defaults to 90.
|
|
2915
|
+
n_segments (int, optional): Number of segments (overrides dist_interval).
|
|
2916
|
+
scale (int, optional): Scale in meters. Defaults to 30.
|
|
2917
|
+
processing_mode (str, optional): 'aggregated' or 'iterative'.
|
|
2918
|
+
save_folder_path (str, optional): Path to save CSVs.
|
|
2919
|
+
sampling_method (str, optional): 'line' or 'buffered_point'.
|
|
2920
|
+
point_buffer_radius (int, optional): Buffer radius if using 'buffered_point'.
|
|
2921
|
+
batch_size (int, optional): Images per request in 'aggregated' mode. Defaults to 10. Lower the value if you encounter a 'Too many aggregations' error.
|
|
2958
2922
|
|
|
2959
2923
|
Returns:
|
|
2960
|
-
dict or None:
|
|
2961
|
-
- If `processing_mode` is 'aggregated', returns a dictionary where each
|
|
2962
|
-
key is a transect name and each value is a pandas DataFrame. In the
|
|
2963
|
-
DataFrame, the index is the distance along the transect and each
|
|
2964
|
-
column represents an image date. Optionally saves CSV files if
|
|
2965
|
-
`save_folder_path` is provided.
|
|
2966
|
-
- If `processing_mode` is 'iterative', returns None as it saves
|
|
2967
|
-
files directly.
|
|
2968
|
-
|
|
2969
|
-
Raises:
|
|
2970
|
-
ValueError: If `lines` and `line_names` have different lengths, or if
|
|
2971
|
-
an unknown reducer or processing mode is specified.
|
|
2924
|
+
dict or None: Dictionary of DataFrames (aggregated) or None (iterative).
|
|
2972
2925
|
"""
|
|
2973
|
-
# Validating inputs
|
|
2974
2926
|
if len(lines) != len(line_names):
|
|
2975
2927
|
raise ValueError("'lines' and 'line_names' must have the same number of elements.")
|
|
2976
|
-
|
|
2928
|
+
|
|
2929
|
+
first_img = self.collection.first()
|
|
2930
|
+
bands = first_img.bandNames().getInfo()
|
|
2931
|
+
is_multiband = len(bands) > 1
|
|
2932
|
+
|
|
2933
|
+
# Setup robust dictionary for handling masked/zero values
|
|
2934
|
+
default_val = -9999
|
|
2935
|
+
dummy_dict = ee.Dictionary.fromLists(bands, ee.List.repeat(default_val, len(bands)))
|
|
2936
|
+
|
|
2937
|
+
if is_multiband:
|
|
2938
|
+
reducer_cols = [f"{b}_{reducer}" for b in bands]
|
|
2939
|
+
clean_names = bands
|
|
2940
|
+
rename_keys = bands
|
|
2941
|
+
rename_vals = reducer_cols
|
|
2942
|
+
else:
|
|
2943
|
+
reducer_cols = [reducer]
|
|
2944
|
+
clean_names = [bands[0]]
|
|
2945
|
+
rename_keys = bands
|
|
2946
|
+
rename_vals = reducer_cols
|
|
2947
|
+
|
|
2948
|
+
print("Pre-computing transect geometries from input LineString(s)...")
|
|
2949
|
+
|
|
2950
|
+
master_transect_fc = ee.FeatureCollection([])
|
|
2951
|
+
geom_error = 1.0
|
|
2952
|
+
|
|
2953
|
+
for i, line in enumerate(lines):
|
|
2954
|
+
line_name = line_names[i]
|
|
2955
|
+
length = line.length(geom_error)
|
|
2956
|
+
|
|
2957
|
+
eff_interval = length.divide(n_segments) if n_segments else dist_interval
|
|
2958
|
+
|
|
2959
|
+
distances = ee.List.sequence(0, length, eff_interval)
|
|
2960
|
+
cut_lines = line.cutLines(distances, geom_error).geometries()
|
|
2961
|
+
|
|
2962
|
+
def create_feature(l):
|
|
2963
|
+
geom = ee.Geometry(ee.List(l).get(0))
|
|
2964
|
+
dist = ee.Number(ee.List(l).get(1))
|
|
2965
|
+
|
|
2966
|
+
final_geom = ee.Algorithms.If(
|
|
2967
|
+
ee.String(sampling_method).equals('buffered_point'),
|
|
2968
|
+
geom.centroid(geom_error).buffer(point_buffer_radius),
|
|
2969
|
+
geom
|
|
2970
|
+
)
|
|
2971
|
+
|
|
2972
|
+
return ee.Feature(ee.Geometry(final_geom), {
|
|
2973
|
+
'transect_name': line_name,
|
|
2974
|
+
'distance': dist
|
|
2975
|
+
})
|
|
2976
|
+
|
|
2977
|
+
line_fc = ee.FeatureCollection(cut_lines.zip(distances).map(create_feature))
|
|
2978
|
+
master_transect_fc = master_transect_fc.merge(line_fc)
|
|
2979
|
+
|
|
2980
|
+
try:
|
|
2981
|
+
ee_reducer = getattr(ee.Reducer, reducer)()
|
|
2982
|
+
except AttributeError:
|
|
2983
|
+
raise ValueError(f"Unknown reducer: '{reducer}'.")
|
|
2984
|
+
|
|
2985
|
+
def process_image(image):
|
|
2986
|
+
date_val = image.get('Date_Filter')
|
|
2987
|
+
|
|
2988
|
+
# Map over points (Slower but Robust)
|
|
2989
|
+
def reduce_point(f):
|
|
2990
|
+
stats = image.reduceRegion(
|
|
2991
|
+
reducer=ee_reducer,
|
|
2992
|
+
geometry=f.geometry(),
|
|
2993
|
+
scale=scale,
|
|
2994
|
+
maxPixels=1e13
|
|
2995
|
+
)
|
|
2996
|
+
# Combine with defaults (preserves 0, handles masked)
|
|
2997
|
+
safe_stats = dummy_dict.combine(stats, overwrite=True)
|
|
2998
|
+
# Rename keys to match expected outputs (e.g. 'ndvi' -> 'ndvi_mean')
|
|
2999
|
+
final_stats = safe_stats.rename(rename_keys, rename_vals)
|
|
3000
|
+
|
|
3001
|
+
return f.set(final_stats).set({'image_date': date_val})
|
|
3002
|
+
|
|
3003
|
+
return master_transect_fc.map(reduce_point)
|
|
3004
|
+
|
|
3005
|
+
export_cols = ['transect_name', 'distance', 'image_date'] + reducer_cols
|
|
3006
|
+
|
|
2977
3007
|
if processing_mode == 'aggregated':
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
#
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
2994
|
-
|
|
2995
|
-
|
|
2996
|
-
# Determine effective distance interval based on n_segments or dist_interval
|
|
2997
|
-
effective_dist_interval = ee.Algorithms.If(
|
|
2998
|
-
n_segments,
|
|
2999
|
-
length.divide(n_segments),
|
|
3000
|
-
dist_interval or 30 # Defaults to 30 if both are None
|
|
3001
|
-
)
|
|
3002
|
-
# Generate distances along the line(s) for segmentation
|
|
3003
|
-
distances = ee.List.sequence(0, length, effective_dist_interval)
|
|
3004
|
-
# Segmenting the line into smaller lines at the specified distances
|
|
3005
|
-
cut_lines_geoms = line.cutLines(distances, maxError).geometries()
|
|
3006
|
-
# Function to create features with distance attributes
|
|
3007
|
-
# Adjusted to ensure consistent return types
|
|
3008
|
-
def set_dist_attr(l):
|
|
3009
|
-
# l is a list: [geometry, distance]
|
|
3010
|
-
# Extracting geometry portion of line
|
|
3011
|
-
geom_segment = ee.Geometry(ee.List(l).get(0))
|
|
3012
|
-
# Extracting distance value for attribute
|
|
3013
|
-
distance = ee.Number(ee.List(l).get(1))
|
|
3014
|
-
### Determine final geometry based on sampling method
|
|
3015
|
-
# If the sampling method is 'buffered_point',
|
|
3016
|
-
# create a buffered point feature at the centroid of each segment,
|
|
3017
|
-
# otherwise create a line feature
|
|
3018
|
-
final_feature = ee.Algorithms.If(
|
|
3019
|
-
ee.String(sampling_method).equals('buffered_point'),
|
|
3020
|
-
# True Case: Create the buffered point feature
|
|
3021
|
-
ee.Feature(
|
|
3022
|
-
geom_segment.centroid(maxError).buffer(point_buffer_radius),
|
|
3023
|
-
{'distance': distance}
|
|
3024
|
-
),
|
|
3025
|
-
# False Case: Create the line segment feature
|
|
3026
|
-
ee.Feature(geom_segment, {'distance': distance})
|
|
3027
|
-
)
|
|
3028
|
-
# Return either the line segment feature or the buffered point feature
|
|
3029
|
-
return final_feature
|
|
3030
|
-
# Creating a FeatureCollection of the cut lines with distance attributes
|
|
3031
|
-
# Using map to apply the set_dist_attr function to each cut line geometry
|
|
3032
|
-
line_features = ee.FeatureCollection(cut_lines_geoms.zip(distances).map(set_dist_attr))
|
|
3033
|
-
# Reducing the image over the line features to get transect values
|
|
3034
|
-
transect_fc = image.reduceRegions(
|
|
3035
|
-
collection=line_features, reducer=ee_reducer, scale=scale
|
|
3036
|
-
)
|
|
3037
|
-
# Adding image date and line name properties to each feature
|
|
3038
|
-
def set_props(feature):
|
|
3039
|
-
return feature.set({'image_date': image_date, 'transect_name': line_name})
|
|
3040
|
-
# Append to the list of all transects for this image
|
|
3041
|
-
all_transects_for_image = all_transects_for_image.add(transect_fc.map(set_props))
|
|
3042
|
-
# Combine all transect FeatureCollections into a single FeatureCollection and flatten
|
|
3043
|
-
# Flatten is used to merge the list of FeatureCollections into one
|
|
3044
|
-
return ee.FeatureCollection(all_transects_for_image).flatten()
|
|
3045
|
-
# Map the function over the entire image collection and flatten the results
|
|
3046
|
-
results_fc = ee.FeatureCollection(self.collection.map(get_transects_for_image)).flatten()
|
|
3047
|
-
# Convert the results to a pandas DataFrame
|
|
3048
|
-
df = GenericCollection.ee_to_df(results_fc, remove_geom=True)
|
|
3049
|
-
# Check if the DataFrame is empty
|
|
3050
|
-
if df.empty:
|
|
3051
|
-
print("Warning: No transect data was generated.")
|
|
3008
|
+
collection_size = self.collection.size().getInfo()
|
|
3009
|
+
print(f"Starting batch process of {collection_size} images...")
|
|
3010
|
+
|
|
3011
|
+
dfs = []
|
|
3012
|
+
for i in range(0, collection_size, batch_size):
|
|
3013
|
+
print(f" Processing image {i} to {min(i + batch_size, collection_size)}...")
|
|
3014
|
+
|
|
3015
|
+
batch_col = ee.ImageCollection(self.collection.toList(batch_size, i))
|
|
3016
|
+
results_fc = batch_col.map(process_image).flatten()
|
|
3017
|
+
|
|
3018
|
+
# Dynamic Class Call for ee_to_df
|
|
3019
|
+
df_batch = self.__class__.ee_to_df(results_fc, columns=export_cols, remove_geom=True)
|
|
3020
|
+
|
|
3021
|
+
if not df_batch.empty:
|
|
3022
|
+
dfs.append(df_batch)
|
|
3023
|
+
|
|
3024
|
+
if not dfs:
|
|
3025
|
+
print("Warning: No transect data generated.")
|
|
3052
3026
|
return {}
|
|
3053
|
-
|
|
3027
|
+
|
|
3028
|
+
df = pd.concat(dfs, ignore_index=True)
|
|
3029
|
+
|
|
3030
|
+
# Post-Process & Split
|
|
3054
3031
|
output_dfs = {}
|
|
3055
|
-
|
|
3032
|
+
for col in reducer_cols:
|
|
3033
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
3034
|
+
df[col] = df[col].replace(-9999, np.nan)
|
|
3035
|
+
|
|
3056
3036
|
for name in sorted(df['transect_name'].unique()):
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
3066
|
-
|
|
3067
|
-
|
|
3068
|
-
|
|
3037
|
+
line_df = df[df['transect_name'] == name]
|
|
3038
|
+
|
|
3039
|
+
for raw_col, band_name in zip(reducer_cols, clean_names):
|
|
3040
|
+
try:
|
|
3041
|
+
# Safety drop for duplicates
|
|
3042
|
+
line_df_clean = line_df.drop_duplicates(subset=['distance', 'image_date'])
|
|
3043
|
+
|
|
3044
|
+
pivot = line_df_clean.pivot(index='distance', columns='image_date', values=raw_col)
|
|
3045
|
+
pivot.columns.name = 'Date'
|
|
3046
|
+
key = f"{name}_{band_name}"
|
|
3047
|
+
output_dfs[key] = pivot
|
|
3048
|
+
|
|
3049
|
+
if save_folder_path:
|
|
3050
|
+
safe_key = "".join(x for x in key if x.isalnum() or x in "._-")
|
|
3051
|
+
fname = f"{save_folder_path}{safe_key}_transects.csv"
|
|
3052
|
+
pivot.to_csv(fname)
|
|
3053
|
+
print(f"Saved: {fname}")
|
|
3054
|
+
except Exception as e:
|
|
3055
|
+
print(f"Skipping pivot for {name}/{band_name}: {e}")
|
|
3056
|
+
|
|
3069
3057
|
return output_dfs
|
|
3070
3058
|
|
|
3071
|
-
### old, depreciated iterative client-side processing method ###
|
|
3072
3059
|
elif processing_mode == 'iterative':
|
|
3073
3060
|
if not save_folder_path:
|
|
3074
|
-
raise ValueError("
|
|
3061
|
+
raise ValueError("save_folder_path is required for iterative mode.")
|
|
3075
3062
|
|
|
3076
3063
|
image_collection_dates = self.dates
|
|
3077
3064
|
for i, date in enumerate(image_collection_dates):
|
|
3078
3065
|
try:
|
|
3079
3066
|
print(f"Processing image {i+1}/{len(image_collection_dates)}: {date}")
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3083
|
-
)
|
|
3084
|
-
|
|
3085
|
-
|
|
3067
|
+
image_list = self.collection.toList(self.collection.size())
|
|
3068
|
+
image = ee.Image(image_list.get(i))
|
|
3069
|
+
|
|
3070
|
+
fc_result = process_image(image)
|
|
3071
|
+
df = self.__class__.ee_to_df(fc_result, columns=export_cols, remove_geom=True)
|
|
3072
|
+
|
|
3073
|
+
if not df.empty:
|
|
3074
|
+
for col in reducer_cols:
|
|
3075
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
3076
|
+
df[col] = df[col].replace(-9999, np.nan)
|
|
3077
|
+
|
|
3078
|
+
fname = f"{save_folder_path}{date}_transects.csv"
|
|
3079
|
+
df.to_csv(fname, index=False)
|
|
3080
|
+
print(f"Saved: {fname}")
|
|
3081
|
+
else:
|
|
3082
|
+
print(f"Skipping {date}: No data.")
|
|
3086
3083
|
except Exception as e:
|
|
3087
|
-
print(f"
|
|
3084
|
+
print(f"Error processing {date}: {e}")
|
|
3088
3085
|
else:
|
|
3089
|
-
raise ValueError("
|
|
3086
|
+
raise ValueError("processing_mode must be 'iterative' or 'aggregated'.")
|
|
3090
3087
|
|
|
3091
3088
|
@staticmethod
|
|
3092
3089
|
def extract_zonal_stats_from_buffer(
|
|
@@ -3190,7 +3187,8 @@ class GenericCollection:
|
|
|
3190
3187
|
buffer_size=1,
|
|
3191
3188
|
tileScale=1,
|
|
3192
3189
|
dates=None,
|
|
3193
|
-
file_path=None
|
|
3190
|
+
file_path=None,
|
|
3191
|
+
unweighted=False
|
|
3194
3192
|
):
|
|
3195
3193
|
"""
|
|
3196
3194
|
Iterates over a collection of images and extracts spatial statistics (defaults to mean) for a given list of geometries or coordinates. Individual statistics are calculated for each geometry or coordinate provided.
|
|
@@ -3207,6 +3205,7 @@ class GenericCollection:
|
|
|
3207
3205
|
tileScale (int, optional): A scaling factor to reduce aggregation tile size. Defaults to 1.
|
|
3208
3206
|
dates (list, optional): A list of date strings ('YYYY-MM-DD') for filtering the collection, such that only images from these dates are included for zonal statistic retrieval. Defaults to None, which uses all dates in the collection.
|
|
3209
3207
|
file_path (str, optional): File path to save the output CSV.
|
|
3208
|
+
unweighted (bool, optional): If True, uses unweighted statistics when applicable (e.g., for 'mean'). Defaults to False.
|
|
3210
3209
|
|
|
3211
3210
|
Returns:
|
|
3212
3211
|
pd.DataFrame or None: A pandas DataFrame with dates as the index and coordinate names
|
|
@@ -3313,6 +3312,9 @@ class GenericCollection:
|
|
|
3313
3312
|
reducer = getattr(ee.Reducer, reducer_type)()
|
|
3314
3313
|
except AttributeError:
|
|
3315
3314
|
raise ValueError(f"Unknown reducer_type: '{reducer_type}'.")
|
|
3315
|
+
|
|
3316
|
+
if unweighted:
|
|
3317
|
+
reducer = reducer.unweighted()
|
|
3316
3318
|
|
|
3317
3319
|
# Define the function to map over the image collection
|
|
3318
3320
|
def calculate_stats_for_image(image):
|
|
@@ -3374,6 +3376,394 @@ class GenericCollection:
|
|
|
3374
3376
|
print(f"Zonal stats saved to {file_path}.csv")
|
|
3375
3377
|
return
|
|
3376
3378
|
return pivot_df
|
|
3379
|
+
|
|
3380
|
+
def multiband_zonal_stats(
|
|
3381
|
+
self,
|
|
3382
|
+
geometry,
|
|
3383
|
+
bands,
|
|
3384
|
+
reducer_types,
|
|
3385
|
+
scale=30,
|
|
3386
|
+
geometry_name='geom',
|
|
3387
|
+
dates=None,
|
|
3388
|
+
include_area=False,
|
|
3389
|
+
file_path=None,
|
|
3390
|
+
unweighted=False
|
|
3391
|
+
):
|
|
3392
|
+
"""
|
|
3393
|
+
Calculates zonal statistics for multiple bands over a single geometry for each image in the collection.
|
|
3394
|
+
Allows for specifying different reducers for different bands. Optionally includes the geometry area.
|
|
3395
|
+
|
|
3396
|
+
Args:
|
|
3397
|
+
geometry (ee.Geometry or ee.Feature): The single geometry to calculate statistics for.
|
|
3398
|
+
bands (list of str): A list of band names to include in the analysis.
|
|
3399
|
+
reducer_types (str or list of str): A single reducer name (e.g., 'mean') to apply to all bands,
|
|
3400
|
+
or a list of reducer names matching the length of the 'bands' list to apply specific reducers
|
|
3401
|
+
to specific bands.
|
|
3402
|
+
scale (int, optional): The scale in meters for the reduction. Defaults to 30.
|
|
3403
|
+
geometry_name (str, optional): A name for the geometry, used in column naming. Defaults to 'geom'.
|
|
3404
|
+
dates (list of str, optional): A list of date strings ('YYYY-MM-DD') to filter the collection.
|
|
3405
|
+
Defaults to None (processes all images).
|
|
3406
|
+
include_area (bool, optional): If True, adds a column with the area of the geometry in square meters.
|
|
3407
|
+
Defaults to False.
|
|
3408
|
+
file_path (str, optional): If provided, saves the resulting DataFrame to a CSV file at this path.
|
|
3409
|
+
unweighted (bool, optional): If True, uses unweighted statistics when applicable (e.g., for 'mean'). Defaults to False.
|
|
3410
|
+
|
|
3411
|
+
Returns:
|
|
3412
|
+
pd.DataFrame: A pandas DataFrame indexed by Date, with columns named as '{band}_{geometry_name}_{reducer}'.
|
|
3413
|
+
"""
|
|
3414
|
+
# 1. Input Validation and Setup
|
|
3415
|
+
if not isinstance(geometry, (ee.Geometry, ee.Feature)):
|
|
3416
|
+
raise ValueError("The `geometry` argument must be an ee.Geometry or ee.Feature.")
|
|
3417
|
+
|
|
3418
|
+
region = geometry.geometry() if isinstance(geometry, ee.Feature) else geometry
|
|
3419
|
+
|
|
3420
|
+
if isinstance(bands, str):
|
|
3421
|
+
bands = [bands]
|
|
3422
|
+
if not isinstance(bands, list):
|
|
3423
|
+
raise ValueError("The `bands` argument must be a string or a list of strings.")
|
|
3424
|
+
|
|
3425
|
+
# Handle reducer_types (str vs list)
|
|
3426
|
+
if isinstance(reducer_types, str):
|
|
3427
|
+
reducers_list = [reducer_types] * len(bands)
|
|
3428
|
+
elif isinstance(reducer_types, list):
|
|
3429
|
+
if len(reducer_types) != len(bands):
|
|
3430
|
+
raise ValueError("If `reducer_types` is a list, it must have the same length as `bands`.")
|
|
3431
|
+
reducers_list = reducer_types
|
|
3432
|
+
else:
|
|
3433
|
+
raise ValueError("`reducer_types` must be a string or a list of strings.")
|
|
3434
|
+
|
|
3435
|
+
# 2. Filter Collection
|
|
3436
|
+
processing_col = self.collection
|
|
3437
|
+
|
|
3438
|
+
if dates:
|
|
3439
|
+
processing_col = processing_col.filter(ee.Filter.inList('Date_Filter', dates))
|
|
3440
|
+
|
|
3441
|
+
processing_col = processing_col.select(bands)
|
|
3442
|
+
|
|
3443
|
+
# 3. Pre-calculate Area (if requested)
|
|
3444
|
+
area_val = None
|
|
3445
|
+
area_col_name = f"{geometry_name}_area_m2"
|
|
3446
|
+
if include_area:
|
|
3447
|
+
# Calculate geodesic area in square meters with maxError of 1m
|
|
3448
|
+
area_val = region.area(1)
|
|
3449
|
+
|
|
3450
|
+
# 4. Define the Reduction Logic
|
|
3451
|
+
def calculate_multiband_stats(image):
|
|
3452
|
+
# Base feature with date property
|
|
3453
|
+
date_val = image.get('Date_Filter')
|
|
3454
|
+
feature = ee.Feature(None, {'Date': date_val})
|
|
3455
|
+
|
|
3456
|
+
# If requested, add the static area value to every feature
|
|
3457
|
+
if include_area:
|
|
3458
|
+
feature = feature.set(area_col_name, area_val)
|
|
3459
|
+
|
|
3460
|
+
unique_reducers = list(set(reducers_list))
|
|
3461
|
+
|
|
3462
|
+
# OPTIMIZED PATH: Single reducer type for all bands
|
|
3463
|
+
if len(unique_reducers) == 1:
|
|
3464
|
+
r_type = unique_reducers[0]
|
|
3465
|
+
try:
|
|
3466
|
+
reducer = getattr(ee.Reducer, r_type)()
|
|
3467
|
+
except AttributeError:
|
|
3468
|
+
reducer = ee.Reducer.mean()
|
|
3469
|
+
|
|
3470
|
+
if unweighted:
|
|
3471
|
+
reducer = reducer.unweighted()
|
|
3472
|
+
|
|
3473
|
+
stats = image.reduceRegion(
|
|
3474
|
+
reducer=reducer,
|
|
3475
|
+
geometry=region,
|
|
3476
|
+
scale=scale,
|
|
3477
|
+
maxPixels=1e13
|
|
3478
|
+
)
|
|
3479
|
+
|
|
3480
|
+
for band in bands:
|
|
3481
|
+
col_name = f"{band}_{geometry_name}_{r_type}"
|
|
3482
|
+
val = stats.get(band)
|
|
3483
|
+
feature = feature.set(col_name, val)
|
|
3484
|
+
|
|
3485
|
+
# ITERATIVE PATH: Different reducers for different bands
|
|
3486
|
+
else:
|
|
3487
|
+
for band, r_type in zip(bands, reducers_list):
|
|
3488
|
+
try:
|
|
3489
|
+
reducer = getattr(ee.Reducer, r_type)()
|
|
3490
|
+
except AttributeError:
|
|
3491
|
+
reducer = ee.Reducer.mean()
|
|
3492
|
+
|
|
3493
|
+
if unweighted:
|
|
3494
|
+
reducer = reducer.unweighted()
|
|
3495
|
+
|
|
3496
|
+
stats = image.select(band).reduceRegion(
|
|
3497
|
+
reducer=reducer,
|
|
3498
|
+
geometry=region,
|
|
3499
|
+
scale=scale,
|
|
3500
|
+
maxPixels=1e13
|
|
3501
|
+
)
|
|
3502
|
+
|
|
3503
|
+
val = stats.get(band)
|
|
3504
|
+
col_name = f"{band}_{geometry_name}_{r_type}"
|
|
3505
|
+
feature = feature.set(col_name, val)
|
|
3506
|
+
|
|
3507
|
+
return feature
|
|
3508
|
+
|
|
3509
|
+
# 5. Execute Server-Side Mapping (with explicit Cast)
|
|
3510
|
+
results_fc = ee.FeatureCollection(processing_col.map(calculate_multiband_stats))
|
|
3511
|
+
|
|
3512
|
+
# 6. Client-Side Conversion
|
|
3513
|
+
try:
|
|
3514
|
+
df = GenericCollection.ee_to_df(results_fc, remove_geom=True)
|
|
3515
|
+
except Exception as e:
|
|
3516
|
+
raise RuntimeError(f"Failed to convert Earth Engine results to DataFrame. Error: {e}")
|
|
3517
|
+
|
|
3518
|
+
if df.empty:
|
|
3519
|
+
print("Warning: No results returned. Check if the geometry intersects the imagery or if dates are valid.")
|
|
3520
|
+
return pd.DataFrame()
|
|
3521
|
+
|
|
3522
|
+
# 7. Formatting & Reordering
|
|
3523
|
+
if 'Date' in df.columns:
|
|
3524
|
+
df['Date'] = pd.to_datetime(df['Date'])
|
|
3525
|
+
df = df.sort_values('Date').set_index('Date')
|
|
3526
|
+
|
|
3527
|
+
# Construct the expected column names in the exact order of the input lists
|
|
3528
|
+
expected_order = [f"{band}_{geometry_name}_{r_type}" for band, r_type in zip(bands, reducers_list)]
|
|
3529
|
+
|
|
3530
|
+
# If area was included, append it to the END of the list
|
|
3531
|
+
if include_area:
|
|
3532
|
+
expected_order.append(area_col_name)
|
|
3533
|
+
|
|
3534
|
+
# Reindex the DataFrame to match this order.
|
|
3535
|
+
existing_cols = [c for c in expected_order if c in df.columns]
|
|
3536
|
+
df = df[existing_cols]
|
|
3537
|
+
|
|
3538
|
+
# 8. Export (Optional)
|
|
3539
|
+
if file_path:
|
|
3540
|
+
if not file_path.lower().endswith('.csv'):
|
|
3541
|
+
file_path += '.csv'
|
|
3542
|
+
try:
|
|
3543
|
+
df.to_csv(file_path)
|
|
3544
|
+
print(f"Multiband zonal stats saved to {file_path}")
|
|
3545
|
+
except Exception as e:
|
|
3546
|
+
print(f"Error saving file to {file_path}: {e}")
|
|
3547
|
+
|
|
3548
|
+
return df
|
|
3549
|
+
|
|
3550
|
+
def sample(
|
|
3551
|
+
self,
|
|
3552
|
+
locations,
|
|
3553
|
+
band=None,
|
|
3554
|
+
scale=None,
|
|
3555
|
+
location_names=None,
|
|
3556
|
+
dates=None,
|
|
3557
|
+
file_path=None,
|
|
3558
|
+
tileScale=1
|
|
3559
|
+
):
|
|
3560
|
+
"""
|
|
3561
|
+
Extracts time-series pixel values for a list of locations.
|
|
3562
|
+
|
|
3563
|
+
|
|
3564
|
+
Args:
|
|
3565
|
+
locations (list, tuple, ee.Geometry, or ee.FeatureCollection): Input points.
|
|
3566
|
+
band (str, optional): The name of the band to sample. Defaults to the first band.
|
|
3567
|
+
scale (int, optional): Scale in meters. Defaults to 30 if None.
|
|
3568
|
+
location_names (list of str, optional): Custom names for locations.
|
|
3569
|
+
dates (list, optional): Date filter ['YYYY-MM-DD'].
|
|
3570
|
+
file_path (str, optional): CSV export path.
|
|
3571
|
+
tileScale (int, optional): Aggregation tile scale. Defaults to 1.
|
|
3572
|
+
|
|
3573
|
+
Returns:
|
|
3574
|
+
pd.DataFrame (or CSV if file_path is provided): DataFrame indexed by Date, columns by Location.
|
|
3575
|
+
"""
|
|
3576
|
+
col = self.collection
|
|
3577
|
+
if dates:
|
|
3578
|
+
col = col.filter(ee.Filter.inList('Date_Filter', dates))
|
|
3579
|
+
|
|
3580
|
+
first_img = col.first()
|
|
3581
|
+
available_bands = first_img.bandNames().getInfo()
|
|
3582
|
+
|
|
3583
|
+
if band:
|
|
3584
|
+
if band not in available_bands:
|
|
3585
|
+
raise ValueError(f"Band '{band}' not found. Available: {available_bands}")
|
|
3586
|
+
target_band = band
|
|
3587
|
+
else:
|
|
3588
|
+
target_band = available_bands[0]
|
|
3589
|
+
|
|
3590
|
+
processing_col = col.select([target_band])
|
|
3591
|
+
|
|
3592
|
+
def set_name(f):
|
|
3593
|
+
name = ee.Algorithms.If(
|
|
3594
|
+
f.get('geo_name'), f.get('geo_name'),
|
|
3595
|
+
ee.Algorithms.If(f.get('name'), f.get('name'),
|
|
3596
|
+
ee.Algorithms.If(f.get('system:index'), f.get('system:index'), 'unnamed'))
|
|
3597
|
+
)
|
|
3598
|
+
return f.set('geo_name', name)
|
|
3599
|
+
|
|
3600
|
+
if isinstance(locations, (ee.FeatureCollection, ee.Feature)):
|
|
3601
|
+
features = ee.FeatureCollection(locations)
|
|
3602
|
+
elif isinstance(locations, ee.Geometry):
|
|
3603
|
+
lbl = location_names[0] if (location_names and location_names[0]) else 'Point_1'
|
|
3604
|
+
features = ee.FeatureCollection([ee.Feature(locations).set('geo_name', lbl)])
|
|
3605
|
+
elif isinstance(locations, tuple) and len(locations) == 2:
|
|
3606
|
+
lbl = location_names[0] if location_names else 'Location_1'
|
|
3607
|
+
features = ee.FeatureCollection([ee.Feature(ee.Geometry.Point(locations), {'geo_name': lbl})])
|
|
3608
|
+
elif isinstance(locations, list):
|
|
3609
|
+
if all(isinstance(i, tuple) for i in locations):
|
|
3610
|
+
names = location_names if location_names else [f"Loc_{i+1}" for i in range(len(locations))]
|
|
3611
|
+
features = ee.FeatureCollection([
|
|
3612
|
+
ee.Feature(ee.Geometry.Point(p), {'geo_name': str(n)}) for p, n in zip(locations, names)
|
|
3613
|
+
])
|
|
3614
|
+
elif all(isinstance(i, ee.Geometry) for i in locations):
|
|
3615
|
+
names = location_names if location_names else [f"Geom_{i+1}" for i in range(len(locations))]
|
|
3616
|
+
features = ee.FeatureCollection([
|
|
3617
|
+
ee.Feature(g, {'geo_name': str(n)}) for g, n in zip(locations, names)
|
|
3618
|
+
])
|
|
3619
|
+
else:
|
|
3620
|
+
raise ValueError("List must contain (lon, lat) tuples or ee.Geometry objects.")
|
|
3621
|
+
else:
|
|
3622
|
+
raise TypeError("Invalid locations input.")
|
|
3623
|
+
|
|
3624
|
+
features = features.map(set_name)
|
|
3625
|
+
|
|
3626
|
+
|
|
3627
|
+
def sample_image(img):
|
|
3628
|
+
date = img.get('Date_Filter')
|
|
3629
|
+
use_scale = scale if scale is not None else 30
|
|
3630
|
+
|
|
3631
|
+
|
|
3632
|
+
default_dict = ee.Dictionary({target_band: -9999})
|
|
3633
|
+
|
|
3634
|
+
def extract_point(f):
|
|
3635
|
+
stats = img.reduceRegion(
|
|
3636
|
+
reducer=ee.Reducer.first(),
|
|
3637
|
+
geometry=f.geometry(),
|
|
3638
|
+
scale=use_scale,
|
|
3639
|
+
tileScale=tileScale
|
|
3640
|
+
)
|
|
3641
|
+
|
|
3642
|
+
# Combine dictionaries.
|
|
3643
|
+
# If stats has 'target_band' (even if 0), it overwrites -9999.
|
|
3644
|
+
# If stats is empty (masked), -9999 remains.
|
|
3645
|
+
safe_stats = default_dict.combine(stats, overwrite=True)
|
|
3646
|
+
val = safe_stats.get(target_band)
|
|
3647
|
+
|
|
3648
|
+
return f.set({
|
|
3649
|
+
target_band: val,
|
|
3650
|
+
'image_date': date
|
|
3651
|
+
})
|
|
3652
|
+
|
|
3653
|
+
return features.map(extract_point)
|
|
3654
|
+
|
|
3655
|
+
# Flatten the results
|
|
3656
|
+
flat_results = processing_col.map(sample_image).flatten()
|
|
3657
|
+
|
|
3658
|
+
df = GenericCollection.ee_to_df(
|
|
3659
|
+
flat_results,
|
|
3660
|
+
columns=['image_date', 'geo_name', target_band],
|
|
3661
|
+
remove_geom=True
|
|
3662
|
+
)
|
|
3663
|
+
|
|
3664
|
+
if df.empty:
|
|
3665
|
+
print("Warning: No data returned.")
|
|
3666
|
+
return pd.DataFrame()
|
|
3667
|
+
|
|
3668
|
+
# 6. Clean and Pivot
|
|
3669
|
+
df[target_band] = pd.to_numeric(df[target_band], errors='coerce')
|
|
3670
|
+
|
|
3671
|
+
# Filter out ONLY the sentinel value (-9999), preserving 0.
|
|
3672
|
+
df = df[df[target_band] != -9999]
|
|
3673
|
+
|
|
3674
|
+
if df.empty:
|
|
3675
|
+
print(f"Warning: All data points were masked (NoData) for band '{target_band}'.")
|
|
3676
|
+
return pd.DataFrame()
|
|
3677
|
+
|
|
3678
|
+
pivot_df = df.pivot(index='image_date', columns='geo_name', values=target_band)
|
|
3679
|
+
pivot_df.index.name = 'Date'
|
|
3680
|
+
pivot_df.columns.name = None
|
|
3681
|
+
pivot_df = pivot_df.reset_index()
|
|
3682
|
+
|
|
3683
|
+
if file_path:
|
|
3684
|
+
if not file_path.lower().endswith('.csv'):
|
|
3685
|
+
file_path += '.csv'
|
|
3686
|
+
pivot_df.to_csv(file_path, index=False)
|
|
3687
|
+
print(f"Sampled data saved to {file_path}")
|
|
3688
|
+
return None
|
|
3689
|
+
|
|
3690
|
+
return pivot_df
|
|
3691
|
+
|
|
3692
|
+
def multiband_sample(
|
|
3693
|
+
self,
|
|
3694
|
+
location,
|
|
3695
|
+
scale=30,
|
|
3696
|
+
file_path=None
|
|
3697
|
+
):
|
|
3698
|
+
"""
|
|
3699
|
+
Extracts ALL band values for a SINGLE location across the entire collection.
|
|
3700
|
+
|
|
3701
|
+
Args:
|
|
3702
|
+
location (tuple or ee.Geometry): A single (lon, lat) tuple OR ee.Geometry.
|
|
3703
|
+
scale (int, optional): Scale in meters. Defaults to 30.
|
|
3704
|
+
file_path (str, optional): Path to save CSV.
|
|
3705
|
+
|
|
3706
|
+
Returns:
|
|
3707
|
+
pd.DataFrame: DataFrame indexed by Date, with columns for each Band.
|
|
3708
|
+
"""
|
|
3709
|
+
if isinstance(location, tuple) and len(location) == 2:
|
|
3710
|
+
geom = ee.Geometry.Point(location)
|
|
3711
|
+
elif isinstance(location, ee.Geometry):
|
|
3712
|
+
geom = location
|
|
3713
|
+
else:
|
|
3714
|
+
raise ValueError("Location must be a single (lon, lat) tuple or ee.Geometry.")
|
|
3715
|
+
|
|
3716
|
+
first_img = self.collection.first()
|
|
3717
|
+
band_names = first_img.bandNames()
|
|
3718
|
+
|
|
3719
|
+
# Create a dictionary of {band_name: -9999}
|
|
3720
|
+
# fill missing values so the Feature structure is consistent
|
|
3721
|
+
dummy_values = ee.List.repeat(-9999, band_names.length())
|
|
3722
|
+
default_dict = ee.Dictionary.fromLists(band_names, dummy_values)
|
|
3723
|
+
|
|
3724
|
+
def get_all_bands(img):
|
|
3725
|
+
date = img.get('Date_Filter')
|
|
3726
|
+
|
|
3727
|
+
# reduceRegion returns a Dictionary.
|
|
3728
|
+
# If a pixel is masked, that band key is missing from 'stats'.
|
|
3729
|
+
stats = img.reduceRegion(
|
|
3730
|
+
reducer=ee.Reducer.first(),
|
|
3731
|
+
geometry=geom,
|
|
3732
|
+
scale=scale,
|
|
3733
|
+
maxPixels=1e13
|
|
3734
|
+
)
|
|
3735
|
+
|
|
3736
|
+
# Combine stats with defaults.
|
|
3737
|
+
# overwrite=True means real data (stats) overwrites the -9999 defaults.
|
|
3738
|
+
complete_stats = default_dict.combine(stats, overwrite=True)
|
|
3739
|
+
|
|
3740
|
+
return ee.Feature(None, complete_stats).set('Date', date)
|
|
3741
|
+
|
|
3742
|
+
fc = ee.FeatureCollection(self.collection.map(get_all_bands))
|
|
3743
|
+
|
|
3744
|
+
df = GenericCollection.ee_to_df(fc, remove_geom=True)
|
|
3745
|
+
|
|
3746
|
+
if df.empty:
|
|
3747
|
+
print("Warning: No data found.")
|
|
3748
|
+
return pd.DataFrame()
|
|
3749
|
+
|
|
3750
|
+
# 6. Cleanup
|
|
3751
|
+
if 'Date' in df.columns:
|
|
3752
|
+
df['Date'] = pd.to_datetime(df['Date'])
|
|
3753
|
+
df = df.set_index('Date').sort_index()
|
|
3754
|
+
|
|
3755
|
+
# Replace our sentinel -9999 with proper NaNs
|
|
3756
|
+
df = df.replace(-9999, np.nan)
|
|
3757
|
+
|
|
3758
|
+
# 7. Export
|
|
3759
|
+
if file_path:
|
|
3760
|
+
if not file_path.lower().endswith('.csv'):
|
|
3761
|
+
file_path += '.csv'
|
|
3762
|
+
df.to_csv(file_path)
|
|
3763
|
+
print(f"Multiband sample saved to {file_path}")
|
|
3764
|
+
return None
|
|
3765
|
+
|
|
3766
|
+
return df
|
|
3377
3767
|
|
|
3378
3768
|
def export_to_asset_collection(
|
|
3379
3769
|
self,
|
|
@@ -3384,7 +3774,8 @@ class GenericCollection:
|
|
|
3384
3774
|
filename_prefix="",
|
|
3385
3775
|
crs=None,
|
|
3386
3776
|
max_pixels=int(1e13),
|
|
3387
|
-
description_prefix="export"
|
|
3777
|
+
description_prefix="export",
|
|
3778
|
+
overwrite=False
|
|
3388
3779
|
):
|
|
3389
3780
|
"""
|
|
3390
3781
|
Exports an image collection to a Google Earth Engine asset collection. The asset collection will be created if it does not already exist,
|
|
@@ -3399,6 +3790,7 @@ class GenericCollection:
|
|
|
3399
3790
|
crs (str, optional): The coordinate reference system. Defaults to None, which will use the image's CRS.
|
|
3400
3791
|
max_pixels (int, optional): The maximum number of pixels. Defaults to int(1e13).
|
|
3401
3792
|
description_prefix (str, optional): The description prefix. Defaults to "export".
|
|
3793
|
+
overwrite (bool, optional): Whether to overwrite existing assets. Defaults to False.
|
|
3402
3794
|
|
|
3403
3795
|
Returns:
|
|
3404
3796
|
None: (queues export tasks)
|
|
@@ -3416,6 +3808,14 @@ class GenericCollection:
|
|
|
3416
3808
|
asset_id = asset_collection_path + "/" + filename_prefix + date_str
|
|
3417
3809
|
desc = description_prefix + "_" + filename_prefix + date_str
|
|
3418
3810
|
|
|
3811
|
+
if overwrite:
|
|
3812
|
+
try:
|
|
3813
|
+
ee.data.deleteAsset(asset_id)
|
|
3814
|
+
print(f"Overwriting: Deleted existing asset {asset_id}")
|
|
3815
|
+
except ee.EEException:
|
|
3816
|
+
# Asset does not exist, so nothing to delete. Proceed safely.
|
|
3817
|
+
pass
|
|
3818
|
+
|
|
3419
3819
|
params = {
|
|
3420
3820
|
'image': img,
|
|
3421
3821
|
'description': desc,
|