dist-s1-enumerator 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dist_s1_enumerator/__init__.py +57 -0
- dist_s1_enumerator/asf.py +328 -0
- dist_s1_enumerator/constants.py +50 -0
- dist_s1_enumerator/data/jpl_burst_geo.parquet +0 -0
- dist_s1_enumerator/data/mgrs.parquet +0 -0
- dist_s1_enumerator/data/mgrs_burst_lookup_table.parquet +0 -0
- dist_s1_enumerator/dist_enum.py +425 -0
- dist_s1_enumerator/dist_enum_inputs.py +138 -0
- dist_s1_enumerator/exceptions.py +2 -0
- dist_s1_enumerator/mgrs_burst_data.py +170 -0
- dist_s1_enumerator/param_models.py +100 -0
- dist_s1_enumerator/py.typed +0 -0
- dist_s1_enumerator/rtc_s1_io.py +142 -0
- dist_s1_enumerator/tabular_models.py +91 -0
- dist_s1_enumerator-1.0.8.dist-info/METADATA +295 -0
- dist_s1_enumerator-1.0.8.dist-info/RECORD +19 -0
- dist_s1_enumerator-1.0.8.dist-info/WHEEL +5 -0
- dist_s1_enumerator-1.0.8.dist-info/licenses/LICENSE +202 -0
- dist_s1_enumerator-1.0.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
from datetime import datetime, timedelta
|
|
2
|
+
|
|
3
|
+
import geopandas as gpd
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from pandera.pandas import check_input
|
|
6
|
+
from tqdm.auto import tqdm
|
|
7
|
+
|
|
8
|
+
from dist_s1_enumerator.asf import get_rtc_s1_metadata_from_acq_group
|
|
9
|
+
from dist_s1_enumerator.param_models import LookbackStrategyParams
|
|
10
|
+
from dist_s1_enumerator.tabular_models import dist_s1_input_schema, reorder_columns, rtc_s1_schema
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def enumerate_one_dist_s1_product(
|
|
14
|
+
mgrs_tile_id: str,
|
|
15
|
+
track_number: int | list[int],
|
|
16
|
+
post_date: datetime | pd.Timestamp | str,
|
|
17
|
+
lookback_strategy: str = 'multi_window',
|
|
18
|
+
post_date_buffer_days: int = 1,
|
|
19
|
+
max_pre_imgs_per_burst: int | list[int] | tuple[int, ...] = (5, 5, 5),
|
|
20
|
+
delta_window_days: int = 365,
|
|
21
|
+
delta_lookback_days: int | list[int] | tuple[int, ...] = 365,
|
|
22
|
+
min_pre_imgs_per_burst: int = 1,
|
|
23
|
+
tqdm_enabled: bool = True,
|
|
24
|
+
) -> gpd.GeoDataFrame:
|
|
25
|
+
"""Enumerate a single product using unique DIST-S1 identifiers.
|
|
26
|
+
|
|
27
|
+
The product identifiers are:
|
|
28
|
+
|
|
29
|
+
1. MGRS Tile
|
|
30
|
+
2. Track Number
|
|
31
|
+
3. Post-image date (with a buffer)
|
|
32
|
+
|
|
33
|
+
Hits the ASF DAAC API to get the necessary pre-/post-image data. Not
|
|
34
|
+
recommended for enumerating large numbers of products over multiple MGRS
|
|
35
|
+
tiles and/or track numbers.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
mgrs_tile_id : str
|
|
40
|
+
MGRS tile for DIST-S1 product
|
|
41
|
+
track_number : int
|
|
42
|
+
Track number for RTC-S1 pass
|
|
43
|
+
post_date : datetime | pd.Timestamp | str
|
|
44
|
+
Approximate date of post-image Acquistion, if string should be in the form of 'YYYY-MM-DD'.
|
|
45
|
+
post_date_buffer_days : int, optional
|
|
46
|
+
Number of days around the specified post date to search for post-image
|
|
47
|
+
RTC-S1 data
|
|
48
|
+
lookback_strategy : str, optional
|
|
49
|
+
Lookback strategy to use, by default 'multi_window'. Options are
|
|
50
|
+
'immediate_lookback' or 'multi_window'.
|
|
51
|
+
max_pre_imgs_per_burst : int, optional
|
|
52
|
+
Number of pre-images per burst to include, by default (5, 5, 5).
|
|
53
|
+
If lookback strategy is 'multi_window':
|
|
54
|
+
- this is interpreted as the maximum number of pre-images on each anniversary date.
|
|
55
|
+
- tuple/list of integers are provided, each int represents the maximum number of pre-images on each
|
|
56
|
+
anniversary date,
|
|
57
|
+
most recent last.
|
|
58
|
+
- if a single integer is provided, this is interpreted as the maximum number of pre-images on 3
|
|
59
|
+
anniversary dates.
|
|
60
|
+
If the lookback strategy is 'immediate_lookback':
|
|
61
|
+
- Expects a single integer, tuples/lists will throw an error.
|
|
62
|
+
- This means the maximum pre-images on prior to the post-date.
|
|
63
|
+
delta_window_days : int, optional
|
|
64
|
+
The acceptable window of time to search for pre-image RTC-S1 data. Default is 365 days.
|
|
65
|
+
This amounts to roughly `post_date - lookback_days - delta_window_days` to `post_date - lookback_days`.
|
|
66
|
+
If lookback strategy is 'multi_window', this means the maximum window of time to search for pre-images on each
|
|
67
|
+
anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
|
|
68
|
+
delta_lookback_days : int | list[int] | tuple[int, ...], optional
|
|
69
|
+
When to set the most recent pre-image date. Default is 365 days.
|
|
70
|
+
If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
|
|
71
|
+
anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
|
|
72
|
+
If lookback strategy is 'immediate_lookback', this must be set to 0.
|
|
73
|
+
min_pre_imgs_per_burst : int, optional
|
|
74
|
+
Minimum number of pre-images per burst to include, by default 1. This is for *all* the pre-images.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
gpd.GeoDataFrame
|
|
79
|
+
DataFrame containing enumerated DIST-S1 products and the requisite OPERA RTC-S1 inputs and metadata.
|
|
80
|
+
This is used within some of the DIST-S1 workflows to enumerate the requisited pre- and post-image inputs.
|
|
81
|
+
The metadata includes polarization, url, burst_id, etc.
|
|
82
|
+
"""
|
|
83
|
+
params = LookbackStrategyParams(
|
|
84
|
+
lookback_strategy=lookback_strategy,
|
|
85
|
+
max_pre_imgs_per_burst=max_pre_imgs_per_burst,
|
|
86
|
+
delta_lookback_days=delta_lookback_days,
|
|
87
|
+
min_pre_imgs_per_burst=min_pre_imgs_per_burst,
|
|
88
|
+
delta_window_days=delta_window_days,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if isinstance(post_date, str):
|
|
92
|
+
post_date = pd.Timestamp(post_date)
|
|
93
|
+
|
|
94
|
+
if post_date_buffer_days >= 6:
|
|
95
|
+
raise ValueError('post_date_buffer_days must be less than 6 (S1 pass length) - please check available data')
|
|
96
|
+
|
|
97
|
+
if isinstance(track_number, int):
|
|
98
|
+
track_numbers = [track_number]
|
|
99
|
+
elif isinstance(track_number, list):
|
|
100
|
+
track_numbers = track_number
|
|
101
|
+
else:
|
|
102
|
+
raise TypeError('track_number must be a single integer or a list of integers.')
|
|
103
|
+
|
|
104
|
+
if isinstance(mgrs_tile_id, list):
|
|
105
|
+
raise TypeError('mgrs_tile_id must be a single string; we are enumerating inputs for a single DIST-S1 product.')
|
|
106
|
+
|
|
107
|
+
if isinstance(post_date, pd.Timestamp):
|
|
108
|
+
post_date = post_date.to_pydatetime()
|
|
109
|
+
|
|
110
|
+
print(f'Searching for post-images for track {track_number} in MGRS tile {mgrs_tile_id}')
|
|
111
|
+
df_rtc_post = get_rtc_s1_metadata_from_acq_group(
|
|
112
|
+
[mgrs_tile_id],
|
|
113
|
+
track_numbers=track_numbers,
|
|
114
|
+
start_acq_dt=post_date + timedelta(days=post_date_buffer_days),
|
|
115
|
+
stop_acq_dt=post_date - timedelta(days=post_date_buffer_days),
|
|
116
|
+
# Should take less than 5 minutes for S1 to pass over MGRS tile
|
|
117
|
+
max_variation_seconds=300,
|
|
118
|
+
n_images_per_burst=1,
|
|
119
|
+
)
|
|
120
|
+
if df_rtc_post.empty:
|
|
121
|
+
raise ValueError(f'No RTC-S1 post-images found for track {track_number} in MGRS tile {mgrs_tile_id}.')
|
|
122
|
+
|
|
123
|
+
if lookback_strategy == 'immediate_lookback':
|
|
124
|
+
# Add 5 minutes buffer to ensure we don't include post-images in pre-image set.
|
|
125
|
+
print('Searching for pre-images for immediate_lookback products')
|
|
126
|
+
print(
|
|
127
|
+
f'Lookback days {params.delta_lookback_days} and window days {params.delta_window_days} '
|
|
128
|
+
f'with max pre-images per burst {params.max_pre_imgs_per_burst}'
|
|
129
|
+
)
|
|
130
|
+
post_date_min = df_rtc_post.acq_dt.min() - pd.Timedelta(seconds=300)
|
|
131
|
+
earliest_lookback = params.delta_window_days + params.delta_lookback_days
|
|
132
|
+
latest_lookback = params.delta_lookback_days
|
|
133
|
+
start_acq_dt = post_date_min - timedelta(days=earliest_lookback)
|
|
134
|
+
stop_acq_dt = post_date_min - timedelta(days=latest_lookback)
|
|
135
|
+
df_rtc_pre = get_rtc_s1_metadata_from_acq_group(
|
|
136
|
+
[mgrs_tile_id],
|
|
137
|
+
track_numbers=track_numbers,
|
|
138
|
+
start_acq_dt=start_acq_dt,
|
|
139
|
+
stop_acq_dt=stop_acq_dt,
|
|
140
|
+
n_images_per_burst=max_pre_imgs_per_burst,
|
|
141
|
+
)
|
|
142
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
143
|
+
|
|
144
|
+
df_rtc_pre = pd.merge(df_rtc_pre, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner')
|
|
145
|
+
|
|
146
|
+
df_rtc_pre['input_category'] = 'pre'
|
|
147
|
+
|
|
148
|
+
elif lookback_strategy == 'multi_window':
|
|
149
|
+
df_rtc_pre_list = []
|
|
150
|
+
zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
|
|
151
|
+
print('Searching for pre-images for multi_window baseline')
|
|
152
|
+
print(
|
|
153
|
+
f'Lookback days {params.delta_lookback_days} and window days {params.delta_window_days} '
|
|
154
|
+
f'with max pre-images per burst {params.max_pre_imgs_per_burst}'
|
|
155
|
+
)
|
|
156
|
+
for delta_lookback_day, max_pre_img_per_burst in tqdm(
|
|
157
|
+
zipped_data,
|
|
158
|
+
desc='Windows',
|
|
159
|
+
dynamic_ncols=True,
|
|
160
|
+
disable=(not tqdm_enabled),
|
|
161
|
+
):
|
|
162
|
+
# Add 5 minutes buffer to ensure we don't include post-images in pre-image set.
|
|
163
|
+
post_date_min = df_rtc_post.acq_dt.min() - pd.Timedelta(seconds=300)
|
|
164
|
+
earliest_lookback = params.delta_window_days + delta_lookback_day
|
|
165
|
+
latest_lookback = delta_lookback_day
|
|
166
|
+
start_acq_dt = post_date_min - timedelta(days=latest_lookback)
|
|
167
|
+
stop_acq_dt = post_date_min - timedelta(days=earliest_lookback)
|
|
168
|
+
df_rtc_pre_window = get_rtc_s1_metadata_from_acq_group(
|
|
169
|
+
[mgrs_tile_id],
|
|
170
|
+
track_numbers=track_numbers,
|
|
171
|
+
start_acq_dt=start_acq_dt,
|
|
172
|
+
stop_acq_dt=stop_acq_dt,
|
|
173
|
+
n_images_per_burst=max_pre_img_per_burst,
|
|
174
|
+
polarizations=None,
|
|
175
|
+
)
|
|
176
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
177
|
+
|
|
178
|
+
df_rtc_pre_window = pd.merge(
|
|
179
|
+
df_rtc_pre_window, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner'
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if not df_rtc_pre_window.empty:
|
|
183
|
+
df_rtc_pre_list.append(df_rtc_pre_window)
|
|
184
|
+
|
|
185
|
+
df_rtc_pre = pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
|
|
186
|
+
|
|
187
|
+
else:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
f'Unsupported lookback_strategy: {lookback_strategy}. Expected "multi_window" or "immediate_lookback".'
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if not df_rtc_pre.empty:
|
|
193
|
+
pre_counts = df_rtc_pre.groupby('jpl_burst_id').size()
|
|
194
|
+
burst_ids_with_min_pre_images = pre_counts[pre_counts >= params.min_pre_imgs_per_burst].index.tolist()
|
|
195
|
+
df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(burst_ids_with_min_pre_images)].reset_index(drop=True)
|
|
196
|
+
|
|
197
|
+
post_burst_ids = df_rtc_post.jpl_burst_id.unique().tolist()
|
|
198
|
+
pre_burst_ids = df_rtc_pre.jpl_burst_id.unique().tolist()
|
|
199
|
+
|
|
200
|
+
final_burst_ids = list(set(post_burst_ids) & set(pre_burst_ids))
|
|
201
|
+
df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(final_burst_ids)].reset_index(drop=True)
|
|
202
|
+
df_rtc_post = df_rtc_post[df_rtc_post.jpl_burst_id.isin(final_burst_ids)].reset_index(drop=True)
|
|
203
|
+
|
|
204
|
+
if df_rtc_pre.empty:
|
|
205
|
+
raise ValueError(
|
|
206
|
+
f'Not enough RTC-S1 pre-images found for track {track_number} in MGRS tile {mgrs_tile_id} '
|
|
207
|
+
'with available pre-images.'
|
|
208
|
+
)
|
|
209
|
+
if df_rtc_post.empty:
|
|
210
|
+
raise ValueError(
|
|
211
|
+
f'Not enough RTC-S1 post-images found for track {track_number} in MGRS tile {mgrs_tile_id} '
|
|
212
|
+
'with available pre-images.'
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
df_rtc_pre['input_category'] = 'pre'
|
|
216
|
+
df_rtc_post['input_category'] = 'post'
|
|
217
|
+
|
|
218
|
+
df_rtc_product = pd.concat([df_rtc_pre, df_rtc_post], axis=0).reset_index(drop=True)
|
|
219
|
+
|
|
220
|
+
# Validation
|
|
221
|
+
dist_s1_input_schema.validate(df_rtc_product)
|
|
222
|
+
else:
|
|
223
|
+
df_rtc_product = gpd.GeoDataFrame()
|
|
224
|
+
df_rtc_product = reorder_columns(df_rtc_product, dist_s1_input_schema)
|
|
225
|
+
|
|
226
|
+
return df_rtc_product
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
@check_input(rtc_s1_schema, 0)
|
|
230
|
+
def enumerate_dist_s1_products(
|
|
231
|
+
df_rtc_ts: gpd.GeoDataFrame,
|
|
232
|
+
mgrs_tile_ids: list[str],
|
|
233
|
+
lookback_strategy: str = 'multi_window',
|
|
234
|
+
max_pre_imgs_per_burst: int = (5, 5, 5),
|
|
235
|
+
min_pre_imgs_per_burst: int = 1,
|
|
236
|
+
tqdm_enabled: bool = True,
|
|
237
|
+
delta_lookback_days: int = 365,
|
|
238
|
+
delta_window_days: int = 365,
|
|
239
|
+
) -> gpd.GeoDataFrame:
|
|
240
|
+
"""
|
|
241
|
+
Enumerate DIST-S1 products from a stack of RTC-S1 metadata and a list of MGRS tiles.
|
|
242
|
+
|
|
243
|
+
This function avoids repeated calls to the ASF DAAC API by working from a local stack of RTC-S1 metadata.
|
|
244
|
+
|
|
245
|
+
This enumeration finds all the available post-image dates from a given stack of RTC-S1 inputs.
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
Parameters
|
|
249
|
+
----------
|
|
250
|
+
df_rtc_ts : gpd.GeoDataFrame
|
|
251
|
+
RTC-S1 data.
|
|
252
|
+
mgrs_tile_ids : list[str]
|
|
253
|
+
List of MGRS tiles to enumerate.
|
|
254
|
+
lookback_strategy : str, optional
|
|
255
|
+
Lookback strategy to use, by default 'immediate_lookback'. Options are
|
|
256
|
+
'immediate_lookback' or 'multi_window'.
|
|
257
|
+
max_pre_imgs_per_burst : int, optional
|
|
258
|
+
Number of pre-images per burst to include, by default 10.
|
|
259
|
+
If lookback strategy is 'multi_window':
|
|
260
|
+
- this is interpreted as the maximum number of pre-images on each anniversary date.
|
|
261
|
+
- tuple/list of integers are provided, each int represents the maximum number of pre-images on each
|
|
262
|
+
anniversary date, most recent last.
|
|
263
|
+
- if a single integer is provided, this is interpreted as the maximum number of pre-images on 3
|
|
264
|
+
anniversary dates.
|
|
265
|
+
If the lookback strategy is 'immediate_lookback':
|
|
266
|
+
- Expects a single integer, tuples/lists will throw an error.
|
|
267
|
+
- This means the maximum pre-images prior to the post-date.
|
|
268
|
+
min_pre_imgs_per_burst : int, optional
|
|
269
|
+
Minimum number of pre-images per burst to include, by default 1. This is for *all* the pre-images.
|
|
270
|
+
tqdm_enabled : bool, optional
|
|
271
|
+
Whether to enable tqdm progress bars, by default True.
|
|
272
|
+
delta_lookback_days : int, optional
|
|
273
|
+
When to set the most recent pre-image date. Default is 365.
|
|
274
|
+
If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
|
|
275
|
+
anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
|
|
276
|
+
If lookback strategy is 'immediate_lookback', this must be set to 0.
|
|
277
|
+
delta_window_days : int, optional
|
|
278
|
+
The acceptable window of time to search for pre-image RTC-S1 data. Default is 365 days.
|
|
279
|
+
This amounts to roughly `post_date - lookback_days - delta_window_days` to `post_date - lookback_days`.
|
|
280
|
+
If lookback strategy is 'multi_window', this means the maximum window of time to search for pre-images on each
|
|
281
|
+
anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
|
|
282
|
+
|
|
283
|
+
Returns
|
|
284
|
+
-------
|
|
285
|
+
gpd.GeoDataFrame
|
|
286
|
+
DataFrame containing enumerated OPERA RTC-S1 input metadata including polarization, url, burst_id, etc.
|
|
287
|
+
"""
|
|
288
|
+
params = LookbackStrategyParams(
|
|
289
|
+
lookback_strategy=lookback_strategy,
|
|
290
|
+
max_pre_imgs_per_burst=max_pre_imgs_per_burst,
|
|
291
|
+
delta_lookback_days=delta_lookback_days,
|
|
292
|
+
min_pre_imgs_per_burst=min_pre_imgs_per_burst,
|
|
293
|
+
delta_window_days=delta_window_days,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
products = []
|
|
297
|
+
product_id = 0
|
|
298
|
+
for mgrs_tile_id in tqdm(mgrs_tile_ids, desc='Enumerate by MGRS tiles', disable=(not tqdm_enabled)):
|
|
299
|
+
df_rtc_ts_tile = df_rtc_ts[df_rtc_ts.mgrs_tile_id == mgrs_tile_id].reset_index(drop=True)
|
|
300
|
+
acq_group_ids_in_tile = df_rtc_ts_tile.acq_group_id_within_mgrs_tile.unique().tolist()
|
|
301
|
+
# Groups are analogs to tracks (excepted grouped around the equator to ensure a single pass is grouped properly)
|
|
302
|
+
for group_id in acq_group_ids_in_tile:
|
|
303
|
+
df_rtc_ts_tile_track = df_rtc_ts_tile[df_rtc_ts_tile.acq_group_id_within_mgrs_tile == group_id].reset_index(
|
|
304
|
+
drop=True
|
|
305
|
+
)
|
|
306
|
+
# Latest pass is now the first to appear in the list of pass_ids
|
|
307
|
+
pass_ids_unique = sorted(df_rtc_ts_tile_track.pass_id.unique().tolist(), reverse=True)
|
|
308
|
+
# Now traverse over all the passes
|
|
309
|
+
for pass_id in pass_ids_unique:
|
|
310
|
+
# post
|
|
311
|
+
df_rtc_post = df_rtc_ts_tile_track[df_rtc_ts_tile_track.pass_id == pass_id].reset_index(drop=True)
|
|
312
|
+
df_rtc_post['input_category'] = 'post'
|
|
313
|
+
|
|
314
|
+
if lookback_strategy == 'immediate_lookback':
|
|
315
|
+
# pre-image accounting
|
|
316
|
+
post_date = df_rtc_post.acq_dt.min()
|
|
317
|
+
delta_lookback_timedelta = pd.Timedelta(params.delta_lookback_days, unit='D')
|
|
318
|
+
delta_window_timedelta = pd.Timedelta(params.delta_window_days, unit='D')
|
|
319
|
+
window_start = post_date - delta_lookback_timedelta - delta_window_timedelta
|
|
320
|
+
window_stop = post_date - delta_lookback_timedelta
|
|
321
|
+
|
|
322
|
+
# pre-image filtering
|
|
323
|
+
# Select pre-images temporally
|
|
324
|
+
ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
|
|
325
|
+
df_rtc_ts_tile_track.acq_dt >= window_start
|
|
326
|
+
)
|
|
327
|
+
df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
|
|
328
|
+
# Select images that are present in the post-image
|
|
329
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
330
|
+
df_rtc_pre = pd.merge(
|
|
331
|
+
df_rtc_ts_tile_track_filtered,
|
|
332
|
+
df_unique_keys,
|
|
333
|
+
on=['jpl_burst_id', 'polarizations'],
|
|
334
|
+
how='inner',
|
|
335
|
+
)
|
|
336
|
+
df_rtc_pre['input_category'] = 'pre'
|
|
337
|
+
|
|
338
|
+
# It is unclear how merging when multiple MGRS tiles are provided will impact order so this
|
|
339
|
+
# is done to ensure the most recent pre-image set for each burst is selected
|
|
340
|
+
df_rtc_pre = df_rtc_pre.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
|
|
341
|
+
# Assume the data is sorted by acquisition date
|
|
342
|
+
df_rtc_pre = df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_imgs_per_burst).reset_index(drop=True)
|
|
343
|
+
if df_rtc_pre.empty:
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
# product and provenance
|
|
347
|
+
df_rtc_product = pd.concat([df_rtc_pre, df_rtc_post]).reset_index(drop=True)
|
|
348
|
+
df_rtc_product['product_id'] = product_id
|
|
349
|
+
|
|
350
|
+
elif lookback_strategy == 'multi_window':
|
|
351
|
+
# pre-image accounting
|
|
352
|
+
post_date = df_rtc_post.acq_dt.min()
|
|
353
|
+
# Loop over the different lookback days
|
|
354
|
+
df_rtc_pre_list = []
|
|
355
|
+
zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
|
|
356
|
+
for delta_lookback_day, max_pre_img_per_burst_param in zipped_data:
|
|
357
|
+
delta_lookback_timedelta = pd.Timedelta(delta_lookback_day, unit='D')
|
|
358
|
+
delta_window_timedelta = pd.Timedelta(params.delta_window_days, unit='D')
|
|
359
|
+
window_start = post_date - delta_lookback_timedelta - delta_window_timedelta
|
|
360
|
+
window_stop = post_date - delta_lookback_timedelta
|
|
361
|
+
|
|
362
|
+
# pre-image filtering
|
|
363
|
+
# Select pre-images temporally
|
|
364
|
+
ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
|
|
365
|
+
df_rtc_ts_tile_track.acq_dt >= window_start
|
|
366
|
+
)
|
|
367
|
+
df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
|
|
368
|
+
|
|
369
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
370
|
+
df_rtc_pre = pd.merge(
|
|
371
|
+
df_rtc_ts_tile_track_filtered,
|
|
372
|
+
df_unique_keys,
|
|
373
|
+
on=['jpl_burst_id', 'polarizations'],
|
|
374
|
+
how='inner',
|
|
375
|
+
)
|
|
376
|
+
df_rtc_pre['input_category'] = 'pre'
|
|
377
|
+
|
|
378
|
+
# It is unclear how merging when multiple MGRS tiles are provided will impact order so this
|
|
379
|
+
# is done to ensure the most recent pre-image set for each burst is selected
|
|
380
|
+
df_rtc_pre = df_rtc_pre.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
|
|
381
|
+
# Assume the data is sorted by acquisition date
|
|
382
|
+
df_rtc_pre = (
|
|
383
|
+
df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_img_per_burst_param).reset_index(drop=True)
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
if df_rtc_pre.empty:
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
if not df_rtc_pre.empty:
|
|
390
|
+
df_rtc_pre_list.append(df_rtc_pre)
|
|
391
|
+
|
|
392
|
+
# Concatenate all df_rtc_pre into a single DataFrame
|
|
393
|
+
df_rtc_pre_final = (
|
|
394
|
+
pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
|
|
395
|
+
)
|
|
396
|
+
df_rtc_product = pd.concat([df_rtc_pre_final, df_rtc_post]).reset_index(drop=True)
|
|
397
|
+
df_rtc_product['product_id'] = product_id
|
|
398
|
+
|
|
399
|
+
else:
|
|
400
|
+
raise ValueError(
|
|
401
|
+
f'Unsupported lookback_strategy: {lookback_strategy}. '
|
|
402
|
+
'Expected "multi_window" or "immediate_lookback".'
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
# Remove bursts that don't have minimum number of pre images
|
|
406
|
+
pre_counts = df_rtc_product[df_rtc_product.input_category == 'pre'].groupby('jpl_burst_id').size()
|
|
407
|
+
burst_ids_with_min_pre_images = pre_counts[pre_counts >= params.min_pre_imgs_per_burst].index.tolist()
|
|
408
|
+
df_rtc_product = df_rtc_product[
|
|
409
|
+
df_rtc_product.jpl_burst_id.isin(burst_ids_with_min_pre_images)
|
|
410
|
+
].reset_index(drop=True)
|
|
411
|
+
|
|
412
|
+
# finalize products
|
|
413
|
+
if not df_rtc_product.empty:
|
|
414
|
+
products.append(df_rtc_product)
|
|
415
|
+
product_id += 1
|
|
416
|
+
if products:
|
|
417
|
+
df_prods = pd.concat(products, axis=0).reset_index(drop=True)
|
|
418
|
+
dist_s1_input_schema.validate(df_prods)
|
|
419
|
+
else:
|
|
420
|
+
df_prods = gpd.GeoDataFrame()
|
|
421
|
+
|
|
422
|
+
df_prods = reorder_columns(df_prods, dist_s1_input_schema)
|
|
423
|
+
df_prods = df_prods.sort_values(by=['product_id', 'acq_dt'], ascending=True).reset_index(drop=True)
|
|
424
|
+
|
|
425
|
+
return df_prods
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
import geopandas as gpd
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from dist_s1_enumerator.asf import get_rtc_s1_ts_metadata_from_mgrs_tiles
|
|
7
|
+
from dist_s1_enumerator.dist_enum import enumerate_dist_s1_products
|
|
8
|
+
from dist_s1_enumerator.tabular_models import reorder_columns, rtc_s1_schema
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def update_dist_s1_workflow_dict(data_dict: dict) -> dict:
|
|
12
|
+
out = {}
|
|
13
|
+
out.update(
|
|
14
|
+
{
|
|
15
|
+
key: val
|
|
16
|
+
for (key, val) in data_dict.items()
|
|
17
|
+
if key in ['mgrs_tile_id', 'acq_date_for_mgrs_pass', 'track_number', 'product_id']
|
|
18
|
+
}
|
|
19
|
+
)
|
|
20
|
+
out_formatted = {
|
|
21
|
+
'mgrs_tile_id': out['mgrs_tile_id'],
|
|
22
|
+
'post_acq_date': out['acq_date_for_mgrs_pass'],
|
|
23
|
+
'track_number': out['track_number'],
|
|
24
|
+
}
|
|
25
|
+
return out_formatted
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def enumerate_dist_s1_workflow_inputs(
|
|
29
|
+
mgrs_tile_ids: list[str] | str,
|
|
30
|
+
track_numbers: list[int] | int | None = None,
|
|
31
|
+
start_acq_dt: datetime | pd.Timestamp | str | None = None,
|
|
32
|
+
stop_acq_dt: datetime | pd.Timestamp | str | None = None,
|
|
33
|
+
lookback_strategy: str = 'multi_window',
|
|
34
|
+
max_pre_imgs_per_burst: int | list[int] | tuple[int, ...] = (5, 5, 5),
|
|
35
|
+
min_pre_imgs_per_burst: int = 1,
|
|
36
|
+
delta_lookback_days: int | list[int] | tuple[int, ...] = 365,
|
|
37
|
+
delta_window_days: int = 365,
|
|
38
|
+
df_ts: gpd.GeoDataFrame | None = None,
|
|
39
|
+
) -> list[dict]:
|
|
40
|
+
"""Enumerate the inputs for a DIST-S1 workflow.
|
|
41
|
+
|
|
42
|
+
This function enumerates DIST-S1 workflow inputs from MGRS tiles and track numbers.
|
|
43
|
+
It uses the ASF DAAC API to get the necessary RTC-S1 metadata and then enumerates
|
|
44
|
+
DIST-S1 products to create formatted DIST-S1 workflow inputs.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
mgrs_tile_ids : list[str] | str
|
|
49
|
+
MGRS tile(s) for DIST-S1 products. Can be a single string or list of strings.
|
|
50
|
+
track_numbers : list[int] | int | None
|
|
51
|
+
Track number(s) for RTC-S1 passes. Can be a single integer or list of integers.
|
|
52
|
+
start_acq_dt : pd.Timestamp | str | None, optional
|
|
53
|
+
Start acquisition datetime for filtering RTC-S1 data. If string, should be in
|
|
54
|
+
ISO format. If None, no start filtering is applied.
|
|
55
|
+
stop_acq_dt : pd.Timestamp | str | None, optional
|
|
56
|
+
Stop acquisition datetime for filtering RTC-S1 data. If string, should be in
|
|
57
|
+
ISO format. If None, no stop filtering is applied.
|
|
58
|
+
lookback_strategy : str, optional
|
|
59
|
+
Lookback strategy to use, by default 'multi_window'. Options are
|
|
60
|
+
'immediate_lookback' or 'multi_window'.
|
|
61
|
+
max_pre_imgs_per_burst : int | list[int] | tuple[int, ...], optional
|
|
62
|
+
Maximum number of pre-images per burst to include, by default 10.
|
|
63
|
+
If lookback strategy is 'multi_window':
|
|
64
|
+
- this is interpreted as the maximum number of pre-images on each anniversary date.
|
|
65
|
+
- tuple/list of integers are provided, each int represents the maximum number of pre-images on each
|
|
66
|
+
anniversary date, most recent last.
|
|
67
|
+
- if a single integer is provided, this is interpreted as the maximum number of pre-images on 3
|
|
68
|
+
anniversary dates.
|
|
69
|
+
If the lookback strategy is 'immediate_lookback':
|
|
70
|
+
- Expects a single integer, tuples/lists will throw an error.
|
|
71
|
+
- This means the maximum pre-images prior to the post-date.
|
|
72
|
+
min_pre_imgs_per_burst : int, optional
|
|
73
|
+
Minimum number of pre-images per burst to include, by default 1. This is for *all* the pre-images.
|
|
74
|
+
delta_lookback_days : int | list[int] | tuple[int, ...], optional
|
|
75
|
+
When to set the most recent pre-image date. Default is 0.
|
|
76
|
+
If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
|
|
77
|
+
anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
|
|
78
|
+
If lookback strategy is 'immediate_lookback', this must be set to 0.
|
|
79
|
+
delta_window_days : int, optional
|
|
80
|
+
The acceptable window of time to search for pre-image RTC-S1 data. Default is 365 days.
|
|
81
|
+
This amounts to roughly `post_date - lookback_days - delta_window_days` to `post_date - lookback_days`.
|
|
82
|
+
If lookback strategy is 'multi_window', this means the maximum window of time to search for pre-images on each
|
|
83
|
+
anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
|
|
84
|
+
df_ts : gpd.GeoDataFrame | None, optional
|
|
85
|
+
RTC-S1 time series data. If None, will be enumerated from MGRS tiles and track numbers.
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
list[dict]
|
|
90
|
+
List of dictionaries containing formatted DIST-S1 workflow inputs. Each dictionary contains:
|
|
91
|
+
- mgrs_tile_id: MGRS tile identifier
|
|
92
|
+
- post_acq_date: Post-image acquisition date
|
|
93
|
+
- track_number: Track number for the RTC-S1 pass
|
|
94
|
+
"""
|
|
95
|
+
if isinstance(mgrs_tile_ids, str):
|
|
96
|
+
mgrs_tile_ids = [mgrs_tile_ids]
|
|
97
|
+
if track_numbers is not None and isinstance(track_numbers, int):
|
|
98
|
+
track_numbers = [track_numbers]
|
|
99
|
+
if isinstance(start_acq_dt, str):
|
|
100
|
+
start_acq_dt = pd.Timestamp(start_acq_dt, tz='UTC')
|
|
101
|
+
if isinstance(stop_acq_dt, str):
|
|
102
|
+
stop_acq_dt = pd.Timestamp(stop_acq_dt, tz='UTC')
|
|
103
|
+
|
|
104
|
+
if df_ts is None:
|
|
105
|
+
# Note we have to get full time-series to enumerate products! not just start/stop times.
|
|
106
|
+
df_ts = get_rtc_s1_ts_metadata_from_mgrs_tiles(
|
|
107
|
+
mgrs_tile_ids,
|
|
108
|
+
track_numbers,
|
|
109
|
+
)
|
|
110
|
+
else:
|
|
111
|
+
rtc_s1_schema.validate(df_ts)
|
|
112
|
+
df_ts = reorder_columns(df_ts, rtc_s1_schema)
|
|
113
|
+
|
|
114
|
+
df_products = enumerate_dist_s1_products(
|
|
115
|
+
df_ts,
|
|
116
|
+
mgrs_tile_ids,
|
|
117
|
+
lookback_strategy=lookback_strategy,
|
|
118
|
+
max_pre_imgs_per_burst=max_pre_imgs_per_burst,
|
|
119
|
+
min_pre_imgs_per_burst=min_pre_imgs_per_burst,
|
|
120
|
+
delta_lookback_days=delta_lookback_days,
|
|
121
|
+
delta_window_days=delta_window_days,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
df_post = df_products[df_products['input_category'] == 'post'].reset_index(drop=True)
|
|
125
|
+
df_s1_workflow_inputs = df_post.groupby(['product_id']).first().reset_index(drop=True)
|
|
126
|
+
df_s1_workflow_inputs = df_s1_workflow_inputs.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
|
|
127
|
+
|
|
128
|
+
if start_acq_dt is not None:
|
|
129
|
+
start_ind = df_s1_workflow_inputs.acq_dt >= start_acq_dt
|
|
130
|
+
df_s1_workflow_inputs = df_s1_workflow_inputs[start_ind].reset_index(drop=True)
|
|
131
|
+
if stop_acq_dt is not None:
|
|
132
|
+
stop_ind = df_s1_workflow_inputs.acq_dt <= stop_acq_dt
|
|
133
|
+
df_s1_workflow_inputs = df_s1_workflow_inputs[stop_ind].reset_index(drop=True)
|
|
134
|
+
|
|
135
|
+
df_s1_workflow_input_data = df_s1_workflow_inputs.to_dict('records')
|
|
136
|
+
|
|
137
|
+
df_s1_workflow_input_data_formatted = list(map(update_dist_s1_workflow_dict, df_s1_workflow_input_data))
|
|
138
|
+
return df_s1_workflow_input_data_formatted
|