dist-s1-enumerator 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ from datetime import datetime, timedelta
2
+
3
+ import geopandas as gpd
4
+ import pandas as pd
5
+ from pandera.pandas import check_input
6
+ from tqdm.auto import tqdm
7
+
8
+ from dist_s1_enumerator.asf import get_rtc_s1_metadata_from_acq_group
9
+ from dist_s1_enumerator.param_models import LookbackStrategyParams
10
+ from dist_s1_enumerator.tabular_models import dist_s1_input_schema, reorder_columns, rtc_s1_schema
11
+
12
+
13
+ def enumerate_one_dist_s1_product(
14
+ mgrs_tile_id: str,
15
+ track_number: int | list[int],
16
+ post_date: datetime | pd.Timestamp | str,
17
+ lookback_strategy: str = 'multi_window',
18
+ post_date_buffer_days: int = 1,
19
+ max_pre_imgs_per_burst: int | list[int] | tuple[int, ...] = (5, 5, 5),
20
+ delta_window_days: int = 365,
21
+ delta_lookback_days: int | list[int] | tuple[int, ...] = 365,
22
+ min_pre_imgs_per_burst: int = 1,
23
+ tqdm_enabled: bool = True,
24
+ ) -> gpd.GeoDataFrame:
25
+ """Enumerate a single product using unique DIST-S1 identifiers.
26
+
27
+ The product identifiers are:
28
+
29
+ 1. MGRS Tile
30
+ 2. Track Number
31
+ 3. Post-image date (with a buffer)
32
+
33
+ Hits the ASF DAAC API to get the necessary pre-/post-image data. Not
34
+ recommended for enumerating large numbers of products over multiple MGRS
35
+ tiles and/or track numbers.
36
+
37
+ Parameters
38
+ ----------
39
+ mgrs_tile_id : str
40
+ MGRS tile for DIST-S1 product
41
+ track_number : int
42
+ Track number for RTC-S1 pass
43
+ post_date : datetime | pd.Timestamp | str
44
+ Approximate date of post-image Acquistion, if string should be in the form of 'YYYY-MM-DD'.
45
+ post_date_buffer_days : int, optional
46
+ Number of days around the specified post date to search for post-image
47
+ RTC-S1 data
48
+ lookback_strategy : str, optional
49
+ Lookback strategy to use, by default 'multi_window'. Options are
50
+ 'immediate_lookback' or 'multi_window'.
51
+ max_pre_imgs_per_burst : int, optional
52
+ Number of pre-images per burst to include, by default (5, 5, 5).
53
+ If lookback strategy is 'multi_window':
54
+ - this is interpreted as the maximum number of pre-images on each anniversary date.
55
+ - tuple/list of integers are provided, each int represents the maximum number of pre-images on each
56
+ anniversary date,
57
+ most recent last.
58
+ - if a single integer is provided, this is interpreted as the maximum number of pre-images on 3
59
+ anniversary dates.
60
+ If the lookback strategy is 'immediate_lookback':
61
+ - Expects a single integer, tuples/lists will throw an error.
62
+ - This means the maximum pre-images on prior to the post-date.
63
+ delta_window_days : int, optional
64
+ The acceptable window of time to search for pre-image RTC-S1 data. Default is 365 days.
65
+ This amounts to roughly `post_date - lookback_days - delta_window_days` to `post_date - lookback_days`.
66
+ If lookback strategy is 'multi_window', this means the maximum window of time to search for pre-images on each
67
+ anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
68
+ delta_lookback_days : int | list[int] | tuple[int, ...], optional
69
+ When to set the most recent pre-image date. Default is 365 days.
70
+ If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
71
+ anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
72
+ If lookback strategy is 'immediate_lookback', this must be set to 0.
73
+ min_pre_imgs_per_burst : int, optional
74
+ Minimum number of pre-images per burst to include, by default 1. This is for *all* the pre-images.
75
+
76
+ Returns
77
+ -------
78
+ gpd.GeoDataFrame
79
+ DataFrame containing enumerated DIST-S1 products and the requisite OPERA RTC-S1 inputs and metadata.
80
+ This is used within some of the DIST-S1 workflows to enumerate the requisited pre- and post-image inputs.
81
+ The metadata includes polarization, url, burst_id, etc.
82
+ """
83
+ params = LookbackStrategyParams(
84
+ lookback_strategy=lookback_strategy,
85
+ max_pre_imgs_per_burst=max_pre_imgs_per_burst,
86
+ delta_lookback_days=delta_lookback_days,
87
+ min_pre_imgs_per_burst=min_pre_imgs_per_burst,
88
+ delta_window_days=delta_window_days,
89
+ )
90
+
91
+ if isinstance(post_date, str):
92
+ post_date = pd.Timestamp(post_date)
93
+
94
+ if post_date_buffer_days >= 6:
95
+ raise ValueError('post_date_buffer_days must be less than 6 (S1 pass length) - please check available data')
96
+
97
+ if isinstance(track_number, int):
98
+ track_numbers = [track_number]
99
+ elif isinstance(track_number, list):
100
+ track_numbers = track_number
101
+ else:
102
+ raise TypeError('track_number must be a single integer or a list of integers.')
103
+
104
+ if isinstance(mgrs_tile_id, list):
105
+ raise TypeError('mgrs_tile_id must be a single string; we are enumerating inputs for a single DIST-S1 product.')
106
+
107
+ if isinstance(post_date, pd.Timestamp):
108
+ post_date = post_date.to_pydatetime()
109
+
110
+ print(f'Searching for post-images for track {track_number} in MGRS tile {mgrs_tile_id}')
111
+ df_rtc_post = get_rtc_s1_metadata_from_acq_group(
112
+ [mgrs_tile_id],
113
+ track_numbers=track_numbers,
114
+ start_acq_dt=post_date + timedelta(days=post_date_buffer_days),
115
+ stop_acq_dt=post_date - timedelta(days=post_date_buffer_days),
116
+ # Should take less than 5 minutes for S1 to pass over MGRS tile
117
+ max_variation_seconds=300,
118
+ n_images_per_burst=1,
119
+ )
120
+ if df_rtc_post.empty:
121
+ raise ValueError(f'No RTC-S1 post-images found for track {track_number} in MGRS tile {mgrs_tile_id}.')
122
+
123
+ if lookback_strategy == 'immediate_lookback':
124
+ # Add 5 minutes buffer to ensure we don't include post-images in pre-image set.
125
+ print('Searching for pre-images for immediate_lookback products')
126
+ print(
127
+ f'Lookback days {params.delta_lookback_days} and window days {params.delta_window_days} '
128
+ f'with max pre-images per burst {params.max_pre_imgs_per_burst}'
129
+ )
130
+ post_date_min = df_rtc_post.acq_dt.min() - pd.Timedelta(seconds=300)
131
+ earliest_lookback = params.delta_window_days + params.delta_lookback_days
132
+ latest_lookback = params.delta_lookback_days
133
+ start_acq_dt = post_date_min - timedelta(days=earliest_lookback)
134
+ stop_acq_dt = post_date_min - timedelta(days=latest_lookback)
135
+ df_rtc_pre = get_rtc_s1_metadata_from_acq_group(
136
+ [mgrs_tile_id],
137
+ track_numbers=track_numbers,
138
+ start_acq_dt=start_acq_dt,
139
+ stop_acq_dt=stop_acq_dt,
140
+ n_images_per_burst=max_pre_imgs_per_burst,
141
+ )
142
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
143
+
144
+ df_rtc_pre = pd.merge(df_rtc_pre, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner')
145
+
146
+ df_rtc_pre['input_category'] = 'pre'
147
+
148
+ elif lookback_strategy == 'multi_window':
149
+ df_rtc_pre_list = []
150
+ zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
151
+ print('Searching for pre-images for multi_window baseline')
152
+ print(
153
+ f'Lookback days {params.delta_lookback_days} and window days {params.delta_window_days} '
154
+ f'with max pre-images per burst {params.max_pre_imgs_per_burst}'
155
+ )
156
+ for delta_lookback_day, max_pre_img_per_burst in tqdm(
157
+ zipped_data,
158
+ desc='Windows',
159
+ dynamic_ncols=True,
160
+ disable=(not tqdm_enabled),
161
+ ):
162
+ # Add 5 minutes buffer to ensure we don't include post-images in pre-image set.
163
+ post_date_min = df_rtc_post.acq_dt.min() - pd.Timedelta(seconds=300)
164
+ earliest_lookback = params.delta_window_days + delta_lookback_day
165
+ latest_lookback = delta_lookback_day
166
+ start_acq_dt = post_date_min - timedelta(days=latest_lookback)
167
+ stop_acq_dt = post_date_min - timedelta(days=earliest_lookback)
168
+ df_rtc_pre_window = get_rtc_s1_metadata_from_acq_group(
169
+ [mgrs_tile_id],
170
+ track_numbers=track_numbers,
171
+ start_acq_dt=start_acq_dt,
172
+ stop_acq_dt=stop_acq_dt,
173
+ n_images_per_burst=max_pre_img_per_burst,
174
+ polarizations=None,
175
+ )
176
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
177
+
178
+ df_rtc_pre_window = pd.merge(
179
+ df_rtc_pre_window, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner'
180
+ )
181
+
182
+ if not df_rtc_pre_window.empty:
183
+ df_rtc_pre_list.append(df_rtc_pre_window)
184
+
185
+ df_rtc_pre = pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
186
+
187
+ else:
188
+ raise ValueError(
189
+ f'Unsupported lookback_strategy: {lookback_strategy}. Expected "multi_window" or "immediate_lookback".'
190
+ )
191
+
192
+ if not df_rtc_pre.empty:
193
+ pre_counts = df_rtc_pre.groupby('jpl_burst_id').size()
194
+ burst_ids_with_min_pre_images = pre_counts[pre_counts >= params.min_pre_imgs_per_burst].index.tolist()
195
+ df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(burst_ids_with_min_pre_images)].reset_index(drop=True)
196
+
197
+ post_burst_ids = df_rtc_post.jpl_burst_id.unique().tolist()
198
+ pre_burst_ids = df_rtc_pre.jpl_burst_id.unique().tolist()
199
+
200
+ final_burst_ids = list(set(post_burst_ids) & set(pre_burst_ids))
201
+ df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(final_burst_ids)].reset_index(drop=True)
202
+ df_rtc_post = df_rtc_post[df_rtc_post.jpl_burst_id.isin(final_burst_ids)].reset_index(drop=True)
203
+
204
+ if df_rtc_pre.empty:
205
+ raise ValueError(
206
+ f'Not enough RTC-S1 pre-images found for track {track_number} in MGRS tile {mgrs_tile_id} '
207
+ 'with available pre-images.'
208
+ )
209
+ if df_rtc_post.empty:
210
+ raise ValueError(
211
+ f'Not enough RTC-S1 post-images found for track {track_number} in MGRS tile {mgrs_tile_id} '
212
+ 'with available pre-images.'
213
+ )
214
+
215
+ df_rtc_pre['input_category'] = 'pre'
216
+ df_rtc_post['input_category'] = 'post'
217
+
218
+ df_rtc_product = pd.concat([df_rtc_pre, df_rtc_post], axis=0).reset_index(drop=True)
219
+
220
+ # Validation
221
+ dist_s1_input_schema.validate(df_rtc_product)
222
+ else:
223
+ df_rtc_product = gpd.GeoDataFrame()
224
+ df_rtc_product = reorder_columns(df_rtc_product, dist_s1_input_schema)
225
+
226
+ return df_rtc_product
227
+
228
+
229
+ @check_input(rtc_s1_schema, 0)
230
+ def enumerate_dist_s1_products(
231
+ df_rtc_ts: gpd.GeoDataFrame,
232
+ mgrs_tile_ids: list[str],
233
+ lookback_strategy: str = 'multi_window',
234
+ max_pre_imgs_per_burst: int = (5, 5, 5),
235
+ min_pre_imgs_per_burst: int = 1,
236
+ tqdm_enabled: bool = True,
237
+ delta_lookback_days: int = 365,
238
+ delta_window_days: int = 365,
239
+ ) -> gpd.GeoDataFrame:
240
+ """
241
+ Enumerate DIST-S1 products from a stack of RTC-S1 metadata and a list of MGRS tiles.
242
+
243
+ This function avoids repeated calls to the ASF DAAC API by working from a local stack of RTC-S1 metadata.
244
+
245
+ This enumeration finds all the available post-image dates from a given stack of RTC-S1 inputs.
246
+
247
+
248
+ Parameters
249
+ ----------
250
+ df_rtc_ts : gpd.GeoDataFrame
251
+ RTC-S1 data.
252
+ mgrs_tile_ids : list[str]
253
+ List of MGRS tiles to enumerate.
254
+ lookback_strategy : str, optional
255
+ Lookback strategy to use, by default 'immediate_lookback'. Options are
256
+ 'immediate_lookback' or 'multi_window'.
257
+ max_pre_imgs_per_burst : int, optional
258
+ Number of pre-images per burst to include, by default 10.
259
+ If lookback strategy is 'multi_window':
260
+ - this is interpreted as the maximum number of pre-images on each anniversary date.
261
+ - tuple/list of integers are provided, each int represents the maximum number of pre-images on each
262
+ anniversary date, most recent last.
263
+ - if a single integer is provided, this is interpreted as the maximum number of pre-images on 3
264
+ anniversary dates.
265
+ If the lookback strategy is 'immediate_lookback':
266
+ - Expects a single integer, tuples/lists will throw an error.
267
+ - This means the maximum pre-images prior to the post-date.
268
+ min_pre_imgs_per_burst : int, optional
269
+ Minimum number of pre-images per burst to include, by default 1. This is for *all* the pre-images.
270
+ tqdm_enabled : bool, optional
271
+ Whether to enable tqdm progress bars, by default True.
272
+ delta_lookback_days : int, optional
273
+ When to set the most recent pre-image date. Default is 365.
274
+ If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
275
+ anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
276
+ If lookback strategy is 'immediate_lookback', this must be set to 0.
277
+ delta_window_days : int, optional
278
+ The acceptable window of time to search for pre-image RTC-S1 data. Default is 365 days.
279
+ This amounts to roughly `post_date - lookback_days - delta_window_days` to `post_date - lookback_days`.
280
+ If lookback strategy is 'multi_window', this means the maximum window of time to search for pre-images on each
281
+ anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
282
+
283
+ Returns
284
+ -------
285
+ gpd.GeoDataFrame
286
+ DataFrame containing enumerated OPERA RTC-S1 input metadata including polarization, url, burst_id, etc.
287
+ """
288
+ params = LookbackStrategyParams(
289
+ lookback_strategy=lookback_strategy,
290
+ max_pre_imgs_per_burst=max_pre_imgs_per_burst,
291
+ delta_lookback_days=delta_lookback_days,
292
+ min_pre_imgs_per_burst=min_pre_imgs_per_burst,
293
+ delta_window_days=delta_window_days,
294
+ )
295
+
296
+ products = []
297
+ product_id = 0
298
+ for mgrs_tile_id in tqdm(mgrs_tile_ids, desc='Enumerate by MGRS tiles', disable=(not tqdm_enabled)):
299
+ df_rtc_ts_tile = df_rtc_ts[df_rtc_ts.mgrs_tile_id == mgrs_tile_id].reset_index(drop=True)
300
+ acq_group_ids_in_tile = df_rtc_ts_tile.acq_group_id_within_mgrs_tile.unique().tolist()
301
+ # Groups are analogs to tracks (excepted grouped around the equator to ensure a single pass is grouped properly)
302
+ for group_id in acq_group_ids_in_tile:
303
+ df_rtc_ts_tile_track = df_rtc_ts_tile[df_rtc_ts_tile.acq_group_id_within_mgrs_tile == group_id].reset_index(
304
+ drop=True
305
+ )
306
+ # Latest pass is now the first to appear in the list of pass_ids
307
+ pass_ids_unique = sorted(df_rtc_ts_tile_track.pass_id.unique().tolist(), reverse=True)
308
+ # Now traverse over all the passes
309
+ for pass_id in pass_ids_unique:
310
+ # post
311
+ df_rtc_post = df_rtc_ts_tile_track[df_rtc_ts_tile_track.pass_id == pass_id].reset_index(drop=True)
312
+ df_rtc_post['input_category'] = 'post'
313
+
314
+ if lookback_strategy == 'immediate_lookback':
315
+ # pre-image accounting
316
+ post_date = df_rtc_post.acq_dt.min()
317
+ delta_lookback_timedelta = pd.Timedelta(params.delta_lookback_days, unit='D')
318
+ delta_window_timedelta = pd.Timedelta(params.delta_window_days, unit='D')
319
+ window_start = post_date - delta_lookback_timedelta - delta_window_timedelta
320
+ window_stop = post_date - delta_lookback_timedelta
321
+
322
+ # pre-image filtering
323
+ # Select pre-images temporally
324
+ ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
325
+ df_rtc_ts_tile_track.acq_dt >= window_start
326
+ )
327
+ df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
328
+ # Select images that are present in the post-image
329
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
330
+ df_rtc_pre = pd.merge(
331
+ df_rtc_ts_tile_track_filtered,
332
+ df_unique_keys,
333
+ on=['jpl_burst_id', 'polarizations'],
334
+ how='inner',
335
+ )
336
+ df_rtc_pre['input_category'] = 'pre'
337
+
338
+ # It is unclear how merging when multiple MGRS tiles are provided will impact order so this
339
+ # is done to ensure the most recent pre-image set for each burst is selected
340
+ df_rtc_pre = df_rtc_pre.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
341
+ # Assume the data is sorted by acquisition date
342
+ df_rtc_pre = df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_imgs_per_burst).reset_index(drop=True)
343
+ if df_rtc_pre.empty:
344
+ continue
345
+
346
+ # product and provenance
347
+ df_rtc_product = pd.concat([df_rtc_pre, df_rtc_post]).reset_index(drop=True)
348
+ df_rtc_product['product_id'] = product_id
349
+
350
+ elif lookback_strategy == 'multi_window':
351
+ # pre-image accounting
352
+ post_date = df_rtc_post.acq_dt.min()
353
+ # Loop over the different lookback days
354
+ df_rtc_pre_list = []
355
+ zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
356
+ for delta_lookback_day, max_pre_img_per_burst_param in zipped_data:
357
+ delta_lookback_timedelta = pd.Timedelta(delta_lookback_day, unit='D')
358
+ delta_window_timedelta = pd.Timedelta(params.delta_window_days, unit='D')
359
+ window_start = post_date - delta_lookback_timedelta - delta_window_timedelta
360
+ window_stop = post_date - delta_lookback_timedelta
361
+
362
+ # pre-image filtering
363
+ # Select pre-images temporally
364
+ ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
365
+ df_rtc_ts_tile_track.acq_dt >= window_start
366
+ )
367
+ df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
368
+
369
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
370
+ df_rtc_pre = pd.merge(
371
+ df_rtc_ts_tile_track_filtered,
372
+ df_unique_keys,
373
+ on=['jpl_burst_id', 'polarizations'],
374
+ how='inner',
375
+ )
376
+ df_rtc_pre['input_category'] = 'pre'
377
+
378
+ # It is unclear how merging when multiple MGRS tiles are provided will impact order so this
379
+ # is done to ensure the most recent pre-image set for each burst is selected
380
+ df_rtc_pre = df_rtc_pre.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
381
+ # Assume the data is sorted by acquisition date
382
+ df_rtc_pre = (
383
+ df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_img_per_burst_param).reset_index(drop=True)
384
+ )
385
+
386
+ if df_rtc_pre.empty:
387
+ continue
388
+
389
+ if not df_rtc_pre.empty:
390
+ df_rtc_pre_list.append(df_rtc_pre)
391
+
392
+ # Concatenate all df_rtc_pre into a single DataFrame
393
+ df_rtc_pre_final = (
394
+ pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
395
+ )
396
+ df_rtc_product = pd.concat([df_rtc_pre_final, df_rtc_post]).reset_index(drop=True)
397
+ df_rtc_product['product_id'] = product_id
398
+
399
+ else:
400
+ raise ValueError(
401
+ f'Unsupported lookback_strategy: {lookback_strategy}. '
402
+ 'Expected "multi_window" or "immediate_lookback".'
403
+ )
404
+
405
+ # Remove bursts that don't have minimum number of pre images
406
+ pre_counts = df_rtc_product[df_rtc_product.input_category == 'pre'].groupby('jpl_burst_id').size()
407
+ burst_ids_with_min_pre_images = pre_counts[pre_counts >= params.min_pre_imgs_per_burst].index.tolist()
408
+ df_rtc_product = df_rtc_product[
409
+ df_rtc_product.jpl_burst_id.isin(burst_ids_with_min_pre_images)
410
+ ].reset_index(drop=True)
411
+
412
+ # finalize products
413
+ if not df_rtc_product.empty:
414
+ products.append(df_rtc_product)
415
+ product_id += 1
416
+ if products:
417
+ df_prods = pd.concat(products, axis=0).reset_index(drop=True)
418
+ dist_s1_input_schema.validate(df_prods)
419
+ else:
420
+ df_prods = gpd.GeoDataFrame()
421
+
422
+ df_prods = reorder_columns(df_prods, dist_s1_input_schema)
423
+ df_prods = df_prods.sort_values(by=['product_id', 'acq_dt'], ascending=True).reset_index(drop=True)
424
+
425
+ return df_prods
@@ -0,0 +1,138 @@
1
+ from datetime import datetime
2
+
3
+ import geopandas as gpd
4
+ import pandas as pd
5
+
6
+ from dist_s1_enumerator.asf import get_rtc_s1_ts_metadata_from_mgrs_tiles
7
+ from dist_s1_enumerator.dist_enum import enumerate_dist_s1_products
8
+ from dist_s1_enumerator.tabular_models import reorder_columns, rtc_s1_schema
9
+
10
+
11
+ def update_dist_s1_workflow_dict(data_dict: dict) -> dict:
12
+ out = {}
13
+ out.update(
14
+ {
15
+ key: val
16
+ for (key, val) in data_dict.items()
17
+ if key in ['mgrs_tile_id', 'acq_date_for_mgrs_pass', 'track_number', 'product_id']
18
+ }
19
+ )
20
+ out_formatted = {
21
+ 'mgrs_tile_id': out['mgrs_tile_id'],
22
+ 'post_acq_date': out['acq_date_for_mgrs_pass'],
23
+ 'track_number': out['track_number'],
24
+ }
25
+ return out_formatted
26
+
27
+
28
+ def enumerate_dist_s1_workflow_inputs(
29
+ mgrs_tile_ids: list[str] | str,
30
+ track_numbers: list[int] | int | None = None,
31
+ start_acq_dt: datetime | pd.Timestamp | str | None = None,
32
+ stop_acq_dt: datetime | pd.Timestamp | str | None = None,
33
+ lookback_strategy: str = 'multi_window',
34
+ max_pre_imgs_per_burst: int | list[int] | tuple[int, ...] = (5, 5, 5),
35
+ min_pre_imgs_per_burst: int = 1,
36
+ delta_lookback_days: int | list[int] | tuple[int, ...] = 365,
37
+ delta_window_days: int = 365,
38
+ df_ts: gpd.GeoDataFrame | None = None,
39
+ ) -> list[dict]:
40
+ """Enumerate the inputs for a DIST-S1 workflow.
41
+
42
+ This function enumerates DIST-S1 workflow inputs from MGRS tiles and track numbers.
43
+ It uses the ASF DAAC API to get the necessary RTC-S1 metadata and then enumerates
44
+ DIST-S1 products to create formatted DIST-S1 workflow inputs.
45
+
46
+ Parameters
47
+ ----------
48
+ mgrs_tile_ids : list[str] | str
49
+ MGRS tile(s) for DIST-S1 products. Can be a single string or list of strings.
50
+ track_numbers : list[int] | int | None
51
+ Track number(s) for RTC-S1 passes. Can be a single integer or list of integers.
52
+ start_acq_dt : pd.Timestamp | str | None, optional
53
+ Start acquisition datetime for filtering RTC-S1 data. If string, should be in
54
+ ISO format. If None, no start filtering is applied.
55
+ stop_acq_dt : pd.Timestamp | str | None, optional
56
+ Stop acquisition datetime for filtering RTC-S1 data. If string, should be in
57
+ ISO format. If None, no stop filtering is applied.
58
+ lookback_strategy : str, optional
59
+ Lookback strategy to use, by default 'multi_window'. Options are
60
+ 'immediate_lookback' or 'multi_window'.
61
+ max_pre_imgs_per_burst : int | list[int] | tuple[int, ...], optional
62
+ Maximum number of pre-images per burst to include, by default 10.
63
+ If lookback strategy is 'multi_window':
64
+ - this is interpreted as the maximum number of pre-images on each anniversary date.
65
+ - tuple/list of integers are provided, each int represents the maximum number of pre-images on each
66
+ anniversary date, most recent last.
67
+ - if a single integer is provided, this is interpreted as the maximum number of pre-images on 3
68
+ anniversary dates.
69
+ If the lookback strategy is 'immediate_lookback':
70
+ - Expects a single integer, tuples/lists will throw an error.
71
+ - This means the maximum pre-images prior to the post-date.
72
+ min_pre_imgs_per_burst : int, optional
73
+ Minimum number of pre-images per burst to include, by default 1. This is for *all* the pre-images.
74
+ delta_lookback_days : int | list[int] | tuple[int, ...], optional
75
+ When to set the most recent pre-image date. Default is 0.
76
+ If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
77
+ anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
78
+ If lookback strategy is 'immediate_lookback', this must be set to 0.
79
+ delta_window_days : int, optional
80
+ The acceptable window of time to search for pre-image RTC-S1 data. Default is 365 days.
81
+ This amounts to roughly `post_date - lookback_days - delta_window_days` to `post_date - lookback_days`.
82
+ If lookback strategy is 'multi_window', this means the maximum window of time to search for pre-images on each
83
+ anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
84
+ df_ts : gpd.GeoDataFrame | None, optional
85
+ RTC-S1 time series data. If None, will be enumerated from MGRS tiles and track numbers.
86
+
87
+ Returns
88
+ -------
89
+ list[dict]
90
+ List of dictionaries containing formatted DIST-S1 workflow inputs. Each dictionary contains:
91
+ - mgrs_tile_id: MGRS tile identifier
92
+ - post_acq_date: Post-image acquisition date
93
+ - track_number: Track number for the RTC-S1 pass
94
+ """
95
+ if isinstance(mgrs_tile_ids, str):
96
+ mgrs_tile_ids = [mgrs_tile_ids]
97
+ if track_numbers is not None and isinstance(track_numbers, int):
98
+ track_numbers = [track_numbers]
99
+ if isinstance(start_acq_dt, str):
100
+ start_acq_dt = pd.Timestamp(start_acq_dt, tz='UTC')
101
+ if isinstance(stop_acq_dt, str):
102
+ stop_acq_dt = pd.Timestamp(stop_acq_dt, tz='UTC')
103
+
104
+ if df_ts is None:
105
+ # Note we have to get full time-series to enumerate products! not just start/stop times.
106
+ df_ts = get_rtc_s1_ts_metadata_from_mgrs_tiles(
107
+ mgrs_tile_ids,
108
+ track_numbers,
109
+ )
110
+ else:
111
+ rtc_s1_schema.validate(df_ts)
112
+ df_ts = reorder_columns(df_ts, rtc_s1_schema)
113
+
114
+ df_products = enumerate_dist_s1_products(
115
+ df_ts,
116
+ mgrs_tile_ids,
117
+ lookback_strategy=lookback_strategy,
118
+ max_pre_imgs_per_burst=max_pre_imgs_per_burst,
119
+ min_pre_imgs_per_burst=min_pre_imgs_per_burst,
120
+ delta_lookback_days=delta_lookback_days,
121
+ delta_window_days=delta_window_days,
122
+ )
123
+
124
+ df_post = df_products[df_products['input_category'] == 'post'].reset_index(drop=True)
125
+ df_s1_workflow_inputs = df_post.groupby(['product_id']).first().reset_index(drop=True)
126
+ df_s1_workflow_inputs = df_s1_workflow_inputs.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
127
+
128
+ if start_acq_dt is not None:
129
+ start_ind = df_s1_workflow_inputs.acq_dt >= start_acq_dt
130
+ df_s1_workflow_inputs = df_s1_workflow_inputs[start_ind].reset_index(drop=True)
131
+ if stop_acq_dt is not None:
132
+ stop_ind = df_s1_workflow_inputs.acq_dt <= stop_acq_dt
133
+ df_s1_workflow_inputs = df_s1_workflow_inputs[stop_ind].reset_index(drop=True)
134
+
135
+ df_s1_workflow_input_data = df_s1_workflow_inputs.to_dict('records')
136
+
137
+ df_s1_workflow_input_data_formatted = list(map(update_dist_s1_workflow_dict, df_s1_workflow_input_data))
138
+ return df_s1_workflow_input_data_formatted
@@ -0,0 +1,2 @@
1
+ class NoMGRSCoverage(Exception):
2
+ """Exception raised for no MGRS coverage."""