dist-s1-enumerator 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ import warnings
2
+ from importlib.metadata import PackageNotFoundError, version
3
+
4
+ import asf_search
5
+
6
+ from dist_s1_enumerator.asf import (
7
+ agg_rtc_metadata_by_burst_id,
8
+ get_rtc_s1_metadata_from_acq_group,
9
+ get_rtc_s1_ts_metadata_from_mgrs_tiles,
10
+ )
11
+ from dist_s1_enumerator.dist_enum import enumerate_dist_s1_products, enumerate_one_dist_s1_product
12
+ from dist_s1_enumerator.dist_enum_inputs import enumerate_dist_s1_workflow_inputs
13
+ from dist_s1_enumerator.mgrs_burst_data import (
14
+ get_burst_ids_in_mgrs_tiles,
15
+ get_burst_table,
16
+ get_burst_table_from_mgrs_tiles,
17
+ get_lut_by_mgrs_tile_ids,
18
+ get_mgrs_burst_lut,
19
+ get_mgrs_burst_lut_path,
20
+ get_mgrs_table,
21
+ get_mgrs_tiles_overlapping_geometry,
22
+ )
23
+ from dist_s1_enumerator.rtc_s1_io import localize_rtc_s1_ts
24
+
25
+
26
+ try:
27
+ __version__ = version(__name__)
28
+ except PackageNotFoundError:
29
+ __version__ = None
30
+ warnings.warn(
31
+ 'package is not installed!\n'
32
+ 'Install in editable/develop mode via (from the top of this repo):\n'
33
+ ' python -m pip install -e .\n',
34
+ RuntimeWarning,
35
+ )
36
+ # Increase CMR timeout to 2 minutes
37
+ asf_search.constants.INTERNAL.CMR_TIMEOUT = 120
38
+
39
+ __all__ = [
40
+ 'agg_rtc_metadata_by_burst_id',
41
+ 'agg_rtc_metadata_by_burst_id',
42
+ 'enumerate_dist_s1_products',
43
+ 'enumerate_dist_s1_products',
44
+ 'enumerate_dist_s1_workflow_inputs',
45
+ 'enumerate_one_dist_s1_product',
46
+ 'get_burst_ids_in_mgrs_tiles',
47
+ 'get_burst_table_from_mgrs_tiles',
48
+ 'get_burst_table',
49
+ 'get_lut_by_mgrs_tile_ids',
50
+ 'get_mgrs_burst_lut',
51
+ 'get_mgrs_burst_lut_path',
52
+ 'get_mgrs_table',
53
+ 'get_mgrs_tiles_overlapping_geometry',
54
+ 'get_rtc_s1_metadata_from_acq_group',
55
+ 'get_rtc_s1_ts_metadata_from_mgrs_tiles',
56
+ 'localize_rtc_s1_ts',
57
+ ]
@@ -0,0 +1,328 @@
1
+ from datetime import datetime
2
+ from warnings import warn
3
+
4
+ import asf_search as asf
5
+ import geopandas as gpd
6
+ import pandas as pd
7
+ from pandera.pandas import check_input
8
+ from rasterio.crs import CRS
9
+ from shapely.geometry import shape
10
+
11
+ from dist_s1_enumerator.mgrs_burst_data import get_burst_ids_in_mgrs_tiles, get_lut_by_mgrs_tile_ids
12
+ from dist_s1_enumerator.tabular_models import reorder_columns, rtc_s1_resp_schema, rtc_s1_schema
13
+
14
+
15
+ def convert_asf_url_to_cumulus(url: str) -> str:
16
+ asf_base = 'https://datapool.asf.alaska.edu/RTC/OPERA-S1/'
17
+ cumulus_base = 'https://cumulus.asf.earthdatacloud.nasa.gov/OPERA/OPERA_L2_RTC-S1/'
18
+
19
+ if not (url.startswith(cumulus_base) or url.startswith(asf_base)):
20
+ warn(f'URL {url} is not a valid ASF datapool or cumulus earthdatacloud URL.')
21
+ return url
22
+
23
+ if not url.startswith(asf_base):
24
+ return url
25
+
26
+ filename = url.split('/')[-1]
27
+ granule_pol_parts = filename.rsplit('_', 1)
28
+ if len(granule_pol_parts) != 2:
29
+ raise ValueError(f'Could not extract granule name from filename: {filename}')
30
+
31
+ granule_name = granule_pol_parts[0]
32
+ new_url = f'{cumulus_base}{granule_name}/{filename}'
33
+ return new_url
34
+
35
+
36
+ def format_polarization(pol: list | str) -> str:
37
+ if isinstance(pol, list):
38
+ if ('VV' in pol) and len(pol) == 2:
39
+ return 'VV+VH'
40
+ elif ('HH' in pol) and len(pol) == 2:
41
+ return 'HH+HV'
42
+ else:
43
+ return '+'.join(pol)
44
+ elif isinstance(pol, str):
45
+ return pol
46
+ else:
47
+ raise TypeError(f'Invalid polarization: {pol}.')
48
+
49
+
50
+ def extract_pass_id(acq_dt: pd.Timestamp) -> int:
51
+ reference_date = pd.Timestamp('2014-01-01', tz='UTC')
52
+ return int((acq_dt - reference_date).total_seconds() / 86400 / 6)
53
+
54
+
55
+ def append_pass_data(df_rtc: gpd.GeoDataFrame, mgrs_tile_ids: list[str]) -> gpd.GeoDataFrame:
56
+ """Format the RTC S1 metadata for easier lookups."""
57
+ # Extract the LUT acquisition info
58
+ # Burst IDs will have multiple rows if they lie in multiple MGRS tiles and those tiles are specified
59
+ rtc_columns = df_rtc.columns.tolist()
60
+ if not all([col in rtc_columns for col in ['jpl_burst_id', 'pass_id', 'acq_dt', 'track_number']]):
61
+ raise ValueError('Cannot append pass data without jpl_burst_id, pass_id, acq_dt, and track_number columns.')
62
+ df_lut = get_lut_by_mgrs_tile_ids(mgrs_tile_ids)
63
+ df_rtc = pd.merge(
64
+ df_rtc,
65
+ df_lut[['jpl_burst_id', 'mgrs_tile_id', 'acq_group_id_within_mgrs_tile']],
66
+ on='jpl_burst_id',
67
+ how='inner',
68
+ )
69
+ # Creates a date string 'YYYY-MM-DD' for the earliest acquisition date for a pass of the mgrs tile
70
+ df_rtc['acq_date_for_mgrs_pass'] = (
71
+ df_rtc.groupby(['mgrs_tile_id', 'acq_group_id_within_mgrs_tile', 'pass_id'])['acq_dt']
72
+ .transform('min')
73
+ .dt.floor('D')
74
+ .dt.strftime('%Y-%m-%d')
75
+ )
76
+
77
+ # Creates track_token that associates joins the track number with '_' within a pass of the mgrs tile
78
+ def get_track_token(track_numbers: list[int]) -> str:
79
+ unique_track_numbers = track_numbers.unique().tolist()
80
+ return '_'.join(map(str, sorted(unique_track_numbers)))
81
+
82
+ df_rtc['track_token'] = df_rtc.groupby(['mgrs_tile_id', 'acq_group_id_within_mgrs_tile'])['track_number'].transform(
83
+ get_track_token
84
+ )
85
+
86
+ df_rtc = df_rtc.sort_values(by=['jpl_burst_id', 'acq_dt']).reset_index(drop=True)
87
+
88
+ return df_rtc
89
+
90
+
91
+ def get_rtc_s1_ts_metadata_by_burst_ids(
92
+ burst_ids: str | list[str],
93
+ start_acq_dt: str | datetime | None | pd.Timestamp = None,
94
+ stop_acq_dt: str | datetime | None | pd.Timestamp = None,
95
+ polarizations: str | None = None,
96
+ include_single_polarization: bool = False,
97
+ ) -> gpd.GeoDataFrame:
98
+ """Wrap/format the ASF search API for RTC-S1 metadata search. All searches go through this function.
99
+
100
+ Requires search data to be dual polarized data of the same type (if not specified, will get all search results
101
+ of the available type).
102
+
103
+ If dual polarized data is mixed (that is there are HH+HV and VV+VH), will raise an error.
104
+ """
105
+ if isinstance(burst_ids, str):
106
+ burst_ids = [burst_ids]
107
+
108
+ if (polarizations is not None) and (polarizations not in ['HH+HV', 'VV+VH']):
109
+ raise ValueError(f'Invalid polarization: {polarizations}. Must be one of: HH+HV, VV+VH, None.')
110
+
111
+ # Convert all date inputs to datetime objects using pandas for flexibility
112
+ start_acq_dt_obj = None
113
+ stop_acq_dt_obj = None
114
+
115
+ if start_acq_dt is not None:
116
+ start_acq_dt_obj = pd.to_datetime(start_acq_dt, utc=True).to_pydatetime()
117
+
118
+ if stop_acq_dt is not None:
119
+ stop_acq_dt_obj = pd.to_datetime(stop_acq_dt, utc=True).to_pydatetime()
120
+
121
+ # Make sure JPL syntax is transformed to asf syntax
122
+ burst_ids = [burst_id.upper().replace('-', '_') for burst_id in burst_ids]
123
+ resp = asf.geo_search(
124
+ operaBurstID=burst_ids,
125
+ processingLevel='RTC',
126
+ start=start_acq_dt_obj,
127
+ end=stop_acq_dt_obj,
128
+ )
129
+ if not resp:
130
+ warn('No results - please check burst id and availability.', category=UserWarning)
131
+ return gpd.GeoDataFrame(columns=rtc_s1_resp_schema.columns.keys())
132
+
133
+ properties = [r.properties for r in resp]
134
+ geometry = [shape(r.geojson()['geometry']) for r in resp]
135
+ properties_f = [
136
+ {
137
+ 'opera_id': p['sceneName'],
138
+ 'acq_dt': pd.to_datetime(p['startTime']),
139
+ 'track_number': p['pathNumber'],
140
+ 'polarizations': p['polarization'],
141
+ 'all_urls': [p['url']] + p['additionalUrls'],
142
+ }
143
+ for p in properties
144
+ ]
145
+
146
+ df_rtc = gpd.GeoDataFrame(properties_f, geometry=geometry, crs=CRS.from_epsg(4326))
147
+ # Extract the burst_id from the opera_id
148
+ df_rtc['jpl_burst_id'] = df_rtc['opera_id'].map(lambda bid: bid.split('_')[3])
149
+
150
+ # pass_id is the integer number of 6 day periods since 2014-01-01
151
+ df_rtc['pass_id'] = df_rtc.acq_dt.map(extract_pass_id)
152
+
153
+ # Remove duplicates from time series
154
+ df_rtc['dedup_id'] = df_rtc.opera_id.map(lambda id_: '_'.join(id_.split('_')[:5]))
155
+ df_rtc = df_rtc.drop_duplicates(subset=['dedup_id']).reset_index(drop=True)
156
+ df_rtc = df_rtc.drop(columns=['dedup_id'])
157
+
158
+ # polarizations - ensure dual polarization
159
+ # asf metadata can be ['HH', 'HV'] or 'HH+HV' - reformat to the latter
160
+ df_rtc['polarizations'] = df_rtc['polarizations'].map(format_polarization)
161
+ if polarizations is not None:
162
+ ind_pol = df_rtc['polarizations'] == polarizations
163
+ elif not include_single_polarization:
164
+ ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH'])
165
+ else:
166
+ ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH', 'HH', 'HV', 'VV', 'VH'])
167
+ if not ind_pol.any():
168
+ warn(f'No valid dual polarization images found for {burst_ids}.')
169
+ # First get all the dual-polarizations images
170
+ df_rtc = df_rtc[ind_pol].reset_index(drop=True)
171
+
172
+ def get_url_by_polarization(prod_urls: list[str], polarization_token: str) -> list[str]:
173
+ if polarization_token == 'copol':
174
+ polarizations_allowed = ['VV', 'HH']
175
+ elif polarization_token == 'crosspol':
176
+ polarizations_allowed = ['HV', 'VH']
177
+ else:
178
+ raise ValueError(f'Invalid polarization token: {polarization_token}. Must be one of: copol, crosspol.')
179
+ possible_urls = [url for pol in polarizations_allowed for url in prod_urls if f'_{pol}.tif' == url[-7:]]
180
+ if len(possible_urls) == 0:
181
+ raise ValueError(f'No {polarizations_allowed} urls found')
182
+ if len(possible_urls) > 1:
183
+ raise ValueError(f'Multiple {polarization_token} urls found: {", ".join(possible_urls)}')
184
+ return possible_urls[0]
185
+
186
+ url_copol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'copol'))
187
+ url_crosspol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'crosspol'))
188
+
189
+ df_rtc['url_copol'] = url_copol
190
+ df_rtc['url_crosspol'] = url_crosspol
191
+ df_rtc['url_copol'] = df_rtc['url_copol'].map(convert_asf_url_to_cumulus)
192
+ df_rtc['url_crosspol'] = df_rtc['url_crosspol'].map(convert_asf_url_to_cumulus)
193
+ df_rtc = df_rtc.drop(columns=['all_urls'])
194
+
195
+ # Ensure the data is sorted by jpl_burst_id and acq_dt
196
+ df_rtc = df_rtc.sort_values(by=['jpl_burst_id', 'acq_dt'], ascending=True).reset_index(drop=True)
197
+
198
+ rtc_s1_resp_schema.validate(df_rtc)
199
+ df_rtc = reorder_columns(df_rtc, rtc_s1_resp_schema)
200
+
201
+ return df_rtc
202
+
203
+
204
+ def get_rtc_s1_metadata_from_acq_group(
205
+ mgrs_tile_ids: list[str],
206
+ track_numbers: list[int],
207
+ n_images_per_burst: int = 1,
208
+ start_acq_dt: datetime | str | None = None,
209
+ stop_acq_dt: datetime | str | None = None,
210
+ max_variation_seconds: float | None = None,
211
+ polarizations: str | None = None,
212
+ ) -> gpd.GeoDataFrame:
213
+ """
214
+ Meant for acquiring a pre-image or post-image set from MGRS tiles for a given S1 pass.
215
+
216
+ Obtains the most recent burst image set within a date range.
217
+
218
+ For acquiring a post-image set, we provide the keyword argument max_variation_seconds to ensure the latest
219
+ acquisition of are within the latest acquisition time from the most recent burst image. If this is not provided,
220
+ you will get the latest burst image product for each burst within the allowable date range. This could yield imagery
221
+ collected on different dates for the burst_ids provided.
222
+
223
+ For acquiring a pre-image set, we use n_images_per_burst > 1. We get the latest n_images_per_burst images for each
224
+ burst and there can be different number of images per burst for all the burst supplied and/or the image
225
+ time series can be composed of images from different dates.
226
+
227
+ Note we take care of the equator edge cases in LUT of the MGRS/burst_ids, so only need to provide 1 valid track
228
+ number in pass.
229
+
230
+ Parameters
231
+ ----------
232
+ mgrs_tile_ids: list[str]
233
+ track_numbers: list[int]
234
+ start_acq_dt: datetime | str
235
+ stop_acq_dt : datetime
236
+ max_variation_seconds : float, optional
237
+ n_images_per_burst : int, optional
238
+
239
+ Returns
240
+ -------
241
+ gpd.GeoDataFrame
242
+ """
243
+ if len(track_numbers) > 2:
244
+ raise ValueError('Cannot handle more than 2 track numbers.')
245
+ if (len(track_numbers) == 2) and (abs(track_numbers[0] - track_numbers[1]) > 1):
246
+ raise ValueError('Two track numbers that are not consecutive were provided.')
247
+ burst_ids = get_burst_ids_in_mgrs_tiles(mgrs_tile_ids, track_numbers=track_numbers)
248
+ if not burst_ids:
249
+ mgrs_tiles_str = ','.join(mgrs_tile_ids)
250
+ track_numbers_str = ','.join(map(str, track_numbers))
251
+ raise ValueError(
252
+ f'No burst ids found for the provided MGRS tile {mgrs_tiles_str} and track numbers {track_numbers_str}.'
253
+ )
254
+
255
+ if (n_images_per_burst == 1) and (max_variation_seconds is None):
256
+ warn(
257
+ 'No maximum variation in acq dts provided although n_images_per_burst is 1. '
258
+ 'This could yield imagery collected on '
259
+ 'different dates for the burst_ids provided.',
260
+ category=UserWarning,
261
+ )
262
+ df_rtc = get_rtc_s1_ts_metadata_by_burst_ids(
263
+ burst_ids,
264
+ start_acq_dt=start_acq_dt,
265
+ stop_acq_dt=stop_acq_dt,
266
+ polarizations=polarizations,
267
+ )
268
+ # Assumes that each group is ordered by date (earliest first and most recent last)
269
+ columns = df_rtc.columns
270
+ df_rtc = df_rtc.groupby('jpl_burst_id').tail(n_images_per_burst).reset_index(drop=False)
271
+ df_rtc = df_rtc[columns]
272
+ if max_variation_seconds is not None:
273
+ if (n_images_per_burst is None) or (n_images_per_burst > 1):
274
+ raise ValueError('Cannot apply maximum variation in acq dts when n_images_per_burst > 1 or None.')
275
+ max_dt = df_rtc['acq_dt'].max()
276
+ ind = df_rtc['acq_dt'] > max_dt - pd.Timedelta(seconds=max_variation_seconds)
277
+ df_rtc = df_rtc[ind].reset_index(drop=True)
278
+
279
+ if not df_rtc.empty:
280
+ df_rtc = append_pass_data(df_rtc, mgrs_tile_ids)
281
+ rtc_s1_schema.validate(df_rtc)
282
+ df_rtc = reorder_columns(df_rtc, rtc_s1_schema)
283
+
284
+ return df_rtc
285
+
286
+
287
+ def get_rtc_s1_ts_metadata_from_mgrs_tiles(
288
+ mgrs_tile_ids: list[str],
289
+ track_numbers: list[int] | None = None,
290
+ start_acq_dt: str | datetime | None = None,
291
+ stop_acq_dt: str | datetime | None = None,
292
+ polarizations: str | None = None,
293
+ ) -> gpd.GeoDataFrame:
294
+ """Get the RTC S1 time series for a given MGRS tile and track number."""
295
+ if isinstance(start_acq_dt, str):
296
+ start_acq_dt = datetime.strptime(start_acq_dt, '%Y-%m-%d')
297
+ if isinstance(stop_acq_dt, str):
298
+ stop_acq_dt = datetime.strptime(stop_acq_dt, '%Y-%m-%d')
299
+
300
+ burst_ids = get_burst_ids_in_mgrs_tiles(mgrs_tile_ids, track_numbers=track_numbers)
301
+ df_rtc_ts = get_rtc_s1_ts_metadata_by_burst_ids(
302
+ burst_ids, start_acq_dt=start_acq_dt, stop_acq_dt=stop_acq_dt, polarizations=polarizations
303
+ )
304
+ if df_rtc_ts.empty:
305
+ mgrs_tiles_str = ','.join(mgrs_tile_ids)
306
+ msg = f'No RTC S1 metadata found for MGRS tile {mgrs_tiles_str}.'
307
+ if track_numbers is not None:
308
+ track_number_token = '_'.join(map(str, track_numbers))
309
+ msg += f' Track numbers provided: {track_number_token}.'
310
+ warn(msg)
311
+ return gpd.GeoDataFrame(columns=rtc_s1_schema.columns.keys())
312
+
313
+ df_rtc_ts = append_pass_data(df_rtc_ts, mgrs_tile_ids)
314
+ rtc_s1_schema.validate(df_rtc_ts)
315
+ df_rtc_ts = reorder_columns(df_rtc_ts, rtc_s1_schema)
316
+
317
+ return df_rtc_ts
318
+
319
+
320
+ @check_input(rtc_s1_schema, 0)
321
+ def agg_rtc_metadata_by_burst_id(df_rtc_ts: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
322
+ df_agg = (
323
+ df_rtc_ts.groupby('jpl_burst_id')
324
+ .agg(count=('jpl_burst_id', 'size'), earliest_acq_date=('acq_dt', 'min'), latest_acq_date=('acq_dt', 'max'))
325
+ .reset_index()
326
+ )
327
+
328
+ return df_agg
@@ -0,0 +1,50 @@
1
+ # CONSTANTS FOR REFERENCE
2
+ MAX_BURSTS_IN_MGRS_TILE = 450
3
+ MAX_MGRS_TILES_INTERSECTING_BURST = 8
4
+
5
+
6
+ # Tiles that are in DIST-HLS but not in DIST-S1
7
+ # due to coverage
8
+ BLACKLISTED_MGRS_TILE_IDS = [
9
+ '02RQN',
10
+ '02RRN',
11
+ '05LNJ',
12
+ '15DWD',
13
+ '15DXD',
14
+ '17NQE',
15
+ '17PPR',
16
+ '23EMN',
17
+ '23EMP',
18
+ '23ENN',
19
+ '24KTU',
20
+ '30NWK',
21
+ '36JZT',
22
+ '37GCH',
23
+ '37GCJ',
24
+ '37GDH',
25
+ '37GDJ',
26
+ '37KES',
27
+ '47NRJ',
28
+ '49NHJ',
29
+ '49PEK',
30
+ '49PFK',
31
+ '49PGK',
32
+ '49PGL',
33
+ '49PHK',
34
+ '49PHL',
35
+ '50KPF',
36
+ '50KQF',
37
+ '50MNB',
38
+ '50PKQ',
39
+ '50PKR',
40
+ '50PLQ',
41
+ '50PLR',
42
+ '50PRQ',
43
+ '52MCU',
44
+ '52MCV',
45
+ '53QPC',
46
+ '54RXR',
47
+ '55KEB',
48
+ '55KFA',
49
+ '57XVF',
50
+ ]
Binary file