dist-s1-enumerator 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dist_s1_enumerator/asf.py +41 -18
- dist_s1_enumerator/dist_enum.py +33 -13
- {dist_s1_enumerator-1.0.3.dist-info → dist_s1_enumerator-1.0.5.dist-info}/METADATA +1 -1
- {dist_s1_enumerator-1.0.3.dist-info → dist_s1_enumerator-1.0.5.dist-info}/RECORD +7 -7
- {dist_s1_enumerator-1.0.3.dist-info → dist_s1_enumerator-1.0.5.dist-info}/WHEEL +0 -0
- {dist_s1_enumerator-1.0.3.dist-info → dist_s1_enumerator-1.0.5.dist-info}/licenses/LICENSE +0 -0
- {dist_s1_enumerator-1.0.3.dist-info → dist_s1_enumerator-1.0.5.dist-info}/top_level.txt +0 -0
dist_s1_enumerator/asf.py
CHANGED
|
@@ -12,6 +12,27 @@ from dist_s1_enumerator.mgrs_burst_data import get_burst_ids_in_mgrs_tiles, get_
|
|
|
12
12
|
from dist_s1_enumerator.tabular_models import reorder_columns, rtc_s1_resp_schema, rtc_s1_schema
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
def convert_asf_url_to_cumulus(url: str) -> str:
|
|
16
|
+
asf_base = 'https://datapool.asf.alaska.edu/RTC/OPERA-S1/'
|
|
17
|
+
cumulus_base = 'https://cumulus.asf.earthdatacloud.nasa.gov/OPERA/OPERA_L2_RTC-S1/'
|
|
18
|
+
|
|
19
|
+
if not (url.startswith(cumulus_base) or url.startswith(asf_base)):
|
|
20
|
+
warn(f'URL {url} is not a valid ASF datapool or cumulus earthdatacloud URL.')
|
|
21
|
+
return url
|
|
22
|
+
|
|
23
|
+
if not url.startswith(asf_base):
|
|
24
|
+
return url
|
|
25
|
+
|
|
26
|
+
filename = url.split('/')[-1]
|
|
27
|
+
granule_pol_parts = filename.rsplit('_', 1)
|
|
28
|
+
if len(granule_pol_parts) != 2:
|
|
29
|
+
raise ValueError(f'Could not extract granule name from filename: {filename}')
|
|
30
|
+
|
|
31
|
+
granule_name = granule_pol_parts[0]
|
|
32
|
+
new_url = f'{cumulus_base}{granule_name}/{filename}'
|
|
33
|
+
return new_url
|
|
34
|
+
|
|
35
|
+
|
|
15
36
|
def format_polarization(pol: list | str) -> str:
|
|
16
37
|
if isinstance(pol, list):
|
|
17
38
|
if ('VV' in pol) and len(pol) == 2:
|
|
@@ -72,6 +93,7 @@ def get_rtc_s1_ts_metadata_by_burst_ids(
|
|
|
72
93
|
start_acq_dt: str | datetime | None | pd.Timestamp = None,
|
|
73
94
|
stop_acq_dt: str | datetime | None | pd.Timestamp = None,
|
|
74
95
|
polarizations: str | None = None,
|
|
96
|
+
include_single_polarization: bool = False,
|
|
75
97
|
) -> gpd.GeoDataFrame:
|
|
76
98
|
"""Wrap/format the ASF search API for RTC-S1 metadata search. All searches go through this function.
|
|
77
99
|
|
|
@@ -138,37 +160,36 @@ def get_rtc_s1_ts_metadata_by_burst_ids(
|
|
|
138
160
|
df_rtc['polarizations'] = df_rtc['polarizations'].map(format_polarization)
|
|
139
161
|
if polarizations is not None:
|
|
140
162
|
ind_pol = df_rtc['polarizations'] == polarizations
|
|
141
|
-
|
|
163
|
+
elif not include_single_polarization:
|
|
142
164
|
ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH'])
|
|
165
|
+
else:
|
|
166
|
+
ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH', 'HH', 'HV', 'VV', 'VH'])
|
|
143
167
|
if not ind_pol.any():
|
|
144
|
-
|
|
168
|
+
warn(f'No valid dual polarization images found for {burst_ids}.')
|
|
145
169
|
# First get all the dual-polarizations images
|
|
146
170
|
df_rtc = df_rtc[ind_pol].reset_index(drop=True)
|
|
147
|
-
# Then check all the dual-polarizations are the same (either HH+HV or VV+VH)
|
|
148
|
-
# TODO: if there are mixtures, can DIST-S1 still be generated assuming they look the same?
|
|
149
|
-
polarizations_unique = df_rtc['polarizations'].unique().tolist()
|
|
150
|
-
if len(polarizations_unique) > 1:
|
|
151
|
-
raise ValueError(
|
|
152
|
-
f'Mixed dual polarizations found for {burst_ids}. That is, some images are HH+HV and others are VV+HV.'
|
|
153
|
-
)
|
|
154
|
-
else:
|
|
155
|
-
# Either HH+HV or VV+VH
|
|
156
|
-
copol, crosspol = polarizations_unique[0].split('+')
|
|
157
171
|
|
|
158
172
|
def get_url_by_polarization(prod_urls: list[str], polarization_token: str) -> list[str]:
|
|
159
|
-
|
|
173
|
+
if polarization_token == 'copol':
|
|
174
|
+
polarizations_allowed = ['VV', 'HH']
|
|
175
|
+
elif polarization_token == 'crosspol':
|
|
176
|
+
polarizations_allowed = ['HV', 'VH']
|
|
177
|
+
else:
|
|
178
|
+
raise ValueError(f'Invalid polarization token: {polarization_token}. Must be one of: copol, crosspol.')
|
|
179
|
+
possible_urls = [url for pol in polarizations_allowed for url in prod_urls if f'_{pol}.tif' == url[-7:]]
|
|
160
180
|
if len(possible_urls) == 0:
|
|
161
|
-
raise ValueError(f'No {
|
|
181
|
+
raise ValueError(f'No {polarizations_allowed} urls found')
|
|
162
182
|
if len(possible_urls) > 1:
|
|
163
|
-
|
|
164
|
-
raise ValueError(f'Multiple {polarization_token} urls found')
|
|
183
|
+
raise ValueError(f'Multiple {polarization_token} urls found: {", ".join(possible_urls)}')
|
|
165
184
|
return possible_urls[0]
|
|
166
185
|
|
|
167
|
-
url_copol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, copol))
|
|
168
|
-
url_crosspol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, crosspol))
|
|
186
|
+
url_copol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'copol'))
|
|
187
|
+
url_crosspol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'crosspol'))
|
|
169
188
|
|
|
170
189
|
df_rtc['url_copol'] = url_copol
|
|
171
190
|
df_rtc['url_crosspol'] = url_crosspol
|
|
191
|
+
df_rtc['url_copol'] = df_rtc['url_copol'].map(convert_asf_url_to_cumulus)
|
|
192
|
+
df_rtc['url_crosspol'] = df_rtc['url_crosspol'].map(convert_asf_url_to_cumulus)
|
|
172
193
|
df_rtc = df_rtc.drop(columns=['all_urls'])
|
|
173
194
|
|
|
174
195
|
# Ensure the data is sorted by jpl_burst_id and acq_dt
|
|
@@ -187,6 +208,7 @@ def get_rtc_s1_metadata_from_acq_group(
|
|
|
187
208
|
start_acq_dt: datetime | str | None = None,
|
|
188
209
|
stop_acq_dt: datetime | str | None = None,
|
|
189
210
|
max_variation_seconds: float | None = None,
|
|
211
|
+
polarizations: str | None = None,
|
|
190
212
|
) -> gpd.GeoDataFrame:
|
|
191
213
|
"""
|
|
192
214
|
Meant for acquiring a pre-image or post-image set from MGRS tiles for a given S1 pass.
|
|
@@ -241,6 +263,7 @@ def get_rtc_s1_metadata_from_acq_group(
|
|
|
241
263
|
burst_ids,
|
|
242
264
|
start_acq_dt=start_acq_dt,
|
|
243
265
|
stop_acq_dt=stop_acq_dt,
|
|
266
|
+
polarizations=polarizations,
|
|
244
267
|
)
|
|
245
268
|
# Assumes that each group is ordered by date (earliest first and most recent last)
|
|
246
269
|
columns = df_rtc.columns
|
dist_s1_enumerator/dist_enum.py
CHANGED
|
@@ -117,7 +117,6 @@ def enumerate_one_dist_s1_product(
|
|
|
117
117
|
max_variation_seconds=300,
|
|
118
118
|
n_images_per_burst=1,
|
|
119
119
|
)
|
|
120
|
-
|
|
121
120
|
if df_rtc_post.empty:
|
|
122
121
|
raise ValueError(f'No RTC-S1 post-images found for track {track_number} in MGRS tile {mgrs_tile_id}.')
|
|
123
122
|
|
|
@@ -137,6 +136,11 @@ def enumerate_one_dist_s1_product(
|
|
|
137
136
|
stop_acq_dt=stop_acq_dt,
|
|
138
137
|
n_images_per_burst=max_pre_imgs_per_burst,
|
|
139
138
|
)
|
|
139
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
140
|
+
|
|
141
|
+
df_rtc_pre = pd.merge(df_rtc_pre, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner')
|
|
142
|
+
|
|
143
|
+
df_rtc_pre['input_category'] = 'pre'
|
|
140
144
|
|
|
141
145
|
elif lookback_strategy == 'multi_window':
|
|
142
146
|
df_rtc_pre_list = []
|
|
@@ -155,16 +159,22 @@ def enumerate_one_dist_s1_product(
|
|
|
155
159
|
latest_lookback = delta_lookback_day
|
|
156
160
|
start_acq_dt = post_date_min - timedelta(days=latest_lookback)
|
|
157
161
|
stop_acq_dt = post_date_min - timedelta(days=earliest_lookback)
|
|
158
|
-
|
|
162
|
+
df_rtc_pre_window = get_rtc_s1_metadata_from_acq_group(
|
|
159
163
|
[mgrs_tile_id],
|
|
160
164
|
track_numbers=track_numbers,
|
|
161
165
|
start_acq_dt=start_acq_dt,
|
|
162
166
|
stop_acq_dt=stop_acq_dt,
|
|
163
167
|
n_images_per_burst=max_pre_img_per_burst,
|
|
168
|
+
polarizations=None,
|
|
164
169
|
)
|
|
170
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
165
171
|
|
|
166
|
-
|
|
167
|
-
|
|
172
|
+
df_rtc_pre_window = pd.merge(
|
|
173
|
+
df_rtc_pre_window, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner'
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if not df_rtc_pre_window.empty:
|
|
177
|
+
df_rtc_pre_list.append(df_rtc_pre_window)
|
|
168
178
|
|
|
169
179
|
df_rtc_pre = pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
|
|
170
180
|
|
|
@@ -179,7 +189,7 @@ def enumerate_one_dist_s1_product(
|
|
|
179
189
|
df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(burst_ids_with_min_pre_images)].reset_index(drop=True)
|
|
180
190
|
|
|
181
191
|
post_burst_ids = df_rtc_post.jpl_burst_id.unique().tolist()
|
|
182
|
-
pre_burst_ids =
|
|
192
|
+
pre_burst_ids = df_rtc_pre.jpl_burst_id.unique().tolist()
|
|
183
193
|
|
|
184
194
|
final_burst_ids = list(set(post_burst_ids) & set(pre_burst_ids))
|
|
185
195
|
df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(final_burst_ids)].reset_index(drop=True)
|
|
@@ -308,10 +318,15 @@ def enumerate_dist_s1_products(
|
|
|
308
318
|
ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
|
|
309
319
|
df_rtc_ts_tile_track.acq_dt >= window_start
|
|
310
320
|
)
|
|
321
|
+
df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
|
|
311
322
|
# Select images that are present in the post-image
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
323
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
324
|
+
df_rtc_pre = pd.merge(
|
|
325
|
+
df_rtc_ts_tile_track_filtered,
|
|
326
|
+
df_unique_keys,
|
|
327
|
+
on=['jpl_burst_id', 'polarizations'],
|
|
328
|
+
how='inner',
|
|
329
|
+
)
|
|
315
330
|
df_rtc_pre['input_category'] = 'pre'
|
|
316
331
|
|
|
317
332
|
# It is unclear how merging when multiple MGRS tiles are provided will impact order so this
|
|
@@ -343,10 +358,15 @@ def enumerate_dist_s1_products(
|
|
|
343
358
|
ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
|
|
344
359
|
df_rtc_ts_tile_track.acq_dt >= window_start
|
|
345
360
|
)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
df_rtc_pre =
|
|
361
|
+
df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
|
|
362
|
+
|
|
363
|
+
df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
|
|
364
|
+
df_rtc_pre = pd.merge(
|
|
365
|
+
df_rtc_ts_tile_track_filtered,
|
|
366
|
+
df_unique_keys,
|
|
367
|
+
on=['jpl_burst_id', 'polarizations'],
|
|
368
|
+
how='inner',
|
|
369
|
+
)
|
|
350
370
|
df_rtc_pre['input_category'] = 'pre'
|
|
351
371
|
|
|
352
372
|
# It is unclear how merging when multiple MGRS tiles are provided will impact order so this
|
|
@@ -361,7 +381,7 @@ def enumerate_dist_s1_products(
|
|
|
361
381
|
continue
|
|
362
382
|
|
|
363
383
|
if not df_rtc_pre.empty:
|
|
364
|
-
df_rtc_pre_list.append(df_rtc_pre)
|
|
384
|
+
df_rtc_pre_list.append(df_rtc_pre)
|
|
365
385
|
|
|
366
386
|
# Concatenate all df_rtc_pre into a single DataFrame
|
|
367
387
|
df_rtc_pre_final = (
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dist-s1-enumerator
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Enumeration and ops library for the OPERA DIST-S1 project
|
|
5
5
|
Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
dist_s1_enumerator/__init__.py,sha256=L89uOLGobmF-ZsBA767RiGpKCDKVx6KOK6wJkjGQ69M,1766
|
|
2
|
-
dist_s1_enumerator/asf.py,sha256=
|
|
3
|
-
dist_s1_enumerator/dist_enum.py,sha256=
|
|
2
|
+
dist_s1_enumerator/asf.py,sha256=m0LHIBM6OSeoNi2Htin5oeeyGjsWecFgyKeqUXNcbDo,13850
|
|
3
|
+
dist_s1_enumerator/dist_enum.py,sha256=VJxoCZenrwwmLvOZZ-Roq-pc6jfGrlX9AnlD_oiWuJg,21188
|
|
4
4
|
dist_s1_enumerator/dist_enum_inputs.py,sha256=KxGZNQYEsN2KNPcrHnh8Zi5e84dBdbtyeVV-aA8XI5o,6732
|
|
5
5
|
dist_s1_enumerator/exceptions.py,sha256=JhT8fIEmW3O2OvUQADkEJkL8ZrUN5pkKNzCCSt33goQ,82
|
|
6
6
|
dist_s1_enumerator/mgrs_burst_data.py,sha256=jifDFf1NUb-_4i9vYpi3rCUzM_qJCLbXkS-fu42iwA8,7538
|
|
@@ -11,8 +11,8 @@ dist_s1_enumerator/tabular_models.py,sha256=OjsTg6fN9Mq-LHVKuz9klFD3DsG0WkfPmfDf
|
|
|
11
11
|
dist_s1_enumerator/data/jpl_burst_geo.parquet,sha256=maST6onCUlYVaQozf-zl47VMTQ7nflLros8kLQG8ZDE,24736554
|
|
12
12
|
dist_s1_enumerator/data/mgrs.parquet,sha256=P2jY4l2dztz_wdzZATBwgooa5mIZSC8TgJbHUjR5m0c,601482
|
|
13
13
|
dist_s1_enumerator/data/mgrs_burst_lookup_table.parquet,sha256=RjrgwRKn2Ac2q4_8mk9DpkX5FXPYPBReiNbqT0iFp5A,3364657
|
|
14
|
-
dist_s1_enumerator-1.0.
|
|
15
|
-
dist_s1_enumerator-1.0.
|
|
16
|
-
dist_s1_enumerator-1.0.
|
|
17
|
-
dist_s1_enumerator-1.0.
|
|
18
|
-
dist_s1_enumerator-1.0.
|
|
14
|
+
dist_s1_enumerator-1.0.5.dist-info/licenses/LICENSE,sha256=qsoT0jnoSQSgSzA-sywESwmVxC3XcugfW-3vctvz2aM,11346
|
|
15
|
+
dist_s1_enumerator-1.0.5.dist-info/METADATA,sha256=DQRz-rCezmqUrzstZbm86W8fySTjf6trDa5v0RWutmI,8794
|
|
16
|
+
dist_s1_enumerator-1.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
dist_s1_enumerator-1.0.5.dist-info/top_level.txt,sha256=5-RGu6oxsKKyhybZZSuUImALhcQT8ZOAnVv2MmrESDE,19
|
|
18
|
+
dist_s1_enumerator-1.0.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|