dist-s1-enumerator 1.0.3__tar.gz → 1.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/CHANGELOG.md +17 -0
  2. {dist_s1_enumerator-1.0.3/src/dist_s1_enumerator.egg-info → dist_s1_enumerator-1.0.5}/PKG-INFO +1 -1
  3. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/asf.py +41 -18
  4. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/dist_enum.py +33 -13
  5. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5/src/dist_s1_enumerator.egg-info}/PKG-INFO +1 -1
  6. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_asf.py +22 -1
  7. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_dist_enum.py +111 -0
  8. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/dependabot.yml +0 -0
  9. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/bump-tag-for-release.yml +0 -0
  10. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/changelog-updated.yml +0 -0
  11. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/github-release.yml +0 -0
  12. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/labeled-pr.yml +0 -0
  13. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/nb_tests.yml +0 -0
  14. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/pypi-release.yml +0 -0
  15. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/static_analysis.yml +0 -0
  16. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.github/workflows/tests.yml +0 -0
  17. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/.gitignore +0 -0
  18. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/LICENSE +0 -0
  19. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/README.md +0 -0
  20. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/environment.yml +0 -0
  21. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/notebooks/A__Staging_Inputs_for_One_MGRS_Tile.ipynb +0 -0
  22. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/notebooks/B__Enumerate_MGRS_tile.ipynb +0 -0
  23. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/pyproject.toml +0 -0
  24. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/setup.cfg +0 -0
  25. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/__init__.py +0 -0
  26. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/data/jpl_burst_geo.parquet +0 -0
  27. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/data/mgrs.parquet +0 -0
  28. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/data/mgrs_burst_lookup_table.parquet +0 -0
  29. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/dist_enum_inputs.py +0 -0
  30. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/exceptions.py +0 -0
  31. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/mgrs_burst_data.py +0 -0
  32. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/param_models.py +0 -0
  33. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/py.typed +0 -0
  34. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/rtc_s1_io.py +0 -0
  35. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator/tabular_models.py +0 -0
  36. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator.egg-info/SOURCES.txt +0 -0
  37. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator.egg-info/dependency_links.txt +0 -0
  38. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator.egg-info/not-zip-safe +0 -0
  39. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator.egg-info/requires.txt +0 -0
  40. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/src/dist_s1_enumerator.egg-info/top_level.txt +0 -0
  41. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/conftest.py +0 -0
  42. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/data/mock_ts_data_for_enum.ipynb +0 -0
  43. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/data/rtc_s1_ts_metadata/chile_19HBD.parquet +0 -0
  44. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/data/rtc_s1_ts_metadata/mgrs01UBT.parquet +0 -0
  45. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/data/rtc_s1_ts_metadata/mgrs11SLT_11SLU_11SMT.parquet +0 -0
  46. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/data/rtc_s1_ts_metadata/mgrs15RXN__track63.parquet +0 -0
  47. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/data/rtc_s1_ts_metadata/mgrs22WFD.parquet +0 -0
  48. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/data/ts_data_for_workflow_inputs.ipynb +0 -0
  49. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_dist_enum_inputs.py +0 -0
  50. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_mgrs_burst_data.py +0 -0
  51. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_notebooks.py +0 -0
  52. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_package.py +0 -0
  53. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_param_model.py +0 -0
  54. {dist_s1_enumerator-1.0.3 → dist_s1_enumerator-1.0.5}/tests/test_rtc_s1_io.py +0 -0
@@ -6,6 +6,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
7
7
  and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
8
8
 
9
+ ## [1.0.5] - 2025-09-29
10
+
11
+ ### Fixed
12
+ * CMR metadata does not have correctly migrated urls from ASF datapool to ASF cumulus - see https://github.com/opera-adt/dist-s1/issues/158.
13
+
14
+
15
+ ## [1.0.4] - 2025-09-29
16
+
17
+ ### Added
18
+ * Update time-series enumeration for multiple polarizations within an MGRS tile.
19
+ - We now ensure that for each MGRS tile, a single fixed spatial burst creates a baseline (set of pre-images) for a given RTC-S1 burst product. That is, if the recent data was VV+VH in a burst, then the baseline for that burst VV+VH. Multiple dual polarization (i.e. both VV+VH and HH+HV) data can be used within a single MGRS tile.
20
+ * We now ensure that single polarization data is excluded from baselines and not used in the creation of the post-image set.
21
+
22
+ ### Fixed
23
+ * Bug in enumerating 1 product - we did not ensure spatial bursts were consistent between pre-/post-image sets.
24
+
25
+
9
26
  ## [1.0.3] - 2025-09-09
10
27
 
11
28
  ### Fixed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dist-s1-enumerator
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: Enumeration and ops library for the OPERA DIST-S1 project
5
5
  Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
6
6
  License: Apache-2.0
@@ -12,6 +12,27 @@ from dist_s1_enumerator.mgrs_burst_data import get_burst_ids_in_mgrs_tiles, get_
12
12
  from dist_s1_enumerator.tabular_models import reorder_columns, rtc_s1_resp_schema, rtc_s1_schema
13
13
 
14
14
 
15
+ def convert_asf_url_to_cumulus(url: str) -> str:
16
+ asf_base = 'https://datapool.asf.alaska.edu/RTC/OPERA-S1/'
17
+ cumulus_base = 'https://cumulus.asf.earthdatacloud.nasa.gov/OPERA/OPERA_L2_RTC-S1/'
18
+
19
+ if not (url.startswith(cumulus_base) or url.startswith(asf_base)):
20
+ warn(f'URL {url} is not a valid ASF datapool or cumulus earthdatacloud URL.')
21
+ return url
22
+
23
+ if not url.startswith(asf_base):
24
+ return url
25
+
26
+ filename = url.split('/')[-1]
27
+ granule_pol_parts = filename.rsplit('_', 1)
28
+ if len(granule_pol_parts) != 2:
29
+ raise ValueError(f'Could not extract granule name from filename: {filename}')
30
+
31
+ granule_name = granule_pol_parts[0]
32
+ new_url = f'{cumulus_base}{granule_name}/{filename}'
33
+ return new_url
34
+
35
+
15
36
  def format_polarization(pol: list | str) -> str:
16
37
  if isinstance(pol, list):
17
38
  if ('VV' in pol) and len(pol) == 2:
@@ -72,6 +93,7 @@ def get_rtc_s1_ts_metadata_by_burst_ids(
72
93
  start_acq_dt: str | datetime | None | pd.Timestamp = None,
73
94
  stop_acq_dt: str | datetime | None | pd.Timestamp = None,
74
95
  polarizations: str | None = None,
96
+ include_single_polarization: bool = False,
75
97
  ) -> gpd.GeoDataFrame:
76
98
  """Wrap/format the ASF search API for RTC-S1 metadata search. All searches go through this function.
77
99
 
@@ -138,37 +160,36 @@ def get_rtc_s1_ts_metadata_by_burst_ids(
138
160
  df_rtc['polarizations'] = df_rtc['polarizations'].map(format_polarization)
139
161
  if polarizations is not None:
140
162
  ind_pol = df_rtc['polarizations'] == polarizations
141
- else:
163
+ elif not include_single_polarization:
142
164
  ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH'])
165
+ else:
166
+ ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH', 'HH', 'HV', 'VV', 'VH'])
143
167
  if not ind_pol.any():
144
- raise ValueError(f'No valid dual polarization images found for {burst_ids}.')
168
+ warn(f'No valid dual polarization images found for {burst_ids}.')
145
169
  # First get all the dual-polarizations images
146
170
  df_rtc = df_rtc[ind_pol].reset_index(drop=True)
147
- # Then check all the dual-polarizations are the same (either HH+HV or VV+VH)
148
- # TODO: if there are mixtures, can DIST-S1 still be generated assuming they look the same?
149
- polarizations_unique = df_rtc['polarizations'].unique().tolist()
150
- if len(polarizations_unique) > 1:
151
- raise ValueError(
152
- f'Mixed dual polarizations found for {burst_ids}. That is, some images are HH+HV and others are VV+HV.'
153
- )
154
- else:
155
- # Either HH+HV or VV+VH
156
- copol, crosspol = polarizations_unique[0].split('+')
157
171
 
158
172
  def get_url_by_polarization(prod_urls: list[str], polarization_token: str) -> list[str]:
159
- possible_urls = [url for url in prod_urls if f'_{polarization_token}.tif' == url[-7:]]
173
+ if polarization_token == 'copol':
174
+ polarizations_allowed = ['VV', 'HH']
175
+ elif polarization_token == 'crosspol':
176
+ polarizations_allowed = ['HV', 'VH']
177
+ else:
178
+ raise ValueError(f'Invalid polarization token: {polarization_token}. Must be one of: copol, crosspol.')
179
+ possible_urls = [url for pol in polarizations_allowed for url in prod_urls if f'_{pol}.tif' == url[-7:]]
160
180
  if len(possible_urls) == 0:
161
- raise ValueError(f'No {polarization_token} urls found')
181
+ raise ValueError(f'No {polarizations_allowed} urls found')
162
182
  if len(possible_urls) > 1:
163
- breakpoint()
164
- raise ValueError(f'Multiple {polarization_token} urls found')
183
+ raise ValueError(f'Multiple {polarization_token} urls found: {", ".join(possible_urls)}')
165
184
  return possible_urls[0]
166
185
 
167
- url_copol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, copol))
168
- url_crosspol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, crosspol))
186
+ url_copol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'copol'))
187
+ url_crosspol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'crosspol'))
169
188
 
170
189
  df_rtc['url_copol'] = url_copol
171
190
  df_rtc['url_crosspol'] = url_crosspol
191
+ df_rtc['url_copol'] = df_rtc['url_copol'].map(convert_asf_url_to_cumulus)
192
+ df_rtc['url_crosspol'] = df_rtc['url_crosspol'].map(convert_asf_url_to_cumulus)
172
193
  df_rtc = df_rtc.drop(columns=['all_urls'])
173
194
 
174
195
  # Ensure the data is sorted by jpl_burst_id and acq_dt
@@ -187,6 +208,7 @@ def get_rtc_s1_metadata_from_acq_group(
187
208
  start_acq_dt: datetime | str | None = None,
188
209
  stop_acq_dt: datetime | str | None = None,
189
210
  max_variation_seconds: float | None = None,
211
+ polarizations: str | None = None,
190
212
  ) -> gpd.GeoDataFrame:
191
213
  """
192
214
  Meant for acquiring a pre-image or post-image set from MGRS tiles for a given S1 pass.
@@ -241,6 +263,7 @@ def get_rtc_s1_metadata_from_acq_group(
241
263
  burst_ids,
242
264
  start_acq_dt=start_acq_dt,
243
265
  stop_acq_dt=stop_acq_dt,
266
+ polarizations=polarizations,
244
267
  )
245
268
  # Assumes that each group is ordered by date (earliest first and most recent last)
246
269
  columns = df_rtc.columns
@@ -117,7 +117,6 @@ def enumerate_one_dist_s1_product(
117
117
  max_variation_seconds=300,
118
118
  n_images_per_burst=1,
119
119
  )
120
-
121
120
  if df_rtc_post.empty:
122
121
  raise ValueError(f'No RTC-S1 post-images found for track {track_number} in MGRS tile {mgrs_tile_id}.')
123
122
 
@@ -137,6 +136,11 @@ def enumerate_one_dist_s1_product(
137
136
  stop_acq_dt=stop_acq_dt,
138
137
  n_images_per_burst=max_pre_imgs_per_burst,
139
138
  )
139
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
140
+
141
+ df_rtc_pre = pd.merge(df_rtc_pre, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner')
142
+
143
+ df_rtc_pre['input_category'] = 'pre'
140
144
 
141
145
  elif lookback_strategy == 'multi_window':
142
146
  df_rtc_pre_list = []
@@ -155,16 +159,22 @@ def enumerate_one_dist_s1_product(
155
159
  latest_lookback = delta_lookback_day
156
160
  start_acq_dt = post_date_min - timedelta(days=latest_lookback)
157
161
  stop_acq_dt = post_date_min - timedelta(days=earliest_lookback)
158
- df_rtc_pre = get_rtc_s1_metadata_from_acq_group(
162
+ df_rtc_pre_window = get_rtc_s1_metadata_from_acq_group(
159
163
  [mgrs_tile_id],
160
164
  track_numbers=track_numbers,
161
165
  start_acq_dt=start_acq_dt,
162
166
  stop_acq_dt=stop_acq_dt,
163
167
  n_images_per_burst=max_pre_img_per_burst,
168
+ polarizations=None,
164
169
  )
170
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
165
171
 
166
- if not df_rtc_pre.empty:
167
- df_rtc_pre_list.append(df_rtc_pre)
172
+ df_rtc_pre_window = pd.merge(
173
+ df_rtc_pre_window, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner'
174
+ )
175
+
176
+ if not df_rtc_pre_window.empty:
177
+ df_rtc_pre_list.append(df_rtc_pre_window)
168
178
 
169
179
  df_rtc_pre = pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
170
180
 
@@ -179,7 +189,7 @@ def enumerate_one_dist_s1_product(
179
189
  df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(burst_ids_with_min_pre_images)].reset_index(drop=True)
180
190
 
181
191
  post_burst_ids = df_rtc_post.jpl_burst_id.unique().tolist()
182
- pre_burst_ids = df_rtc_post.jpl_burst_id.unique().tolist()
192
+ pre_burst_ids = df_rtc_pre.jpl_burst_id.unique().tolist()
183
193
 
184
194
  final_burst_ids = list(set(post_burst_ids) & set(pre_burst_ids))
185
195
  df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(final_burst_ids)].reset_index(drop=True)
@@ -308,10 +318,15 @@ def enumerate_dist_s1_products(
308
318
  ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
309
319
  df_rtc_ts_tile_track.acq_dt >= window_start
310
320
  )
321
+ df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
311
322
  # Select images that are present in the post-image
312
- ind_burst = df_rtc_ts_tile_track.jpl_burst_id.isin(df_rtc_post.jpl_burst_id)
313
- ind = ind_time & ind_burst
314
- df_rtc_pre = df_rtc_ts_tile_track[ind].reset_index(drop=True)
323
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
324
+ df_rtc_pre = pd.merge(
325
+ df_rtc_ts_tile_track_filtered,
326
+ df_unique_keys,
327
+ on=['jpl_burst_id', 'polarizations'],
328
+ how='inner',
329
+ )
315
330
  df_rtc_pre['input_category'] = 'pre'
316
331
 
317
332
  # It is unclear how merging when multiple MGRS tiles are provided will impact order so this
@@ -343,10 +358,15 @@ def enumerate_dist_s1_products(
343
358
  ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
344
359
  df_rtc_ts_tile_track.acq_dt >= window_start
345
360
  )
346
- # Select images that are present in the post-image
347
- ind_burst = df_rtc_ts_tile_track.jpl_burst_id.isin(df_rtc_post.jpl_burst_id)
348
- ind = ind_time & ind_burst
349
- df_rtc_pre = df_rtc_ts_tile_track[ind].reset_index(drop=True)
361
+ df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
362
+
363
+ df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
364
+ df_rtc_pre = pd.merge(
365
+ df_rtc_ts_tile_track_filtered,
366
+ df_unique_keys,
367
+ on=['jpl_burst_id', 'polarizations'],
368
+ how='inner',
369
+ )
350
370
  df_rtc_pre['input_category'] = 'pre'
351
371
 
352
372
  # It is unclear how merging when multiple MGRS tiles are provided will impact order so this
@@ -361,7 +381,7 @@ def enumerate_dist_s1_products(
361
381
  continue
362
382
 
363
383
  if not df_rtc_pre.empty:
364
- df_rtc_pre_list.append(df_rtc_pre) # Store each df_rtc_pre
384
+ df_rtc_pre_list.append(df_rtc_pre)
365
385
 
366
386
  # Concatenate all df_rtc_pre into a single DataFrame
367
387
  df_rtc_pre_final = (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dist-s1-enumerator
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: Enumeration and ops library for the OPERA DIST-S1 project
5
5
  Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
6
6
  License: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  import pytest
2
2
 
3
- from dist_s1_enumerator.asf import append_pass_data, get_rtc_s1_ts_metadata_by_burst_ids
3
+ from dist_s1_enumerator.asf import append_pass_data, convert_asf_url_to_cumulus, get_rtc_s1_ts_metadata_by_burst_ids
4
4
 
5
5
 
6
6
  @pytest.mark.integration
@@ -35,3 +35,24 @@ def test_appending_mgrs_tiles() -> None:
35
35
 
36
36
  df_rtc_formatted_no_rows = append_pass_data(df_rtc_resp, ['22NFF'])
37
37
  assert df_rtc_formatted_no_rows.empty
38
+
39
+
40
+ @pytest.mark.parametrize('pol_token', ['VV', 'VH', 'HH', 'HV'])
41
+ def test_convert_asf_url_to_cumulus_from_datapool(pol_token: str) -> None:
42
+ """Test converting ASF datapool URL to cumulus earthdatacloud URL."""
43
+ asf_url = f'https://datapool.asf.alaska.edu/RTC/OPERA-S1/OPERA_L2_RTC-S1_T001-000189-IW2_20211028T180924Z_20250703T015334Z_S1A_30_v1.0_{pol_token}.tif'
44
+ expected_cumulus_url = f'https://cumulus.asf.earthdatacloud.nasa.gov/OPERA/OPERA_L2_RTC-S1/OPERA_L2_RTC-S1_T001-000189-IW2_20211028T180924Z_20250703T015334Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T001-000189-IW2_20211028T180924Z_20250703T015334Z_S1A_30_v1.0_{pol_token}.tif'
45
+
46
+ result = convert_asf_url_to_cumulus(asf_url)
47
+
48
+ assert result == expected_cumulus_url
49
+
50
+
51
+ @pytest.mark.parametrize('pol_token', ['VV', 'VH', 'HH', 'HV'])
52
+ def test_convert_asf_url_to_cumulus_already_cumulus(pol_token: str) -> None:
53
+ """Test that cumulus URLs are returned unchanged."""
54
+ cumulus_url = f'https://cumulus.asf.earthdatacloud.nasa.gov/OPERA/OPERA_L2_RTC-S1/OPERA_L2_RTC-S1_T001-000189-IW2_20211028T180924Z_20250703T015334Z_S1A_30_v1.0/OPERA_L2_RTC-S1_T001-000189-IW2_20211028T180924Z_20250703T015334Z_S1A_30_v1.0_{pol_token}.tif'
55
+
56
+ result = convert_asf_url_to_cumulus(cumulus_url)
57
+
58
+ assert result == cumulus_url
@@ -247,3 +247,114 @@ def test_burst_ids_consistent_between_pre_and_post(mgrs_tile_ids: list[str], tra
247
247
  df_pre = df_product[df_product['input_category'] == 'pre'].reset_index(drop=True)
248
248
  df_post = df_product[df_product['input_category'] == 'post'].reset_index(drop=True)
249
249
  assert sorted(df_pre['jpl_burst_id'].unique().tolist()) == sorted(df_post['jpl_burst_id'].unique().tolist())
250
+
251
+
252
+ @pytest.mark.integration
253
+ def test_dist_enum_one_with_multi_window_with_multiple_polarizations_and_asf_daac() -> None:
254
+ """Test enumeration of 1 product with multi_window strategy with multiple dual polarization data.
255
+
256
+ Context: MGRS Tile 20TLP: https://search.asf.alaska.edu/#/?polygon=
257
+ POLYGON((-65.5041%2044.226,-65.4632%2043.2383,-64.1113%2043.2594,-64.1298%2044.2478,-65.5041%2044.226))
258
+ &start=2025-09-18T07:00:00Z&end=2025-09-20T06:59:59Z&resultsLoaded=true&zoom=8.078
259
+ &center=-63.112,42.844&dataset=OPERA-S1&productTypes=RTC
260
+ &granule=OPERA_L2_RTC-S1_T171-365960-IW2_20250919T102314Z_20250919T135744Z_S1C_30_v1.0
261
+ """
262
+ df_product = enumerate_one_dist_s1_product(
263
+ '20TLP',
264
+ track_number=171,
265
+ post_date='2025-09-19',
266
+ lookback_strategy='multi_window',
267
+ # Need to look back further for valid VV+VH data
268
+ delta_lookback_days=(1460, 1095, 730, 365),
269
+ max_pre_imgs_per_burst=(3, 3, 3, 4),
270
+ )
271
+
272
+ assert sorted(df_product.polarizations.unique().tolist()) == ['HH+HV', 'VV+VH']
273
+
274
+ df_sample_vvvh_burst = df_product[df_product.jpl_burst_id == 'T171-365965-IW3'].reset_index(drop=True)
275
+ dates_for_sample_vvvh_burst = sorted(df_sample_vvvh_burst['acq_date_for_mgrs_pass'].unique().tolist())
276
+ # Note the last date is the post date
277
+ expected_dates = ['2020-09-21', '2021-05-19', '2021-05-31', '2025-09-19']
278
+ assert dates_for_sample_vvvh_burst == expected_dates
279
+
280
+ # Check baseline data
281
+ # The post image is VV+VH
282
+ # Ref: https://search.asf.alaska.edu/#/?dataset=OPERA-S1&productTypes=RTC&operaBurstID=T171_365965_IW3&zoom=3.000
283
+ # &center=-74.108,31.979
284
+ # &resultsLoaded=true&granule=OPERA_L2_RTC-S1_T171-365965-IW3_20250919T102329Z_20250919T145901Z_S1C_30_v1.0
285
+ opera_ids = df_sample_vvvh_burst.opera_id.unique().tolist()
286
+ opera_ids_trunc = ['_'.join(op_id.split('_')[:5]) for op_id in opera_ids]
287
+ # another VV+VH image
288
+ assert 'OPERA_L2_RTC-S1_T171-365965-IW3_20200921T102347Z' in opera_ids_trunc
289
+ # a HH+HV image in the time series - there is only one image from 2024 so should be in if it weren't 2024
290
+ assert 'OPERA_L2_RTC-S1_T171-365965-IW3_20240427T102443Z' not in opera_ids_trunc
291
+
292
+
293
+ @pytest.mark.integration
294
+ def test_dist_enum_one_with_multi_window_with_asf_daac() -> None:
295
+ df_product = enumerate_one_dist_s1_product(
296
+ '11SLT',
297
+ track_number=144,
298
+ post_date='2025-06-19',
299
+ lookback_strategy='multi_window',
300
+ delta_lookback_days=(1095, 730, 365),
301
+ max_pre_imgs_per_burst=(3, 3, 4),
302
+ )
303
+ burst_ids_expected = [
304
+ 'T144-308024-IW1',
305
+ 'T144-308025-IW1',
306
+ 'T144-308026-IW1',
307
+ 'T144-308027-IW1',
308
+ 'T144-308028-IW1',
309
+ 'T144-308029-IW1',
310
+ 'T144-308030-IW1',
311
+ 'T144-308031-IW1',
312
+ ]
313
+ assert sorted(df_product['jpl_burst_id'].unique().tolist()) == sorted(burst_ids_expected)
314
+
315
+ post_ind = df_product.input_category == 'post'
316
+ df_product_post = df_product[post_ind].reset_index(drop=True)
317
+
318
+ pre_ind = df_product.input_category == 'pre'
319
+ df_product_pre = df_product[pre_ind].reset_index(drop=True)
320
+
321
+ pre_dates_expected = [
322
+ '2024-06-12',
323
+ '2024-05-31',
324
+ '2024-05-19',
325
+ '2024-05-07',
326
+ '2023-06-18',
327
+ '2023-06-06',
328
+ '2023-05-25',
329
+ '2022-06-11',
330
+ '2022-05-30',
331
+ '2022-05-18',
332
+ ]
333
+ assert sorted(df_product_pre['acq_date_for_mgrs_pass'].unique().tolist()) == sorted(pre_dates_expected)
334
+
335
+ assert df_product_post['acq_date_for_mgrs_pass'].unique().tolist() == ['2025-06-19']
336
+
337
+
338
+ @pytest.mark.integration
339
+ def test_dist_enum_one_with_multi_window_with_asf_daac_single_polarization() -> None:
340
+ """
341
+ Test enumeration of 1 product with multi_window strategy with single polarization data in post-image set.
342
+
343
+ The dataframe should be empty!
344
+
345
+ https://search.asf.alaska.edu/#/?maxResults=250&zoom=4.562&center=144.313,-7.683
346
+ &polygon=POLYGON((-242.1832%205.9478,-231.2276%205.9478,-231.2276%2018.4899,-242.1832%2018.4899,-242.1832%205.9478))
347
+ &dataset=OPERA-S1&productTypes=RTC&start=2024-10-18T08:00:00Z
348
+ &end=2024-10-31T07:59:59Z&resultsLoaded=true
349
+ &granule=OPERA_L2_RTC-S1_T069-146165-IW2_20241029T100013Z_20241029T204425Z_S1A_30_v1.0
350
+ &flightDirs=Ascending
351
+ """
352
+ with pytest.raises(ValueError, match='No RTC-S1 post-images found for track 69 in MGRS tile 51QUU.'):
353
+ _ = enumerate_one_dist_s1_product(
354
+ '51QUU',
355
+ track_number=69,
356
+ post_date='2024-10-29',
357
+ lookback_strategy='multi_window',
358
+ delta_lookback_days=(730, 365),
359
+ max_pre_imgs_per_burst=(3, 4),
360
+ )