PyPI - dist-s1-enumerator - Versions diffs - 1.0.2__tar.gz → 1.0.4__tar.gz - Mend

dist-s1-enumerator 1.0.2tar.gz → 1.0.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

{dist_s1_enumerator-1.0.2 → dist_s1_enumerator-1.0.4}/CHANGELOG.md RENAMED Viewed

@@ -6,15 +6,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
 and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.0.4] - 2025-09-29
+### Added
+* Update time-series enumeration for multiple polarizations within an MGRS tile.
+   - We now ensure that for each MGRS tile, a single fixed spatial burst creates a baseline (set of pre-images) for a given RTC-S1 burst product. That is, if the recent data was VV+VH in a burst, then the baseline for that burst VV+VH. Multiple dual polarization (i.e. both VV+VH and HH+HV) data can be used within a single MGRS tile.
+* We now ensure that single polarization data is excluded from baselines and not used in the creation of the post-image set.
+### Fixed
+* Bug in enumerating 1 product.
+## [1.0.3] - 2025-09-09
+### Fixed
+* Defaults for `lookback_delta_days` from 0 to 365 in enumeration of multiple products. Leading to submission of jobs that had no baseline (see Issue: https://github.com/opera-adt/dist-s1-enumerator/issues/44)
+* Renamed variables for easier tracking in `enumerat_dist_s1_products`.
 ## [1.0.2] - 2025-09-09
-## Changed
+### Changed
 * `backoff` library is removed and we now use `tenacity`
-## Added
+### Added
 * Uses sessions and adapters to handle mutiple concurrent requests more reliably.
 ## [1.0.1] - 2025-08-07
 ### Added

{dist_s1_enumerator-1.0.2/src/dist_s1_enumerator.egg-info → dist_s1_enumerator-1.0.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dist-s1-enumerator
-Version: 1.0.2
+Version: 1.0.4
 Summary: Enumeration and ops library for the OPERA DIST-S1 project
 Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
 License: Apache-2.0

{dist_s1_enumerator-1.0.2 → dist_s1_enumerator-1.0.4}/src/dist_s1_enumerator/asf.py RENAMED Viewed

@@ -72,6 +72,7 @@ def get_rtc_s1_ts_metadata_by_burst_ids(
     start_acq_dt: str | datetime | None | pd.Timestamp = None,
     stop_acq_dt: str | datetime | None | pd.Timestamp = None,
     polarizations: str | None = None,
+    include_single_polarization: bool = False,
 ) -> gpd.GeoDataFrame:
     """Wrap/format the ASF search API for RTC-S1 metadata search. All searches go through this function.
@@ -138,34 +139,31 @@ def get_rtc_s1_ts_metadata_by_burst_ids(
     df_rtc['polarizations'] = df_rtc['polarizations'].map(format_polarization)
     if polarizations is not None:
         ind_pol = df_rtc['polarizations'] == polarizations
-    else:
+    elif not include_single_polarization:
         ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH'])
+    else:
+        ind_pol = df_rtc['polarizations'].isin(['HH+HV', 'VV+VH', 'HH', 'HV', 'VV', 'VH'])
     if not ind_pol.any():
-        raise ValueError(f'No valid dual polarization images found for {burst_ids}.')
+        warn(f'No valid dual polarization images found for {burst_ids}.')
     # First get all the dual-polarizations images
     df_rtc = df_rtc[ind_pol].reset_index(drop=True)
-    # Then check all the dual-polarizations are the same (either HH+HV or VV+VH)
-    # TODO: if there are mixtures, can DIST-S1 still be generated assuming they look the same?
-    polarizations_unique = df_rtc['polarizations'].unique().tolist()
-    if len(polarizations_unique) > 1:
-        raise ValueError(
-            f'Mixed dual polarizations found for {burst_ids}. That is, some images are HH+HV and others are VV+HV.'
-        )
-    else:
-        # Either HH+HV or VV+VH
-        copol, crosspol = polarizations_unique[0].split('+')
     def get_url_by_polarization(prod_urls: list[str], polarization_token: str) -> list[str]:
-        possible_urls = [url for url in prod_urls if f'_{polarization_token}.tif' == url[-7:]]
+        if polarization_token == 'copol':
+            polarizations_allowed = ['VV', 'HH']
+        elif polarization_token == 'crosspol':
+            polarizations_allowed = ['HV', 'VH']
+        else:
+            raise ValueError(f'Invalid polarization token: {polarization_token}. Must be one of: copol, crosspol.')
+        possible_urls = [url for pol in polarizations_allowed for url in prod_urls if f'_{pol}.tif' == url[-7:]]
         if len(possible_urls) == 0:
-            raise ValueError(f'No {polarization_token} urls found')
+            raise ValueError(f'No {polarizations_allowed} urls found')
         if len(possible_urls) > 1:
-            breakpoint()
-            raise ValueError(f'Multiple {polarization_token} urls found')
+            raise ValueError(f'Multiple {polarization_token} urls found: {", ".join(possible_urls)}')
         return possible_urls[0]
-    url_copol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, copol))
-    url_crosspol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, crosspol))
+    url_copol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'copol'))
+    url_crosspol = df_rtc.all_urls.map(lambda urls_for_prod: get_url_by_polarization(urls_for_prod, 'crosspol'))
     df_rtc['url_copol'] = url_copol
     df_rtc['url_crosspol'] = url_crosspol
@@ -187,6 +185,7 @@ def get_rtc_s1_metadata_from_acq_group(
     start_acq_dt: datetime | str | None = None,
     stop_acq_dt: datetime | str | None = None,
     max_variation_seconds: float | None = None,
+    polarizations: str | None = None,
 ) -> gpd.GeoDataFrame:
     """
     Meant for acquiring a pre-image or post-image set from MGRS tiles for a given S1 pass.
@@ -241,6 +240,7 @@ def get_rtc_s1_metadata_from_acq_group(
         burst_ids,
         start_acq_dt=start_acq_dt,
         stop_acq_dt=stop_acq_dt,
+        polarizations=polarizations,
     )
     # Assumes that each group is ordered by date (earliest first and most recent last)
     columns = df_rtc.columns

{dist_s1_enumerator-1.0.2 → dist_s1_enumerator-1.0.4}/src/dist_s1_enumerator/dist_enum.py RENAMED Viewed

@@ -117,7 +117,6 @@ def enumerate_one_dist_s1_product(
         max_variation_seconds=300,
         n_images_per_burst=1,
     )
     if df_rtc_post.empty:
         raise ValueError(f'No RTC-S1 post-images found for track {track_number} in MGRS tile {mgrs_tile_id}.')
@@ -137,6 +136,11 @@ def enumerate_one_dist_s1_product(
             stop_acq_dt=stop_acq_dt,
             n_images_per_burst=max_pre_imgs_per_burst,
         )
+        df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
+        df_rtc_pre = pd.merge(df_rtc_pre, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner')
+        df_rtc_pre['input_category'] = 'pre'
     elif lookback_strategy == 'multi_window':
         df_rtc_pre_list = []
@@ -155,16 +159,22 @@ def enumerate_one_dist_s1_product(
             latest_lookback = delta_lookback_day
             start_acq_dt = post_date_min - timedelta(days=latest_lookback)
             stop_acq_dt = post_date_min - timedelta(days=earliest_lookback)
-            df_rtc_pre = get_rtc_s1_metadata_from_acq_group(
+            df_rtc_pre_window = get_rtc_s1_metadata_from_acq_group(
                 [mgrs_tile_id],
                 track_numbers=track_numbers,
                 start_acq_dt=start_acq_dt,
                 stop_acq_dt=stop_acq_dt,
                 n_images_per_burst=max_pre_img_per_burst,
+                polarizations=None,
             )
+            df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
-            if not df_rtc_pre.empty:
-                df_rtc_pre_list.append(df_rtc_pre)
+            df_rtc_pre_window = pd.merge(
+                df_rtc_pre_window, df_unique_keys, on=['jpl_burst_id', 'polarizations'], how='inner'
+            )
+            if not df_rtc_pre_window.empty:
+                df_rtc_pre_list.append(df_rtc_pre_window)
         df_rtc_pre = pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
@@ -179,7 +189,7 @@ def enumerate_one_dist_s1_product(
         df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(burst_ids_with_min_pre_images)].reset_index(drop=True)
         post_burst_ids = df_rtc_post.jpl_burst_id.unique().tolist()
-        pre_burst_ids = df_rtc_post.jpl_burst_id.unique().tolist()
+        pre_burst_ids = df_rtc_pre.jpl_burst_id.unique().tolist()
         final_burst_ids = list(set(post_burst_ids) & set(pre_burst_ids))
         df_rtc_pre = df_rtc_pre[df_rtc_pre.jpl_burst_id.isin(final_burst_ids)].reset_index(drop=True)
@@ -218,7 +228,7 @@ def enumerate_dist_s1_products(
     max_pre_imgs_per_burst: int = (5, 5, 5),
     min_pre_imgs_per_burst: int = 1,
     tqdm_enabled: bool = True,
-    delta_lookback_days: int = 0,
+    delta_lookback_days: int = 365,
     delta_window_days: int = 365,
 ) -> gpd.GeoDataFrame:
     """
@@ -254,7 +264,7 @@ def enumerate_dist_s1_products(
     tqdm_enabled : bool, optional
         Whether to enable tqdm progress bars, by default True.
     delta_lookback_days : int, optional
-        When to set the most recent pre-image date. Default is 0.
+        When to set the most recent pre-image date. Default is 365.
         If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
         anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
         If lookback strategy is 'immediate_lookback', this must be set to 0.
@@ -308,10 +318,15 @@ def enumerate_dist_s1_products(
                     ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
                         df_rtc_ts_tile_track.acq_dt >= window_start
                     )
+                    df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
                     # Select images that are present in the post-image
-                    ind_burst = df_rtc_ts_tile_track.jpl_burst_id.isin(df_rtc_post.jpl_burst_id)
-                    ind = ind_time & ind_burst
-                    df_rtc_pre = df_rtc_ts_tile_track[ind].reset_index(drop=True)
+                    df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
+                    df_rtc_pre = pd.merge(
+                        df_rtc_ts_tile_track_filtered,
+                        df_unique_keys,
+                        on=['jpl_burst_id', 'polarizations'],
+                        how='inner',
+                    )
                     df_rtc_pre['input_category'] = 'pre'
                     # It is unclear how merging when multiple MGRS tiles are provided will impact order so this
@@ -332,7 +347,7 @@ def enumerate_dist_s1_products(
                     # Loop over the different lookback days
                     df_rtc_pre_list = []
                     zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
-                    for delta_lookback_day, max_pre_img_per_burst in zipped_data:
+                    for delta_lookback_day, max_pre_img_per_burst_param in zipped_data:
                         delta_lookback_timedelta = pd.Timedelta(delta_lookback_day, unit='D')
                         delta_window_timedelta = pd.Timedelta(params.delta_window_days, unit='D')
                         window_start = post_date - delta_lookback_timedelta - delta_window_timedelta
@@ -343,10 +358,15 @@ def enumerate_dist_s1_products(
                         ind_time = (df_rtc_ts_tile_track.acq_dt < window_stop) & (
                             df_rtc_ts_tile_track.acq_dt >= window_start
                         )
-                        # Select images that are present in the post-image
-                        ind_burst = df_rtc_ts_tile_track.jpl_burst_id.isin(df_rtc_post.jpl_burst_id)
-                        ind = ind_time & ind_burst
-                        df_rtc_pre = df_rtc_ts_tile_track[ind].reset_index(drop=True)
+                        df_rtc_ts_tile_track_filtered = df_rtc_ts_tile_track[ind_time].reset_index(drop=True)
+                        df_unique_keys = df_rtc_post[['jpl_burst_id', 'polarizations']].drop_duplicates()
+                        df_rtc_pre = pd.merge(
+                            df_rtc_ts_tile_track_filtered,
+                            df_unique_keys,
+                            on=['jpl_burst_id', 'polarizations'],
+                            how='inner',
+                        )
                         df_rtc_pre['input_category'] = 'pre'
                         # It is unclear how merging when multiple MGRS tiles are provided will impact order so this
@@ -354,20 +374,19 @@ def enumerate_dist_s1_products(
                         df_rtc_pre = df_rtc_pre.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
                         # Assume the data is sorted by acquisition date
                         df_rtc_pre = (
-                            df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_img_per_burst).reset_index(drop=True)
+                            df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_img_per_burst_param).reset_index(drop=True)
                         )
                         if df_rtc_pre.empty:
                             continue
                         if not df_rtc_pre.empty:
-                            df_rtc_pre_list.append(df_rtc_pre)  # Store each df_rtc_pre
+                            df_rtc_pre_list.append(df_rtc_pre)
                     # Concatenate all df_rtc_pre into a single DataFrame
                     df_rtc_pre_final = (
                         pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
                     )
-                    # product and provenance
                     df_rtc_product = pd.concat([df_rtc_pre_final, df_rtc_post]).reset_index(drop=True)
                     df_rtc_product['product_id'] = product_id

{dist_s1_enumerator-1.0.2 → dist_s1_enumerator-1.0.4}/src/dist_s1_enumerator/dist_enum_inputs.py RENAMED Viewed

@@ -33,7 +33,7 @@ def enumerate_dist_s1_workflow_inputs(
     lookback_strategy: str = 'multi_window',
     max_pre_imgs_per_burst: int | list[int] | tuple[int, ...] = (5, 5, 5),
     min_pre_imgs_per_burst: int = 1,
-    delta_lookback_days: int | list[int] | tuple[int, ...] = 0,
+    delta_lookback_days: int | list[int] | tuple[int, ...] = 365,
     delta_window_days: int = 365,
     df_ts: gpd.GeoDataFrame | None = None,
 ) -> list[dict]:

{dist_s1_enumerator-1.0.2 → dist_s1_enumerator-1.0.4/src/dist_s1_enumerator.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dist-s1-enumerator
-Version: 1.0.2
+Version: 1.0.4
 Summary: Enumeration and ops library for the OPERA DIST-S1 project
 Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
 License: Apache-2.0

{dist_s1_enumerator-1.0.2 → dist_s1_enumerator-1.0.4}/tests/test_dist_enum.py RENAMED Viewed

@@ -247,3 +247,114 @@ def test_burst_ids_consistent_between_pre_and_post(mgrs_tile_ids: list[str], tra
         df_pre = df_product[df_product['input_category'] == 'pre'].reset_index(drop=True)
         df_post = df_product[df_product['input_category'] == 'post'].reset_index(drop=True)
         assert sorted(df_pre['jpl_burst_id'].unique().tolist()) == sorted(df_post['jpl_burst_id'].unique().tolist())
+@pytest.mark.integration
+def test_dist_enum_one_with_multi_window_with_multiple_polarizations_and_asf_daac() -> None:
+    """Test enumeration of 1 product with multi_window strategy with multiple dual polarization data.
+    Context: MGRS Tile 20TLP: https://search.asf.alaska.edu/#/?polygon=
+    POLYGON((-65.5041%2044.226,-65.4632%2043.2383,-64.1113%2043.2594,-64.1298%2044.2478,-65.5041%2044.226))
+    &start=2025-09-18T07:00:00Z&end=2025-09-20T06:59:59Z&resultsLoaded=true&zoom=8.078
+    &center=-63.112,42.844&dataset=OPERA-S1&productTypes=RTC
+    &granule=OPERA_L2_RTC-S1_T171-365960-IW2_20250919T102314Z_20250919T135744Z_S1C_30_v1.0
+    """
+    df_product = enumerate_one_dist_s1_product(
+        '20TLP',
+        track_number=171,
+        post_date='2025-09-19',
+        lookback_strategy='multi_window',
+        # Need to look back further for valid VV+VH data
+        delta_lookback_days=(1460, 1095, 730, 365),
+        max_pre_imgs_per_burst=(3, 3, 3, 4),
+    )
+    assert sorted(df_product.polarizations.unique().tolist()) == ['HH+HV', 'VV+VH']
+    df_sample_vvvh_burst = df_product[df_product.jpl_burst_id == 'T171-365965-IW3'].reset_index(drop=True)
+    dates_for_sample_vvvh_burst = sorted(df_sample_vvvh_burst['acq_date_for_mgrs_pass'].unique().tolist())
+    # Note the last date is the post date
+    expected_dates = ['2020-09-21', '2021-05-19', '2021-05-31', '2025-09-19']
+    assert dates_for_sample_vvvh_burst == expected_dates
+    # Check baseline data
+    # The post image is VV+VH
+    # Ref: https://search.asf.alaska.edu/#/?dataset=OPERA-S1&productTypes=RTC&operaBurstID=T171_365965_IW3&zoom=3.000
+    # &center=-74.108,31.979
+    # &resultsLoaded=true&granule=OPERA_L2_RTC-S1_T171-365965-IW3_20250919T102329Z_20250919T145901Z_S1C_30_v1.0
+    opera_ids = df_sample_vvvh_burst.opera_id.unique().tolist()
+    opera_ids_trunc = ['_'.join(op_id.split('_')[:5]) for op_id in opera_ids]
+    # another VV+VH image
+    assert 'OPERA_L2_RTC-S1_T171-365965-IW3_20200921T102347Z' in opera_ids_trunc
+    # a HH+HV image in the time series - there is only one image from 2024 so should be in if it weren't 2024
+    assert 'OPERA_L2_RTC-S1_T171-365965-IW3_20240427T102443Z' not in opera_ids_trunc
+@pytest.mark.integration
+def test_dist_enum_one_with_multi_window_with_asf_daac() -> None:
+    df_product = enumerate_one_dist_s1_product(
+        '11SLT',
+        track_number=144,
+        post_date='2025-06-19',
+        lookback_strategy='multi_window',
+        delta_lookback_days=(1095, 730, 365),
+        max_pre_imgs_per_burst=(3, 3, 4),
+    )
+    burst_ids_expected = [
+        'T144-308024-IW1',
+        'T144-308025-IW1',
+        'T144-308026-IW1',
+        'T144-308027-IW1',
+        'T144-308028-IW1',
+        'T144-308029-IW1',
+        'T144-308030-IW1',
+        'T144-308031-IW1',
+    ]
+    assert sorted(df_product['jpl_burst_id'].unique().tolist()) == sorted(burst_ids_expected)
+    post_ind = df_product.input_category == 'post'
+    df_product_post = df_product[post_ind].reset_index(drop=True)
+    pre_ind = df_product.input_category == 'pre'
+    df_product_pre = df_product[pre_ind].reset_index(drop=True)
+    pre_dates_expected = [
+        '2024-06-12',
+        '2024-05-31',
+        '2024-05-19',
+        '2024-05-07',
+        '2023-06-18',
+        '2023-06-06',
+        '2023-05-25',
+        '2022-06-11',
+        '2022-05-30',
+        '2022-05-18',
+    ]
+    assert sorted(df_product_pre['acq_date_for_mgrs_pass'].unique().tolist()) == sorted(pre_dates_expected)
+    assert df_product_post['acq_date_for_mgrs_pass'].unique().tolist() == ['2025-06-19']
+@pytest.mark.integration
+def test_dist_enum_one_with_multi_window_with_asf_daac_single_polarization() -> None:
+    """
+    Test enumeration of 1 product with multi_window strategy with single polarization data in post-image set.
+    The dataframe should be empty!
+    https://search.asf.alaska.edu/#/?maxResults=250&zoom=4.562&center=144.313,-7.683
+    &polygon=POLYGON((-242.1832%205.9478,-231.2276%205.9478,-231.2276%2018.4899,-242.1832%2018.4899,-242.1832%205.9478))
+    &dataset=OPERA-S1&productTypes=RTC&start=2024-10-18T08:00:00Z
+    &end=2024-10-31T07:59:59Z&resultsLoaded=true
+    &granule=OPERA_L2_RTC-S1_T069-146165-IW2_20241029T100013Z_20241029T204425Z_S1A_30_v1.0
+    &flightDirs=Ascending
+    """
+    with pytest.raises(ValueError, match='No RTC-S1 post-images found for track 69 in MGRS tile 51QUU.'):
+        _ = enumerate_one_dist_s1_product(
+            '51QUU',
+            track_number=69,
+            post_date='2024-10-29',
+            lookback_strategy='multi_window',
+            delta_lookback_days=(730, 365),
+            max_pre_imgs_per_burst=(3, 4),
+        )