dist-s1-enumerator 1.0.1__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/bump-tag-for-release.yml +1 -1
  2. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/changelog-updated.yml +1 -1
  3. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/github-release.yml +1 -1
  4. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/labeled-pr.yml +1 -1
  5. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/pypi-release.yml +1 -1
  6. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/static_analysis.yml +2 -2
  7. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/CHANGELOG.md +14 -0
  8. {dist_s1_enumerator-1.0.1/src/dist_s1_enumerator.egg-info → dist_s1_enumerator-1.0.3}/PKG-INFO +1 -1
  9. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/environment.yml +1 -1
  10. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/dist_enum.py +4 -5
  11. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/dist_enum_inputs.py +1 -1
  12. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/rtc_s1_io.py +57 -19
  13. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3/src/dist_s1_enumerator.egg-info}/PKG-INFO +1 -1
  14. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/dependabot.yml +0 -0
  15. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/nb_tests.yml +0 -0
  16. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.github/workflows/tests.yml +0 -0
  17. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/.gitignore +0 -0
  18. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/LICENSE +0 -0
  19. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/README.md +0 -0
  20. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/notebooks/A__Staging_Inputs_for_One_MGRS_Tile.ipynb +0 -0
  21. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/notebooks/B__Enumerate_MGRS_tile.ipynb +0 -0
  22. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/pyproject.toml +0 -0
  23. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/setup.cfg +0 -0
  24. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/__init__.py +0 -0
  25. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/asf.py +0 -0
  26. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/data/jpl_burst_geo.parquet +0 -0
  27. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/data/mgrs.parquet +0 -0
  28. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/data/mgrs_burst_lookup_table.parquet +0 -0
  29. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/exceptions.py +0 -0
  30. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/mgrs_burst_data.py +0 -0
  31. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/param_models.py +0 -0
  32. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/py.typed +0 -0
  33. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator/tabular_models.py +0 -0
  34. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator.egg-info/SOURCES.txt +0 -0
  35. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator.egg-info/dependency_links.txt +0 -0
  36. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator.egg-info/not-zip-safe +0 -0
  37. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator.egg-info/requires.txt +0 -0
  38. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/src/dist_s1_enumerator.egg-info/top_level.txt +0 -0
  39. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/conftest.py +0 -0
  40. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/data/mock_ts_data_for_enum.ipynb +0 -0
  41. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/data/rtc_s1_ts_metadata/chile_19HBD.parquet +0 -0
  42. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/data/rtc_s1_ts_metadata/mgrs01UBT.parquet +0 -0
  43. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/data/rtc_s1_ts_metadata/mgrs11SLT_11SLU_11SMT.parquet +0 -0
  44. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/data/rtc_s1_ts_metadata/mgrs15RXN__track63.parquet +0 -0
  45. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/data/rtc_s1_ts_metadata/mgrs22WFD.parquet +0 -0
  46. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/data/ts_data_for_workflow_inputs.ipynb +0 -0
  47. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_asf.py +0 -0
  48. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_dist_enum.py +0 -0
  49. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_dist_enum_inputs.py +0 -0
  50. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_mgrs_burst_data.py +0 -0
  51. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_notebooks.py +0 -0
  52. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_package.py +0 -0
  53. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_param_model.py +0 -0
  54. {dist_s1_enumerator-1.0.1 → dist_s1_enumerator-1.0.3}/tests/test_rtc_s1_io.py +0 -0
@@ -7,7 +7,7 @@ on:
7
7
 
8
8
  jobs:
9
9
  call-bump-version-workflow:
10
- uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.18.1
10
+ uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.20.0
11
11
  with:
12
12
  user: opera-pst-dev
13
13
  email: opera-pst-dev@jpl.nasa.gov
@@ -13,6 +13,6 @@ on:
13
13
 
14
14
  jobs:
15
15
  call-changelog-check-workflow:
16
- uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.18.1
16
+ uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.20.0
17
17
  secrets:
18
18
  USER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -7,7 +7,7 @@ on:
7
7
 
8
8
  jobs:
9
9
  call-release-workflow:
10
- uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.18.1
10
+ uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.20.0
11
11
  with:
12
12
  release_prefix: dist-s1-enumerator
13
13
  develop_branch: dev
@@ -12,4 +12,4 @@ on:
12
12
 
13
13
  jobs:
14
14
  call-labeled-pr-check-workflow:
15
- uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.18.1
15
+ uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.20.0
@@ -13,7 +13,7 @@ jobs:
13
13
  steps:
14
14
  - uses: actions/checkout@v4
15
15
 
16
- - uses: actions/setup-python@v5
16
+ - uses: actions/setup-python@v6
17
17
  with:
18
18
  python-version: 3.11
19
19
 
@@ -5,7 +5,7 @@ on: push
5
5
  jobs:
6
6
 
7
7
  call-secrets-analysis-workflow:
8
- uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.18.1
8
+ uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.20.0
9
9
 
10
10
  call-ruff-workflow:
11
- uses: ASFHyP3/actions/.github/workflows/reusable-ruff.yml@v0.18.1
11
+ uses: ASFHyP3/actions/.github/workflows/reusable-ruff.yml@v0.20.0
@@ -6,6 +6,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
7
7
  and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
8
8
 
9
+ ## [1.0.3] - 2025-09-09
10
+
11
+ ### Fixed
12
+ * Defaults for `lookback_delta_days` from 0 to 365 in enumeration of multiple products. Leading to submission of jobs that had no baseline (see Issue: https://github.com/opera-adt/dist-s1-enumerator/issues/44)
13
+ * Renamed variables for easier tracking in `enumerat_dist_s1_products`.
14
+
15
+ ## [1.0.2] - 2025-09-09
16
+
17
+ ### Changed
18
+ * `backoff` library is removed and we now use `tenacity`
19
+
20
+ ### Added
21
+ * Uses sessions and adapters to handle mutiple concurrent requests more reliably.
22
+
9
23
  ## [1.0.1] - 2025-08-07
10
24
 
11
25
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dist-s1-enumerator
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Enumeration and ops library for the OPERA DIST-S1 project
5
5
  Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
6
6
  License: Apache-2.0
@@ -3,7 +3,7 @@ dependencies:
3
3
  - python>=3.12
4
4
  - pip
5
5
  - asf_search
6
- - backoff
6
+ - tenacity
7
7
  - contextily
8
8
  - flake8
9
9
  - flake8-blind-except
@@ -218,7 +218,7 @@ def enumerate_dist_s1_products(
218
218
  max_pre_imgs_per_burst: int = (5, 5, 5),
219
219
  min_pre_imgs_per_burst: int = 1,
220
220
  tqdm_enabled: bool = True,
221
- delta_lookback_days: int = 0,
221
+ delta_lookback_days: int = 365,
222
222
  delta_window_days: int = 365,
223
223
  ) -> gpd.GeoDataFrame:
224
224
  """
@@ -254,7 +254,7 @@ def enumerate_dist_s1_products(
254
254
  tqdm_enabled : bool, optional
255
255
  Whether to enable tqdm progress bars, by default True.
256
256
  delta_lookback_days : int, optional
257
- When to set the most recent pre-image date. Default is 0.
257
+ When to set the most recent pre-image date. Default is 365.
258
258
  If lookback strategy is 'multi_window', this means the maximum number of days to search for pre-images on each
259
259
  anniversary date where `post_date - n * lookback_days` are the anniversary dates for n = 1,....
260
260
  If lookback strategy is 'immediate_lookback', this must be set to 0.
@@ -332,7 +332,7 @@ def enumerate_dist_s1_products(
332
332
  # Loop over the different lookback days
333
333
  df_rtc_pre_list = []
334
334
  zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
335
- for delta_lookback_day, max_pre_img_per_burst in zipped_data:
335
+ for delta_lookback_day, max_pre_img_per_burst_param in zipped_data:
336
336
  delta_lookback_timedelta = pd.Timedelta(delta_lookback_day, unit='D')
337
337
  delta_window_timedelta = pd.Timedelta(params.delta_window_days, unit='D')
338
338
  window_start = post_date - delta_lookback_timedelta - delta_window_timedelta
@@ -354,7 +354,7 @@ def enumerate_dist_s1_products(
354
354
  df_rtc_pre = df_rtc_pre.sort_values(by='acq_dt', ascending=True).reset_index(drop=True)
355
355
  # Assume the data is sorted by acquisition date
356
356
  df_rtc_pre = (
357
- df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_img_per_burst).reset_index(drop=True)
357
+ df_rtc_pre.groupby('jpl_burst_id').tail(max_pre_img_per_burst_param).reset_index(drop=True)
358
358
  )
359
359
 
360
360
  if df_rtc_pre.empty:
@@ -367,7 +367,6 @@ def enumerate_dist_s1_products(
367
367
  df_rtc_pre_final = (
368
368
  pd.concat(df_rtc_pre_list, ignore_index=True) if df_rtc_pre_list else pd.DataFrame()
369
369
  )
370
- # product and provenance
371
370
  df_rtc_product = pd.concat([df_rtc_pre_final, df_rtc_post]).reset_index(drop=True)
372
371
  df_rtc_product['product_id'] = product_id
373
372
 
@@ -33,7 +33,7 @@ def enumerate_dist_s1_workflow_inputs(
33
33
  lookback_strategy: str = 'multi_window',
34
34
  max_pre_imgs_per_burst: int | list[int] | tuple[int, ...] = (5, 5, 5),
35
35
  min_pre_imgs_per_burst: int = 1,
36
- delta_lookback_days: int | list[int] | tuple[int, ...] = 0,
36
+ delta_lookback_days: int | list[int] | tuple[int, ...] = 365,
37
37
  delta_window_days: int = 365,
38
38
  df_ts: gpd.GeoDataFrame | None = None,
39
39
  ) -> list[dict]:
@@ -1,12 +1,12 @@
1
1
  import concurrent.futures
2
2
  from pathlib import Path
3
3
 
4
- import backoff
5
4
  import geopandas as gpd
6
5
  import requests
7
6
  from pandera.pandas import check_input
8
7
  from rasterio.errors import RasterioIOError
9
- from requests.exceptions import HTTPError
8
+ from requests.exceptions import HTTPError, RequestException, Timeout
9
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
10
10
  from tqdm.auto import tqdm
11
11
 
12
12
  from dist_s1_enumerator.tabular_models import rtc_s1_schema
@@ -54,22 +54,56 @@ def append_local_paths(df_rtc_ts: gpd.GeoDataFrame, data_dir: Path | str) -> lis
54
54
  return df_out
55
55
 
56
56
 
57
- @backoff.on_exception(
58
- backoff.expo,
59
- [ConnectionError, HTTPError, RasterioIOError],
60
- max_tries=30,
61
- max_time=60,
62
- jitter=backoff.full_jitter,
57
+ def create_download_session(max_workers: int = 5) -> requests.Session:
58
+ """Create a requests session with appropriate settings for downloads.
59
+
60
+ Args:
61
+ max_workers: Number of concurrent download threads (used to size connection pool)
62
+ """
63
+ session = requests.Session()
64
+ session.headers.update({'User-Agent': 'dist-s1-enumerator/1.0'})
65
+
66
+ # Size connection pool based on concurrent workers
67
+ pool_maxsize = max(max_workers * 2, 10)
68
+ pool_maxsize = min(pool_maxsize, 50)
69
+
70
+ adapter = requests.adapters.HTTPAdapter(
71
+ pool_connections=10,
72
+ pool_maxsize=pool_maxsize,
73
+ max_retries=0, # handle retries with tenacity
74
+ )
75
+ session.mount('http://', adapter)
76
+ session.mount('https://', adapter)
77
+ return session
78
+
79
+
80
+ @retry(
81
+ retry=retry_if_exception_type((ConnectionError, HTTPError, RasterioIOError, Timeout, RequestException)),
82
+ stop=stop_after_attempt(5),
83
+ wait=wait_exponential(multiplier=1, min=1, max=10),
84
+ reraise=True,
63
85
  )
64
- def localize_one_rtc(url: str, out_path: Path) -> Path:
86
+ def localize_one_rtc(url: str, out_path: Path, session: requests.Session | None = None) -> Path:
87
+ """Download a single RTC file with retry logic."""
65
88
  if out_path.exists():
66
89
  return out_path
67
90
 
68
- with requests.get(url, stream=True) as r:
69
- r.raise_for_status()
70
- with out_path.open('wb') as f:
71
- for chunk in r.iter_content(chunk_size=16384):
72
- f.write(chunk)
91
+ if session is None:
92
+ session = create_download_session()
93
+
94
+ try:
95
+ with session.get(url, stream=True, timeout=30) as r:
96
+ r.raise_for_status()
97
+ out_path.parent.mkdir(parents=True, exist_ok=True)
98
+ with out_path.open('wb') as f:
99
+ for chunk in r.iter_content(chunk_size=16384):
100
+ if chunk: # filter out keep-alive chunks
101
+ f.write(chunk)
102
+ except Exception:
103
+ # Clean up partial file on failure
104
+ if out_path.exists():
105
+ out_path.unlink()
106
+ raise
73
107
  return out_path
74
108
 
75
109
 
@@ -79,26 +113,30 @@ def localize_rtc_s1_ts(
79
113
  data_dir: Path | str,
80
114
  max_workers: int = 5,
81
115
  tqdm_enabled: bool = True,
82
- ) -> list[Path]:
116
+ ) -> gpd.GeoDataFrame:
83
117
  df_out = append_local_paths(df_rtc_ts, data_dir)
84
118
  urls = df_out['url_copol'].tolist() + df_out['url_crosspol'].tolist()
85
119
  out_paths = df_out['loc_path_copol'].tolist() + df_out['loc_path_crosspol'].tolist()
86
120
 
87
- def localize_one_rtc_p(data: tuple) -> Path:
88
- return localize_one_rtc(*data)
121
+ # Create shared session for connection pooling, sized for concurrent workers
122
+ session = create_download_session(max_workers)
123
+
124
+ def localize_one_rtc_with_session(data: tuple) -> Path:
125
+ url, out_path = data
126
+ return localize_one_rtc(url, out_path, session)
89
127
 
90
128
  disable_tqdm = not tqdm_enabled
91
129
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
92
130
  _ = list(
93
131
  tqdm(
94
- executor.map(localize_one_rtc_p, zip(urls, out_paths)),
132
+ executor.map(localize_one_rtc_with_session, zip(urls, out_paths)),
95
133
  total=len(urls),
96
134
  disable=disable_tqdm,
97
135
  desc='Downloading RTC-S1 burst data',
98
136
  dynamic_ncols=True,
99
137
  )
100
138
  )
101
- # For serliaziation
139
+ # For serialization
102
140
  df_out['loc_path_copol'] = df_out['loc_path_copol'].astype(str)
103
141
  df_out['loc_path_crosspol'] = df_out['loc_path_crosspol'].astype(str)
104
142
  return df_out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dist-s1-enumerator
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Enumeration and ops library for the OPERA DIST-S1 project
5
5
  Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
6
6
  License: Apache-2.0