dist-s1-enumerator 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/bump-tag-for-release.yml +1 -1
  2. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/changelog-updated.yml +1 -1
  3. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/github-release.yml +1 -1
  4. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/labeled-pr.yml +1 -1
  5. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/pypi-release.yml +1 -1
  6. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/static_analysis.yml +2 -2
  7. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/CHANGELOG.md +15 -0
  8. {dist_s1_enumerator-1.0.0/src/dist_s1_enumerator.egg-info → dist_s1_enumerator-1.0.2}/PKG-INFO +1 -1
  9. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/environment.yml +1 -1
  10. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/dist_enum.py +15 -4
  11. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/rtc_s1_io.py +57 -19
  12. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2/src/dist_s1_enumerator.egg-info}/PKG-INFO +1 -1
  13. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/dependabot.yml +0 -0
  14. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/nb_tests.yml +0 -0
  15. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.github/workflows/tests.yml +0 -0
  16. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/.gitignore +0 -0
  17. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/LICENSE +0 -0
  18. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/README.md +0 -0
  19. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/notebooks/A__Staging_Inputs_for_One_MGRS_Tile.ipynb +0 -0
  20. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/notebooks/B__Enumerate_MGRS_tile.ipynb +0 -0
  21. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/pyproject.toml +0 -0
  22. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/setup.cfg +0 -0
  23. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/__init__.py +0 -0
  24. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/asf.py +0 -0
  25. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/data/jpl_burst_geo.parquet +0 -0
  26. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/data/mgrs.parquet +0 -0
  27. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/data/mgrs_burst_lookup_table.parquet +0 -0
  28. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/dist_enum_inputs.py +0 -0
  29. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/exceptions.py +0 -0
  30. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/mgrs_burst_data.py +0 -0
  31. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/param_models.py +0 -0
  32. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/py.typed +0 -0
  33. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator/tabular_models.py +0 -0
  34. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator.egg-info/SOURCES.txt +0 -0
  35. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator.egg-info/dependency_links.txt +0 -0
  36. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator.egg-info/not-zip-safe +0 -0
  37. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator.egg-info/requires.txt +0 -0
  38. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/src/dist_s1_enumerator.egg-info/top_level.txt +0 -0
  39. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/conftest.py +0 -0
  40. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/data/mock_ts_data_for_enum.ipynb +0 -0
  41. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/data/rtc_s1_ts_metadata/chile_19HBD.parquet +0 -0
  42. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/data/rtc_s1_ts_metadata/mgrs01UBT.parquet +0 -0
  43. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/data/rtc_s1_ts_metadata/mgrs11SLT_11SLU_11SMT.parquet +0 -0
  44. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/data/rtc_s1_ts_metadata/mgrs15RXN__track63.parquet +0 -0
  45. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/data/rtc_s1_ts_metadata/mgrs22WFD.parquet +0 -0
  46. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/data/ts_data_for_workflow_inputs.ipynb +0 -0
  47. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_asf.py +0 -0
  48. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_dist_enum.py +0 -0
  49. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_dist_enum_inputs.py +0 -0
  50. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_mgrs_burst_data.py +0 -0
  51. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_notebooks.py +0 -0
  52. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_package.py +0 -0
  53. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_param_model.py +0 -0
  54. {dist_s1_enumerator-1.0.0 → dist_s1_enumerator-1.0.2}/tests/test_rtc_s1_io.py +0 -0
@@ -7,7 +7,7 @@ on:
7
7
 
8
8
  jobs:
9
9
  call-bump-version-workflow:
10
- uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.18.1
10
+ uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.20.0
11
11
  with:
12
12
  user: opera-pst-dev
13
13
  email: opera-pst-dev@jpl.nasa.gov
@@ -13,6 +13,6 @@ on:
13
13
 
14
14
  jobs:
15
15
  call-changelog-check-workflow:
16
- uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.18.1
16
+ uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.20.0
17
17
  secrets:
18
18
  USER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -7,7 +7,7 @@ on:
7
7
 
8
8
  jobs:
9
9
  call-release-workflow:
10
- uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.18.1
10
+ uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.20.0
11
11
  with:
12
12
  release_prefix: dist-s1-enumerator
13
13
  develop_branch: dev
@@ -12,4 +12,4 @@ on:
12
12
 
13
13
  jobs:
14
14
  call-labeled-pr-check-workflow:
15
- uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.18.1
15
+ uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.20.0
@@ -13,7 +13,7 @@ jobs:
13
13
  steps:
14
14
  - uses: actions/checkout@v4
15
15
 
16
- - uses: actions/setup-python@v5
16
+ - uses: actions/setup-python@v6
17
17
  with:
18
18
  python-version: 3.11
19
19
 
@@ -5,7 +5,7 @@ on: push
5
5
  jobs:
6
6
 
7
7
  call-secrets-analysis-workflow:
8
- uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.18.1
8
+ uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.20.0
9
9
 
10
10
  call-ruff-workflow:
11
- uses: ASFHyP3/actions/.github/workflows/reusable-ruff.yml@v0.18.1
11
+ uses: ASFHyP3/actions/.github/workflows/reusable-ruff.yml@v0.20.0
@@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
7
7
  and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
8
8
 
9
+ ## [1.0.2] - 2025-09-09
10
+
11
+ ## Changed
12
+ * `backoff` library is removed and we now use `tenacity`
13
+
14
+ ## Added
15
+ * Uses sessions and adapters to handle mutiple concurrent requests more reliably.
16
+
17
+
18
+ ## [1.0.1] - 2025-08-07
19
+
20
+ ### Added
21
+ * Improved tqdm outputs for enumeration of 1 dist-s1 product with multiwindow
22
+ * Added back more print statements
23
+
9
24
  ## [1.0.0] - 2025-08-05
10
25
 
11
26
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dist-s1-enumerator
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Enumeration and ops library for the OPERA DIST-S1 project
5
5
  Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
6
6
  License: Apache-2.0
@@ -3,7 +3,7 @@ dependencies:
3
3
  - python>=3.12
4
4
  - pip
5
5
  - asf_search
6
- - backoff
6
+ - tenacity
7
7
  - contextily
8
8
  - flake8
9
9
  - flake8-blind-except
@@ -20,6 +20,7 @@ def enumerate_one_dist_s1_product(
20
20
  delta_window_days: int = 365,
21
21
  delta_lookback_days: int | list[int] | tuple[int, ...] = 365,
22
22
  min_pre_imgs_per_burst: int = 1,
23
+ tqdm_enabled: bool = True,
23
24
  ) -> gpd.GeoDataFrame:
24
25
  """Enumerate a single product using unique DIST-S1 identifiers.
25
26
 
@@ -106,6 +107,7 @@ def enumerate_one_dist_s1_product(
106
107
  if isinstance(post_date, pd.Timestamp):
107
108
  post_date = post_date.to_pydatetime()
108
109
 
110
+ print(f'Searching for post-images for track {track_number} in MGRS tile {mgrs_tile_id}')
109
111
  df_rtc_post = get_rtc_s1_metadata_from_acq_group(
110
112
  [mgrs_tile_id],
111
113
  track_numbers=track_numbers,
@@ -121,6 +123,8 @@ def enumerate_one_dist_s1_product(
121
123
 
122
124
  if lookback_strategy == 'immediate_lookback':
123
125
  # Add 5 minutes buffer to ensure we don't include post-images in pre-image set.
126
+ print('Searching for pre-images for immediate_lookback products')
127
+ print(f'Lookback days {params.delta_lookback_days} and window days {params.delta_window_days}')
124
128
  post_date_min = df_rtc_post.acq_dt.min() - pd.Timedelta(seconds=300)
125
129
  earliest_lookback = params.delta_window_days + params.delta_lookback_days
126
130
  latest_lookback = params.delta_lookback_days
@@ -136,7 +140,15 @@ def enumerate_one_dist_s1_product(
136
140
 
137
141
  elif lookback_strategy == 'multi_window':
138
142
  df_rtc_pre_list = []
139
- for delta_lookback_day, max_pre_img_per_burst in zip(params.delta_lookback_days, params.max_pre_imgs_per_burst):
143
+ zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
144
+ print('Searching for pre-images for multi_window baseline')
145
+ print(f'Lookback days {params.delta_lookback_days} and window days {params.delta_window_days}')
146
+ for delta_lookback_day, max_pre_img_per_burst in tqdm(
147
+ zipped_data,
148
+ desc='Windows',
149
+ dynamic_ncols=True,
150
+ disable=(not tqdm_enabled),
151
+ ):
140
152
  # Add 5 minutes buffer to ensure we don't include post-images in pre-image set.
141
153
  post_date_min = df_rtc_post.acq_dt.min() - pd.Timedelta(seconds=300)
142
154
  earliest_lookback = params.delta_window_days + delta_lookback_day
@@ -319,9 +331,8 @@ def enumerate_dist_s1_products(
319
331
  post_date = df_rtc_post.acq_dt.min()
320
332
  # Loop over the different lookback days
321
333
  df_rtc_pre_list = []
322
- for delta_lookback_day, max_pre_img_per_burst in zip(
323
- params.delta_lookback_days, params.max_pre_imgs_per_burst
324
- ):
334
+ zipped_data = list(zip(params.delta_lookback_days, params.max_pre_imgs_per_burst))
335
+ for delta_lookback_day, max_pre_img_per_burst in zipped_data:
325
336
  delta_lookback_timedelta = pd.Timedelta(delta_lookback_day, unit='D')
326
337
  delta_window_timedelta = pd.Timedelta(params.delta_window_days, unit='D')
327
338
  window_start = post_date - delta_lookback_timedelta - delta_window_timedelta
@@ -1,12 +1,12 @@
1
1
  import concurrent.futures
2
2
  from pathlib import Path
3
3
 
4
- import backoff
5
4
  import geopandas as gpd
6
5
  import requests
7
6
  from pandera.pandas import check_input
8
7
  from rasterio.errors import RasterioIOError
9
- from requests.exceptions import HTTPError
8
+ from requests.exceptions import HTTPError, RequestException, Timeout
9
+ from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
10
10
  from tqdm.auto import tqdm
11
11
 
12
12
  from dist_s1_enumerator.tabular_models import rtc_s1_schema
@@ -54,22 +54,56 @@ def append_local_paths(df_rtc_ts: gpd.GeoDataFrame, data_dir: Path | str) -> lis
54
54
  return df_out
55
55
 
56
56
 
57
- @backoff.on_exception(
58
- backoff.expo,
59
- [ConnectionError, HTTPError, RasterioIOError],
60
- max_tries=30,
61
- max_time=60,
62
- jitter=backoff.full_jitter,
57
+ def create_download_session(max_workers: int = 5) -> requests.Session:
58
+ """Create a requests session with appropriate settings for downloads.
59
+
60
+ Args:
61
+ max_workers: Number of concurrent download threads (used to size connection pool)
62
+ """
63
+ session = requests.Session()
64
+ session.headers.update({'User-Agent': 'dist-s1-enumerator/1.0'})
65
+
66
+ # Size connection pool based on concurrent workers
67
+ pool_maxsize = max(max_workers * 2, 10)
68
+ pool_maxsize = min(pool_maxsize, 50)
69
+
70
+ adapter = requests.adapters.HTTPAdapter(
71
+ pool_connections=10,
72
+ pool_maxsize=pool_maxsize,
73
+ max_retries=0, # handle retries with tenacity
74
+ )
75
+ session.mount('http://', adapter)
76
+ session.mount('https://', adapter)
77
+ return session
78
+
79
+
80
+ @retry(
81
+ retry=retry_if_exception_type((ConnectionError, HTTPError, RasterioIOError, Timeout, RequestException)),
82
+ stop=stop_after_attempt(5),
83
+ wait=wait_exponential(multiplier=1, min=1, max=10),
84
+ reraise=True,
63
85
  )
64
- def localize_one_rtc(url: str, out_path: Path) -> Path:
86
+ def localize_one_rtc(url: str, out_path: Path, session: requests.Session | None = None) -> Path:
87
+ """Download a single RTC file with retry logic."""
65
88
  if out_path.exists():
66
89
  return out_path
67
90
 
68
- with requests.get(url, stream=True) as r:
69
- r.raise_for_status()
70
- with out_path.open('wb') as f:
71
- for chunk in r.iter_content(chunk_size=16384):
72
- f.write(chunk)
91
+ if session is None:
92
+ session = create_download_session()
93
+
94
+ try:
95
+ with session.get(url, stream=True, timeout=30) as r:
96
+ r.raise_for_status()
97
+ out_path.parent.mkdir(parents=True, exist_ok=True)
98
+ with out_path.open('wb') as f:
99
+ for chunk in r.iter_content(chunk_size=16384):
100
+ if chunk: # filter out keep-alive chunks
101
+ f.write(chunk)
102
+ except Exception:
103
+ # Clean up partial file on failure
104
+ if out_path.exists():
105
+ out_path.unlink()
106
+ raise
73
107
  return out_path
74
108
 
75
109
 
@@ -79,26 +113,30 @@ def localize_rtc_s1_ts(
79
113
  data_dir: Path | str,
80
114
  max_workers: int = 5,
81
115
  tqdm_enabled: bool = True,
82
- ) -> list[Path]:
116
+ ) -> gpd.GeoDataFrame:
83
117
  df_out = append_local_paths(df_rtc_ts, data_dir)
84
118
  urls = df_out['url_copol'].tolist() + df_out['url_crosspol'].tolist()
85
119
  out_paths = df_out['loc_path_copol'].tolist() + df_out['loc_path_crosspol'].tolist()
86
120
 
87
- def localize_one_rtc_p(data: tuple) -> Path:
88
- return localize_one_rtc(*data)
121
+ # Create shared session for connection pooling, sized for concurrent workers
122
+ session = create_download_session(max_workers)
123
+
124
+ def localize_one_rtc_with_session(data: tuple) -> Path:
125
+ url, out_path = data
126
+ return localize_one_rtc(url, out_path, session)
89
127
 
90
128
  disable_tqdm = not tqdm_enabled
91
129
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
92
130
  _ = list(
93
131
  tqdm(
94
- executor.map(localize_one_rtc_p, zip(urls, out_paths)),
132
+ executor.map(localize_one_rtc_with_session, zip(urls, out_paths)),
95
133
  total=len(urls),
96
134
  disable=disable_tqdm,
97
135
  desc='Downloading RTC-S1 burst data',
98
136
  dynamic_ncols=True,
99
137
  )
100
138
  )
101
- # For serliaziation
139
+ # For serialization
102
140
  df_out['loc_path_copol'] = df_out['loc_path_copol'].astype(str)
103
141
  df_out['loc_path_crosspol'] = df_out['loc_path_crosspol'].astype(str)
104
142
  return df_out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dist-s1-enumerator
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Enumeration and ops library for the OPERA DIST-S1 project
5
5
  Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
6
6
  License: Apache-2.0