dist-s1-enumerator 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dist_s1_enumerator/rtc_s1_io.py +57 -19
- {dist_s1_enumerator-1.0.1.dist-info → dist_s1_enumerator-1.0.2.dist-info}/METADATA +1 -1
- {dist_s1_enumerator-1.0.1.dist-info → dist_s1_enumerator-1.0.2.dist-info}/RECORD +6 -6
- {dist_s1_enumerator-1.0.1.dist-info → dist_s1_enumerator-1.0.2.dist-info}/WHEEL +0 -0
- {dist_s1_enumerator-1.0.1.dist-info → dist_s1_enumerator-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {dist_s1_enumerator-1.0.1.dist-info → dist_s1_enumerator-1.0.2.dist-info}/top_level.txt +0 -0
dist_s1_enumerator/rtc_s1_io.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import concurrent.futures
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
import backoff
|
|
5
4
|
import geopandas as gpd
|
|
6
5
|
import requests
|
|
7
6
|
from pandera.pandas import check_input
|
|
8
7
|
from rasterio.errors import RasterioIOError
|
|
9
|
-
from requests.exceptions import HTTPError
|
|
8
|
+
from requests.exceptions import HTTPError, RequestException, Timeout
|
|
9
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
|
10
10
|
from tqdm.auto import tqdm
|
|
11
11
|
|
|
12
12
|
from dist_s1_enumerator.tabular_models import rtc_s1_schema
|
|
@@ -54,22 +54,56 @@ def append_local_paths(df_rtc_ts: gpd.GeoDataFrame, data_dir: Path | str) -> lis
|
|
|
54
54
|
return df_out
|
|
55
55
|
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
57
|
+
def create_download_session(max_workers: int = 5) -> requests.Session:
|
|
58
|
+
"""Create a requests session with appropriate settings for downloads.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
max_workers: Number of concurrent download threads (used to size connection pool)
|
|
62
|
+
"""
|
|
63
|
+
session = requests.Session()
|
|
64
|
+
session.headers.update({'User-Agent': 'dist-s1-enumerator/1.0'})
|
|
65
|
+
|
|
66
|
+
# Size connection pool based on concurrent workers
|
|
67
|
+
pool_maxsize = max(max_workers * 2, 10)
|
|
68
|
+
pool_maxsize = min(pool_maxsize, 50)
|
|
69
|
+
|
|
70
|
+
adapter = requests.adapters.HTTPAdapter(
|
|
71
|
+
pool_connections=10,
|
|
72
|
+
pool_maxsize=pool_maxsize,
|
|
73
|
+
max_retries=0, # handle retries with tenacity
|
|
74
|
+
)
|
|
75
|
+
session.mount('http://', adapter)
|
|
76
|
+
session.mount('https://', adapter)
|
|
77
|
+
return session
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@retry(
|
|
81
|
+
retry=retry_if_exception_type((ConnectionError, HTTPError, RasterioIOError, Timeout, RequestException)),
|
|
82
|
+
stop=stop_after_attempt(5),
|
|
83
|
+
wait=wait_exponential(multiplier=1, min=1, max=10),
|
|
84
|
+
reraise=True,
|
|
63
85
|
)
|
|
64
|
-
def localize_one_rtc(url: str, out_path: Path) -> Path:
|
|
86
|
+
def localize_one_rtc(url: str, out_path: Path, session: requests.Session | None = None) -> Path:
|
|
87
|
+
"""Download a single RTC file with retry logic."""
|
|
65
88
|
if out_path.exists():
|
|
66
89
|
return out_path
|
|
67
90
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
91
|
+
if session is None:
|
|
92
|
+
session = create_download_session()
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
with session.get(url, stream=True, timeout=30) as r:
|
|
96
|
+
r.raise_for_status()
|
|
97
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
with out_path.open('wb') as f:
|
|
99
|
+
for chunk in r.iter_content(chunk_size=16384):
|
|
100
|
+
if chunk: # filter out keep-alive chunks
|
|
101
|
+
f.write(chunk)
|
|
102
|
+
except Exception:
|
|
103
|
+
# Clean up partial file on failure
|
|
104
|
+
if out_path.exists():
|
|
105
|
+
out_path.unlink()
|
|
106
|
+
raise
|
|
73
107
|
return out_path
|
|
74
108
|
|
|
75
109
|
|
|
@@ -79,26 +113,30 @@ def localize_rtc_s1_ts(
|
|
|
79
113
|
data_dir: Path | str,
|
|
80
114
|
max_workers: int = 5,
|
|
81
115
|
tqdm_enabled: bool = True,
|
|
82
|
-
) ->
|
|
116
|
+
) -> gpd.GeoDataFrame:
|
|
83
117
|
df_out = append_local_paths(df_rtc_ts, data_dir)
|
|
84
118
|
urls = df_out['url_copol'].tolist() + df_out['url_crosspol'].tolist()
|
|
85
119
|
out_paths = df_out['loc_path_copol'].tolist() + df_out['loc_path_crosspol'].tolist()
|
|
86
120
|
|
|
87
|
-
|
|
88
|
-
|
|
121
|
+
# Create shared session for connection pooling, sized for concurrent workers
|
|
122
|
+
session = create_download_session(max_workers)
|
|
123
|
+
|
|
124
|
+
def localize_one_rtc_with_session(data: tuple) -> Path:
|
|
125
|
+
url, out_path = data
|
|
126
|
+
return localize_one_rtc(url, out_path, session)
|
|
89
127
|
|
|
90
128
|
disable_tqdm = not tqdm_enabled
|
|
91
129
|
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
92
130
|
_ = list(
|
|
93
131
|
tqdm(
|
|
94
|
-
executor.map(
|
|
132
|
+
executor.map(localize_one_rtc_with_session, zip(urls, out_paths)),
|
|
95
133
|
total=len(urls),
|
|
96
134
|
disable=disable_tqdm,
|
|
97
135
|
desc='Downloading RTC-S1 burst data',
|
|
98
136
|
dynamic_ncols=True,
|
|
99
137
|
)
|
|
100
138
|
)
|
|
101
|
-
# For
|
|
139
|
+
# For serialization
|
|
102
140
|
df_out['loc_path_copol'] = df_out['loc_path_copol'].astype(str)
|
|
103
141
|
df_out['loc_path_crosspol'] = df_out['loc_path_crosspol'].astype(str)
|
|
104
142
|
return df_out
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dist-s1-enumerator
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Enumeration and ops library for the OPERA DIST-S1 project
|
|
5
5
|
Author-email: "Richard West, Charlie Marshak, Talib Oliver-Cabrera, and Jungkyo Jung" <charlie.z.marshak@jpl.nasa.gov>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -6,13 +6,13 @@ dist_s1_enumerator/exceptions.py,sha256=JhT8fIEmW3O2OvUQADkEJkL8ZrUN5pkKNzCCSt33
|
|
|
6
6
|
dist_s1_enumerator/mgrs_burst_data.py,sha256=jifDFf1NUb-_4i9vYpi3rCUzM_qJCLbXkS-fu42iwA8,7538
|
|
7
7
|
dist_s1_enumerator/param_models.py,sha256=DI2MgSxiPo7HiRKtXX8bxZnQtuoYAmtAcdYYrnhMIho,4614
|
|
8
8
|
dist_s1_enumerator/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
dist_s1_enumerator/rtc_s1_io.py,sha256=
|
|
9
|
+
dist_s1_enumerator/rtc_s1_io.py,sha256=TPlgMdyjRYnGWCt7J1nQ1AY90lAPQoMy2BN0oFMw4gg,5267
|
|
10
10
|
dist_s1_enumerator/tabular_models.py,sha256=OjsTg6fN9Mq-LHVKuz9klFD3DsG0WkfPmfDfdZYUJOw,3189
|
|
11
11
|
dist_s1_enumerator/data/jpl_burst_geo.parquet,sha256=maST6onCUlYVaQozf-zl47VMTQ7nflLros8kLQG8ZDE,24736554
|
|
12
12
|
dist_s1_enumerator/data/mgrs.parquet,sha256=P2jY4l2dztz_wdzZATBwgooa5mIZSC8TgJbHUjR5m0c,601482
|
|
13
13
|
dist_s1_enumerator/data/mgrs_burst_lookup_table.parquet,sha256=RjrgwRKn2Ac2q4_8mk9DpkX5FXPYPBReiNbqT0iFp5A,3364657
|
|
14
|
-
dist_s1_enumerator-1.0.
|
|
15
|
-
dist_s1_enumerator-1.0.
|
|
16
|
-
dist_s1_enumerator-1.0.
|
|
17
|
-
dist_s1_enumerator-1.0.
|
|
18
|
-
dist_s1_enumerator-1.0.
|
|
14
|
+
dist_s1_enumerator-1.0.2.dist-info/licenses/LICENSE,sha256=qsoT0jnoSQSgSzA-sywESwmVxC3XcugfW-3vctvz2aM,11346
|
|
15
|
+
dist_s1_enumerator-1.0.2.dist-info/METADATA,sha256=cYY6m8NRWNyePHJaKZ0xX45gn1IDUBj4HSp5XGAhvQQ,8794
|
|
16
|
+
dist_s1_enumerator-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
17
|
+
dist_s1_enumerator-1.0.2.dist-info/top_level.txt,sha256=5-RGu6oxsKKyhybZZSuUImALhcQT8ZOAnVv2MmrESDE,19
|
|
18
|
+
dist_s1_enumerator-1.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|