cloudnet-api-client 0.12.6__tar.gz → 0.12.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/dataportal.env +1 -1
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/CHANGELOG.md +5 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/PKG-INFO +1 -1
- cloudnet_api_client-0.12.7/cloudnet_api_client/dl.py +166 -0
- cloudnet_api_client-0.12.7/cloudnet_api_client/version.py +1 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/test_client.py +2 -2
- cloudnet_api_client-0.12.6/cloudnet_api_client/dl.py +0 -108
- cloudnet_api_client-0.12.6/cloudnet_api_client/version.py +0 -1
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/db.env +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/docker-compose.yml +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/initdb.d/init-dbs.sh +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/ss.env +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/workflows/publish.yml +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/workflows/test.yml +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.gitignore +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.pre-commit-config.yaml +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/LICENSE +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/README.md +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/__init__.py +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/client.py +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/containers.py +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/py.typed +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/utils.py +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/pyproject.toml +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20140205_hyytiala_classification.nc +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250801_Magurele_CHM170137_000.nc +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250803_JOYCE_WST_01m.dat +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250808_Granada_CHM170119_0045_000.nc +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250808_hyytiala_iwc-Z-T-method.nc +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250814_bucharest_classification.nc +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250821_limassol_parsivel_41582c49.nc +0 -0
- {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250822_leipzig-lim_ecmwf-open.nc +0 -0
|
@@ -24,6 +24,6 @@ DATACITE_API_TIMEOUT_MS=2000
|
|
|
24
24
|
DATACITE_DOI_SERVER=http://handle.datacite.test
|
|
25
25
|
DATACITE_DOI_PREFIX=XXX
|
|
26
26
|
LABELLING_URL=http://localhost:5803
|
|
27
|
-
|
|
27
|
+
INSTRUMENTDB_URL=http://localhost:5805
|
|
28
28
|
DVAS_URL=https://dvas.test
|
|
29
29
|
DC_URL=https://dc.test
|
|
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## 0.12.7 – 2025-12-18
|
|
9
|
+
|
|
10
|
+
- Adjust progress bars
|
|
11
|
+
- Write partial file
|
|
12
|
+
|
|
8
13
|
## 0.12.6 – 2025-09-17
|
|
9
14
|
|
|
10
15
|
- Allow other iterables besides `list` as argument
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import aiohttp
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from cloudnet_api_client import utils
|
|
11
|
+
from cloudnet_api_client.containers import Metadata, ProductMetadata
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BarConfig:
|
|
15
|
+
def __init__(
|
|
16
|
+
self, disable: bool | None, max_workers: int, total_bytes: int
|
|
17
|
+
) -> None:
|
|
18
|
+
self.disable = disable
|
|
19
|
+
self.position_queue = self._init_position_queue(max_workers)
|
|
20
|
+
self.total_amount = tqdm(
|
|
21
|
+
total=total_bytes,
|
|
22
|
+
desc="Progress",
|
|
23
|
+
unit="iB",
|
|
24
|
+
unit_scale=True,
|
|
25
|
+
unit_divisor=1024,
|
|
26
|
+
disable=self.disable,
|
|
27
|
+
position=0,
|
|
28
|
+
leave=False,
|
|
29
|
+
colour="green",
|
|
30
|
+
)
|
|
31
|
+
self.lock = asyncio.Lock()
|
|
32
|
+
|
|
33
|
+
def _init_position_queue(self, max_workers: int) -> asyncio.Queue:
|
|
34
|
+
queue: asyncio.Queue = asyncio.Queue()
|
|
35
|
+
for i in range(1, max_workers + 1):
|
|
36
|
+
queue.put_nowait(i)
|
|
37
|
+
return queue
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class DlParams:
|
|
42
|
+
url: str
|
|
43
|
+
destination: Path
|
|
44
|
+
session: aiohttp.ClientSession
|
|
45
|
+
semaphore: asyncio.Semaphore
|
|
46
|
+
bar_config: BarConfig
|
|
47
|
+
disable: bool | None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def download_files(
|
|
51
|
+
base_url: str,
|
|
52
|
+
metadata: Iterable[Metadata],
|
|
53
|
+
output_path: Path,
|
|
54
|
+
concurrency_limit: int,
|
|
55
|
+
disable_progress: bool | None,
|
|
56
|
+
validate_checksum: bool = False,
|
|
57
|
+
) -> list[Path]:
|
|
58
|
+
metas = list(metadata)
|
|
59
|
+
file_exists = _checksum_matches if validate_checksum else _size_and_name_matches
|
|
60
|
+
semaphore = asyncio.Semaphore(concurrency_limit)
|
|
61
|
+
total_bytes = sum(meta.size for meta in metas)
|
|
62
|
+
bar_config = BarConfig(disable_progress, concurrency_limit, total_bytes)
|
|
63
|
+
full_paths = []
|
|
64
|
+
async with aiohttp.ClientSession() as session:
|
|
65
|
+
tasks = []
|
|
66
|
+
for meta in metas:
|
|
67
|
+
download_url = f"{base_url}{meta.download_url.split('/api/')[-1]}"
|
|
68
|
+
destination = output_path / meta.download_url.split("/")[-1]
|
|
69
|
+
full_paths.append(destination)
|
|
70
|
+
if destination.exists() and file_exists(meta, destination):
|
|
71
|
+
logging.debug(f"Already downloaded: {destination}")
|
|
72
|
+
continue
|
|
73
|
+
dl_params = DlParams(
|
|
74
|
+
url=download_url,
|
|
75
|
+
destination=destination,
|
|
76
|
+
session=session,
|
|
77
|
+
semaphore=semaphore,
|
|
78
|
+
bar_config=bar_config,
|
|
79
|
+
disable=disable_progress,
|
|
80
|
+
)
|
|
81
|
+
task = asyncio.create_task(_download_file_with_retries(dl_params))
|
|
82
|
+
tasks.append(task)
|
|
83
|
+
if disable_progress is True:
|
|
84
|
+
print(f"Downloading {len(metas)} files...", end="", flush=True)
|
|
85
|
+
await asyncio.gather(*tasks)
|
|
86
|
+
bar_config.total_amount.close()
|
|
87
|
+
bar_config.total_amount.clear()
|
|
88
|
+
if disable_progress is True:
|
|
89
|
+
print(" done.", flush=True)
|
|
90
|
+
return full_paths
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def _download_file_with_retries(
|
|
94
|
+
params: DlParams,
|
|
95
|
+
max_retries: int = 3,
|
|
96
|
+
) -> None:
|
|
97
|
+
"""Attempt to download a file, retrying up to max_retries times if needed."""
|
|
98
|
+
position = await params.bar_config.position_queue.get()
|
|
99
|
+
try:
|
|
100
|
+
for attempt in range(1, max_retries + 1):
|
|
101
|
+
try:
|
|
102
|
+
await _download_file(params, position)
|
|
103
|
+
return
|
|
104
|
+
except aiohttp.ClientError as e:
|
|
105
|
+
logging.warning(f"Attempt {attempt} failed for {params.url}: {e}")
|
|
106
|
+
if attempt == max_retries:
|
|
107
|
+
logging.error(
|
|
108
|
+
f"Giving up on {params.url} after {max_retries} attempts."
|
|
109
|
+
)
|
|
110
|
+
raise e
|
|
111
|
+
else:
|
|
112
|
+
# Exponential backoff before retrying
|
|
113
|
+
await asyncio.sleep(2**attempt)
|
|
114
|
+
finally:
|
|
115
|
+
params.bar_config.position_queue.put_nowait(position)
|
|
116
|
+
raise RuntimeError("Unreachable code reached.")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
async def _download_file(
|
|
120
|
+
params: DlParams,
|
|
121
|
+
position: int,
|
|
122
|
+
) -> None:
|
|
123
|
+
tmp_path = params.destination.with_suffix(f"{params.destination.suffix}.part")
|
|
124
|
+
async with params.semaphore, params.session.get(params.url) as response:
|
|
125
|
+
response.raise_for_status()
|
|
126
|
+
bar = tqdm(
|
|
127
|
+
desc=params.destination.name,
|
|
128
|
+
total=response.content_length,
|
|
129
|
+
unit="iB",
|
|
130
|
+
unit_scale=True,
|
|
131
|
+
unit_divisor=1024,
|
|
132
|
+
disable=params.bar_config.disable,
|
|
133
|
+
position=position,
|
|
134
|
+
leave=False,
|
|
135
|
+
colour="cyan",
|
|
136
|
+
)
|
|
137
|
+
try:
|
|
138
|
+
tmp_path.parent.mkdir(parents=True, exist_ok=True)
|
|
139
|
+
with tmp_path.open("wb") as f:
|
|
140
|
+
async for chunk in response.content.iter_chunked(8192):
|
|
141
|
+
f.write(chunk)
|
|
142
|
+
bar.update(len(chunk))
|
|
143
|
+
params.bar_config.total_amount.update(len(chunk))
|
|
144
|
+
tmp_path.replace(params.destination)
|
|
145
|
+
except Exception:
|
|
146
|
+
try:
|
|
147
|
+
if tmp_path.exists():
|
|
148
|
+
tmp_path.unlink()
|
|
149
|
+
except OSError:
|
|
150
|
+
pass
|
|
151
|
+
raise
|
|
152
|
+
finally:
|
|
153
|
+
bar.close()
|
|
154
|
+
bar.clear()
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _checksum_matches(meta: Metadata, destination: Path) -> bool:
|
|
158
|
+
fun = utils.sha256sum if isinstance(meta, ProductMetadata) else utils.md5sum
|
|
159
|
+
return fun(destination) == meta.checksum
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _size_and_name_matches(meta: Metadata, destination: Path) -> bool:
|
|
163
|
+
return (
|
|
164
|
+
destination.stat().st_size == meta.size
|
|
165
|
+
and destination.name == meta.download_url.split("/")[-1]
|
|
166
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.12.7"
|
|
@@ -382,7 +382,7 @@ class TestRawFiles:
|
|
|
382
382
|
|
|
383
383
|
def test_filter_by_filename_suffix(self, client: APIClient):
|
|
384
384
|
meta = client.raw_files(filename_suffix="000.nc")
|
|
385
|
-
assert len(meta) ==
|
|
385
|
+
assert len(meta) == 3
|
|
386
386
|
|
|
387
387
|
def test_filter_by_instrument_id(self, client: APIClient):
|
|
388
388
|
meta = client.raw_files(instrument_id="weather-station")
|
|
@@ -487,7 +487,7 @@ class TestFilterCombinations:
|
|
|
487
487
|
assert len(meta) == 0
|
|
488
488
|
|
|
489
489
|
def test_partial_filename_matches(self, client: APIClient):
|
|
490
|
-
meta = client.raw_files(filename_prefix="
|
|
490
|
+
meta = client.raw_files(filename_prefix="202508", filename_suffix=".nc")
|
|
491
491
|
assert len(meta) == 2
|
|
492
492
|
|
|
493
493
|
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
import logging
|
|
3
|
-
from collections.abc import Iterable
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
import aiohttp
|
|
7
|
-
from tqdm import tqdm
|
|
8
|
-
from tqdm.asyncio import tqdm_asyncio
|
|
9
|
-
|
|
10
|
-
from cloudnet_api_client import utils
|
|
11
|
-
from cloudnet_api_client.containers import Metadata, ProductMetadata
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
async def download_files(
|
|
15
|
-
base_url: str,
|
|
16
|
-
metadata: Iterable[Metadata],
|
|
17
|
-
output_path: Path,
|
|
18
|
-
concurrency_limit: int,
|
|
19
|
-
disable_progress: bool | None,
|
|
20
|
-
validate_checksum: bool = False,
|
|
21
|
-
) -> list[Path]:
|
|
22
|
-
file_exists = _checksum_matches if validate_checksum else _size_and_name_matches
|
|
23
|
-
semaphore = asyncio.Semaphore(concurrency_limit)
|
|
24
|
-
full_paths = []
|
|
25
|
-
async with aiohttp.ClientSession() as session:
|
|
26
|
-
tasks = []
|
|
27
|
-
for meta in metadata:
|
|
28
|
-
download_url = f"{base_url}{meta.download_url.split('/api/')[-1]}"
|
|
29
|
-
destination = output_path / meta.download_url.split("/")[-1]
|
|
30
|
-
full_paths.append(destination)
|
|
31
|
-
if destination.exists() and file_exists(meta, destination):
|
|
32
|
-
logging.debug(f"Already downloaded: {destination}")
|
|
33
|
-
continue
|
|
34
|
-
task = asyncio.create_task(
|
|
35
|
-
_download_file_with_retries(
|
|
36
|
-
session, download_url, destination, semaphore, disable_progress
|
|
37
|
-
)
|
|
38
|
-
)
|
|
39
|
-
tasks.append(task)
|
|
40
|
-
await tqdm_asyncio.gather(
|
|
41
|
-
*tasks, desc="Completed files", disable=disable_progress
|
|
42
|
-
)
|
|
43
|
-
return full_paths
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
async def _download_file_with_retries(
|
|
47
|
-
session: aiohttp.ClientSession,
|
|
48
|
-
url: str,
|
|
49
|
-
destination: Path,
|
|
50
|
-
semaphore: asyncio.Semaphore,
|
|
51
|
-
disable_progress: bool | None,
|
|
52
|
-
max_retries: int = 3,
|
|
53
|
-
) -> None:
|
|
54
|
-
"""Attempt to download a file, retrying up to max_retries times if needed."""
|
|
55
|
-
for attempt in range(1, max_retries + 1):
|
|
56
|
-
try:
|
|
57
|
-
await _download_file(session, url, destination, semaphore, disable_progress)
|
|
58
|
-
return
|
|
59
|
-
except aiohttp.ClientError as e:
|
|
60
|
-
logging.warning(f"Attempt {attempt} failed for {url}: {e}")
|
|
61
|
-
if attempt == max_retries:
|
|
62
|
-
logging.error(f"Giving up on {url} after {max_retries} attempts.")
|
|
63
|
-
raise e
|
|
64
|
-
else:
|
|
65
|
-
# Exponential backoff before retrying
|
|
66
|
-
await asyncio.sleep(2**attempt)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
async def _download_file(
|
|
70
|
-
session: aiohttp.ClientSession,
|
|
71
|
-
url: str,
|
|
72
|
-
destination: Path,
|
|
73
|
-
semaphore: asyncio.Semaphore,
|
|
74
|
-
disable_progress: bool | None,
|
|
75
|
-
) -> None:
|
|
76
|
-
async with semaphore:
|
|
77
|
-
async with session.get(url) as response:
|
|
78
|
-
response.raise_for_status()
|
|
79
|
-
with (
|
|
80
|
-
destination.open("wb") as file_out,
|
|
81
|
-
tqdm(
|
|
82
|
-
desc=destination.name,
|
|
83
|
-
total=response.content_length,
|
|
84
|
-
unit="iB",
|
|
85
|
-
unit_scale=True,
|
|
86
|
-
unit_divisor=1024,
|
|
87
|
-
disable=disable_progress,
|
|
88
|
-
) as bar,
|
|
89
|
-
):
|
|
90
|
-
while True:
|
|
91
|
-
chunk = await response.content.read(8192)
|
|
92
|
-
if not chunk:
|
|
93
|
-
break
|
|
94
|
-
file_out.write(chunk)
|
|
95
|
-
bar.update(len(chunk))
|
|
96
|
-
logging.debug(f"Downloaded: {destination}")
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def _checksum_matches(meta: Metadata, destination: Path) -> bool:
|
|
100
|
-
fun = utils.sha256sum if isinstance(meta, ProductMetadata) else utils.md5sum
|
|
101
|
-
return fun(destination) == meta.checksum
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def _size_and_name_matches(meta: Metadata, destination: Path) -> bool:
|
|
105
|
-
return (
|
|
106
|
-
destination.stat().st_size == meta.size
|
|
107
|
-
and destination.name == meta.download_url.split("/")[-1]
|
|
108
|
-
)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.12.6"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250803_JOYCE_WST_01m.dat
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|