cloudnet-api-client 0.12.6__tar.gz → 0.12.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/dataportal.env +1 -1
  2. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/CHANGELOG.md +5 -0
  3. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/PKG-INFO +1 -1
  4. cloudnet_api_client-0.12.7/cloudnet_api_client/dl.py +166 -0
  5. cloudnet_api_client-0.12.7/cloudnet_api_client/version.py +1 -0
  6. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/test_client.py +2 -2
  7. cloudnet_api_client-0.12.6/cloudnet_api_client/dl.py +0 -108
  8. cloudnet_api_client-0.12.6/cloudnet_api_client/version.py +0 -1
  9. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/db.env +0 -0
  10. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/docker-compose.yml +0 -0
  11. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/initdb.d/init-dbs.sh +0 -0
  12. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/ss.env +0 -0
  13. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/workflows/publish.yml +0 -0
  14. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.github/workflows/test.yml +0 -0
  15. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.gitignore +0 -0
  16. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/.pre-commit-config.yaml +0 -0
  17. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/LICENSE +0 -0
  18. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/README.md +0 -0
  19. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/__init__.py +0 -0
  20. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/client.py +0 -0
  21. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/containers.py +0 -0
  22. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/py.typed +0 -0
  23. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/cloudnet_api_client/utils.py +0 -0
  24. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/pyproject.toml +0 -0
  25. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20140205_hyytiala_classification.nc +0 -0
  26. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250801_Magurele_CHM170137_000.nc +0 -0
  27. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250803_JOYCE_WST_01m.dat +0 -0
  28. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250808_Granada_CHM170119_0045_000.nc +0 -0
  29. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250808_hyytiala_iwc-Z-T-method.nc +0 -0
  30. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250814_bucharest_classification.nc +0 -0
  31. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250821_limassol_parsivel_41582c49.nc +0 -0
  32. {cloudnet_api_client-0.12.6 → cloudnet_api_client-0.12.7}/tests/data/20250822_leipzig-lim_ecmwf-open.nc +0 -0
@@ -24,6 +24,6 @@ DATACITE_API_TIMEOUT_MS=2000
24
24
  DATACITE_DOI_SERVER=http://handle.datacite.test
25
25
  DATACITE_DOI_PREFIX=XXX
26
26
  LABELLING_URL=http://localhost:5803
27
- HANDLE_API_URL=http://localhost:5804
27
+ INSTRUMENTDB_URL=http://localhost:5805
28
28
  DVAS_URL=https://dvas.test
29
29
  DC_URL=https://dc.test
@@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## 0.12.7 – 2025-12-18
9
+
10
+ - Adjust progress bars
11
+ - Write partial file
12
+
8
13
  ## 0.12.6 – 2025-09-17
9
14
 
10
15
  - Allow other iterables besides `list` as argument
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloudnet-api-client
3
- Version: 0.12.6
3
+ Version: 0.12.7
4
4
  Summary: Cloudnet API client
5
5
  Author-email: Simo Tukiainen <simo.tukiainen@fmi.fi>
6
6
  License-File: LICENSE
@@ -0,0 +1,166 @@
1
+ import asyncio
2
+ import logging
3
+ from collections.abc import Iterable
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+
7
+ import aiohttp
8
+ from tqdm import tqdm
9
+
10
+ from cloudnet_api_client import utils
11
+ from cloudnet_api_client.containers import Metadata, ProductMetadata
12
+
13
+
14
+ class BarConfig:
15
+ def __init__(
16
+ self, disable: bool | None, max_workers: int, total_bytes: int
17
+ ) -> None:
18
+ self.disable = disable
19
+ self.position_queue = self._init_position_queue(max_workers)
20
+ self.total_amount = tqdm(
21
+ total=total_bytes,
22
+ desc="Progress",
23
+ unit="iB",
24
+ unit_scale=True,
25
+ unit_divisor=1024,
26
+ disable=self.disable,
27
+ position=0,
28
+ leave=False,
29
+ colour="green",
30
+ )
31
+ self.lock = asyncio.Lock()
32
+
33
+ def _init_position_queue(self, max_workers: int) -> asyncio.Queue:
34
+ queue: asyncio.Queue = asyncio.Queue()
35
+ for i in range(1, max_workers + 1):
36
+ queue.put_nowait(i)
37
+ return queue
38
+
39
+
40
+ @dataclass
41
+ class DlParams:
42
+ url: str
43
+ destination: Path
44
+ session: aiohttp.ClientSession
45
+ semaphore: asyncio.Semaphore
46
+ bar_config: BarConfig
47
+ disable: bool | None
48
+
49
+
50
+ async def download_files(
51
+ base_url: str,
52
+ metadata: Iterable[Metadata],
53
+ output_path: Path,
54
+ concurrency_limit: int,
55
+ disable_progress: bool | None,
56
+ validate_checksum: bool = False,
57
+ ) -> list[Path]:
58
+ metas = list(metadata)
59
+ file_exists = _checksum_matches if validate_checksum else _size_and_name_matches
60
+ semaphore = asyncio.Semaphore(concurrency_limit)
61
+ total_bytes = sum(meta.size for meta in metas)
62
+ bar_config = BarConfig(disable_progress, concurrency_limit, total_bytes)
63
+ full_paths = []
64
+ async with aiohttp.ClientSession() as session:
65
+ tasks = []
66
+ for meta in metas:
67
+ download_url = f"{base_url}{meta.download_url.split('/api/')[-1]}"
68
+ destination = output_path / meta.download_url.split("/")[-1]
69
+ full_paths.append(destination)
70
+ if destination.exists() and file_exists(meta, destination):
71
+ logging.debug(f"Already downloaded: {destination}")
72
+ continue
73
+ dl_params = DlParams(
74
+ url=download_url,
75
+ destination=destination,
76
+ session=session,
77
+ semaphore=semaphore,
78
+ bar_config=bar_config,
79
+ disable=disable_progress,
80
+ )
81
+ task = asyncio.create_task(_download_file_with_retries(dl_params))
82
+ tasks.append(task)
83
+ if disable_progress is True:
84
+ print(f"Downloading {len(metas)} files...", end="", flush=True)
85
+ await asyncio.gather(*tasks)
86
+ bar_config.total_amount.close()
87
+ bar_config.total_amount.clear()
88
+ if disable_progress is True:
89
+ print(" done.", flush=True)
90
+ return full_paths
91
+
92
+
93
+ async def _download_file_with_retries(
94
+ params: DlParams,
95
+ max_retries: int = 3,
96
+ ) -> None:
97
+ """Attempt to download a file, retrying up to max_retries times if needed."""
98
+ position = await params.bar_config.position_queue.get()
99
+ try:
100
+ for attempt in range(1, max_retries + 1):
101
+ try:
102
+ await _download_file(params, position)
103
+ return
104
+ except aiohttp.ClientError as e:
105
+ logging.warning(f"Attempt {attempt} failed for {params.url}: {e}")
106
+ if attempt == max_retries:
107
+ logging.error(
108
+ f"Giving up on {params.url} after {max_retries} attempts."
109
+ )
110
+ raise e
111
+ else:
112
+ # Exponential backoff before retrying
113
+ await asyncio.sleep(2**attempt)
114
+ finally:
115
+ params.bar_config.position_queue.put_nowait(position)
116
+ raise RuntimeError("Unreachable code reached.")
117
+
118
+
119
+ async def _download_file(
120
+ params: DlParams,
121
+ position: int,
122
+ ) -> None:
123
+ tmp_path = params.destination.with_suffix(f"{params.destination.suffix}.part")
124
+ async with params.semaphore, params.session.get(params.url) as response:
125
+ response.raise_for_status()
126
+ bar = tqdm(
127
+ desc=params.destination.name,
128
+ total=response.content_length,
129
+ unit="iB",
130
+ unit_scale=True,
131
+ unit_divisor=1024,
132
+ disable=params.bar_config.disable,
133
+ position=position,
134
+ leave=False,
135
+ colour="cyan",
136
+ )
137
+ try:
138
+ tmp_path.parent.mkdir(parents=True, exist_ok=True)
139
+ with tmp_path.open("wb") as f:
140
+ async for chunk in response.content.iter_chunked(8192):
141
+ f.write(chunk)
142
+ bar.update(len(chunk))
143
+ params.bar_config.total_amount.update(len(chunk))
144
+ tmp_path.replace(params.destination)
145
+ except Exception:
146
+ try:
147
+ if tmp_path.exists():
148
+ tmp_path.unlink()
149
+ except OSError:
150
+ pass
151
+ raise
152
+ finally:
153
+ bar.close()
154
+ bar.clear()
155
+
156
+
157
+ def _checksum_matches(meta: Metadata, destination: Path) -> bool:
158
+ fun = utils.sha256sum if isinstance(meta, ProductMetadata) else utils.md5sum
159
+ return fun(destination) == meta.checksum
160
+
161
+
162
+ def _size_and_name_matches(meta: Metadata, destination: Path) -> bool:
163
+ return (
164
+ destination.stat().st_size == meta.size
165
+ and destination.name == meta.download_url.split("/")[-1]
166
+ )
@@ -0,0 +1 @@
1
+ __version__ = "0.12.7"
@@ -382,7 +382,7 @@ class TestRawFiles:
382
382
 
383
383
  def test_filter_by_filename_suffix(self, client: APIClient):
384
384
  meta = client.raw_files(filename_suffix="000.nc")
385
- assert len(meta) == 2
385
+ assert len(meta) == 3
386
386
 
387
387
  def test_filter_by_instrument_id(self, client: APIClient):
388
388
  meta = client.raw_files(instrument_id="weather-station")
@@ -487,7 +487,7 @@ class TestFilterCombinations:
487
487
  assert len(meta) == 0
488
488
 
489
489
  def test_partial_filename_matches(self, client: APIClient):
490
- meta = client.raw_files(filename_prefix="2025", filename_suffix=".nc")
490
+ meta = client.raw_files(filename_prefix="202508", filename_suffix=".nc")
491
491
  assert len(meta) == 2
492
492
 
493
493
 
@@ -1,108 +0,0 @@
1
- import asyncio
2
- import logging
3
- from collections.abc import Iterable
4
- from pathlib import Path
5
-
6
- import aiohttp
7
- from tqdm import tqdm
8
- from tqdm.asyncio import tqdm_asyncio
9
-
10
- from cloudnet_api_client import utils
11
- from cloudnet_api_client.containers import Metadata, ProductMetadata
12
-
13
-
14
- async def download_files(
15
- base_url: str,
16
- metadata: Iterable[Metadata],
17
- output_path: Path,
18
- concurrency_limit: int,
19
- disable_progress: bool | None,
20
- validate_checksum: bool = False,
21
- ) -> list[Path]:
22
- file_exists = _checksum_matches if validate_checksum else _size_and_name_matches
23
- semaphore = asyncio.Semaphore(concurrency_limit)
24
- full_paths = []
25
- async with aiohttp.ClientSession() as session:
26
- tasks = []
27
- for meta in metadata:
28
- download_url = f"{base_url}{meta.download_url.split('/api/')[-1]}"
29
- destination = output_path / meta.download_url.split("/")[-1]
30
- full_paths.append(destination)
31
- if destination.exists() and file_exists(meta, destination):
32
- logging.debug(f"Already downloaded: {destination}")
33
- continue
34
- task = asyncio.create_task(
35
- _download_file_with_retries(
36
- session, download_url, destination, semaphore, disable_progress
37
- )
38
- )
39
- tasks.append(task)
40
- await tqdm_asyncio.gather(
41
- *tasks, desc="Completed files", disable=disable_progress
42
- )
43
- return full_paths
44
-
45
-
46
- async def _download_file_with_retries(
47
- session: aiohttp.ClientSession,
48
- url: str,
49
- destination: Path,
50
- semaphore: asyncio.Semaphore,
51
- disable_progress: bool | None,
52
- max_retries: int = 3,
53
- ) -> None:
54
- """Attempt to download a file, retrying up to max_retries times if needed."""
55
- for attempt in range(1, max_retries + 1):
56
- try:
57
- await _download_file(session, url, destination, semaphore, disable_progress)
58
- return
59
- except aiohttp.ClientError as e:
60
- logging.warning(f"Attempt {attempt} failed for {url}: {e}")
61
- if attempt == max_retries:
62
- logging.error(f"Giving up on {url} after {max_retries} attempts.")
63
- raise e
64
- else:
65
- # Exponential backoff before retrying
66
- await asyncio.sleep(2**attempt)
67
-
68
-
69
- async def _download_file(
70
- session: aiohttp.ClientSession,
71
- url: str,
72
- destination: Path,
73
- semaphore: asyncio.Semaphore,
74
- disable_progress: bool | None,
75
- ) -> None:
76
- async with semaphore:
77
- async with session.get(url) as response:
78
- response.raise_for_status()
79
- with (
80
- destination.open("wb") as file_out,
81
- tqdm(
82
- desc=destination.name,
83
- total=response.content_length,
84
- unit="iB",
85
- unit_scale=True,
86
- unit_divisor=1024,
87
- disable=disable_progress,
88
- ) as bar,
89
- ):
90
- while True:
91
- chunk = await response.content.read(8192)
92
- if not chunk:
93
- break
94
- file_out.write(chunk)
95
- bar.update(len(chunk))
96
- logging.debug(f"Downloaded: {destination}")
97
-
98
-
99
- def _checksum_matches(meta: Metadata, destination: Path) -> bool:
100
- fun = utils.sha256sum if isinstance(meta, ProductMetadata) else utils.md5sum
101
- return fun(destination) == meta.checksum
102
-
103
-
104
- def _size_and_name_matches(meta: Metadata, destination: Path) -> bool:
105
- return (
106
- destination.stat().st_size == meta.size
107
- and destination.name == meta.download_url.split("/")[-1]
108
- )
@@ -1 +0,0 @@
1
- __version__ = "0.12.6"