cloudnet-api-client 0.2.1__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/CHANGELOG.md +10 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/PKG-INFO +22 -19
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/README.md +21 -18
- cloudnet_api_client-0.4.0/cloudnet_api_client/__init__.py +1 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/cloudnet_api_client/client.py +52 -9
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/cloudnet_api_client/containers.py +2 -1
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/cloudnet_api_client/dl.py +5 -32
- cloudnet_api_client-0.4.0/cloudnet_api_client/version.py +1 -0
- cloudnet_api_client-0.2.1/cloudnet_api_client/__init__.py +0 -3
- cloudnet_api_client-0.2.1/cloudnet_api_client/version.py +0 -1
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/.github/workflows/publish.yml +0 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/.github/workflows/test.yml +0 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/.gitignore +0 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/.pre-commit-config.yaml +0 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/LICENSE +0 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/cloudnet_api_client/py.typed +0 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/cloudnet_api_client/utils.py +0 -0
- {cloudnet_api_client-0.2.1 → cloudnet_api_client-0.4.0}/pyproject.toml +0 -0
|
@@ -5,6 +5,16 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## 0.4.0 – 2025-04-03
|
|
9
|
+
|
|
10
|
+
- Move download function to APIClient class
|
|
11
|
+
- Add status parameter
|
|
12
|
+
|
|
13
|
+
## 0.3.0 – 2025-04-02
|
|
14
|
+
|
|
15
|
+
- Move `filename_prefix` and `filename_suffix` parameters
|
|
16
|
+
- Fix query logic
|
|
17
|
+
|
|
8
18
|
## 0.2.1 – 2025-04-02
|
|
9
19
|
|
|
10
20
|
- Return full paths of downloaded files
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cloudnet-api-client
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Cloudnet API client
|
|
5
5
|
Author-email: Simo Tukiainen <simo.tukiainen@fmi.fi>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -49,10 +49,10 @@ sites = client.sites(type="cloudnet")
|
|
|
49
49
|
products = client.products()
|
|
50
50
|
|
|
51
51
|
metadata = client.metadata("hyytiala", "2021-01-01", product=["mwr", "radar"])
|
|
52
|
-
|
|
52
|
+
client.download(metadata, "data/")
|
|
53
53
|
|
|
54
54
|
raw_metadata = client.raw_metadata("granada", date="2024-01", instrument_id="parsivel")
|
|
55
|
-
|
|
55
|
+
client.download(raw_metadata, "data_raw/")
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
## Documentation
|
|
@@ -63,22 +63,27 @@ Fetch product and raw file metadata from the Cloudnet data portal.
|
|
|
63
63
|
|
|
64
64
|
Parameters:
|
|
65
65
|
|
|
66
|
-
| name
|
|
67
|
-
|
|
|
68
|
-
| site_id
|
|
69
|
-
| date
|
|
70
|
-
| date_from
|
|
71
|
-
| date_to
|
|
72
|
-
| updated_at
|
|
73
|
-
| updated_at_from
|
|
74
|
-
| updated_at_to
|
|
75
|
-
| instrument_id
|
|
76
|
-
| instrument_pid
|
|
77
|
-
| product\*
|
|
78
|
-
| show_legacy\*
|
|
66
|
+
| name | type | default | example |
|
|
67
|
+
| ------------------- | --------------------------- | ------- | ---------------------------------------------------- |
|
|
68
|
+
| site_id | `str` | | "hyytiala" |
|
|
69
|
+
| date | `str` or `date` | `None` | "2024-01-01" |
|
|
70
|
+
| date_from | `str` or `date` | `None` | "2025-01-01" |
|
|
71
|
+
| date_to | `str` or `date` | `None` | "2025-01-01" |
|
|
72
|
+
| updated_at | `str`, `date` or `datetime` | `None` | "2025-01-01T12:00:00" |
|
|
73
|
+
| updated_at_from | `str`, `date` or `datetime` | `None` | "2025-01-01T12:00:00" |
|
|
74
|
+
| updated_at_to | `str`, `date` or `datetime` | `None` | "2025-01-01T12:00:00" |
|
|
75
|
+
| instrument_id | `str` or `list[str]` | `None` | "rpg-fmcw-94" |
|
|
76
|
+
| instrument_pid | `str` or `list[str]` | `None` | "https://hdl.handle.net/21.12132/3.191564170f8a4686" |
|
|
77
|
+
| product\* | `str` or `list[str]` | `None` | "classification" |
|
|
78
|
+
| show_legacy\* | `bool` | `False` | |
|
|
79
|
+
| filename_prefix\*\* | `str` or `list[str]` | `None` | "stare" |
|
|
80
|
+
| filename_suffix\*\* | `str` or `list[str]` | `None` | ".lv1" |
|
|
81
|
+
| status\*\* | `str` or `list[str]` | `None` | "created", "uploaded", "processed" or "invalid" |
|
|
79
82
|
|
|
80
83
|
\* = only in `metadata()`
|
|
81
84
|
|
|
85
|
+
\*\* = only in `raw_metadata()`
|
|
86
|
+
|
|
82
87
|
**Date Handling**
|
|
83
88
|
|
|
84
89
|
The `date`, `date_from` and `date_to` parameters support:
|
|
@@ -111,8 +116,6 @@ Parameters:
|
|
|
111
116
|
| metadata | `list[RawMetadata]` or `list[ProductMetadata]` | |
|
|
112
117
|
| include_pattern | `str` | `None` |
|
|
113
118
|
| exclude_pattern | `str` | `None` |
|
|
114
|
-
| filename_prefix | `str` | `None` |
|
|
115
|
-
| filename_suffix | `str` | `None` |
|
|
116
119
|
| include_tag_subset | `set[str]` | `None` |
|
|
117
120
|
| exclude_tag_subset | `set[str]` | `None` |
|
|
118
121
|
|
|
@@ -140,7 +143,7 @@ Parameters:
|
|
|
140
143
|
|
|
141
144
|
Fetch cloudnet instruments.
|
|
142
145
|
|
|
143
|
-
### `
|
|
146
|
+
### `APIClient().download(list[Metadata])` → `list[Path]`
|
|
144
147
|
|
|
145
148
|
Download files from the fetched metadata.
|
|
146
149
|
|
|
@@ -22,10 +22,10 @@ sites = client.sites(type="cloudnet")
|
|
|
22
22
|
products = client.products()
|
|
23
23
|
|
|
24
24
|
metadata = client.metadata("hyytiala", "2021-01-01", product=["mwr", "radar"])
|
|
25
|
-
|
|
25
|
+
client.download(metadata, "data/")
|
|
26
26
|
|
|
27
27
|
raw_metadata = client.raw_metadata("granada", date="2024-01", instrument_id="parsivel")
|
|
28
|
-
|
|
28
|
+
client.download(raw_metadata, "data_raw/")
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
## Documentation
|
|
@@ -36,22 +36,27 @@ Fetch product and raw file metadata from the Cloudnet data portal.
|
|
|
36
36
|
|
|
37
37
|
Parameters:
|
|
38
38
|
|
|
39
|
-
| name
|
|
40
|
-
|
|
|
41
|
-
| site_id
|
|
42
|
-
| date
|
|
43
|
-
| date_from
|
|
44
|
-
| date_to
|
|
45
|
-
| updated_at
|
|
46
|
-
| updated_at_from
|
|
47
|
-
| updated_at_to
|
|
48
|
-
| instrument_id
|
|
49
|
-
| instrument_pid
|
|
50
|
-
| product\*
|
|
51
|
-
| show_legacy\*
|
|
39
|
+
| name | type | default | example |
|
|
40
|
+
| ------------------- | --------------------------- | ------- | ---------------------------------------------------- |
|
|
41
|
+
| site_id | `str` | | "hyytiala" |
|
|
42
|
+
| date | `str` or `date` | `None` | "2024-01-01" |
|
|
43
|
+
| date_from | `str` or `date` | `None` | "2025-01-01" |
|
|
44
|
+
| date_to | `str` or `date` | `None` | "2025-01-01" |
|
|
45
|
+
| updated_at | `str`, `date` or `datetime` | `None` | "2025-01-01T12:00:00" |
|
|
46
|
+
| updated_at_from | `str`, `date` or `datetime` | `None` | "2025-01-01T12:00:00" |
|
|
47
|
+
| updated_at_to | `str`, `date` or `datetime` | `None` | "2025-01-01T12:00:00" |
|
|
48
|
+
| instrument_id | `str` or `list[str]` | `None` | "rpg-fmcw-94" |
|
|
49
|
+
| instrument_pid | `str` or `list[str]` | `None` | "https://hdl.handle.net/21.12132/3.191564170f8a4686" |
|
|
50
|
+
| product\* | `str` or `list[str]` | `None` | "classification" |
|
|
51
|
+
| show_legacy\* | `bool` | `False` | |
|
|
52
|
+
| filename_prefix\*\* | `str` or `list[str]` | `None` | "stare" |
|
|
53
|
+
| filename_suffix\*\* | `str` or `list[str]` | `None` | ".lv1" |
|
|
54
|
+
| status\*\* | `str` or `list[str]` | `None` | "created", "uploaded", "processed" or "invalid" |
|
|
52
55
|
|
|
53
56
|
\* = only in `metadata()`
|
|
54
57
|
|
|
58
|
+
\*\* = only in `raw_metadata()`
|
|
59
|
+
|
|
55
60
|
**Date Handling**
|
|
56
61
|
|
|
57
62
|
The `date`, `date_from` and `date_to` parameters support:
|
|
@@ -84,8 +89,6 @@ Parameters:
|
|
|
84
89
|
| metadata | `list[RawMetadata]` or `list[ProductMetadata]` | |
|
|
85
90
|
| include_pattern | `str` | `None` |
|
|
86
91
|
| exclude_pattern | `str` | `None` |
|
|
87
|
-
| filename_prefix | `str` | `None` |
|
|
88
|
-
| filename_suffix | `str` | `None` |
|
|
89
92
|
| include_tag_subset | `set[str]` | `None` |
|
|
90
93
|
| exclude_tag_subset | `set[str]` | `None` |
|
|
91
94
|
|
|
@@ -113,7 +116,7 @@ Parameters:
|
|
|
113
116
|
|
|
114
117
|
Fetch cloudnet instruments.
|
|
115
118
|
|
|
116
|
-
### `
|
|
119
|
+
### `APIClient().download(list[Metadata])` → `list[Path]`
|
|
117
120
|
|
|
118
121
|
Download files from the fetched metadata.
|
|
119
122
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .client import APIClient as APIClient
|
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import calendar
|
|
2
3
|
import datetime
|
|
4
|
+
import os
|
|
3
5
|
import re
|
|
4
6
|
from dataclasses import fields, is_dataclass
|
|
7
|
+
from os import PathLike
|
|
8
|
+
from pathlib import Path
|
|
5
9
|
from typing import TypeVar, cast
|
|
6
10
|
from urllib.parse import urljoin
|
|
7
11
|
|
|
@@ -12,6 +16,7 @@ from urllib3.util.retry import Retry
|
|
|
12
16
|
from cloudnet_api_client.containers import (
|
|
13
17
|
PRODUCT_TYPE,
|
|
14
18
|
SITE_TYPE,
|
|
19
|
+
STATUS,
|
|
15
20
|
Instrument,
|
|
16
21
|
Metadata,
|
|
17
22
|
Product,
|
|
@@ -19,11 +24,13 @@ from cloudnet_api_client.containers import (
|
|
|
19
24
|
RawMetadata,
|
|
20
25
|
Site,
|
|
21
26
|
)
|
|
27
|
+
from cloudnet_api_client.dl import download_files
|
|
22
28
|
|
|
23
29
|
T = TypeVar("T")
|
|
24
30
|
DateParam = str | datetime.date | None
|
|
25
31
|
DateTimeParam = str | datetime.datetime | datetime.date | None
|
|
26
32
|
QueryParam = str | list[str] | None
|
|
33
|
+
MetadataList = list[ProductMetadata] | list[RawMetadata]
|
|
27
34
|
|
|
28
35
|
|
|
29
36
|
class APIClient:
|
|
@@ -92,11 +99,18 @@ class APIClient:
|
|
|
92
99
|
_add_date_params(
|
|
93
100
|
params, date, date_from, date_to, updated_at, updated_at_from, updated_at_to
|
|
94
101
|
)
|
|
95
|
-
|
|
96
|
-
|
|
102
|
+
no_instrument = not instrument_id or instrument_pid
|
|
103
|
+
if no_instrument and (not product and model_id):
|
|
104
|
+
files_res = []
|
|
105
|
+
else:
|
|
106
|
+
files_res = self._get_response("files", params)
|
|
97
107
|
|
|
98
108
|
# Add model files if requested
|
|
99
|
-
if
|
|
109
|
+
if (
|
|
110
|
+
(not product and no_instrument)
|
|
111
|
+
or (product and "model" in product)
|
|
112
|
+
or (model_id and (not product or "model" in product))
|
|
113
|
+
):
|
|
100
114
|
for key in ("showLegacy", "product", "instrument", "instrumentPid"):
|
|
101
115
|
del params[key]
|
|
102
116
|
params["model"] = model_id
|
|
@@ -115,11 +129,17 @@ class APIClient:
|
|
|
115
129
|
updated_at_to: DateTimeParam = None,
|
|
116
130
|
instrument_id: QueryParam = None,
|
|
117
131
|
instrument_pid: QueryParam = None,
|
|
132
|
+
filename_prefix: QueryParam = None,
|
|
133
|
+
filename_suffix: QueryParam = None,
|
|
134
|
+
status: STATUS | list[STATUS] | None = None,
|
|
118
135
|
) -> list[RawMetadata]:
|
|
119
136
|
params = {
|
|
120
137
|
"site": site_id,
|
|
121
138
|
"instrument": instrument_id,
|
|
122
139
|
"instrumentPid": instrument_pid,
|
|
140
|
+
"filenamePrefix": filename_prefix,
|
|
141
|
+
"filenameSuffix": filename_suffix,
|
|
142
|
+
"status": status,
|
|
123
143
|
}
|
|
124
144
|
_add_date_params(
|
|
125
145
|
params, date, date_from, date_to, updated_at, updated_at_from, updated_at_to
|
|
@@ -127,13 +147,40 @@ class APIClient:
|
|
|
127
147
|
res = self._get_response("raw-files", params)
|
|
128
148
|
return _build_raw_meta_objects(res)
|
|
129
149
|
|
|
150
|
+
def download(
|
|
151
|
+
self,
|
|
152
|
+
metadata: MetadataList,
|
|
153
|
+
output_directory: str | PathLike,
|
|
154
|
+
concurrency_limit: int = 5,
|
|
155
|
+
progress: bool | None = None,
|
|
156
|
+
) -> list[Path]:
|
|
157
|
+
return asyncio.run(
|
|
158
|
+
self.adownload(metadata, output_directory, concurrency_limit, progress)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
async def adownload(
|
|
162
|
+
self,
|
|
163
|
+
metadata: MetadataList,
|
|
164
|
+
output_directory: str | PathLike,
|
|
165
|
+
concurrency_limit: int = 5,
|
|
166
|
+
progress: bool | None = None,
|
|
167
|
+
) -> list[Path]:
|
|
168
|
+
disable_progress = not progress if progress is not None else None
|
|
169
|
+
output_directory = Path(output_directory).resolve()
|
|
170
|
+
os.makedirs(output_directory, exist_ok=True)
|
|
171
|
+
return await download_files(
|
|
172
|
+
self.base_url,
|
|
173
|
+
metadata,
|
|
174
|
+
output_directory,
|
|
175
|
+
concurrency_limit,
|
|
176
|
+
disable_progress,
|
|
177
|
+
)
|
|
178
|
+
|
|
130
179
|
@staticmethod
|
|
131
180
|
def filter(
|
|
132
181
|
metadata: list[Metadata],
|
|
133
182
|
include_pattern: str | None = None,
|
|
134
183
|
exclude_pattern: str | None = None,
|
|
135
|
-
filename_prefix: str | None = None,
|
|
136
|
-
filename_suffix: str | None = None,
|
|
137
184
|
include_tag_subset: set[str] | None = None,
|
|
138
185
|
exclude_tag_subset: set[str] | None = None,
|
|
139
186
|
) -> list[Metadata]:
|
|
@@ -145,10 +192,6 @@ class APIClient:
|
|
|
145
192
|
metadata = [
|
|
146
193
|
m for m in metadata if not re.search(exclude_pattern, m.filename, re.I)
|
|
147
194
|
]
|
|
148
|
-
if filename_prefix:
|
|
149
|
-
metadata = [m for m in metadata if m.filename.startswith(filename_prefix)]
|
|
150
|
-
if filename_suffix:
|
|
151
|
-
metadata = [m for m in metadata if m.filename.endswith(filename_suffix)]
|
|
152
195
|
if include_tag_subset:
|
|
153
196
|
metadata = [
|
|
154
197
|
m
|
|
@@ -4,6 +4,7 @@ from typing import Literal
|
|
|
4
4
|
|
|
5
5
|
SITE_TYPE = Literal["cloudnet", "model", "hidden", "campaign"]
|
|
6
6
|
PRODUCT_TYPE = Literal["instrument", "geophysical", "evaluation", "model"]
|
|
7
|
+
STATUS = Literal["created", "uploaded", "processed", "invalid"]
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
@dataclass(frozen=True, slots=True)
|
|
@@ -58,7 +59,7 @@ class Metadata:
|
|
|
58
59
|
|
|
59
60
|
@dataclass(frozen=True, slots=True)
|
|
60
61
|
class RawMetadata(Metadata):
|
|
61
|
-
status:
|
|
62
|
+
status: STATUS
|
|
62
63
|
instrument: Instrument
|
|
63
64
|
tags: list[str] | None
|
|
64
65
|
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import logging
|
|
3
|
-
import os
|
|
4
|
-
from os import PathLike
|
|
5
3
|
from pathlib import Path
|
|
6
4
|
|
|
7
5
|
import aiohttp
|
|
@@ -11,36 +9,10 @@ from tqdm.asyncio import tqdm_asyncio
|
|
|
11
9
|
from cloudnet_api_client import utils
|
|
12
10
|
from cloudnet_api_client.containers import ProductMetadata, RawMetadata
|
|
13
11
|
|
|
14
|
-
MetadataList = list[ProductMetadata] | list[RawMetadata]
|
|
15
12
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
metadata:
|
|
19
|
-
output_directory: str | PathLike,
|
|
20
|
-
concurrency_limit: int = 5,
|
|
21
|
-
progress: bool | None = None,
|
|
22
|
-
) -> list[Path]:
|
|
23
|
-
return asyncio.run(
|
|
24
|
-
adownload(metadata, output_directory, concurrency_limit, progress)
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
async def adownload(
|
|
29
|
-
metadata: MetadataList,
|
|
30
|
-
output_directory: str | PathLike,
|
|
31
|
-
concurrency_limit: int = 5,
|
|
32
|
-
progress: bool | None = None,
|
|
33
|
-
) -> list[Path]:
|
|
34
|
-
disable_progress = not progress if progress is not None else None
|
|
35
|
-
output_directory = Path(output_directory).resolve()
|
|
36
|
-
os.makedirs(output_directory, exist_ok=True)
|
|
37
|
-
return await _download_files(
|
|
38
|
-
metadata, output_directory, concurrency_limit, disable_progress
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
async def _download_files(
|
|
43
|
-
metadata: MetadataList,
|
|
13
|
+
async def download_files(
|
|
14
|
+
base_url: str,
|
|
15
|
+
metadata: list[ProductMetadata] | list[RawMetadata],
|
|
44
16
|
output_path: Path,
|
|
45
17
|
concurrency_limit: int,
|
|
46
18
|
disable_progress: bool | None,
|
|
@@ -50,6 +22,7 @@ async def _download_files(
|
|
|
50
22
|
async with aiohttp.ClientSession() as session:
|
|
51
23
|
tasks = []
|
|
52
24
|
for meta in metadata:
|
|
25
|
+
download_url = f"{base_url}{meta.download_url.split('/api/')[-1]}"
|
|
53
26
|
destination = output_path / meta.download_url.split("/")[-1]
|
|
54
27
|
full_paths.append(destination)
|
|
55
28
|
if destination.exists() and _file_checksum_matches(meta, destination):
|
|
@@ -57,7 +30,7 @@ async def _download_files(
|
|
|
57
30
|
continue
|
|
58
31
|
task = asyncio.create_task(
|
|
59
32
|
_download_file_with_retries(
|
|
60
|
-
session,
|
|
33
|
+
session, download_url, destination, semaphore, disable_progress
|
|
61
34
|
)
|
|
62
35
|
)
|
|
63
36
|
tasks.append(task)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.4.0"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.2.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|