datacosmos 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacosmos might be problematic. Click here for more details.
- datacosmos/auth/__init__.py +1 -0
- datacosmos/auth/local_token_fetcher.py +156 -0
- datacosmos/auth/token.py +82 -0
- datacosmos/config/auth/__init__.py +1 -0
- datacosmos/config/auth/factory.py +157 -0
- datacosmos/config/config.py +56 -177
- datacosmos/config/constants.py +26 -0
- datacosmos/config/loaders/yaml_source.py +62 -0
- datacosmos/config/models/local_user_account_authentication_config.py +13 -11
- datacosmos/config/models/m2m_authentication_config.py +9 -11
- datacosmos/datacosmos_client.py +153 -55
- datacosmos/stac/stac_client.py +2 -1
- datacosmos/stac/storage/dataclasses/upload_path.py +26 -47
- datacosmos/stac/storage/storage_client.py +2 -0
- datacosmos/stac/storage/uploader.py +42 -13
- {datacosmos-0.0.11.dist-info → datacosmos-0.0.13.dist-info}/METADATA +2 -1
- {datacosmos-0.0.11.dist-info → datacosmos-0.0.13.dist-info}/RECORD +20 -13
- {datacosmos-0.0.11.dist-info → datacosmos-0.0.13.dist-info}/WHEEL +0 -0
- {datacosmos-0.0.11.dist-info → datacosmos-0.0.13.dist-info}/licenses/LICENSE.md +0 -0
- {datacosmos-0.0.11.dist-info → datacosmos-0.0.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Config constants."""
|
|
2
|
+
|
|
3
|
+
# ---- Authentication defaults ----
|
|
4
|
+
DEFAULT_AUTH_TYPE = "m2m"
|
|
5
|
+
|
|
6
|
+
# M2M
|
|
7
|
+
DEFAULT_AUTH_TOKEN_URL = "https://login.open-cosmos.com/oauth/token"
|
|
8
|
+
DEFAULT_AUTH_AUDIENCE = "https://beeapp.open-cosmos.com"
|
|
9
|
+
|
|
10
|
+
# Local (interactive)
|
|
11
|
+
DEFAULT_LOCAL_AUTHORIZATION_ENDPOINT = "https://login.open-cosmos.com/authorize"
|
|
12
|
+
DEFAULT_LOCAL_TOKEN_ENDPOINT = DEFAULT_AUTH_TOKEN_URL
|
|
13
|
+
DEFAULT_LOCAL_REDIRECT_PORT = 8765
|
|
14
|
+
DEFAULT_LOCAL_SCOPES = "openid profile email offline_access"
|
|
15
|
+
DEFAULT_LOCAL_CACHE_FILE = "~/.datacosmos/token_cache.json"
|
|
16
|
+
|
|
17
|
+
# ---- Service URLs ----
|
|
18
|
+
DEFAULT_STAC = dict(
|
|
19
|
+
protocol="https", host="app.open-cosmos.com", port=443, path="/api/data/v0/stac"
|
|
20
|
+
)
|
|
21
|
+
DEFAULT_STORAGE = dict(
|
|
22
|
+
protocol="https", host="app.open-cosmos.com", port=443, path="/api/data/v0/storage"
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# ---- Config file path ----
|
|
26
|
+
DEFAULT_CONFIG_YAML = "config/config.yaml"
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""YAML settings source for pydantic-settings.
|
|
2
|
+
|
|
3
|
+
This module provides a tiny helper to inject a YAML file as a configuration
|
|
4
|
+
source for `pydantic-settings` (v2.x). It returns a callable compatible with
|
|
5
|
+
`BaseSettings.settings_customise_sources`, placed wherever you want in the
|
|
6
|
+
precedence chain.
|
|
7
|
+
|
|
8
|
+
- If the file is missing, the source returns an empty dict.
|
|
9
|
+
- Empty-ish values (`None`, empty string, empty list) are dropped so they don't
|
|
10
|
+
overwrite values coming from later sources (e.g., environment variables).
|
|
11
|
+
- The returned callable accepts `*args, **kwargs` to be version-agnostic across
|
|
12
|
+
pydantic-settings minor releases (some pass positional args, others keywords).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Any, Callable, Dict
|
|
16
|
+
|
|
17
|
+
# A callable that returns a mapping of settings values when invoked by pydantic-settings.
|
|
18
|
+
SettingsSourceCallable = Callable[..., Dict[str, Any]]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def yaml_settings_source(file_path: str) -> SettingsSourceCallable:
|
|
22
|
+
"""Create a pydantic-settings-compatible source that reads from a YAML file.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
file_path : str
|
|
27
|
+
Absolute or relative path to the YAML file to load.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
SettingsSourceCallable
|
|
32
|
+
A callable that, when invoked by pydantic-settings, returns a dict
|
|
33
|
+
of settings loaded from the YAML file. If the file does not exist,
|
|
34
|
+
an empty dict is returned. Keys with empty/None values are omitted
|
|
35
|
+
so later sources (e.g., env vars) can provide effective overrides.
|
|
36
|
+
|
|
37
|
+
Notes
|
|
38
|
+
-----
|
|
39
|
+
The returned callable accepts arbitrary `*args` and `**kwargs` to stay
|
|
40
|
+
compatible with different pydantic-settings 2.x calling conventions.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def _source(*_args: Any, **_kwargs: Any) -> Dict[str, Any]:
|
|
44
|
+
"""Load and sanitize YAML content for use as a settings source.
|
|
45
|
+
|
|
46
|
+
Returns
|
|
47
|
+
-------
|
|
48
|
+
dict
|
|
49
|
+
A dictionary of settings. If the YAML file is missing, `{}`.
|
|
50
|
+
Values that are `None`, empty strings, or empty lists are dropped.
|
|
51
|
+
"""
|
|
52
|
+
import os
|
|
53
|
+
|
|
54
|
+
import yaml
|
|
55
|
+
|
|
56
|
+
if not os.path.exists(file_path):
|
|
57
|
+
return {}
|
|
58
|
+
with open(file_path, "r") as f:
|
|
59
|
+
data = yaml.safe_load(f) or {}
|
|
60
|
+
return {k: v for k, v in data.items() if v not in (None, "", [])}
|
|
61
|
+
|
|
62
|
+
return _source
|
|
@@ -4,23 +4,25 @@ When this is chosen, the user will be prompted to log in using their OPS credent
|
|
|
4
4
|
This will be used for running scripts locally.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import Literal
|
|
7
|
+
from typing import Literal, Optional
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel
|
|
9
|
+
from pydantic import BaseModel, ConfigDict
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class LocalUserAccountAuthenticationConfig(BaseModel):
|
|
13
13
|
"""Configuration for local user account authentication.
|
|
14
14
|
|
|
15
15
|
When this is chosen, the user will be prompted to log in using their OPS credentials.
|
|
16
|
-
This will be used for running scripts locally.
|
|
16
|
+
This will be used for running scripts locally. Required fields are enforced by `normalize_authentication` after merge.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
19
|
+
model_config = ConfigDict(extra="forbid")
|
|
20
|
+
|
|
21
|
+
type: Literal["local"] = "local"
|
|
22
|
+
client_id: Optional[str] = None
|
|
23
|
+
authorization_endpoint: Optional[str] = None
|
|
24
|
+
token_endpoint: Optional[str] = None
|
|
25
|
+
redirect_port: Optional[int] = None
|
|
26
|
+
scopes: Optional[str] = None
|
|
27
|
+
audience: Optional[str] = None
|
|
28
|
+
cache_file: Optional[str] = None
|
|
@@ -4,24 +4,22 @@ Used when running scripts in the cluster that require automated authentication
|
|
|
4
4
|
without user interaction.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import Literal
|
|
7
|
+
from typing import Literal, Optional
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel,
|
|
9
|
+
from pydantic import BaseModel, ConfigDict
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class M2MAuthenticationConfig(BaseModel):
|
|
13
13
|
"""Configuration for machine-to-machine authentication.
|
|
14
14
|
|
|
15
15
|
This is used when running scripts in the cluster that require authentication
|
|
16
|
-
with client credentials.
|
|
16
|
+
with client credentials. Required fields are enforced by `normalize_authentication` after merge.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
DEFAULT_TOKEN_URL: str = "https://login.open-cosmos.com/oauth/token"
|
|
21
|
-
DEFAULT_AUDIENCE: str = "https://beeapp.open-cosmos.com"
|
|
19
|
+
model_config = ConfigDict(extra="forbid")
|
|
22
20
|
|
|
23
|
-
type: Literal["m2m"] =
|
|
24
|
-
client_id: str
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
21
|
+
type: Literal["m2m"] = "m2m"
|
|
22
|
+
client_id: Optional[str] = None
|
|
23
|
+
client_secret: Optional[str] = None
|
|
24
|
+
token_url: Optional[str] = None
|
|
25
|
+
audience: Optional[str] = None
|
datacosmos/datacosmos_client.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Client to interact with the Datacosmos API with authentication and request handling."""
|
|
2
2
|
|
|
3
3
|
from datetime import datetime, timedelta, timezone
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
from typing import Any, Optional
|
|
5
6
|
|
|
6
7
|
import requests
|
|
@@ -23,86 +24,183 @@ class DatacosmosClient:
|
|
|
23
24
|
"""Initialize the DatacosmosClient.
|
|
24
25
|
|
|
25
26
|
Args:
|
|
26
|
-
config
|
|
27
|
-
http_session
|
|
27
|
+
config: SDK configuration (if omitted, Config() loads YAML + env).
|
|
28
|
+
http_session: Pre-authenticated session (OAuth2Session or requests.Session
|
|
29
|
+
with 'Authorization: Bearer ...').
|
|
28
30
|
"""
|
|
31
|
+
self.config = self._coerce_config(config)
|
|
32
|
+
self.token: Optional[str] = None
|
|
33
|
+
self.token_expiry: Optional[datetime] = None
|
|
34
|
+
|
|
29
35
|
if http_session is not None:
|
|
30
|
-
self.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
36
|
+
self._init_with_injected_session(http_session)
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
self._owns_session = True
|
|
40
|
+
self._http_client = self._authenticate_and_initialize_client()
|
|
41
|
+
|
|
42
|
+
# --------------------------- init helpers ---------------------------
|
|
43
|
+
|
|
44
|
+
def _coerce_config(self, cfg: Optional[Config | Any]) -> Config:
|
|
45
|
+
"""Normalize various config inputs into a Config instance."""
|
|
46
|
+
if cfg is None:
|
|
47
|
+
return Config()
|
|
48
|
+
if isinstance(cfg, Config):
|
|
49
|
+
return cfg
|
|
50
|
+
if isinstance(cfg, dict):
|
|
51
|
+
return Config(**cfg)
|
|
52
|
+
try:
|
|
53
|
+
return Config.model_validate(cfg) # pydantic v2
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise DatacosmosException(
|
|
56
|
+
"Invalid config provided to DatacosmosClient"
|
|
57
|
+
) from e
|
|
58
|
+
|
|
59
|
+
def _init_with_injected_session(
|
|
60
|
+
self, http_session: requests.Session | OAuth2Session
|
|
61
|
+
) -> None:
|
|
62
|
+
"""Adopt a caller-provided session and extract token/expiry."""
|
|
63
|
+
self._http_client = http_session
|
|
64
|
+
self._owns_session = False
|
|
65
|
+
|
|
66
|
+
token_data = self._extract_token_data(http_session)
|
|
67
|
+
self.token = token_data.get("access_token")
|
|
68
|
+
if not self.token:
|
|
69
|
+
raise DatacosmosException(
|
|
70
|
+
"Failed to extract access token from injected session"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self.token_expiry = self._compute_expiry(
|
|
74
|
+
token_data.get("expires_at"),
|
|
75
|
+
token_data.get("expires_in"),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def _extract_token_data(
|
|
79
|
+
self, http_session: requests.Session | OAuth2Session
|
|
80
|
+
) -> dict:
|
|
81
|
+
"""Return {'access_token', 'expires_at'?, 'expires_in'?} from the session."""
|
|
82
|
+
if isinstance(http_session, OAuth2Session):
|
|
83
|
+
return getattr(http_session, "token", {}) or {}
|
|
84
|
+
|
|
85
|
+
if isinstance(http_session, requests.Session):
|
|
86
|
+
auth_header = http_session.headers.get("Authorization", "")
|
|
87
|
+
if not auth_header.startswith("Bearer "):
|
|
42
88
|
raise DatacosmosException(
|
|
43
|
-
|
|
89
|
+
"Injected requests.Session must include a 'Bearer' token in its headers"
|
|
44
90
|
)
|
|
91
|
+
return {"access_token": auth_header.split(" ", 1)[1]}
|
|
92
|
+
|
|
93
|
+
raise DatacosmosException(f"Unsupported session type: {type(http_session)}")
|
|
94
|
+
|
|
95
|
+
def _compute_expiry(
|
|
96
|
+
self,
|
|
97
|
+
expires_at: Optional[datetime | int | float],
|
|
98
|
+
expires_in: Optional[int | float],
|
|
99
|
+
) -> Optional[datetime]:
|
|
100
|
+
"""Normalize expiry inputs to an absolute UTC datetime (or None)."""
|
|
101
|
+
if isinstance(expires_at, datetime):
|
|
102
|
+
return expires_at
|
|
103
|
+
if isinstance(expires_at, (int, float)):
|
|
104
|
+
return datetime.fromtimestamp(expires_at, tz=timezone.utc)
|
|
105
|
+
if expires_in is not None:
|
|
45
106
|
try:
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
except Exception:
|
|
51
|
-
raise DatacosmosException(
|
|
52
|
-
"Failed to extract token from injected session"
|
|
53
|
-
)
|
|
107
|
+
return datetime.now(timezone.utc) + timedelta(seconds=int(expires_in))
|
|
108
|
+
except (TypeError, ValueError):
|
|
109
|
+
return None
|
|
110
|
+
return None
|
|
54
111
|
|
|
55
|
-
|
|
56
|
-
else:
|
|
57
|
-
if config:
|
|
58
|
-
self.config = config
|
|
59
|
-
else:
|
|
60
|
-
try:
|
|
61
|
-
self.config = Config.from_yaml()
|
|
62
|
-
except ValueError:
|
|
63
|
-
self.config = Config.from_env()
|
|
64
|
-
|
|
65
|
-
self._owns_session = True
|
|
66
|
-
self.token = None
|
|
67
|
-
self.token_expiry = None
|
|
68
|
-
self._http_client = self._authenticate_and_initialize_client()
|
|
112
|
+
# --------------------------- auth/session ---------------------------
|
|
69
113
|
|
|
70
114
|
def _authenticate_and_initialize_client(self) -> requests.Session:
|
|
71
115
|
"""Authenticate and initialize the HTTP client with a valid token."""
|
|
116
|
+
auth = self.config.authentication
|
|
117
|
+
auth_type = getattr(auth, "type", "m2m")
|
|
118
|
+
|
|
119
|
+
if auth_type == "m2m":
|
|
120
|
+
return self.__build_m2m_session()
|
|
121
|
+
|
|
122
|
+
if auth_type == "local":
|
|
123
|
+
return self.__build_local_session()
|
|
124
|
+
|
|
125
|
+
raise DatacosmosException(f"Unsupported authentication type: {auth_type}")
|
|
126
|
+
|
|
127
|
+
def _refresh_token_if_needed(self):
|
|
128
|
+
"""Refresh the token if it has expired (only if SDK created it)."""
|
|
129
|
+
if not getattr(self, "_owns_session", False):
|
|
130
|
+
return
|
|
131
|
+
now = datetime.now(timezone.utc)
|
|
132
|
+
# Treat missing token or missing expiry as 'needs refresh'
|
|
133
|
+
if (
|
|
134
|
+
(not self.token)
|
|
135
|
+
or (self.token_expiry is None)
|
|
136
|
+
or (self.token_expiry <= now)
|
|
137
|
+
):
|
|
138
|
+
self._http_client = self._authenticate_and_initialize_client()
|
|
139
|
+
|
|
140
|
+
def __build_m2m_session(self) -> requests.Session:
|
|
141
|
+
"""Client Credentials (M2M) flow using requests-oauthlib."""
|
|
142
|
+
auth = self.config.authentication
|
|
72
143
|
try:
|
|
73
|
-
client = BackendApplicationClient(
|
|
74
|
-
client_id=self.config.authentication.client_id
|
|
75
|
-
)
|
|
144
|
+
client = BackendApplicationClient(client_id=auth.client_id)
|
|
76
145
|
oauth_session = OAuth2Session(client=client)
|
|
77
146
|
|
|
78
147
|
token_response = oauth_session.fetch_token(
|
|
79
|
-
token_url=
|
|
80
|
-
client_id=
|
|
81
|
-
client_secret=
|
|
82
|
-
audience=
|
|
148
|
+
token_url=auth.token_url,
|
|
149
|
+
client_id=auth.client_id,
|
|
150
|
+
client_secret=auth.client_secret,
|
|
151
|
+
audience=auth.audience,
|
|
83
152
|
)
|
|
84
153
|
|
|
85
154
|
self.token = token_response["access_token"]
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
155
|
+
expires_at = token_response.get("expires_at")
|
|
156
|
+
if isinstance(expires_at, (int, float)):
|
|
157
|
+
self.token_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc)
|
|
158
|
+
else:
|
|
159
|
+
self.token_expiry = datetime.now(timezone.utc) + timedelta(
|
|
160
|
+
seconds=int(token_response.get("expires_in", 3600))
|
|
161
|
+
)
|
|
89
162
|
|
|
90
163
|
http_client = requests.Session()
|
|
91
164
|
http_client.headers.update({"Authorization": f"Bearer {self.token}"})
|
|
92
165
|
return http_client
|
|
166
|
+
|
|
93
167
|
except (HTTPError, ConnectionError, Timeout) as e:
|
|
94
|
-
raise DatacosmosException(f"Authentication failed: {
|
|
168
|
+
raise DatacosmosException(f"Authentication failed: {e}") from e
|
|
95
169
|
except RequestException as e:
|
|
96
170
|
raise DatacosmosException(
|
|
97
|
-
f"Unexpected request failure during authentication: {
|
|
171
|
+
f"Unexpected request failure during authentication: {e}"
|
|
98
172
|
) from e
|
|
99
173
|
|
|
100
|
-
def
|
|
101
|
-
"""
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
174
|
+
def __build_local_session(self) -> requests.Session:
|
|
175
|
+
"""Interactive local login via LocalTokenFetcher (cached + refresh)."""
|
|
176
|
+
auth = self.config.authentication
|
|
177
|
+
try:
|
|
178
|
+
from datacosmos.auth.local_token_fetcher import LocalTokenFetcher
|
|
179
|
+
|
|
180
|
+
fetcher = LocalTokenFetcher(
|
|
181
|
+
client_id=auth.client_id,
|
|
182
|
+
authorization_endpoint=auth.authorization_endpoint,
|
|
183
|
+
token_endpoint=auth.token_endpoint,
|
|
184
|
+
redirect_port=int(auth.redirect_port),
|
|
185
|
+
audience=auth.audience,
|
|
186
|
+
scopes=auth.scopes,
|
|
187
|
+
token_file=Path(auth.cache_file).expanduser(),
|
|
188
|
+
)
|
|
189
|
+
tok = fetcher.get_token()
|
|
190
|
+
except Exception as e:
|
|
191
|
+
raise DatacosmosException(f"Local authentication failed: {e}") from e
|
|
192
|
+
|
|
193
|
+
self.token = tok.access_token
|
|
194
|
+
self.token_expiry = datetime.fromtimestamp(tok.expires_at, tz=timezone.utc)
|
|
195
|
+
|
|
196
|
+
http_client = requests.Session()
|
|
197
|
+
http_client.headers.update({"Authorization": f"Bearer {self.token}"})
|
|
198
|
+
|
|
199
|
+
# keep for potential reuse in refresh path (optional)
|
|
200
|
+
self._local_token_fetcher = fetcher
|
|
201
|
+
return http_client
|
|
202
|
+
|
|
203
|
+
# --------------------------- request API ---------------------------
|
|
106
204
|
|
|
107
205
|
def request(
|
|
108
206
|
self, method: str, url: str, *args: Any, **kwargs: Any
|
datacosmos/stac/stac_client.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Unified interface for STAC API, combining Item & Collection operations."""
|
|
2
2
|
|
|
3
|
+
from datacosmos.datacosmos_client import DatacosmosClient
|
|
3
4
|
from datacosmos.stac.collection.collection_client import CollectionClient
|
|
4
5
|
from datacosmos.stac.item.item_client import ItemClient
|
|
5
6
|
from datacosmos.stac.storage.storage_client import StorageClient
|
|
@@ -8,7 +9,7 @@ from datacosmos.stac.storage.storage_client import StorageClient
|
|
|
8
9
|
class STACClient(ItemClient, CollectionClient, StorageClient):
|
|
9
10
|
"""Unified interface for STAC API, combining Item & Collection operations."""
|
|
10
11
|
|
|
11
|
-
def __init__(self, client):
|
|
12
|
+
def __init__(self, client: DatacosmosClient):
|
|
12
13
|
"""Initialize the STACClient with a DatacosmosClient."""
|
|
13
14
|
ItemClient.__init__(self, client)
|
|
14
15
|
CollectionClient.__init__(self, client)
|
|
@@ -1,63 +1,42 @@
|
|
|
1
|
-
"""Dataclass for
|
|
1
|
+
"""Dataclass for generating the upload key of an asset."""
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from datetime import datetime
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
|
|
7
|
-
import structlog
|
|
8
|
-
|
|
9
|
-
from datacosmos.stac.enums.processing_level import ProcessingLevel
|
|
10
6
|
from datacosmos.stac.item.models.datacosmos_item import DatacosmosItem
|
|
11
7
|
|
|
12
|
-
logger = structlog.get_logger()
|
|
13
|
-
|
|
14
8
|
|
|
15
9
|
@dataclass
|
|
16
10
|
class UploadPath:
|
|
17
|
-
"""
|
|
11
|
+
"""Storage key in the form: project/<project-id>/<item-id>/<asset-name>."""
|
|
18
12
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
month: int
|
|
23
|
-
year: int
|
|
24
|
-
id: str
|
|
25
|
-
path: str
|
|
13
|
+
project_id: str
|
|
14
|
+
item_id: str
|
|
15
|
+
asset_name: str
|
|
26
16
|
|
|
27
|
-
def __str__(self):
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
return path.removesuffix("/")
|
|
17
|
+
def __str__(self) -> str:
|
|
18
|
+
"""Path in the form: project/<project-id>/<item-id>/<asset-name>."""
|
|
19
|
+
return f"project/{self.project_id}/{self.item_id}/{self.asset_name}".rstrip("/")
|
|
31
20
|
|
|
32
21
|
@classmethod
|
|
33
22
|
def from_item_path(
|
|
34
|
-
cls,
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
day=dt.day,
|
|
42
|
-
month=dt.month,
|
|
43
|
-
year=dt.year,
|
|
44
|
-
id=item.id,
|
|
45
|
-
path=item_path,
|
|
46
|
-
)
|
|
47
|
-
return cls(**path.__dict__)
|
|
23
|
+
cls,
|
|
24
|
+
item: DatacosmosItem,
|
|
25
|
+
project_id: str,
|
|
26
|
+
asset_name: str,
|
|
27
|
+
) -> "UploadPath":
|
|
28
|
+
"""Create an UploadPath for the given item/asset."""
|
|
29
|
+
return cls(project_id=project_id, item_id=item.id, asset_name=asset_name)
|
|
48
30
|
|
|
49
31
|
@classmethod
|
|
50
|
-
def from_path(cls, path: str) -> "
|
|
51
|
-
"""
|
|
52
|
-
parts = path.
|
|
53
|
-
if len(parts) <
|
|
54
|
-
raise ValueError(f"Invalid path {path}")
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
id=parts[5],
|
|
62
|
-
path="/".join(parts[6:]),
|
|
63
|
-
)
|
|
32
|
+
def from_path(cls, path: str) -> "UploadPath":
|
|
33
|
+
"""Reverse-parse a storage key back into its components."""
|
|
34
|
+
parts = Path(path).parts
|
|
35
|
+
if len(parts) < 4 or parts[0] != "project":
|
|
36
|
+
raise ValueError(f"Invalid path: {path}")
|
|
37
|
+
|
|
38
|
+
project_id, item_id, *rest = parts[1:]
|
|
39
|
+
asset_name = "/".join(rest)
|
|
40
|
+
if not asset_name:
|
|
41
|
+
raise ValueError(f"Asset name is missing in path: {path}")
|
|
42
|
+
return cls(project_id=project_id, item_id=item_id, asset_name=asset_name)
|
|
@@ -16,6 +16,7 @@ class StorageClient:
|
|
|
16
16
|
def upload_item(
|
|
17
17
|
self,
|
|
18
18
|
item: DatacosmosItem,
|
|
19
|
+
project_id: str,
|
|
19
20
|
assets_path: str | None = None,
|
|
20
21
|
included_assets: list[str] | bool = True,
|
|
21
22
|
max_workers: int = 4,
|
|
@@ -24,6 +25,7 @@ class StorageClient:
|
|
|
24
25
|
"""Proxy to Uploader.upload_item, without needing to pass client each call."""
|
|
25
26
|
return self.uploader.upload_item(
|
|
26
27
|
item=item,
|
|
28
|
+
project_id=project_id,
|
|
27
29
|
assets_path=assets_path,
|
|
28
30
|
included_assets=included_assets,
|
|
29
31
|
max_workers=max_workers,
|
|
@@ -13,22 +13,37 @@ from datacosmos.stac.storage.storage_base import StorageBase
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class Uploader(StorageBase):
|
|
16
|
-
"""
|
|
16
|
+
"""Upload a STAC item and its assets to Datacosmos storage, then register the item in the STAC API."""
|
|
17
17
|
|
|
18
18
|
def __init__(self, client: DatacosmosClient):
|
|
19
|
-
"""
|
|
19
|
+
"""Initialize the uploader.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
client (DatacosmosClient): Pre-configured DatacosmosClient.
|
|
23
|
+
"""
|
|
20
24
|
super().__init__(client)
|
|
21
25
|
self.item_client = ItemClient(client)
|
|
22
26
|
|
|
23
27
|
def upload_item(
|
|
24
28
|
self,
|
|
25
|
-
item: DatacosmosItem,
|
|
29
|
+
item: DatacosmosItem | str,
|
|
30
|
+
project_id: str,
|
|
26
31
|
assets_path: str | None = None,
|
|
27
32
|
included_assets: list[str] | bool = True,
|
|
28
33
|
max_workers: int = 4,
|
|
29
34
|
time_out: float = 60 * 60 * 1,
|
|
30
35
|
) -> DatacosmosItem:
|
|
31
|
-
"""Upload a STAC item and its assets to Datacosmos.
|
|
36
|
+
"""Upload a STAC item (and optionally its assets) to Datacosmos.
|
|
37
|
+
|
|
38
|
+
`item` can be either:
|
|
39
|
+
• a DatacosmosItem instance, or
|
|
40
|
+
• the path to an item JSON file on disk.
|
|
41
|
+
|
|
42
|
+
If `included_assets` is:
|
|
43
|
+
• True → upload every asset in the item
|
|
44
|
+
• list → upload only the asset keys in that list
|
|
45
|
+
• False → upload nothing; just register the item
|
|
46
|
+
"""
|
|
32
47
|
if not assets_path and not isinstance(item, str):
|
|
33
48
|
raise ValueError(
|
|
34
49
|
"assets_path must be provided if item is not the path to an item file."
|
|
@@ -37,8 +52,7 @@ class Uploader(StorageBase):
|
|
|
37
52
|
if isinstance(item, str):
|
|
38
53
|
item_filename = item
|
|
39
54
|
item = self._load_item(item_filename)
|
|
40
|
-
|
|
41
|
-
assets_path = str(Path(item_filename).parent)
|
|
55
|
+
assets_path = assets_path or str(Path(item_filename).parent)
|
|
42
56
|
|
|
43
57
|
assets_path = assets_path or str(Path.cwd())
|
|
44
58
|
|
|
@@ -50,18 +64,18 @@ class Uploader(StorageBase):
|
|
|
50
64
|
else []
|
|
51
65
|
)
|
|
52
66
|
|
|
53
|
-
jobs = [
|
|
54
|
-
|
|
67
|
+
jobs = [
|
|
68
|
+
(item, asset_key, assets_path, project_id) for asset_key in upload_assets
|
|
69
|
+
]
|
|
55
70
|
self._run_in_threads(self._upload_asset, jobs, max_workers, time_out)
|
|
56
71
|
|
|
57
72
|
self.item_client.add_item(item)
|
|
58
|
-
|
|
59
73
|
return item
|
|
60
74
|
|
|
61
75
|
def upload_from_file(
|
|
62
76
|
self, src: str, dst: str, mime_type: str | None = None
|
|
63
77
|
) -> None:
|
|
64
|
-
"""
|
|
78
|
+
"""Upload a single file to the specified destination path in storage."""
|
|
65
79
|
url = self.base_url.with_suffix(dst)
|
|
66
80
|
mime = mime_type or self._guess_mime(src)
|
|
67
81
|
headers = {"Content-Type": mime}
|
|
@@ -71,25 +85,40 @@ class Uploader(StorageBase):
|
|
|
71
85
|
|
|
72
86
|
@staticmethod
|
|
73
87
|
def _load_item(item_json_file_path: str) -> DatacosmosItem:
|
|
88
|
+
"""Load a DatacosmosItem from a JSON file on disk."""
|
|
74
89
|
with open(item_json_file_path, "rb") as file:
|
|
75
90
|
data = file.read().decode("utf-8")
|
|
76
91
|
return TypeAdapter(DatacosmosItem).validate_json(data)
|
|
77
92
|
|
|
78
93
|
def _upload_asset(
|
|
79
|
-
self, item: DatacosmosItem, asset_key: str, assets_path: str
|
|
94
|
+
self, item: DatacosmosItem, asset_key: str, assets_path: str, project_id: str
|
|
80
95
|
) -> None:
|
|
96
|
+
"""Upload a single asset file and update its href inside the item object.
|
|
97
|
+
|
|
98
|
+
Runs in parallel via _run_in_threads().
|
|
99
|
+
"""
|
|
81
100
|
asset = item.assets[asset_key]
|
|
82
|
-
|
|
101
|
+
|
|
102
|
+
# Build storage key: project/<project_id>/<item_id>/<asset_name>
|
|
103
|
+
upload_path = UploadPath.from_item_path(
|
|
104
|
+
item,
|
|
105
|
+
project_id,
|
|
106
|
+
Path(asset.href).name,
|
|
107
|
+
)
|
|
108
|
+
|
|
83
109
|
local_src = Path(assets_path) / asset.href
|
|
84
110
|
if local_src.exists():
|
|
85
111
|
src = str(local_src)
|
|
86
112
|
asset.href = f"file:///{upload_path}"
|
|
87
113
|
else:
|
|
114
|
+
# fallback: try matching just the filename inside assets_path
|
|
88
115
|
src = str(Path(assets_path) / Path(asset.href).name)
|
|
89
|
-
|
|
116
|
+
|
|
117
|
+
self._update_asset_href(asset) # turn href into public URL
|
|
90
118
|
self.upload_from_file(src, str(upload_path), mime_type=asset.type)
|
|
91
119
|
|
|
92
120
|
def _update_asset_href(self, asset: Asset) -> None:
|
|
121
|
+
"""Convert the storage key to a public HTTPS URL."""
|
|
93
122
|
try:
|
|
94
123
|
url = self.client.config.datacosmos_public_cloud_storage.as_domain_url()
|
|
95
124
|
new_href = url.with_base(asset.href) # type: ignore
|