datacosmos 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datacosmos might be problematic. Click here for more details.

@@ -0,0 +1,26 @@
1
+ """Config constants."""
2
+
3
+ # ---- Authentication defaults ----
4
+ DEFAULT_AUTH_TYPE = "m2m"
5
+
6
+ # M2M
7
+ DEFAULT_AUTH_TOKEN_URL = "https://login.open-cosmos.com/oauth/token"
8
+ DEFAULT_AUTH_AUDIENCE = "https://beeapp.open-cosmos.com"
9
+
10
+ # Local (interactive)
11
+ DEFAULT_LOCAL_AUTHORIZATION_ENDPOINT = "https://login.open-cosmos.com/authorize"
12
+ DEFAULT_LOCAL_TOKEN_ENDPOINT = DEFAULT_AUTH_TOKEN_URL
13
+ DEFAULT_LOCAL_REDIRECT_PORT = 8765
14
+ DEFAULT_LOCAL_SCOPES = "openid profile email offline_access"
15
+ DEFAULT_LOCAL_CACHE_FILE = "~/.datacosmos/token_cache.json"
16
+
17
+ # ---- Service URLs ----
18
+ DEFAULT_STAC = dict(
19
+ protocol="https", host="app.open-cosmos.com", port=443, path="/api/data/v0/stac"
20
+ )
21
+ DEFAULT_STORAGE = dict(
22
+ protocol="https", host="app.open-cosmos.com", port=443, path="/api/data/v0/storage"
23
+ )
24
+
25
+ # ---- Config file path ----
26
+ DEFAULT_CONFIG_YAML = "config/config.yaml"
@@ -0,0 +1,62 @@
1
+ """YAML settings source for pydantic-settings.
2
+
3
+ This module provides a tiny helper to inject a YAML file as a configuration
4
+ source for `pydantic-settings` (v2.x). It returns a callable compatible with
5
+ `BaseSettings.settings_customise_sources`, placed wherever you want in the
6
+ precedence chain.
7
+
8
+ - If the file is missing, the source returns an empty dict.
9
+ - Empty-ish values (`None`, empty string, empty list) are dropped so they don't
10
+ overwrite values coming from later sources (e.g., environment variables).
11
+ - The returned callable accepts `*args, **kwargs` to be version-agnostic across
12
+ pydantic-settings minor releases (some pass positional args, others keywords).
13
+ """
14
+
15
+ from typing import Any, Callable, Dict
16
+
17
+ # A callable that returns a mapping of settings values when invoked by pydantic-settings.
18
+ SettingsSourceCallable = Callable[..., Dict[str, Any]]
19
+
20
+
21
+ def yaml_settings_source(file_path: str) -> SettingsSourceCallable:
22
+ """Create a pydantic-settings-compatible source that reads from a YAML file.
23
+
24
+ Parameters
25
+ ----------
26
+ file_path : str
27
+ Absolute or relative path to the YAML file to load.
28
+
29
+ Returns
30
+ -------
31
+ SettingsSourceCallable
32
+ A callable that, when invoked by pydantic-settings, returns a dict
33
+ of settings loaded from the YAML file. If the file does not exist,
34
+ an empty dict is returned. Keys with empty/None values are omitted
35
+ so later sources (e.g., env vars) can provide effective overrides.
36
+
37
+ Notes
38
+ -----
39
+ The returned callable accepts arbitrary `*args` and `**kwargs` to stay
40
+ compatible with different pydantic-settings 2.x calling conventions.
41
+ """
42
+
43
+ def _source(*_args: Any, **_kwargs: Any) -> Dict[str, Any]:
44
+ """Load and sanitize YAML content for use as a settings source.
45
+
46
+ Returns
47
+ -------
48
+ dict
49
+ A dictionary of settings. If the YAML file is missing, `{}`.
50
+ Values that are `None`, empty strings, or empty lists are dropped.
51
+ """
52
+ import os
53
+
54
+ import yaml
55
+
56
+ if not os.path.exists(file_path):
57
+ return {}
58
+ with open(file_path, "r") as f:
59
+ data = yaml.safe_load(f) or {}
60
+ return {k: v for k, v in data.items() if v not in (None, "", [])}
61
+
62
+ return _source
@@ -4,23 +4,25 @@ When this is chosen, the user will be prompted to log in using their OPS credent
4
4
  This will be used for running scripts locally.
5
5
  """
6
6
 
7
- from typing import Literal
7
+ from typing import Literal, Optional
8
8
 
9
- from pydantic import BaseModel
9
+ from pydantic import BaseModel, ConfigDict
10
10
 
11
11
 
12
12
  class LocalUserAccountAuthenticationConfig(BaseModel):
13
13
  """Configuration for local user account authentication.
14
14
 
15
15
  When this is chosen, the user will be prompted to log in using their OPS credentials.
16
- This will be used for running scripts locally.
16
+ This will be used for running scripts locally. Required fields are enforced by `normalize_authentication` after merge.
17
17
  """
18
18
 
19
- type: Literal["local"]
20
- client_id: str
21
- authorization_endpoint: str
22
- token_endpoint: str
23
- redirect_port: int
24
- scopes: str
25
- audience: str
26
- cache_file: str
19
+ model_config = ConfigDict(extra="forbid")
20
+
21
+ type: Literal["local"] = "local"
22
+ client_id: Optional[str] = None
23
+ authorization_endpoint: Optional[str] = None
24
+ token_endpoint: Optional[str] = None
25
+ redirect_port: Optional[int] = None
26
+ scopes: Optional[str] = None
27
+ audience: Optional[str] = None
28
+ cache_file: Optional[str] = None
@@ -4,24 +4,22 @@ Used when running scripts in the cluster that require automated authentication
4
4
  without user interaction.
5
5
  """
6
6
 
7
- from typing import Literal
7
+ from typing import Literal, Optional
8
8
 
9
- from pydantic import BaseModel, Field
9
+ from pydantic import BaseModel, ConfigDict
10
10
 
11
11
 
12
12
  class M2MAuthenticationConfig(BaseModel):
13
13
  """Configuration for machine-to-machine authentication.
14
14
 
15
15
  This is used when running scripts in the cluster that require authentication
16
- with client credentials.
16
+ with client credentials. Required fields are enforced by `normalize_authentication` after merge.
17
17
  """
18
18
 
19
- DEFAULT_TYPE: Literal["m2m"] = "m2m"
20
- DEFAULT_TOKEN_URL: str = "https://login.open-cosmos.com/oauth/token"
21
- DEFAULT_AUDIENCE: str = "https://beeapp.open-cosmos.com"
19
+ model_config = ConfigDict(extra="forbid")
22
20
 
23
- type: Literal["m2m"] = Field(default=DEFAULT_TYPE)
24
- client_id: str
25
- token_url: str = Field(default=DEFAULT_TOKEN_URL)
26
- audience: str = Field(default=DEFAULT_AUDIENCE)
27
- client_secret: str
21
+ type: Literal["m2m"] = "m2m"
22
+ client_id: Optional[str] = None
23
+ client_secret: Optional[str] = None
24
+ token_url: Optional[str] = None
25
+ audience: Optional[str] = None
@@ -1,6 +1,7 @@
1
1
  """Client to interact with the Datacosmos API with authentication and request handling."""
2
2
 
3
3
  from datetime import datetime, timedelta, timezone
4
+ from pathlib import Path
4
5
  from typing import Any, Optional
5
6
 
6
7
  import requests
@@ -23,86 +24,183 @@ class DatacosmosClient:
23
24
  """Initialize the DatacosmosClient.
24
25
 
25
26
  Args:
26
- config (Optional[Config]): Configuration object (only needed when SDK creates its own session).
27
- http_session (Optional[requests.Session]): Pre-authenticated session.
27
+ config: SDK configuration (if omitted, Config() loads YAML + env).
28
+ http_session: Pre-authenticated session (OAuth2Session or requests.Session
29
+ with 'Authorization: Bearer ...').
28
30
  """
31
+ self.config = self._coerce_config(config)
32
+ self.token: Optional[str] = None
33
+ self.token_expiry: Optional[datetime] = None
34
+
29
35
  if http_session is not None:
30
- self._http_client = http_session
31
- self._owns_session = False
32
- if isinstance(http_session, OAuth2Session):
33
- token_data = http_session.token
34
- elif isinstance(http_session, requests.Session):
35
- auth_header = http_session.headers.get("Authorization", "")
36
- if not auth_header.startswith("Bearer "):
37
- raise DatacosmosException(
38
- "Injected requests.Session must include a 'Bearer' token in its headers"
39
- )
40
- token_data = {"access_token": auth_header.split(" ", 1)[1]}
41
- else:
36
+ self._init_with_injected_session(http_session)
37
+ return
38
+
39
+ self._owns_session = True
40
+ self._http_client = self._authenticate_and_initialize_client()
41
+
42
+ # --------------------------- init helpers ---------------------------
43
+
44
+ def _coerce_config(self, cfg: Optional[Config | Any]) -> Config:
45
+ """Normalize various config inputs into a Config instance."""
46
+ if cfg is None:
47
+ return Config()
48
+ if isinstance(cfg, Config):
49
+ return cfg
50
+ if isinstance(cfg, dict):
51
+ return Config(**cfg)
52
+ try:
53
+ return Config.model_validate(cfg) # pydantic v2
54
+ except Exception as e:
55
+ raise DatacosmosException(
56
+ "Invalid config provided to DatacosmosClient"
57
+ ) from e
58
+
59
+ def _init_with_injected_session(
60
+ self, http_session: requests.Session | OAuth2Session
61
+ ) -> None:
62
+ """Adopt a caller-provided session and extract token/expiry."""
63
+ self._http_client = http_session
64
+ self._owns_session = False
65
+
66
+ token_data = self._extract_token_data(http_session)
67
+ self.token = token_data.get("access_token")
68
+ if not self.token:
69
+ raise DatacosmosException(
70
+ "Failed to extract access token from injected session"
71
+ )
72
+
73
+ self.token_expiry = self._compute_expiry(
74
+ token_data.get("expires_at"),
75
+ token_data.get("expires_in"),
76
+ )
77
+
78
+ def _extract_token_data(
79
+ self, http_session: requests.Session | OAuth2Session
80
+ ) -> dict:
81
+ """Return {'access_token', 'expires_at'?, 'expires_in'?} from the session."""
82
+ if isinstance(http_session, OAuth2Session):
83
+ return getattr(http_session, "token", {}) or {}
84
+
85
+ if isinstance(http_session, requests.Session):
86
+ auth_header = http_session.headers.get("Authorization", "")
87
+ if not auth_header.startswith("Bearer "):
42
88
  raise DatacosmosException(
43
- f"Unsupported session type: {type(http_session)}"
89
+ "Injected requests.Session must include a 'Bearer' token in its headers"
44
90
  )
91
+ return {"access_token": auth_header.split(" ", 1)[1]}
92
+
93
+ raise DatacosmosException(f"Unsupported session type: {type(http_session)}")
94
+
95
+ def _compute_expiry(
96
+ self,
97
+ expires_at: Optional[datetime | int | float],
98
+ expires_in: Optional[int | float],
99
+ ) -> Optional[datetime]:
100
+ """Normalize expiry inputs to an absolute UTC datetime (or None)."""
101
+ if isinstance(expires_at, datetime):
102
+ return expires_at
103
+ if isinstance(expires_at, (int, float)):
104
+ return datetime.fromtimestamp(expires_at, tz=timezone.utc)
105
+ if expires_in is not None:
45
106
  try:
46
- self.token = token_data.get("access_token")
47
- self.token_expiry = token_data.get("expires_at") or token_data.get(
48
- "expires_in"
49
- )
50
- except Exception:
51
- raise DatacosmosException(
52
- "Failed to extract token from injected session"
53
- )
107
+ return datetime.now(timezone.utc) + timedelta(seconds=int(expires_in))
108
+ except (TypeError, ValueError):
109
+ return None
110
+ return None
54
111
 
55
- self.config = config
56
- else:
57
- if config:
58
- self.config = config
59
- else:
60
- try:
61
- self.config = Config.from_yaml()
62
- except ValueError:
63
- self.config = Config.from_env()
64
-
65
- self._owns_session = True
66
- self.token = None
67
- self.token_expiry = None
68
- self._http_client = self._authenticate_and_initialize_client()
112
+ # --------------------------- auth/session ---------------------------
69
113
 
70
114
  def _authenticate_and_initialize_client(self) -> requests.Session:
71
115
  """Authenticate and initialize the HTTP client with a valid token."""
116
+ auth = self.config.authentication
117
+ auth_type = getattr(auth, "type", "m2m")
118
+
119
+ if auth_type == "m2m":
120
+ return self.__build_m2m_session()
121
+
122
+ if auth_type == "local":
123
+ return self.__build_local_session()
124
+
125
+ raise DatacosmosException(f"Unsupported authentication type: {auth_type}")
126
+
127
+ def _refresh_token_if_needed(self):
128
+ """Refresh the token if it has expired (only if SDK created it)."""
129
+ if not getattr(self, "_owns_session", False):
130
+ return
131
+ now = datetime.now(timezone.utc)
132
+ # Treat missing token or missing expiry as 'needs refresh'
133
+ if (
134
+ (not self.token)
135
+ or (self.token_expiry is None)
136
+ or (self.token_expiry <= now)
137
+ ):
138
+ self._http_client = self._authenticate_and_initialize_client()
139
+
140
+ def __build_m2m_session(self) -> requests.Session:
141
+ """Client Credentials (M2M) flow using requests-oauthlib."""
142
+ auth = self.config.authentication
72
143
  try:
73
- client = BackendApplicationClient(
74
- client_id=self.config.authentication.client_id
75
- )
144
+ client = BackendApplicationClient(client_id=auth.client_id)
76
145
  oauth_session = OAuth2Session(client=client)
77
146
 
78
147
  token_response = oauth_session.fetch_token(
79
- token_url=self.config.authentication.token_url,
80
- client_id=self.config.authentication.client_id,
81
- client_secret=self.config.authentication.client_secret,
82
- audience=self.config.authentication.audience,
148
+ token_url=auth.token_url,
149
+ client_id=auth.client_id,
150
+ client_secret=auth.client_secret,
151
+ audience=auth.audience,
83
152
  )
84
153
 
85
154
  self.token = token_response["access_token"]
86
- self.token_expiry = datetime.now(timezone.utc) + timedelta(
87
- seconds=token_response.get("expires_in", 3600)
88
- )
155
+ expires_at = token_response.get("expires_at")
156
+ if isinstance(expires_at, (int, float)):
157
+ self.token_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc)
158
+ else:
159
+ self.token_expiry = datetime.now(timezone.utc) + timedelta(
160
+ seconds=int(token_response.get("expires_in", 3600))
161
+ )
89
162
 
90
163
  http_client = requests.Session()
91
164
  http_client.headers.update({"Authorization": f"Bearer {self.token}"})
92
165
  return http_client
166
+
93
167
  except (HTTPError, ConnectionError, Timeout) as e:
94
- raise DatacosmosException(f"Authentication failed: {str(e)}") from e
168
+ raise DatacosmosException(f"Authentication failed: {e}") from e
95
169
  except RequestException as e:
96
170
  raise DatacosmosException(
97
- f"Unexpected request failure during authentication: {str(e)}"
171
+ f"Unexpected request failure during authentication: {e}"
98
172
  ) from e
99
173
 
100
- def _refresh_token_if_needed(self):
101
- """Refresh the token if it has expired (only if SDK created it)."""
102
- if self._owns_session and (
103
- not self.token or self.token_expiry <= datetime.now(timezone.utc)
104
- ):
105
- self._http_client = self._authenticate_and_initialize_client()
174
+ def __build_local_session(self) -> requests.Session:
175
+ """Interactive local login via LocalTokenFetcher (cached + refresh)."""
176
+ auth = self.config.authentication
177
+ try:
178
+ from datacosmos.auth.local_token_fetcher import LocalTokenFetcher
179
+
180
+ fetcher = LocalTokenFetcher(
181
+ client_id=auth.client_id,
182
+ authorization_endpoint=auth.authorization_endpoint,
183
+ token_endpoint=auth.token_endpoint,
184
+ redirect_port=int(auth.redirect_port),
185
+ audience=auth.audience,
186
+ scopes=auth.scopes,
187
+ token_file=Path(auth.cache_file).expanduser(),
188
+ )
189
+ tok = fetcher.get_token()
190
+ except Exception as e:
191
+ raise DatacosmosException(f"Local authentication failed: {e}") from e
192
+
193
+ self.token = tok.access_token
194
+ self.token_expiry = datetime.fromtimestamp(tok.expires_at, tz=timezone.utc)
195
+
196
+ http_client = requests.Session()
197
+ http_client.headers.update({"Authorization": f"Bearer {self.token}"})
198
+
199
+ # keep for potential reuse in refresh path (optional)
200
+ self._local_token_fetcher = fetcher
201
+ return http_client
202
+
203
+ # --------------------------- request API ---------------------------
106
204
 
107
205
  def request(
108
206
  self, method: str, url: str, *args: Any, **kwargs: Any
@@ -1,5 +1,6 @@
1
1
  """Unified interface for STAC API, combining Item & Collection operations."""
2
2
 
3
+ from datacosmos.datacosmos_client import DatacosmosClient
3
4
  from datacosmos.stac.collection.collection_client import CollectionClient
4
5
  from datacosmos.stac.item.item_client import ItemClient
5
6
  from datacosmos.stac.storage.storage_client import StorageClient
@@ -8,7 +9,7 @@ from datacosmos.stac.storage.storage_client import StorageClient
8
9
  class STACClient(ItemClient, CollectionClient, StorageClient):
9
10
  """Unified interface for STAC API, combining Item & Collection operations."""
10
11
 
11
- def __init__(self, client):
12
+ def __init__(self, client: DatacosmosClient):
12
13
  """Initialize the STACClient with a DatacosmosClient."""
13
14
  ItemClient.__init__(self, client)
14
15
  CollectionClient.__init__(self, client)
@@ -1,63 +1,42 @@
1
- """Dataclass for retrieving the upload path of a file."""
1
+ """Dataclass for generating the upload key of an asset."""
2
2
 
3
3
  from dataclasses import dataclass
4
- from datetime import datetime
5
4
  from pathlib import Path
6
5
 
7
- import structlog
8
-
9
- from datacosmos.stac.enums.processing_level import ProcessingLevel
10
6
  from datacosmos.stac.item.models.datacosmos_item import DatacosmosItem
11
7
 
12
- logger = structlog.get_logger()
13
-
14
8
 
15
9
  @dataclass
16
10
  class UploadPath:
17
- """Dataclass for retrieving the upload path of a file."""
11
+ """Storage key in the form: project/<project-id>/<item-id>/<asset-name>."""
18
12
 
19
- mission: str
20
- level: ProcessingLevel
21
- day: int
22
- month: int
23
- year: int
24
- id: str
25
- path: str
13
+ project_id: str
14
+ item_id: str
15
+ asset_name: str
26
16
 
27
- def __str__(self):
28
- """Return a human-readable string representation of the Path."""
29
- path = f"full/{self.mission.lower()}/{self.level.value.lower()}/{self.year:02}/{self.month:02}/{self.day:02}/{self.id}/{self.path}"
30
- return path.removesuffix("/")
17
+ def __str__(self) -> str:
18
+ """Path in the form: project/<project-id>/<item-id>/<asset-name>."""
19
+ return f"project/{self.project_id}/{self.item_id}/{self.asset_name}".rstrip("/")
31
20
 
32
21
  @classmethod
33
22
  def from_item_path(
34
- cls, item: DatacosmosItem, mission: str, item_path: str
35
- ) -> "Path":
36
- """Create a Path instance from a DatacosmosItem and a path."""
37
- dt = datetime.strptime(item.properties["datetime"], "%Y-%m-%dT%H:%M:%SZ")
38
- path = UploadPath(
39
- mission=mission,
40
- level=ProcessingLevel(item.properties["processing:level"]),
41
- day=dt.day,
42
- month=dt.month,
43
- year=dt.year,
44
- id=item.id,
45
- path=item_path,
46
- )
47
- return cls(**path.__dict__)
23
+ cls,
24
+ item: DatacosmosItem,
25
+ project_id: str,
26
+ asset_name: str,
27
+ ) -> "UploadPath":
28
+ """Create an UploadPath for the given item/asset."""
29
+ return cls(project_id=project_id, item_id=item.id, asset_name=asset_name)
48
30
 
49
31
  @classmethod
50
- def from_path(cls, path: str) -> "Path":
51
- """Create a Path instance from a string path."""
52
- parts = path.split("/")
53
- if len(parts) < 7:
54
- raise ValueError(f"Invalid path {path}")
55
- return cls(
56
- mission=parts[0],
57
- level=ProcessingLevel(parts[1]),
58
- day=int(parts[4]),
59
- month=int(parts[3]),
60
- year=int(parts[2]),
61
- id=parts[5],
62
- path="/".join(parts[6:]),
63
- )
32
+ def from_path(cls, path: str) -> "UploadPath":
33
+ """Reverse-parse a storage key back into its components."""
34
+ parts = Path(path).parts
35
+ if len(parts) < 4 or parts[0] != "project":
36
+ raise ValueError(f"Invalid path: {path}")
37
+
38
+ project_id, item_id, *rest = parts[1:]
39
+ asset_name = "/".join(rest)
40
+ if not asset_name:
41
+ raise ValueError(f"Asset name is missing in path: {path}")
42
+ return cls(project_id=project_id, item_id=item_id, asset_name=asset_name)
@@ -16,6 +16,7 @@ class StorageClient:
16
16
  def upload_item(
17
17
  self,
18
18
  item: DatacosmosItem,
19
+ project_id: str,
19
20
  assets_path: str | None = None,
20
21
  included_assets: list[str] | bool = True,
21
22
  max_workers: int = 4,
@@ -24,6 +25,7 @@ class StorageClient:
24
25
  """Proxy to Uploader.upload_item, without needing to pass client each call."""
25
26
  return self.uploader.upload_item(
26
27
  item=item,
28
+ project_id=project_id,
27
29
  assets_path=assets_path,
28
30
  included_assets=included_assets,
29
31
  max_workers=max_workers,
@@ -13,22 +13,37 @@ from datacosmos.stac.storage.storage_base import StorageBase
13
13
 
14
14
 
15
15
  class Uploader(StorageBase):
16
- """Handles uploading files to Datacosmos storage and registering STAC items."""
16
+ """Upload a STAC item and its assets to Datacosmos storage, then register the item in the STAC API."""
17
17
 
18
18
  def __init__(self, client: DatacosmosClient):
19
- """Handles uploading files to Datacosmos storage and registering STAC items."""
19
+ """Initialize the uploader.
20
+
21
+ Args:
22
+ client (DatacosmosClient): Pre-configured DatacosmosClient.
23
+ """
20
24
  super().__init__(client)
21
25
  self.item_client = ItemClient(client)
22
26
 
23
27
  def upload_item(
24
28
  self,
25
- item: DatacosmosItem,
29
+ item: DatacosmosItem | str,
30
+ project_id: str,
26
31
  assets_path: str | None = None,
27
32
  included_assets: list[str] | bool = True,
28
33
  max_workers: int = 4,
29
34
  time_out: float = 60 * 60 * 1,
30
35
  ) -> DatacosmosItem:
31
- """Upload a STAC item and its assets to Datacosmos."""
36
+ """Upload a STAC item (and optionally its assets) to Datacosmos.
37
+
38
+ `item` can be either:
39
+ • a DatacosmosItem instance, or
40
+ • the path to an item JSON file on disk.
41
+
42
+ If `included_assets` is:
43
+ • True → upload every asset in the item
44
+ • list → upload only the asset keys in that list
45
+ • False → upload nothing; just register the item
46
+ """
32
47
  if not assets_path and not isinstance(item, str):
33
48
  raise ValueError(
34
49
  "assets_path must be provided if item is not the path to an item file."
@@ -37,8 +52,7 @@ class Uploader(StorageBase):
37
52
  if isinstance(item, str):
38
53
  item_filename = item
39
54
  item = self._load_item(item_filename)
40
- if not assets_path:
41
- assets_path = str(Path(item_filename).parent)
55
+ assets_path = assets_path or str(Path(item_filename).parent)
42
56
 
43
57
  assets_path = assets_path or str(Path.cwd())
44
58
 
@@ -50,18 +64,18 @@ class Uploader(StorageBase):
50
64
  else []
51
65
  )
52
66
 
53
- jobs = [(item, asset_key, assets_path) for asset_key in upload_assets]
54
-
67
+ jobs = [
68
+ (item, asset_key, assets_path, project_id) for asset_key in upload_assets
69
+ ]
55
70
  self._run_in_threads(self._upload_asset, jobs, max_workers, time_out)
56
71
 
57
72
  self.item_client.add_item(item)
58
-
59
73
  return item
60
74
 
61
75
  def upload_from_file(
62
76
  self, src: str, dst: str, mime_type: str | None = None
63
77
  ) -> None:
64
- """Uploads a single file to the specified destination path."""
78
+ """Upload a single file to the specified destination path in storage."""
65
79
  url = self.base_url.with_suffix(dst)
66
80
  mime = mime_type or self._guess_mime(src)
67
81
  headers = {"Content-Type": mime}
@@ -71,25 +85,40 @@ class Uploader(StorageBase):
71
85
 
72
86
  @staticmethod
73
87
  def _load_item(item_json_file_path: str) -> DatacosmosItem:
88
+ """Load a DatacosmosItem from a JSON file on disk."""
74
89
  with open(item_json_file_path, "rb") as file:
75
90
  data = file.read().decode("utf-8")
76
91
  return TypeAdapter(DatacosmosItem).validate_json(data)
77
92
 
78
93
  def _upload_asset(
79
- self, item: DatacosmosItem, asset_key: str, assets_path: str
94
+ self, item: DatacosmosItem, asset_key: str, assets_path: str, project_id: str
80
95
  ) -> None:
96
+ """Upload a single asset file and update its href inside the item object.
97
+
98
+ Runs in parallel via _run_in_threads().
99
+ """
81
100
  asset = item.assets[asset_key]
82
- upload_path = UploadPath.from_item_path(item, "", Path(asset.href).name)
101
+
102
+ # Build storage key: project/<project_id>/<item_id>/<asset_name>
103
+ upload_path = UploadPath.from_item_path(
104
+ item,
105
+ project_id,
106
+ Path(asset.href).name,
107
+ )
108
+
83
109
  local_src = Path(assets_path) / asset.href
84
110
  if local_src.exists():
85
111
  src = str(local_src)
86
112
  asset.href = f"file:///{upload_path}"
87
113
  else:
114
+ # fallback: try matching just the filename inside assets_path
88
115
  src = str(Path(assets_path) / Path(asset.href).name)
89
- self._update_asset_href(asset)
116
+
117
+ self._update_asset_href(asset) # turn href into public URL
90
118
  self.upload_from_file(src, str(upload_path), mime_type=asset.type)
91
119
 
92
120
  def _update_asset_href(self, asset: Asset) -> None:
121
+ """Convert the storage key to a public HTTPS URL."""
93
122
  try:
94
123
  url = self.client.config.datacosmos_public_cloud_storage.as_domain_url()
95
124
  new_href = url.with_base(asset.href) # type: ignore