datacosmos 0.0.19__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datacosmos might be problematic. Click here for more details.
- datacosmos/stac/storage/storage_base.py +54 -17
- datacosmos/stac/storage/uploader.py +57 -30
- {datacosmos-0.0.19.dist-info → datacosmos-0.0.20.dist-info}/METADATA +1 -1
- {datacosmos-0.0.19.dist-info → datacosmos-0.0.20.dist-info}/RECORD +7 -7
- {datacosmos-0.0.19.dist-info → datacosmos-0.0.20.dist-info}/WHEEL +0 -0
- {datacosmos-0.0.19.dist-info → datacosmos-0.0.20.dist-info}/licenses/LICENSE.md +0 -0
- {datacosmos-0.0.19.dist-info → datacosmos-0.0.20.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Base class providing common storage helpers (threading, MIME guess, futures)."""
|
|
2
2
|
|
|
3
3
|
import mimetypes
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor, wait
|
|
4
|
+
from concurrent.futures import Future, ThreadPoolExecutor, wait
|
|
5
|
+
from typing import Any, Callable, Dict, Iterable, List, Tuple
|
|
5
6
|
|
|
6
7
|
from datacosmos.datacosmos_client import DatacosmosClient
|
|
8
|
+
from datacosmos.exceptions.datacosmos_error import DatacosmosError
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class StorageBase:
|
|
@@ -18,23 +20,58 @@ class StorageBase:
|
|
|
18
20
|
mime, _ = mimetypes.guess_type(src)
|
|
19
21
|
return mime or "application/octet-stream"
|
|
20
22
|
|
|
21
|
-
def
|
|
23
|
+
def run_in_threads(
|
|
24
|
+
self,
|
|
25
|
+
fn: Callable[..., Any],
|
|
26
|
+
jobs: Iterable[Tuple[Any, ...]],
|
|
27
|
+
max_workers: int,
|
|
28
|
+
timeout: float,
|
|
29
|
+
) -> Tuple[List[Any], List[Dict[str, Any]]]:
|
|
22
30
|
"""Run the callable `fn(*args)` over the iterable of jobs in parallel threads.
|
|
23
31
|
|
|
24
|
-
|
|
32
|
+
Collects successes and failures without aborting the batch on individual errors.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
fn: The function to execute.
|
|
36
|
+
jobs: An iterable of tuples, where each tuple is unpacked as fn(*args).
|
|
37
|
+
max_workers: Maximum number of threads to use.
|
|
38
|
+
timeout: Timeout for the entire batch.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
A tuple containing (successes: List[Any], failures: List[Dict[str, Any]]).
|
|
42
|
+
Failures include the exception and job arguments.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
DatacosmosError: If the entire batch times out.
|
|
25
46
|
"""
|
|
26
|
-
futures = []
|
|
27
|
-
|
|
28
|
-
|
|
47
|
+
futures: List[Future] = []
|
|
48
|
+
|
|
49
|
+
executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
for args in jobs:
|
|
29
53
|
futures.append(executor.submit(fn, *args))
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
54
|
+
|
|
55
|
+
# Wait until all futures are done or the timeout is reached
|
|
56
|
+
done, not_done = wait(futures, timeout=timeout)
|
|
57
|
+
|
|
58
|
+
successes = []
|
|
59
|
+
failures = []
|
|
60
|
+
|
|
61
|
+
for future in done:
|
|
62
|
+
try:
|
|
63
|
+
result = future.result()
|
|
64
|
+
except Exception as e:
|
|
65
|
+
failures.append({'error': str(e), 'exception': e})
|
|
66
|
+
else:
|
|
67
|
+
successes.append(result)
|
|
68
|
+
|
|
69
|
+
if not_done:
|
|
70
|
+
# The executor's shutdown wait must be skipped to allow cancellation
|
|
71
|
+
raise DatacosmosError("Batch processing failed: operation timed out.")
|
|
72
|
+
|
|
73
|
+
return successes, failures
|
|
74
|
+
finally:
|
|
75
|
+
# Shutdown without waiting to enable timeout handling
|
|
76
|
+
# The wait call already established which jobs finished
|
|
77
|
+
executor.shutdown(wait=False)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Handles uploading files to Datacosmos storage and registering STAC items."""
|
|
2
2
|
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
4
5
|
|
|
5
6
|
from pydantic import TypeAdapter
|
|
6
7
|
|
|
@@ -13,7 +14,7 @@ from datacosmos.stac.storage.storage_base import StorageBase
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class Uploader(StorageBase):
|
|
16
|
-
"""Upload a STAC item and its assets to Datacosmos storage
|
|
17
|
+
"""Upload a STAC item and its assets to Datacosmos storage and register the item in the STAC API."""
|
|
17
18
|
|
|
18
19
|
def __init__(self, client: DatacosmosClient):
|
|
19
20
|
"""Initialize the uploader.
|
|
@@ -32,17 +33,25 @@ class Uploader(StorageBase):
|
|
|
32
33
|
included_assets: list[str] | bool = True,
|
|
33
34
|
max_workers: int = 4,
|
|
34
35
|
time_out: float = 60 * 60 * 1,
|
|
35
|
-
) -> DatacosmosItem:
|
|
36
|
-
"""Upload a STAC item (and optionally its assets) to Datacosmos.
|
|
36
|
+
) -> tuple[DatacosmosItem, list[str], list[dict[str, Any]]]:
|
|
37
|
+
"""Upload a STAC item (and optionally its assets) to Datacosmos in parallel threads.
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
39
|
+
Args:
|
|
40
|
+
item (DatacosmosItem | str):
|
|
41
|
+
- a DatacosmosItem instance, or
|
|
42
|
+
- the path to an item JSON file on disk.
|
|
43
|
+
project_id (str): The project ID to upload assets to.
|
|
44
|
+
assets_path (str | None): Base directory where local asset files are located.
|
|
45
|
+
included_assets (list[str] | bool):
|
|
46
|
+
- True → upload every asset in the item.
|
|
47
|
+
- list[str] → upload only the asset keys in that list.
|
|
48
|
+
- False → skip asset upload; just register the item.
|
|
49
|
+
max_workers (int): Maximum number of parallel threads for asset upload.
|
|
50
|
+
time_out (float): Timeout in seconds for the entire asset batch upload.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
tuple[DatacosmosItem, list[str], list[dict[str, Any]]]:
|
|
54
|
+
The updated DatacosmosItem, a list of asset keys that were uploaded successfully, and a list of upload failures.
|
|
46
55
|
"""
|
|
47
56
|
if not assets_path and not isinstance(item, str):
|
|
48
57
|
raise ValueError(
|
|
@@ -54,23 +63,45 @@ class Uploader(StorageBase):
|
|
|
54
63
|
item = self._load_item(item_filename)
|
|
55
64
|
assets_path = assets_path or str(Path(item_filename).parent)
|
|
56
65
|
|
|
66
|
+
if not isinstance(item, DatacosmosItem):
|
|
67
|
+
raise TypeError(f"item must be a DatacosmosItem, got {type(item).__name__}")
|
|
68
|
+
|
|
57
69
|
assets_path = assets_path or str(Path.cwd())
|
|
58
70
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
71
|
+
if included_assets is False:
|
|
72
|
+
upload_assets: list[str] = []
|
|
73
|
+
elif included_assets is True:
|
|
74
|
+
upload_assets = list(item.assets.keys())
|
|
75
|
+
elif isinstance(included_assets, list):
|
|
76
|
+
upload_assets = included_assets
|
|
77
|
+
else:
|
|
78
|
+
upload_assets = []
|
|
66
79
|
|
|
67
80
|
jobs = [
|
|
68
81
|
(item, asset_key, assets_path, project_id) for asset_key in upload_assets
|
|
69
82
|
]
|
|
70
|
-
self._run_in_threads(self._upload_asset, jobs, max_workers, time_out)
|
|
71
83
|
|
|
72
|
-
|
|
73
|
-
|
|
84
|
+
if not jobs:
|
|
85
|
+
self.item_client.add_item(item)
|
|
86
|
+
return item, [], []
|
|
87
|
+
|
|
88
|
+
successes, failures = self.run_in_threads(
|
|
89
|
+
self._upload_asset, jobs, max_workers, time_out
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Register the item if the overall process didn't time out
|
|
93
|
+
# and there was at least one successful upload.
|
|
94
|
+
if successes:
|
|
95
|
+
self.item_client.add_item(item)
|
|
96
|
+
|
|
97
|
+
return item, successes, failures
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def _load_item(item_json_file_path: str) -> DatacosmosItem:
|
|
101
|
+
"""Load a DatacosmosItem from a JSON file on disk."""
|
|
102
|
+
with open(item_json_file_path, "rb") as file:
|
|
103
|
+
data = file.read().decode("utf-8")
|
|
104
|
+
return TypeAdapter(DatacosmosItem).validate_json(data)
|
|
74
105
|
|
|
75
106
|
def upload_from_file(
|
|
76
107
|
self, src: str, dst: str, mime_type: str | None = None
|
|
@@ -83,19 +114,13 @@ class Uploader(StorageBase):
|
|
|
83
114
|
response = self.client.put(url, data=f, headers=headers)
|
|
84
115
|
response.raise_for_status()
|
|
85
116
|
|
|
86
|
-
@staticmethod
|
|
87
|
-
def _load_item(item_json_file_path: str) -> DatacosmosItem:
|
|
88
|
-
"""Load a DatacosmosItem from a JSON file on disk."""
|
|
89
|
-
with open(item_json_file_path, "rb") as file:
|
|
90
|
-
data = file.read().decode("utf-8")
|
|
91
|
-
return TypeAdapter(DatacosmosItem).validate_json(data)
|
|
92
|
-
|
|
93
117
|
def _upload_asset(
|
|
94
118
|
self, item: DatacosmosItem, asset_key: str, assets_path: str, project_id: str
|
|
95
|
-
) ->
|
|
119
|
+
) -> str:
|
|
96
120
|
"""Upload a single asset file and update its href inside the item object.
|
|
97
121
|
|
|
98
|
-
|
|
122
|
+
Returns:
|
|
123
|
+
str: The asset_key upon successful upload.
|
|
99
124
|
"""
|
|
100
125
|
asset = item.assets[asset_key]
|
|
101
126
|
|
|
@@ -117,6 +142,8 @@ class Uploader(StorageBase):
|
|
|
117
142
|
self._update_asset_href(asset) # turn href into public URL
|
|
118
143
|
self.upload_from_file(src, str(upload_path), mime_type=asset.type)
|
|
119
144
|
|
|
145
|
+
return asset_key
|
|
146
|
+
|
|
120
147
|
def _update_asset_href(self, asset: Asset) -> None:
|
|
121
148
|
"""Convert the storage key to a public HTTPS URL."""
|
|
122
149
|
try:
|
|
@@ -43,9 +43,9 @@ datacosmos/stac/item/models/eo_band.py,sha256=YC3Scn_wFhIo51pIVcJeuJienF7JGWoEv3
|
|
|
43
43
|
datacosmos/stac/item/models/item_update.py,sha256=_CpjQn9SsfedfuxlHSiGeptqY4M-p15t9YX__mBRueI,2088
|
|
44
44
|
datacosmos/stac/item/models/raster_band.py,sha256=CoEVs-YyPE5Fse0He9DdOs4dGZpzfCsCuVzOcdXa_UM,354
|
|
45
45
|
datacosmos/stac/storage/__init__.py,sha256=hivfSpOaoSwCAymgU0rTgvSk9LSPAn1cPLQQ9fLmFX0,151
|
|
46
|
-
datacosmos/stac/storage/storage_base.py,sha256=
|
|
46
|
+
datacosmos/stac/storage/storage_base.py,sha256=NpCKAA3qEI212WUNZ2-eG9XfSJKDMiJcu4pEmr10-JI,2843
|
|
47
47
|
datacosmos/stac/storage/storage_client.py,sha256=4boqQ3zVMrk9X2IXus-Cs429juLe0cUQ0XEzg_y3yOA,1205
|
|
48
|
-
datacosmos/stac/storage/uploader.py,sha256=
|
|
48
|
+
datacosmos/stac/storage/uploader.py,sha256=DawtNn4-uEtpUYPZS1fKv77wu-zNne9ltlGZiArzBFI,5919
|
|
49
49
|
datacosmos/stac/storage/dataclasses/__init__.py,sha256=IjcyA8Vod-z1_Gi1FMZhK58Owman0foL25Hs0YtkYYs,43
|
|
50
50
|
datacosmos/stac/storage/dataclasses/upload_path.py,sha256=gbpV67FECFNyXn-yGUSuLvGGWHtibbZq7Qu9yGod3C0,1398
|
|
51
51
|
datacosmos/utils/__init__.py,sha256=XQbAnoqJrPpnSpEzAbjh84yqYWw8cBM8mNp8ynTG-54,50
|
|
@@ -55,8 +55,8 @@ datacosmos/utils/http_response/check_api_response.py,sha256=l_yQiiekNcNbhFec_5Ue
|
|
|
55
55
|
datacosmos/utils/http_response/models/__init__.py,sha256=Wj8YT6dqw7rAz_rctllxo5Or_vv8DwopvQvBzwCTvpw,45
|
|
56
56
|
datacosmos/utils/http_response/models/datacosmos_error.py,sha256=Uqi2uM98nJPeCbM7zngV6vHSk97jEAb_nkdDEeUjiQM,740
|
|
57
57
|
datacosmos/utils/http_response/models/datacosmos_response.py,sha256=oV4n-sue7K1wwiIQeHpxdNU8vxeqF3okVPE2rydw5W0,336
|
|
58
|
-
datacosmos-0.0.
|
|
59
|
-
datacosmos-0.0.
|
|
60
|
-
datacosmos-0.0.
|
|
61
|
-
datacosmos-0.0.
|
|
62
|
-
datacosmos-0.0.
|
|
58
|
+
datacosmos-0.0.20.dist-info/licenses/LICENSE.md,sha256=vpbRI-UUbZVQfr3VG_CXt9HpRnL1b5kt8uTVbirxeyI,1486
|
|
59
|
+
datacosmos-0.0.20.dist-info/METADATA,sha256=WfpmuajQ7GSenjnr8X04rNXDg6U6NH3pmbFMtJYqFV4,1000
|
|
60
|
+
datacosmos-0.0.20.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
61
|
+
datacosmos-0.0.20.dist-info/top_level.txt,sha256=ueobs5CNeyDbPMgXPcVV0d0yNdm8CvGtDT3CaksRVtA,11
|
|
62
|
+
datacosmos-0.0.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|