hafnia 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hafnia/dataset/dataset_helpers.py +59 -1
- hafnia/dataset/dataset_names.py +1 -108
- hafnia/dataset/dataset_recipe/dataset_recipe.py +48 -4
- hafnia/dataset/format_conversions/torchvision_datasets.py +2 -2
- hafnia/dataset/hafnia_dataset.py +163 -69
- hafnia/dataset/hafnia_dataset_types.py +142 -18
- hafnia/dataset/operations/dataset_s3_storage.py +7 -2
- hafnia/dataset/operations/table_transformations.py +0 -18
- hafnia/experiment/command_builder.py +686 -0
- hafnia/platform/datasets.py +32 -132
- hafnia/platform/download.py +1 -1
- hafnia/platform/s5cmd_utils.py +122 -3
- {hafnia-0.5.0.dist-info → hafnia-0.5.2.dist-info}/METADATA +3 -2
- {hafnia-0.5.0.dist-info → hafnia-0.5.2.dist-info}/RECORD +19 -18
- hafnia_cli/dataset_cmds.py +19 -13
- hafnia_cli/runc_cmds.py +7 -2
- {hafnia-0.5.0.dist-info → hafnia-0.5.2.dist-info}/WHEEL +0 -0
- {hafnia-0.5.0.dist-info → hafnia-0.5.2.dist-info}/entry_points.txt +0 -0
- {hafnia-0.5.0.dist-info → hafnia-0.5.2.dist-info}/licenses/LICENSE +0 -0
hafnia/platform/datasets.py
CHANGED
|
@@ -1,23 +1,13 @@
|
|
|
1
|
-
import collections
|
|
2
|
-
import shutil
|
|
3
|
-
from pathlib import Path
|
|
4
1
|
from typing import Any, Dict, List, Optional
|
|
5
2
|
|
|
6
3
|
import rich
|
|
7
|
-
from packaging.version import Version
|
|
8
4
|
from rich import print as rprint
|
|
9
5
|
|
|
10
6
|
from hafnia import http, utils
|
|
11
|
-
from hafnia.dataset.dataset_names import DATASET_FILENAMES_REQUIRED, ResourceCredentials
|
|
12
|
-
from hafnia.dataset.dataset_recipe.dataset_recipe import (
|
|
13
|
-
DatasetRecipe,
|
|
14
|
-
get_dataset_path_from_recipe,
|
|
15
|
-
)
|
|
16
|
-
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
17
7
|
from hafnia.http import fetch, post
|
|
18
8
|
from hafnia.log import user_logger
|
|
19
|
-
from hafnia.platform import s5cmd_utils
|
|
20
9
|
from hafnia.platform.download import get_resource_credentials
|
|
10
|
+
from hafnia.platform.s5cmd_utils import ResourceCredentials
|
|
21
11
|
from hafnia.utils import timed
|
|
22
12
|
from hafnia_cli.config import Config
|
|
23
13
|
|
|
@@ -57,7 +47,6 @@ def get_or_create_dataset(dataset_name: str = "", cfg: Optional[Config] = None)
|
|
|
57
47
|
"""Create a new dataset on the Hafnia platform."""
|
|
58
48
|
cfg = cfg or Config()
|
|
59
49
|
dataset = get_dataset_by_name(dataset_name, cfg)
|
|
60
|
-
|
|
61
50
|
if dataset is not None:
|
|
62
51
|
user_logger.info(f"Dataset '{dataset_name}' already exists on the Hafnia platform.")
|
|
63
52
|
return dataset
|
|
@@ -130,6 +119,31 @@ def get_upload_credentials_by_id(dataset_id: str, cfg: Optional[Config] = None)
|
|
|
130
119
|
return ResourceCredentials.fix_naming(credentials_response)
|
|
131
120
|
|
|
132
121
|
|
|
122
|
+
@timed("Get read access credentials by ID")
|
|
123
|
+
def get_read_credentials_by_id(dataset_id: str, cfg: Optional[Config] = None) -> Optional[ResourceCredentials]:
|
|
124
|
+
"""Get dataset read access credentials by ID from the Hafnia platform."""
|
|
125
|
+
cfg = cfg or Config()
|
|
126
|
+
endpoint_dataset = cfg.get_platform_endpoint("datasets")
|
|
127
|
+
if utils.is_hafnia_cloud_job():
|
|
128
|
+
credentials_endpoint_suffix = "temporary-credentials-hidden" # Access to hidden datasets
|
|
129
|
+
else:
|
|
130
|
+
credentials_endpoint_suffix = "temporary-credentials" # Access to sample dataset
|
|
131
|
+
access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/{credentials_endpoint_suffix}"
|
|
132
|
+
resource_credentials = get_resource_credentials(access_dataset_endpoint, cfg.api_key)
|
|
133
|
+
return resource_credentials
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@timed("Get read access credentials by name")
|
|
137
|
+
def get_read_credentials_by_name(dataset_name: str, cfg: Optional[Config] = None) -> Optional[ResourceCredentials]:
|
|
138
|
+
"""Get dataset read access credentials by name from the Hafnia platform."""
|
|
139
|
+
cfg = cfg or Config()
|
|
140
|
+
dataset_response = get_dataset_by_name(dataset_name=dataset_name, cfg=cfg)
|
|
141
|
+
if dataset_response is None:
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
return get_read_credentials_by_id(dataset_response["id"], cfg=cfg)
|
|
145
|
+
|
|
146
|
+
|
|
133
147
|
@timed("Delete dataset by id")
|
|
134
148
|
def delete_dataset_by_id(dataset_id: str, cfg: Optional[Config] = None) -> Dict:
|
|
135
149
|
cfg = cfg or Config()
|
|
@@ -152,10 +166,14 @@ def delete_dataset_by_name(dataset_name: str, cfg: Optional[Config] = None) -> D
|
|
|
152
166
|
return response
|
|
153
167
|
|
|
154
168
|
|
|
155
|
-
def delete_dataset_completely_by_name(
|
|
169
|
+
def delete_dataset_completely_by_name(
|
|
170
|
+
dataset_name: str,
|
|
171
|
+
interactive: bool = True,
|
|
172
|
+
cfg: Optional[Config] = None,
|
|
173
|
+
) -> None:
|
|
156
174
|
from hafnia.dataset.operations.dataset_s3_storage import delete_hafnia_dataset_files_on_platform
|
|
157
175
|
|
|
158
|
-
cfg = Config()
|
|
176
|
+
cfg = cfg or Config()
|
|
159
177
|
|
|
160
178
|
is_deleted = delete_hafnia_dataset_files_on_platform(
|
|
161
179
|
dataset_name=dataset_name,
|
|
@@ -180,79 +198,6 @@ def upload_dataset_details(cfg: Config, data: dict, dataset_name: str) -> dict:
|
|
|
180
198
|
return response # type: ignore[return-value]
|
|
181
199
|
|
|
182
200
|
|
|
183
|
-
def download_or_get_dataset_path(
|
|
184
|
-
dataset_name: str,
|
|
185
|
-
cfg: Optional[Config] = None,
|
|
186
|
-
path_datasets_folder: Optional[str] = None,
|
|
187
|
-
force_redownload: bool = False,
|
|
188
|
-
download_files: bool = True,
|
|
189
|
-
) -> Path:
|
|
190
|
-
"""Download or get the path of the dataset."""
|
|
191
|
-
recipe_explicit = DatasetRecipe.from_implicit_form(dataset_name)
|
|
192
|
-
path_dataset = get_dataset_path_from_recipe(recipe_explicit, path_datasets=path_datasets_folder)
|
|
193
|
-
|
|
194
|
-
is_dataset_valid = HafniaDataset.check_dataset_path(path_dataset, raise_error=False)
|
|
195
|
-
if is_dataset_valid and not force_redownload:
|
|
196
|
-
user_logger.info("Dataset found locally. Set 'force=True' or add `--force` flag with cli to re-download")
|
|
197
|
-
return path_dataset
|
|
198
|
-
|
|
199
|
-
cfg = cfg or Config()
|
|
200
|
-
api_key = cfg.api_key
|
|
201
|
-
|
|
202
|
-
shutil.rmtree(path_dataset, ignore_errors=True)
|
|
203
|
-
|
|
204
|
-
endpoint_dataset = cfg.get_platform_endpoint("datasets")
|
|
205
|
-
dataset_res = get_dataset_by_name(dataset_name, cfg) # Check if dataset exists
|
|
206
|
-
if dataset_res is None:
|
|
207
|
-
raise ValueError(f"Dataset '{dataset_name}' not found on the Hafnia platform.")
|
|
208
|
-
|
|
209
|
-
dataset_id = dataset_res.get("id") # type: ignore[union-attr]
|
|
210
|
-
|
|
211
|
-
if utils.is_hafnia_cloud_job():
|
|
212
|
-
credentials_endpoint_suffix = "temporary-credentials-hidden" # Access to hidden datasets
|
|
213
|
-
else:
|
|
214
|
-
credentials_endpoint_suffix = "temporary-credentials" # Access to sample dataset
|
|
215
|
-
access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/{credentials_endpoint_suffix}"
|
|
216
|
-
|
|
217
|
-
download_dataset_from_access_endpoint(
|
|
218
|
-
endpoint=access_dataset_endpoint,
|
|
219
|
-
api_key=api_key,
|
|
220
|
-
path_dataset=path_dataset,
|
|
221
|
-
download_files=download_files,
|
|
222
|
-
)
|
|
223
|
-
return path_dataset
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
def download_dataset_from_access_endpoint(
|
|
227
|
-
endpoint: str,
|
|
228
|
-
api_key: str,
|
|
229
|
-
path_dataset: Path,
|
|
230
|
-
version: Optional[str] = None,
|
|
231
|
-
download_files: bool = True,
|
|
232
|
-
) -> None:
|
|
233
|
-
try:
|
|
234
|
-
resource_credentials = get_resource_credentials(endpoint, api_key)
|
|
235
|
-
download_annotation_dataset_from_version(
|
|
236
|
-
version=version,
|
|
237
|
-
credentials=resource_credentials,
|
|
238
|
-
path_dataset=path_dataset,
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
except ValueError as e:
|
|
242
|
-
user_logger.error(f"Failed to download annotations: {e}")
|
|
243
|
-
return
|
|
244
|
-
|
|
245
|
-
if not download_files:
|
|
246
|
-
return
|
|
247
|
-
dataset = HafniaDataset.from_path(path_dataset, check_for_images=False)
|
|
248
|
-
try:
|
|
249
|
-
dataset = dataset.download_files_aws(path_dataset, aws_credentials=resource_credentials, force_redownload=True)
|
|
250
|
-
except ValueError as e:
|
|
251
|
-
user_logger.error(f"Failed to download images: {e}")
|
|
252
|
-
return
|
|
253
|
-
dataset.write_annotations(path_folder=path_dataset) # Overwrite annotations as files have been re-downloaded
|
|
254
|
-
|
|
255
|
-
|
|
256
201
|
TABLE_FIELDS = {
|
|
257
202
|
"ID": "id",
|
|
258
203
|
"Hidden\nSamples": "hidden.samples",
|
|
@@ -287,48 +232,3 @@ def extend_dataset_details(datasets: List[Dict[str, Any]]) -> List[Dict[str, Any
|
|
|
287
232
|
dataset[f"{variant_type}.samples"] = variant["number_of_data_items"]
|
|
288
233
|
dataset[f"{variant_type}.size"] = utils.size_human_readable(variant["size_bytes"])
|
|
289
234
|
return datasets
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
def download_annotation_dataset_from_version(
|
|
293
|
-
version: Optional[str],
|
|
294
|
-
credentials: ResourceCredentials,
|
|
295
|
-
path_dataset: Path,
|
|
296
|
-
) -> list[str]:
|
|
297
|
-
path_dataset.mkdir(parents=True, exist_ok=True)
|
|
298
|
-
|
|
299
|
-
envs = credentials.aws_credentials()
|
|
300
|
-
bucket_prefix_sample_versions = f"{credentials.s3_uri()}/versions"
|
|
301
|
-
all_s3_annotation_files = s5cmd_utils.list_bucket(bucket_prefix=bucket_prefix_sample_versions, append_envs=envs)
|
|
302
|
-
s3_files = _annotation_files_from_version(version=version, all_annotation_files=all_s3_annotation_files)
|
|
303
|
-
|
|
304
|
-
local_paths = [(path_dataset / filename.split("/")[-1]).as_posix() for filename in s3_files]
|
|
305
|
-
s5cmd_utils.fast_copy_files(
|
|
306
|
-
src_paths=s3_files,
|
|
307
|
-
dst_paths=local_paths,
|
|
308
|
-
append_envs=envs,
|
|
309
|
-
description="Downloading annotation files",
|
|
310
|
-
)
|
|
311
|
-
return local_paths
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
def _annotation_files_from_version(version: Optional[str], all_annotation_files: list[str]) -> list[str]:
|
|
315
|
-
version_files = collections.defaultdict(list)
|
|
316
|
-
for metadata_file in all_annotation_files:
|
|
317
|
-
version_str, filename = metadata_file.split("/")[-2:]
|
|
318
|
-
if filename not in DATASET_FILENAMES_REQUIRED:
|
|
319
|
-
continue
|
|
320
|
-
version_files[version_str].append(metadata_file)
|
|
321
|
-
available_versions = {v for v, files in version_files.items() if len(files) == len(DATASET_FILENAMES_REQUIRED)}
|
|
322
|
-
|
|
323
|
-
if len(available_versions) == 0:
|
|
324
|
-
raise ValueError("No versions were found in the dataset.")
|
|
325
|
-
|
|
326
|
-
if version is None:
|
|
327
|
-
latest_version = max(Version(ver) for ver in available_versions)
|
|
328
|
-
version = str(latest_version)
|
|
329
|
-
user_logger.info(f"No version selected. Using latest version: {version}")
|
|
330
|
-
|
|
331
|
-
if version not in available_versions:
|
|
332
|
-
raise ValueError(f"Selected version '{version}' not found in available versions: {available_versions}")
|
|
333
|
-
|
|
334
|
-
return version_files[version]
|
hafnia/platform/download.py
CHANGED
|
@@ -5,9 +5,9 @@ import boto3
|
|
|
5
5
|
from botocore.exceptions import ClientError
|
|
6
6
|
from rich.progress import Progress
|
|
7
7
|
|
|
8
|
-
from hafnia.dataset.dataset_names import ResourceCredentials
|
|
9
8
|
from hafnia.http import fetch
|
|
10
9
|
from hafnia.log import sys_logger, user_logger
|
|
10
|
+
from hafnia.platform.s5cmd_utils import ResourceCredentials
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def get_resource_credentials(endpoint: str, api_key: str) -> ResourceCredentials:
|
hafnia/platform/s5cmd_utils.py
CHANGED
|
@@ -7,6 +7,10 @@ import uuid
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Dict, List, Optional
|
|
9
9
|
|
|
10
|
+
import boto3
|
|
11
|
+
from botocore.exceptions import UnauthorizedSSOTokenError
|
|
12
|
+
from pydantic import BaseModel, field_validator
|
|
13
|
+
|
|
10
14
|
from hafnia.log import sys_logger, user_logger
|
|
11
15
|
from hafnia.utils import progress_bar
|
|
12
16
|
|
|
@@ -26,7 +30,11 @@ def find_s5cmd() -> Optional[str]:
|
|
|
26
30
|
if result:
|
|
27
31
|
return result
|
|
28
32
|
python_dir = Path(sys.executable).parent
|
|
29
|
-
locations = (
|
|
33
|
+
locations = (
|
|
34
|
+
python_dir / "Scripts" / "s5cmd.exe",
|
|
35
|
+
python_dir / "bin" / "s5cmd",
|
|
36
|
+
python_dir / "s5cmd",
|
|
37
|
+
)
|
|
30
38
|
for loc in locations:
|
|
31
39
|
if loc.exists():
|
|
32
40
|
return str(loc)
|
|
@@ -104,12 +112,17 @@ def delete_bucket_content(
|
|
|
104
112
|
returns = execute_command(["rm", f"{bucket_prefix}/*"], append_envs=append_envs)
|
|
105
113
|
|
|
106
114
|
if returns.returncode != 0:
|
|
107
|
-
|
|
108
|
-
|
|
115
|
+
bucket_content_is_already_deleted = "no object found" in returns.stderr.strip()
|
|
116
|
+
bucket_is_already_deleted = "NoSuchBucket" in returns.stderr.strip()
|
|
117
|
+
if bucket_content_is_already_deleted:
|
|
109
118
|
user_logger.info(f"No action was taken. S3 bucket '{bucket_prefix}' is already empty.")
|
|
119
|
+
elif bucket_is_already_deleted:
|
|
120
|
+
user_logger.info(f"No action was taken. S3 bucket '{bucket_prefix}' does not exist.")
|
|
121
|
+
return
|
|
110
122
|
else:
|
|
111
123
|
user_logger.error("Error during s5cmd rm command:")
|
|
112
124
|
user_logger.error(returns.stdout)
|
|
125
|
+
user_logger.error(returns.stderr)
|
|
113
126
|
raise RuntimeError(f"Failed to delete all files in S3 bucket '{bucket_prefix}'.")
|
|
114
127
|
|
|
115
128
|
if remove_bucket:
|
|
@@ -118,6 +131,7 @@ def delete_bucket_content(
|
|
|
118
131
|
if returns.returncode != 0:
|
|
119
132
|
user_logger.error("Error during s5cmd rb command:")
|
|
120
133
|
user_logger.error(returns.stdout)
|
|
134
|
+
user_logger.error(returns.stderr)
|
|
121
135
|
raise RuntimeError(f"Failed to delete S3 bucket '{bucket_prefix}'.")
|
|
122
136
|
user_logger.info(f"S3 bucket '{bucket_prefix}' has been deleted.")
|
|
123
137
|
|
|
@@ -145,3 +159,108 @@ def fast_copy_files(
|
|
|
145
159
|
cmds = [f"cp {src} {dst}" for src, dst in zip(src_paths, dst_paths)]
|
|
146
160
|
lines = execute_commands(cmds, append_envs=append_envs, description=description)
|
|
147
161
|
return lines
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
ARN_PREFIX = "arn:aws:s3:::"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class AwsCredentials(BaseModel):
|
|
168
|
+
access_key: str
|
|
169
|
+
secret_key: str
|
|
170
|
+
session_token: str
|
|
171
|
+
region: Optional[str]
|
|
172
|
+
|
|
173
|
+
def aws_credentials(self) -> Dict[str, str]:
|
|
174
|
+
"""
|
|
175
|
+
Returns the AWS credentials as a dictionary.
|
|
176
|
+
"""
|
|
177
|
+
environment_vars = {
|
|
178
|
+
"AWS_ACCESS_KEY_ID": self.access_key,
|
|
179
|
+
"AWS_SECRET_ACCESS_KEY": self.secret_key,
|
|
180
|
+
"AWS_SESSION_TOKEN": self.session_token,
|
|
181
|
+
}
|
|
182
|
+
if self.region:
|
|
183
|
+
environment_vars["AWS_REGION"] = self.region
|
|
184
|
+
|
|
185
|
+
return environment_vars
|
|
186
|
+
|
|
187
|
+
@staticmethod
|
|
188
|
+
def from_session(session: boto3.Session) -> "AwsCredentials":
|
|
189
|
+
"""
|
|
190
|
+
Creates AwsCredentials from a Boto3 session.
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
frozen_credentials = session.get_credentials().get_frozen_credentials()
|
|
194
|
+
except UnauthorizedSSOTokenError as e:
|
|
195
|
+
raise RuntimeError(
|
|
196
|
+
f"Failed to get AWS credentials from the session for profile '{session.profile_name}'.\n"
|
|
197
|
+
f"Ensure the profile exists in your AWS config in '~/.aws/config' and that you are logged in via AWS SSO.\n"
|
|
198
|
+
f"\tUse 'aws sso login --profile {session.profile_name}' to log in."
|
|
199
|
+
) from e
|
|
200
|
+
return AwsCredentials(
|
|
201
|
+
access_key=frozen_credentials.access_key,
|
|
202
|
+
secret_key=frozen_credentials.secret_key,
|
|
203
|
+
session_token=frozen_credentials.token,
|
|
204
|
+
region=session.region_name,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def to_resource_credentials(self, bucket_name: str) -> "ResourceCredentials":
|
|
208
|
+
"""
|
|
209
|
+
Converts AwsCredentials to ResourceCredentials by adding the S3 ARN.
|
|
210
|
+
"""
|
|
211
|
+
payload = self.model_dump()
|
|
212
|
+
payload["s3_arn"] = f"{ARN_PREFIX}{bucket_name}"
|
|
213
|
+
return ResourceCredentials(**payload)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class ResourceCredentials(AwsCredentials):
|
|
217
|
+
s3_arn: str
|
|
218
|
+
|
|
219
|
+
@staticmethod
|
|
220
|
+
def fix_naming(payload: Dict[str, str]) -> "ResourceCredentials":
|
|
221
|
+
"""
|
|
222
|
+
The endpoint returns a payload with a key called 's3_path', but it
|
|
223
|
+
is actually an ARN path (starts with arn:aws:s3::). This method renames it to 's3_arn' for consistency.
|
|
224
|
+
"""
|
|
225
|
+
if "s3_path" in payload and payload["s3_path"].startswith(ARN_PREFIX):
|
|
226
|
+
payload["s3_arn"] = payload.pop("s3_path")
|
|
227
|
+
|
|
228
|
+
if "region" not in payload:
|
|
229
|
+
payload["region"] = "eu-west-1"
|
|
230
|
+
return ResourceCredentials(**payload)
|
|
231
|
+
|
|
232
|
+
@field_validator("s3_arn")
|
|
233
|
+
@classmethod
|
|
234
|
+
def validate_s3_arn(cls, value: str) -> str:
|
|
235
|
+
"""Validate s3_arn to ensure it starts with 'arn:aws:s3:::'"""
|
|
236
|
+
if not value.startswith("arn:aws:s3:::"):
|
|
237
|
+
raise ValueError(f"Invalid S3 ARN: {value}. It should start with 'arn:aws:s3:::'")
|
|
238
|
+
return value
|
|
239
|
+
|
|
240
|
+
def s3_path(self) -> str:
|
|
241
|
+
"""
|
|
242
|
+
Extracts the S3 path from the ARN.
|
|
243
|
+
Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket/my-prefix
|
|
244
|
+
"""
|
|
245
|
+
return self.s3_arn[len(ARN_PREFIX) :]
|
|
246
|
+
|
|
247
|
+
def s3_uri(self) -> str:
|
|
248
|
+
"""
|
|
249
|
+
Converts the S3 ARN to a URI format.
|
|
250
|
+
Example: arn:aws:s3:::my-bucket/my-prefix -> s3://my-bucket/my-prefix
|
|
251
|
+
"""
|
|
252
|
+
return f"s3://{self.s3_path()}"
|
|
253
|
+
|
|
254
|
+
def bucket_name(self) -> str:
|
|
255
|
+
"""
|
|
256
|
+
Extracts the bucket name from the S3 ARN.
|
|
257
|
+
Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket
|
|
258
|
+
"""
|
|
259
|
+
return self.s3_path().split("/")[0]
|
|
260
|
+
|
|
261
|
+
def object_key(self) -> str:
|
|
262
|
+
"""
|
|
263
|
+
Extracts the object key from the S3 ARN.
|
|
264
|
+
Example: arn:aws:s3:::my-bucket/my-prefix -> my-prefix
|
|
265
|
+
"""
|
|
266
|
+
return "/".join(self.s3_path().split("/")[1:])
|
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hafnia
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: Python SDK for communication with Hafnia platform.
|
|
5
5
|
Author-email: Milestone Systems <hafniaplatform@milestone.dk>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Requires-Python: >=3.10
|
|
8
8
|
Requires-Dist: boto3>=1.35.91
|
|
9
9
|
Requires-Dist: click>=8.1.8
|
|
10
|
+
Requires-Dist: docstring-parser>=0.17.0
|
|
10
11
|
Requires-Dist: emoji>=2.14.1
|
|
11
12
|
Requires-Dist: flatten-dict>=0.4.2
|
|
12
13
|
Requires-Dist: keyring>=25.6.0
|
|
13
|
-
Requires-Dist: mcp>=1.
|
|
14
|
+
Requires-Dist: mcp>=1.23.0
|
|
14
15
|
Requires-Dist: mlflow>=3.4.0
|
|
15
16
|
Requires-Dist: more-itertools>=10.7.0
|
|
16
17
|
Requires-Dist: opencv-python-headless>=4.11.0.86
|
|
@@ -6,23 +6,23 @@ hafnia/utils.py,sha256=l_awkrb3OttxqSMkPiYcpuP3c_kejkSmiqndSahc1s0,8703
|
|
|
6
6
|
hafnia/data/__init__.py,sha256=o9QjiGbEcNa6r-qDmwwmxPXf-1UitNl5-WxFNcujqsg,111
|
|
7
7
|
hafnia/data/factory.py,sha256=kHkvOtBUbwaShZBGf1kZzocDJBn_1dHHLrQxnUpJmfY,778
|
|
8
8
|
hafnia/dataset/dataset_details_uploader.py,sha256=H_zz67bBwbgo4StUwBNmH89WlqydIc-tEQbrRnZDwgg,24161
|
|
9
|
-
hafnia/dataset/dataset_helpers.py,sha256=
|
|
10
|
-
hafnia/dataset/dataset_names.py,sha256=
|
|
11
|
-
hafnia/dataset/hafnia_dataset.py,sha256=
|
|
12
|
-
hafnia/dataset/hafnia_dataset_types.py,sha256=
|
|
9
|
+
hafnia/dataset/dataset_helpers.py,sha256=N8W_ioDlxP2VvNJXzqXLDbcEqgPKz0WyPNOBakHoBUc,6443
|
|
10
|
+
hafnia/dataset/dataset_names.py,sha256=42_UKrDwcKEW48oTbtBaeyi5qVFVaMAj8vRvDv-mcEI,3616
|
|
11
|
+
hafnia/dataset/hafnia_dataset.py,sha256=OOenIMPm8K23AgxHvmc_y05KCzxIwaZa-gv3uNC50NU,38519
|
|
12
|
+
hafnia/dataset/hafnia_dataset_types.py,sha256=eCLawdjIFoul67AAtQ4xaKjbVSNAFA-mvbJYofiu2Sg,26848
|
|
13
13
|
hafnia/dataset/license_types.py,sha256=b1Jt5e8N89sujIs4T9y39sJEkzpAwCoLDTHDTpkiEOI,2166
|
|
14
|
-
hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=
|
|
14
|
+
hafnia/dataset/dataset_recipe/dataset_recipe.py,sha256=Ln49jcpOQ4qzumv-SkWSBCqNgSP1dGQloKSLs7psP90,20991
|
|
15
15
|
hafnia/dataset/dataset_recipe/recipe_transforms.py,sha256=j3Oiytt3LI2rCaJid7Y44oT9MXvlZVqvZanngMebIWg,3088
|
|
16
16
|
hafnia/dataset/dataset_recipe/recipe_types.py,sha256=AcrG6gpRt3Igl-CCJ60uyh-WkfI1NCnQ55M8yClSI9Q,5328
|
|
17
17
|
hafnia/dataset/format_conversions/format_coco.py,sha256=7GjeF016ZBaKxu-VYiqXxuPw8HuuODV1cxc2TbDDZBw,19628
|
|
18
18
|
hafnia/dataset/format_conversions/format_helpers.py,sha256=-lNgn_mrpVM_Xwb3jHH0BlBEPCMKjLqTeYf4PbssbuQ,1144
|
|
19
19
|
hafnia/dataset/format_conversions/format_image_classification_folder.py,sha256=ALVsQfSvBfAkpmUDijznqqd2JBh0Qtnvzq7igxMjMb8,7170
|
|
20
20
|
hafnia/dataset/format_conversions/format_yolo.py,sha256=zvCHo2L_0mPJScMbDtwvZUts9UX2ERKhhYbY31Q6tQA,9912
|
|
21
|
-
hafnia/dataset/format_conversions/torchvision_datasets.py,sha256=
|
|
22
|
-
hafnia/dataset/operations/dataset_s3_storage.py,sha256=
|
|
21
|
+
hafnia/dataset/format_conversions/torchvision_datasets.py,sha256=sC8DgAt10PEaCHFk_Lm-dIzr_0EF-2g24kG9EINYk7c,12096
|
|
22
|
+
hafnia/dataset/operations/dataset_s3_storage.py,sha256=xPC77Og47xTpI0JBFAR1pgb5u7l18byAA6p7IlpnpGE,8971
|
|
23
23
|
hafnia/dataset/operations/dataset_stats.py,sha256=Ltf-V4_o_IB4UXw9WG9bsVoqeX90yGsjivK0CDggriw,11930
|
|
24
24
|
hafnia/dataset/operations/dataset_transformations.py,sha256=qUNno0rAT1A452uzlR-k1WbatyY9VuMp1QJjkMg9GzE,19495
|
|
25
|
-
hafnia/dataset/operations/table_transformations.py,sha256=
|
|
25
|
+
hafnia/dataset/operations/table_transformations.py,sha256=mdjUE1lSQ7QyONjQapSHDg1MkYuKaflcoVUq1Y6Lkqc,13606
|
|
26
26
|
hafnia/dataset/primitives/__init__.py,sha256=xFLJ3R7gpbuQnNJuFhuu836L3nicwoaY5aHkqk7Bbr8,927
|
|
27
27
|
hafnia/dataset/primitives/bbox.py,sha256=QJJBebltOd9J3idisp3QdX0gCgz6P5xlIlGbth19fG0,6669
|
|
28
28
|
hafnia/dataset/primitives/bitmask.py,sha256=Q7RiNYvMDlcFPkXAWXDJkCIERjnUTCrHu6VeEPX1jEA,7212
|
|
@@ -33,14 +33,15 @@ hafnia/dataset/primitives/primitive.py,sha256=Wvby0sCGgYj8ec39PLcHsmip5VKL96ZSCz
|
|
|
33
33
|
hafnia/dataset/primitives/segmentation.py,sha256=hnXIUklkuDMxYkUaff1bgRTcXI_2b32RIbobxR3ejzk,2017
|
|
34
34
|
hafnia/dataset/primitives/utils.py,sha256=3gT1as-xXEj8CamoIuBb9gQwUN9Ae9qnqtqF_uEe0zo,1993
|
|
35
35
|
hafnia/experiment/__init__.py,sha256=OEFE6HqhO5zcTCLZcPcPVjIg7wMFFnvZ1uOtAVhRz7M,85
|
|
36
|
+
hafnia/experiment/command_builder.py,sha256=9_Of8WsJyyaCSK4C0vJMLVaUU6qEhnVNaIUAEHc80rI,27580
|
|
36
37
|
hafnia/experiment/hafnia_logger.py,sha256=BHIOLAds_3JxT0cev_ikUH0XQVIxBJTkcBSx2Q_SIk0,10894
|
|
37
38
|
hafnia/platform/__init__.py,sha256=L_Q7CNpsJ0HMNPy_rLlLK5RhmuCU7IF4BchxKv6amYc,782
|
|
38
39
|
hafnia/platform/builder.py,sha256=kUEuj5-qtL1uk5v2tUvOCREn5yV-G4Fr6F31haIAb5E,5808
|
|
39
40
|
hafnia/platform/dataset_recipe.py,sha256=ybfSSHVPG0eFUbzg_1McezPSOtMoDZEg7l6rFYndtb4,3857
|
|
40
|
-
hafnia/platform/datasets.py,sha256=
|
|
41
|
-
hafnia/platform/download.py,sha256=
|
|
41
|
+
hafnia/platform/datasets.py,sha256=nXHg3I14p3tJeDX2woPH9NMiOxn_54zlIOPJXvXFI_w,9448
|
|
42
|
+
hafnia/platform/download.py,sha256=e73Pm0afwRPTHxBvRy0gUZSFfDuePHPnfasyhaZ-KGQ,5019
|
|
42
43
|
hafnia/platform/experiment.py,sha256=SrEH0nuwwBXf1Iu4diB1BEPqL-TxW3aQkZWBbM1-tY0,1846
|
|
43
|
-
hafnia/platform/s5cmd_utils.py,sha256=
|
|
44
|
+
hafnia/platform/s5cmd_utils.py,sha256=hHsGPJ1S9_hFIVfCO-efvTF4qbLYreK1nl3VC5caU1w,9491
|
|
44
45
|
hafnia/platform/trainer_package.py,sha256=w6JC7o-279ujcwtNTbUaQ9AnPcYRPPbD8EACa6XyUHA,2206
|
|
45
46
|
hafnia/visualizations/colors.py,sha256=003eAJVnBal4abaYIIpsrT7erIOIjTUHHYVJ1Tj1CDc,5226
|
|
46
47
|
hafnia/visualizations/image_visualizations.py,sha256=rB7c-KK-qq0BsSdkaFxCAHOOCTXTUQx0VMEhib7ig0k,7509
|
|
@@ -48,15 +49,15 @@ hafnia_cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
48
49
|
hafnia_cli/__main__.py,sha256=CqD_7RfbfwB6MED3WZ8WxclrFxWcRMtZ5A1Snnst3GM,1761
|
|
49
50
|
hafnia_cli/config.py,sha256=X0dJBYH-7mNAFkHgeZvDbawlQqoyCwoY4C-QhlyYCA0,7695
|
|
50
51
|
hafnia_cli/consts.py,sha256=uCpYX44NCu_Zvte0QwChunxOo-qqhcaJRSYDAIsoJ8A,972
|
|
51
|
-
hafnia_cli/dataset_cmds.py,sha256=
|
|
52
|
+
hafnia_cli/dataset_cmds.py,sha256=JfSj7Cei1T2oYUXP1bpz63uQopgL3R_dMMYnPGGcuU8,2072
|
|
52
53
|
hafnia_cli/dataset_recipe_cmds.py,sha256=OYSmpKL0Wxo1ZSxIGfH6w7pEWoI7CjUTmfIELJSZjGQ,2894
|
|
53
54
|
hafnia_cli/experiment_cmds.py,sha256=_KxsMhbjlkIno1PIMXJ0Omw_PSJi8qi9hmtCUqwcj1M,7970
|
|
54
55
|
hafnia_cli/keychain.py,sha256=bNyjjULVQu7kV338wUC65UvbCwmSGOmEjKWPLIQjT0k,2555
|
|
55
56
|
hafnia_cli/profile_cmds.py,sha256=yTyOsPsUssLCzFIxURkxbKrFEhYIVDlUC0G2s5Uks-U,3476
|
|
56
|
-
hafnia_cli/runc_cmds.py,sha256=
|
|
57
|
+
hafnia_cli/runc_cmds.py,sha256=7P5TjF6KA9K4OKPG1qC_0gteXfLJbXlA858WWrosoGQ,5098
|
|
57
58
|
hafnia_cli/trainer_package_cmds.py,sha256=hUBc6gCMV28fcAA0xQdXKL1z-a3aL9lMWcVqjvHO1Uo,2326
|
|
58
|
-
hafnia-0.5.
|
|
59
|
-
hafnia-0.5.
|
|
60
|
-
hafnia-0.5.
|
|
61
|
-
hafnia-0.5.
|
|
62
|
-
hafnia-0.5.
|
|
59
|
+
hafnia-0.5.2.dist-info/METADATA,sha256=U8Qy_hlNHR-hfgFqsvAkM8dfzS28v9nRI9ne5oXA87o,19312
|
|
60
|
+
hafnia-0.5.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
61
|
+
hafnia-0.5.2.dist-info/entry_points.txt,sha256=j2jsj1pqajLAiSOnF7sq66A3d1SVeHPKVTVyIFzipSA,52
|
|
62
|
+
hafnia-0.5.2.dist-info/licenses/LICENSE,sha256=wLZw1B7_mod_CO1H8LXqQgfqlWD6QceJR8--LJYRZGE,1078
|
|
63
|
+
hafnia-0.5.2.dist-info/RECORD,,
|
hafnia_cli/dataset_cmds.py
CHANGED
|
@@ -3,8 +3,8 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
import click
|
|
5
5
|
|
|
6
|
+
import hafnia.dataset.hafnia_dataset
|
|
6
7
|
from hafnia import utils
|
|
7
|
-
from hafnia_cli import consts
|
|
8
8
|
from hafnia_cli.config import Config
|
|
9
9
|
|
|
10
10
|
|
|
@@ -26,6 +26,13 @@ def cmd_list_datasets(cfg: Config) -> None:
|
|
|
26
26
|
|
|
27
27
|
@dataset.command("download")
|
|
28
28
|
@click.argument("dataset_name")
|
|
29
|
+
@click.option(
|
|
30
|
+
"--version",
|
|
31
|
+
"-v",
|
|
32
|
+
default="latest",
|
|
33
|
+
required=False,
|
|
34
|
+
help="Dataset version to download e.g. '0.0.1' or '1.0.1'. Defaults to the latest version.",
|
|
35
|
+
)
|
|
29
36
|
@click.option(
|
|
30
37
|
"--destination",
|
|
31
38
|
"-d",
|
|
@@ -35,20 +42,18 @@ def cmd_list_datasets(cfg: Config) -> None:
|
|
|
35
42
|
)
|
|
36
43
|
@click.option("--force", "-f", is_flag=True, default=False, help="Flag to enable force redownload")
|
|
37
44
|
@click.pass_obj
|
|
38
|
-
def cmd_dataset_download(
|
|
45
|
+
def cmd_dataset_download(
|
|
46
|
+
cfg: Config, dataset_name: str, version: Optional[str], destination: Optional[click.Path], force: bool
|
|
47
|
+
) -> Path:
|
|
39
48
|
"""Download dataset from Hafnia platform"""
|
|
40
49
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
force_redownload=force,
|
|
49
|
-
)
|
|
50
|
-
except Exception:
|
|
51
|
-
raise click.ClickException(consts.ERROR_GET_RESOURCE)
|
|
50
|
+
path_dataset = hafnia.dataset.hafnia_dataset.download_or_get_dataset_path(
|
|
51
|
+
dataset_name=dataset_name,
|
|
52
|
+
version=version,
|
|
53
|
+
cfg=cfg,
|
|
54
|
+
path_datasets_folder=destination,
|
|
55
|
+
force_redownload=force,
|
|
56
|
+
)
|
|
52
57
|
return path_dataset
|
|
53
58
|
|
|
54
59
|
|
|
@@ -67,4 +72,5 @@ def cmd_dataset_delete(cfg: Config, dataset_name: str, interactive: bool) -> Non
|
|
|
67
72
|
datasets.delete_dataset_completely_by_name(
|
|
68
73
|
dataset_name=dataset_name,
|
|
69
74
|
interactive=interactive,
|
|
75
|
+
cfg=cfg,
|
|
70
76
|
)
|
hafnia_cli/runc_cmds.py
CHANGED
|
@@ -38,7 +38,7 @@ def runc():
|
|
|
38
38
|
@click.pass_obj
|
|
39
39
|
def launch_local(cfg: Config, exec_cmd: str, dataset: str, image_name: str) -> None:
|
|
40
40
|
"""Launch a job within the image."""
|
|
41
|
-
from hafnia.
|
|
41
|
+
from hafnia.dataset.hafnia_dataset import download_or_get_dataset_path
|
|
42
42
|
|
|
43
43
|
is_local_dataset = "/" in dataset
|
|
44
44
|
if is_local_dataset:
|
|
@@ -48,7 +48,12 @@ def launch_local(cfg: Config, exec_cmd: str, dataset: str, image_name: str) -> N
|
|
|
48
48
|
raise click.ClickException(f"Dataset path does not exist: {path_dataset}")
|
|
49
49
|
else:
|
|
50
50
|
click.echo(f"Using Hafnia dataset: {dataset}")
|
|
51
|
-
path_dataset = download_or_get_dataset_path(
|
|
51
|
+
path_dataset = download_or_get_dataset_path(
|
|
52
|
+
dataset_name=dataset,
|
|
53
|
+
version="latest",
|
|
54
|
+
cfg=cfg,
|
|
55
|
+
force_redownload=False,
|
|
56
|
+
)
|
|
52
57
|
|
|
53
58
|
if image_name is None:
|
|
54
59
|
# Load image name from state.json
|
|
File without changes
|
|
File without changes
|
|
File without changes
|