PyPI - hafnia - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

hafnia 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

hafnia/dataset/dataset_helpers.py +59 -1
hafnia/dataset/dataset_names.py +1 -108
hafnia/dataset/dataset_recipe/dataset_recipe.py +48 -4
hafnia/dataset/format_conversions/torchvision_datasets.py +2 -2
hafnia/dataset/hafnia_dataset.py +163 -69
hafnia/dataset/hafnia_dataset_types.py +142 -18
hafnia/dataset/operations/dataset_s3_storage.py +7 -2
hafnia/dataset/operations/table_transformations.py +0 -18
hafnia/platform/datasets.py +32 -132
hafnia/platform/download.py +1 -1
hafnia/platform/s5cmd_utils.py +122 -3
{hafnia-0.5.0.dist-info → hafnia-0.5.1.dist-info}/METADATA +2 -2
{hafnia-0.5.0.dist-info → hafnia-0.5.1.dist-info}/RECORD +18 -18
hafnia_cli/dataset_cmds.py +19 -13
hafnia_cli/runc_cmds.py +7 -2
{hafnia-0.5.0.dist-info → hafnia-0.5.1.dist-info}/WHEEL +0 -0
{hafnia-0.5.0.dist-info → hafnia-0.5.1.dist-info}/entry_points.txt +0 -0
{hafnia-0.5.0.dist-info → hafnia-0.5.1.dist-info}/licenses/LICENSE +0 -0

hafnia/dataset/hafnia_dataset_types.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import collections
 import json
+from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Type, Union
@@ -7,12 +8,21 @@ from typing import Any, Dict, List, Optional, Type, Union
 import cv2
 import more_itertools
 import numpy as np
+import polars as pl
 from packaging.version import Version
 from PIL import Image
 from pydantic import BaseModel, Field, field_serializer, field_validator
 import hafnia
-from hafnia.dataset.dataset_names import SampleField, StorageFormat
+from hafnia.dataset import dataset_helpers
+from hafnia.dataset.dataset_helpers import version_from_string
+from hafnia.dataset.dataset_names import (
+    FILENAME_ANNOTATIONS_JSONL,
+    FILENAME_ANNOTATIONS_PARQUET,
+    FILENAME_DATASET_INFO,
+    SampleField,
+    StorageFormat,
+)
 from hafnia.dataset.primitives import (
     PRIMITIVE_TYPES,
     Bbox,
@@ -102,7 +112,7 @@ class TaskInfo(BaseModel):
 class DatasetInfo(BaseModel):
     dataset_name: str = Field(description="Name of the dataset, e.g. 'coco'")
-    version: Optional[str] = Field(default=None, description="Version of the dataset")
+    version: str = Field(default="0.0.0", description="Version of the dataset")
     dataset_title: Optional[str] = Field(default=None, description="Optional, human-readable title of the dataset")
     description: Optional[str] = Field(default=None, description="Optional, description of the dataset")
     tasks: List[TaskInfo] = Field(default=None, description="List of tasks in the dataset")
@@ -144,31 +154,21 @@ class DatasetInfo(BaseModel):
     @field_validator("format_version")
     @classmethod
     def _validate_format_version(cls, format_version: str) -> str:
-        try:
-            Version(format_version)
-        except Exception as e:
-            raise ValueError(f"Invalid format_version '{format_version}'. Must be a valid version string.") from e
+        version_casted: Version = dataset_helpers.version_from_string(format_version, raise_error=True)
-        if Version(format_version) > Version(hafnia.__dataset_format_version__):
+        if version_casted > Version(hafnia.__dataset_format_version__):
             user_logger.warning(
                 f"The loaded dataset format version '{format_version}' is newer than the format version "
                 f"'{hafnia.__dataset_format_version__}' used in your version of Hafnia. Please consider "
                 f"updating Hafnia package."
             )
-        return format_version
+        return str(version_casted)
     @field_validator("version")
     @classmethod
     def _validate_version(cls, dataset_version: Optional[str]) -> Optional[str]:
-        if dataset_version is None:
-            return None
-        try:
-            Version(dataset_version)
-        except Exception as e:
-            raise ValueError(f"Invalid dataset_version '{dataset_version}'. Must be a valid version string.") from e
-        return dataset_version
+        version_casted: Version = dataset_helpers.version_from_string(dataset_version, raise_error=True)
+        return str(version_casted)
     def check_for_duplicate_task_names(self) -> List[TaskInfo]:
         return self._validate_check_for_duplicate_tasks(self.tasks)
@@ -238,7 +238,7 @@ class DatasetInfo(BaseModel):
         meta.update(info1.meta or {})
         return DatasetInfo(
             dataset_name=info0.dataset_name + "+" + info1.dataset_name,
-            version=None,
+            version="0.0.0",
             tasks=list(unique_tasks),
             meta=meta,
             format_version=dataset_format_version,
@@ -477,3 +477,127 @@ class Sample(BaseModel):
         annotations = self.get_annotations()
         annotations_visualized = image_visualizations.draw_annotations(image=image, primitives=annotations)
         return annotations_visualized
+@dataclass
+class DatasetMetadataFilePaths:
+    dataset_info: str  # Use 'str' to also support s3 paths
+    annotations_jsonl: Optional[str]
+    annotations_parquet: Optional[str]
+    def as_list(self) -> List[str]:
+        files = [self.dataset_info]
+        if self.annotations_jsonl is not None:
+            files.append(self.annotations_jsonl)
+        if self.annotations_parquet is not None:
+            files.append(self.annotations_parquet)
+        return files
+    def read_samples(self) -> pl.DataFrame:
+        if self.annotations_parquet is not None:
+            if not Path(self.annotations_parquet).exists():
+                raise FileNotFoundError(f"Parquet annotations file '{self.annotations_parquet}' does not exist.")
+            user_logger.info(f"Reading dataset annotations from Parquet file: {self.annotations_parquet}")
+            return pl.read_parquet(self.annotations_parquet)
+        if self.annotations_jsonl is not None:
+            if not Path(self.annotations_jsonl).exists():
+                raise FileNotFoundError(f"JSONL annotations file '{self.annotations_jsonl}' does not exist.")
+            user_logger.info(f"Reading dataset annotations from JSONL file: {self.annotations_jsonl}")
+            return pl.read_ndjson(self.annotations_jsonl)
+        raise ValueError(
+            "No annotations file available to read samples from. Dataset is missing both JSONL and Parquet files."
+        )
+    @staticmethod
+    def from_path(path_dataset: Path) -> "DatasetMetadataFilePaths":
+        path_dataset = path_dataset.absolute()
+        metadata_files = DatasetMetadataFilePaths(
+            dataset_info=str(path_dataset / FILENAME_DATASET_INFO),
+            annotations_jsonl=str(path_dataset / FILENAME_ANNOTATIONS_JSONL),
+            annotations_parquet=str(path_dataset / FILENAME_ANNOTATIONS_PARQUET),
+        )
+        return metadata_files
+    @staticmethod
+    def available_versions_from_files_list(files: list[str]) -> Dict[Version, "DatasetMetadataFilePaths"]:
+        versions_and_files: Dict[Version, Dict[str, str]] = collections.defaultdict(dict)
+        for metadata_file in files:
+            version_str, filename = metadata_file.split("/")[-2:]
+            versions_and_files[version_str][filename] = metadata_file
+        available_versions: Dict[Version, DatasetMetadataFilePaths] = {}
+        for version_str, version_files in versions_and_files.items():
+            version_casted: Version = dataset_helpers.version_from_string(version_str, raise_error=False)
+            if version_casted is None:
+                continue
+            if FILENAME_DATASET_INFO not in version_files:
+                continue
+            dataset_metadata_file = DatasetMetadataFilePaths(
+                dataset_info=version_files[FILENAME_DATASET_INFO],
+                annotations_jsonl=version_files.get(FILENAME_ANNOTATIONS_JSONL, None),
+                annotations_parquet=version_files.get(FILENAME_ANNOTATIONS_PARQUET, None),
+            )
+            available_versions[version_casted] = dataset_metadata_file
+        return available_versions
+    def check_version(self, version: str, raise_error: bool = True) -> bool:
+        """
+        Check if the dataset metadata files match the given version.
+        If raise_error is True, raises ValueError if the version does not match.
+        """
+        valid_version = version_from_string(version, raise_error=raise_error)
+        if valid_version is None:
+            return False
+        path_dataset_info = Path(self.dataset_info)
+        if not path_dataset_info.exists():
+            raise FileNotFoundError(f"Dataset info file missing '{self.dataset_info}' in dataset folder.")
+        dataset_info = json.loads(path_dataset_info.read_text())
+        dataset_version = dataset_info.get("version", None)
+        if dataset_version != version:
+            if raise_error:
+                raise ValueError(
+                    f"Dataset version mismatch. Expected version '{version}' but found "
+                    f"version '{dataset_version}' in dataset info."
+                )
+            return False
+        return True
+    def exists(self, version: Optional[str] = None, raise_error: bool = True) -> bool:
+        """
+        Check if all metadata files exist.
+        Add version to check if it matches the version in dataset info.
+        If raise_error is True, raises FileNotFoundError if any file is missing.
+        """
+        path_dataset_info = Path(self.dataset_info)
+        if not path_dataset_info.exists():
+            if raise_error:
+                raise FileNotFoundError(f"Dataset info file missing '{self.dataset_info}' in dataset folder.")
+            return False
+        if version is not None and self.check_version(version, raise_error=raise_error) is False:
+            return False
+        has_jsonl_file = self.annotations_jsonl is not None and Path(self.annotations_jsonl).exists()
+        if has_jsonl_file:
+            return True
+        has_parquet_file = self.annotations_parquet is not None and Path(self.annotations_parquet).exists()
+        if has_parquet_file:
+            return True
+        if raise_error:
+            raise FileNotFoundError(
+                f"Missing annotation file. Expected either '{FILENAME_ANNOTATIONS_JSONL}' or "
+                f"'{FILENAME_ANNOTATIONS_PARQUET}' in dataset folder."
+            )
+        return False

hafnia/dataset/operations/dataset_s3_storage.py CHANGED Viewed

@@ -8,13 +8,13 @@ import polars as pl
 from hafnia.dataset.dataset_helpers import hash_file_xxhash
 from hafnia.dataset.dataset_names import (
     DatasetVariant,
-    ResourceCredentials,
     SampleField,
 )
 from hafnia.dataset.hafnia_dataset import HafniaDataset
 from hafnia.log import user_logger
 from hafnia.platform import s5cmd_utils
 from hafnia.platform.datasets import get_upload_credentials
+from hafnia.platform.s5cmd_utils import ResourceCredentials
 from hafnia.utils import progress_bar
 from hafnia_cli.config import Config
@@ -39,6 +39,7 @@ def delete_hafnia_dataset_files_on_platform(
 def delete_hafnia_dataset_files_from_resource_credentials(
     resource_credentials: ResourceCredentials,
     interactive: bool = True,
+    remove_bucket: bool = True,
 ) -> bool:
     envs = resource_credentials.aws_credentials()
     bucket_name = resource_credentials.bucket_name()
@@ -58,7 +59,11 @@ def delete_hafnia_dataset_files_from_resource_credentials(
             user_logger.info("Delete operation cancelled by the user.")
             return False
     user_logger.info(f"Deleting all files in S3 bucket '{bucket_name}'...")
-    s5cmd_utils.delete_bucket_content(bucket_prefix=f"s3://{bucket_name}", remove_bucket=True, append_envs=envs)
+    s5cmd_utils.delete_bucket_content(
+        bucket_prefix=f"s3://{bucket_name}",
+        remove_bucket=remove_bucket,
+        append_envs=envs,
+    )
     return True

hafnia/dataset/operations/table_transformations.py CHANGED Viewed

@@ -4,8 +4,6 @@ from typing import List, Optional, Tuple, Type
 import polars as pl
 from hafnia.dataset.dataset_names import (
-    FILENAME_ANNOTATIONS_JSONL,
-    FILENAME_ANNOTATIONS_PARQUET,
     PrimitiveField,
     SampleField,
 )
@@ -204,22 +202,6 @@ def split_primitive_columns_by_task_name(
     return samples_table
-def read_samples_from_path(path: Path) -> pl.DataFrame:
-    path_annotations = path / FILENAME_ANNOTATIONS_PARQUET
-    if path_annotations.exists():
-        user_logger.info(f"Reading dataset annotations from Parquet file: {path_annotations}")
-        return pl.read_parquet(path_annotations)
-    path_annotations_jsonl = path / FILENAME_ANNOTATIONS_JSONL
-    if path_annotations_jsonl.exists():
-        user_logger.info(f"Reading dataset annotations from JSONL file: {path_annotations_jsonl}")
-        return pl.read_ndjson(path_annotations_jsonl)
-    raise FileNotFoundError(
-        f"Unable to read annotations. No json file '{path_annotations.name}' or Parquet file '{{path_annotations.name}} in in '{path}'."
-    )
 def check_image_paths(table: pl.DataFrame) -> bool:
     missing_files = []
     org_paths = table[SampleField.FILE_PATH].to_list()

hafnia/platform/datasets.py CHANGED Viewed

@@ -1,23 +1,13 @@
-import collections
-import shutil
-from pathlib import Path
 from typing import Any, Dict, List, Optional
 import rich
-from packaging.version import Version
 from rich import print as rprint
 from hafnia import http, utils
-from hafnia.dataset.dataset_names import DATASET_FILENAMES_REQUIRED, ResourceCredentials
-from hafnia.dataset.dataset_recipe.dataset_recipe import (
-    DatasetRecipe,
-    get_dataset_path_from_recipe,
-)
-from hafnia.dataset.hafnia_dataset import HafniaDataset
 from hafnia.http import fetch, post
 from hafnia.log import user_logger
-from hafnia.platform import s5cmd_utils
 from hafnia.platform.download import get_resource_credentials
+from hafnia.platform.s5cmd_utils import ResourceCredentials
 from hafnia.utils import timed
 from hafnia_cli.config import Config
@@ -57,7 +47,6 @@ def get_or_create_dataset(dataset_name: str = "", cfg: Optional[Config] = None)
     """Create a new dataset on the Hafnia platform."""
     cfg = cfg or Config()
     dataset = get_dataset_by_name(dataset_name, cfg)
     if dataset is not None:
         user_logger.info(f"Dataset '{dataset_name}' already exists on the Hafnia platform.")
         return dataset
@@ -130,6 +119,31 @@ def get_upload_credentials_by_id(dataset_id: str, cfg: Optional[Config] = None)
     return ResourceCredentials.fix_naming(credentials_response)
+@timed("Get read access credentials by ID")
+def get_read_credentials_by_id(dataset_id: str, cfg: Optional[Config] = None) -> Optional[ResourceCredentials]:
+    """Get dataset read access credentials by ID from the Hafnia platform."""
+    cfg = cfg or Config()
+    endpoint_dataset = cfg.get_platform_endpoint("datasets")
+    if utils.is_hafnia_cloud_job():
+        credentials_endpoint_suffix = "temporary-credentials-hidden"  # Access to hidden datasets
+    else:
+        credentials_endpoint_suffix = "temporary-credentials"  # Access to sample dataset
+    access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/{credentials_endpoint_suffix}"
+    resource_credentials = get_resource_credentials(access_dataset_endpoint, cfg.api_key)
+    return resource_credentials
+@timed("Get read access credentials by name")
+def get_read_credentials_by_name(dataset_name: str, cfg: Optional[Config] = None) -> Optional[ResourceCredentials]:
+    """Get dataset read access credentials by name from the Hafnia platform."""
+    cfg = cfg or Config()
+    dataset_response = get_dataset_by_name(dataset_name=dataset_name, cfg=cfg)
+    if dataset_response is None:
+        return None
+    return get_read_credentials_by_id(dataset_response["id"], cfg=cfg)
 @timed("Delete dataset by id")
 def delete_dataset_by_id(dataset_id: str, cfg: Optional[Config] = None) -> Dict:
     cfg = cfg or Config()
@@ -152,10 +166,14 @@ def delete_dataset_by_name(dataset_name: str, cfg: Optional[Config] = None) -> D
     return response
-def delete_dataset_completely_by_name(dataset_name: str, interactive: bool = True) -> None:
+def delete_dataset_completely_by_name(
+    dataset_name: str,
+    interactive: bool = True,
+    cfg: Optional[Config] = None,
+) -> None:
     from hafnia.dataset.operations.dataset_s3_storage import delete_hafnia_dataset_files_on_platform
-    cfg = Config()
+    cfg = cfg or Config()
     is_deleted = delete_hafnia_dataset_files_on_platform(
         dataset_name=dataset_name,
@@ -180,79 +198,6 @@ def upload_dataset_details(cfg: Config, data: dict, dataset_name: str) -> dict:
     return response  # type: ignore[return-value]
-def download_or_get_dataset_path(
-    dataset_name: str,
-    cfg: Optional[Config] = None,
-    path_datasets_folder: Optional[str] = None,
-    force_redownload: bool = False,
-    download_files: bool = True,
-) -> Path:
-    """Download or get the path of the dataset."""
-    recipe_explicit = DatasetRecipe.from_implicit_form(dataset_name)
-    path_dataset = get_dataset_path_from_recipe(recipe_explicit, path_datasets=path_datasets_folder)
-    is_dataset_valid = HafniaDataset.check_dataset_path(path_dataset, raise_error=False)
-    if is_dataset_valid and not force_redownload:
-        user_logger.info("Dataset found locally. Set 'force=True' or add `--force` flag with cli to re-download")
-        return path_dataset
-    cfg = cfg or Config()
-    api_key = cfg.api_key
-    shutil.rmtree(path_dataset, ignore_errors=True)
-    endpoint_dataset = cfg.get_platform_endpoint("datasets")
-    dataset_res = get_dataset_by_name(dataset_name, cfg)  # Check if dataset exists
-    if dataset_res is None:
-        raise ValueError(f"Dataset '{dataset_name}' not found on the Hafnia platform.")
-    dataset_id = dataset_res.get("id")  # type: ignore[union-attr]
-    if utils.is_hafnia_cloud_job():
-        credentials_endpoint_suffix = "temporary-credentials-hidden"  # Access to hidden datasets
-    else:
-        credentials_endpoint_suffix = "temporary-credentials"  # Access to sample dataset
-    access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/{credentials_endpoint_suffix}"
-    download_dataset_from_access_endpoint(
-        endpoint=access_dataset_endpoint,
-        api_key=api_key,
-        path_dataset=path_dataset,
-        download_files=download_files,
-    )
-    return path_dataset
-def download_dataset_from_access_endpoint(
-    endpoint: str,
-    api_key: str,
-    path_dataset: Path,
-    version: Optional[str] = None,
-    download_files: bool = True,
-) -> None:
-    try:
-        resource_credentials = get_resource_credentials(endpoint, api_key)
-        download_annotation_dataset_from_version(
-            version=version,
-            credentials=resource_credentials,
-            path_dataset=path_dataset,
-        )
-    except ValueError as e:
-        user_logger.error(f"Failed to download annotations: {e}")
-        return
-    if not download_files:
-        return
-    dataset = HafniaDataset.from_path(path_dataset, check_for_images=False)
-    try:
-        dataset = dataset.download_files_aws(path_dataset, aws_credentials=resource_credentials, force_redownload=True)
-    except ValueError as e:
-        user_logger.error(f"Failed to download images: {e}")
-        return
-    dataset.write_annotations(path_folder=path_dataset)  # Overwrite annotations as files have been re-downloaded
 TABLE_FIELDS = {
     "ID": "id",
     "Hidden\nSamples": "hidden.samples",
@@ -287,48 +232,3 @@ def extend_dataset_details(datasets: List[Dict[str, Any]]) -> List[Dict[str, Any
             dataset[f"{variant_type}.samples"] = variant["number_of_data_items"]
             dataset[f"{variant_type}.size"] = utils.size_human_readable(variant["size_bytes"])
     return datasets
-def download_annotation_dataset_from_version(
-    version: Optional[str],
-    credentials: ResourceCredentials,
-    path_dataset: Path,
-) -> list[str]:
-    path_dataset.mkdir(parents=True, exist_ok=True)
-    envs = credentials.aws_credentials()
-    bucket_prefix_sample_versions = f"{credentials.s3_uri()}/versions"
-    all_s3_annotation_files = s5cmd_utils.list_bucket(bucket_prefix=bucket_prefix_sample_versions, append_envs=envs)
-    s3_files = _annotation_files_from_version(version=version, all_annotation_files=all_s3_annotation_files)
-    local_paths = [(path_dataset / filename.split("/")[-1]).as_posix() for filename in s3_files]
-    s5cmd_utils.fast_copy_files(
-        src_paths=s3_files,
-        dst_paths=local_paths,
-        append_envs=envs,
-        description="Downloading annotation files",
-    )
-    return local_paths
-def _annotation_files_from_version(version: Optional[str], all_annotation_files: list[str]) -> list[str]:
-    version_files = collections.defaultdict(list)
-    for metadata_file in all_annotation_files:
-        version_str, filename = metadata_file.split("/")[-2:]
-        if filename not in DATASET_FILENAMES_REQUIRED:
-            continue
-        version_files[version_str].append(metadata_file)
-    available_versions = {v for v, files in version_files.items() if len(files) == len(DATASET_FILENAMES_REQUIRED)}
-    if len(available_versions) == 0:
-        raise ValueError("No versions were found in the dataset.")
-    if version is None:
-        latest_version = max(Version(ver) for ver in available_versions)
-        version = str(latest_version)
-        user_logger.info(f"No version selected. Using latest version: {version}")
-    if version not in available_versions:
-        raise ValueError(f"Selected version '{version}' not found in available versions: {available_versions}")
-    return version_files[version]

hafnia/platform/download.py CHANGED Viewed

@@ -5,9 +5,9 @@ import boto3
 from botocore.exceptions import ClientError
 from rich.progress import Progress
-from hafnia.dataset.dataset_names import ResourceCredentials
 from hafnia.http import fetch
 from hafnia.log import sys_logger, user_logger
+from hafnia.platform.s5cmd_utils import ResourceCredentials
 def get_resource_credentials(endpoint: str, api_key: str) -> ResourceCredentials:

hafnia/platform/s5cmd_utils.py CHANGED Viewed

@@ -7,6 +7,10 @@ import uuid
 from pathlib import Path
 from typing import Dict, List, Optional
+import boto3
+from botocore.exceptions import UnauthorizedSSOTokenError
+from pydantic import BaseModel, field_validator
 from hafnia.log import sys_logger, user_logger
 from hafnia.utils import progress_bar
@@ -26,7 +30,11 @@ def find_s5cmd() -> Optional[str]:
     if result:
         return result
     python_dir = Path(sys.executable).parent
-    locations = (python_dir / "Scripts" / "s5cmd.exe", python_dir / "bin" / "s5cmd", python_dir / "s5cmd")
+    locations = (
+        python_dir / "Scripts" / "s5cmd.exe",
+        python_dir / "bin" / "s5cmd",
+        python_dir / "s5cmd",
+    )
     for loc in locations:
         if loc.exists():
             return str(loc)
@@ -104,12 +112,17 @@ def delete_bucket_content(
     returns = execute_command(["rm", f"{bucket_prefix}/*"], append_envs=append_envs)
     if returns.returncode != 0:
-        bucket_is_already_deleted = "no object found" in returns.stderr.strip()
-        if bucket_is_already_deleted:
+        bucket_content_is_already_deleted = "no object found" in returns.stderr.strip()
+        bucket_is_already_deleted = "NoSuchBucket" in returns.stderr.strip()
+        if bucket_content_is_already_deleted:
             user_logger.info(f"No action was taken. S3 bucket '{bucket_prefix}' is already empty.")
+        elif bucket_is_already_deleted:
+            user_logger.info(f"No action was taken. S3 bucket '{bucket_prefix}' does not exist.")
+            return
         else:
             user_logger.error("Error during s5cmd rm command:")
             user_logger.error(returns.stdout)
+            user_logger.error(returns.stderr)
             raise RuntimeError(f"Failed to delete all files in S3 bucket '{bucket_prefix}'.")
     if remove_bucket:
@@ -118,6 +131,7 @@ def delete_bucket_content(
         if returns.returncode != 0:
             user_logger.error("Error during s5cmd rb command:")
             user_logger.error(returns.stdout)
+            user_logger.error(returns.stderr)
             raise RuntimeError(f"Failed to delete S3 bucket '{bucket_prefix}'.")
     user_logger.info(f"S3 bucket '{bucket_prefix}' has been deleted.")
@@ -145,3 +159,108 @@ def fast_copy_files(
     cmds = [f"cp {src} {dst}" for src, dst in zip(src_paths, dst_paths)]
     lines = execute_commands(cmds, append_envs=append_envs, description=description)
     return lines
+ARN_PREFIX = "arn:aws:s3:::"
+class AwsCredentials(BaseModel):
+    access_key: str
+    secret_key: str
+    session_token: str
+    region: Optional[str]
+    def aws_credentials(self) -> Dict[str, str]:
+        """
+        Returns the AWS credentials as a dictionary.
+        """
+        environment_vars = {
+            "AWS_ACCESS_KEY_ID": self.access_key,
+            "AWS_SECRET_ACCESS_KEY": self.secret_key,
+            "AWS_SESSION_TOKEN": self.session_token,
+        }
+        if self.region:
+            environment_vars["AWS_REGION"] = self.region
+        return environment_vars
+    @staticmethod
+    def from_session(session: boto3.Session) -> "AwsCredentials":
+        """
+        Creates AwsCredentials from a Boto3 session.
+        """
+        try:
+            frozen_credentials = session.get_credentials().get_frozen_credentials()
+        except UnauthorizedSSOTokenError as e:
+            raise RuntimeError(
+                f"Failed to get AWS credentials from the session for profile '{session.profile_name}'.\n"
+                f"Ensure the profile exists in your AWS config in '~/.aws/config' and that you are logged in via AWS SSO.\n"
+                f"\tUse 'aws sso login --profile {session.profile_name}' to log in."
+            ) from e
+        return AwsCredentials(
+            access_key=frozen_credentials.access_key,
+            secret_key=frozen_credentials.secret_key,
+            session_token=frozen_credentials.token,
+            region=session.region_name,
+        )
+    def to_resource_credentials(self, bucket_name: str) -> "ResourceCredentials":
+        """
+        Converts AwsCredentials to ResourceCredentials by adding the S3 ARN.
+        """
+        payload = self.model_dump()
+        payload["s3_arn"] = f"{ARN_PREFIX}{bucket_name}"
+        return ResourceCredentials(**payload)
+class ResourceCredentials(AwsCredentials):
+    s3_arn: str
+    @staticmethod
+    def fix_naming(payload: Dict[str, str]) -> "ResourceCredentials":
+        """
+        The endpoint returns a payload with a key called 's3_path', but it
+        is actually an ARN path (starts with arn:aws:s3::). This method renames it to 's3_arn' for consistency.
+        """
+        if "s3_path" in payload and payload["s3_path"].startswith(ARN_PREFIX):
+            payload["s3_arn"] = payload.pop("s3_path")
+        if "region" not in payload:
+            payload["region"] = "eu-west-1"
+        return ResourceCredentials(**payload)
+    @field_validator("s3_arn")
+    @classmethod
+    def validate_s3_arn(cls, value: str) -> str:
+        """Validate s3_arn to ensure it starts with 'arn:aws:s3:::'"""
+        if not value.startswith("arn:aws:s3:::"):
+            raise ValueError(f"Invalid S3 ARN: {value}. It should start with 'arn:aws:s3:::'")
+        return value
+    def s3_path(self) -> str:
+        """
+        Extracts the S3 path from the ARN.
+        Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket/my-prefix
+        """
+        return self.s3_arn[len(ARN_PREFIX) :]
+    def s3_uri(self) -> str:
+        """
+        Converts the S3 ARN to a URI format.
+        Example: arn:aws:s3:::my-bucket/my-prefix -> s3://my-bucket/my-prefix
+        """
+        return f"s3://{self.s3_path()}"
+    def bucket_name(self) -> str:
+        """
+        Extracts the bucket name from the S3 ARN.
+        Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket
+        """
+        return self.s3_path().split("/")[0]
+    def object_key(self) -> str:
+        """
+        Extracts the object key from the S3 ARN.
+        Example: arn:aws:s3:::my-bucket/my-prefix -> my-prefix
+        """
+        return "/".join(self.s3_path().split("/")[1:])

{hafnia-0.5.0.dist-info → hafnia-0.5.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hafnia
-Version: 0.5.0
+Version: 0.5.1
 Summary: Python SDK for communication with Hafnia platform.
 Author-email: Milestone Systems <hafniaplatform@milestone.dk>
 License-File: LICENSE
@@ -10,7 +10,7 @@ Requires-Dist: click>=8.1.8
 Requires-Dist: emoji>=2.14.1
 Requires-Dist: flatten-dict>=0.4.2
 Requires-Dist: keyring>=25.6.0
-Requires-Dist: mcp>=1.16.0
+Requires-Dist: mcp>=1.23.0
 Requires-Dist: mlflow>=3.4.0
 Requires-Dist: more-itertools>=10.7.0
 Requires-Dist: opencv-python-headless>=4.11.0.86

hafnia 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

hafnia 0.5.0py3-none-any.whl → 0.5.1py3-none-any.whl