PyPI - hafnia - Versions diffs - 0.2.2__tar.gz → 0.2.3__tar.gz - Mend

hafnia 0.2.2tar.gz → 0.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/publish_docker.yaml RENAMED Viewed

@@ -47,7 +47,7 @@ jobs:
             echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
           fi
       - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4.2.1
+        uses: aws-actions/configure-aws-credentials@v4.3.0
         with:
           role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
           aws-region: ${{ steps.env-vars.outputs.aws_region }}

{hafnia-0.2.2 → hafnia-0.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hafnia
-Version: 0.2.2
+Version: 0.2.3
 Summary: Python SDK for communication with Hafnia platform.
 Author-email: Milestone Systems <hafniaplatform@milestone.dk>
 License-File: LICENSE

{hafnia-0.2.2 → hafnia-0.2.3}/examples/example_dataset_recipe.py RENAMED Viewed

@@ -29,20 +29,24 @@ dataset_recipe = DatasetRecipe.from_name(name="mnist").shuffle().select_samples(
 dataset = dataset_recipe.build()
 # Note that the interface is similar, but to actually create the dataset you need to call `build()` on the recipe.
-# An important feature of a 'DatasetRecipe' is that the recipe itself - and not the dataset - can be saved as a file
-# and loaded from file. Meaning you can easily save, share, load and build the dataset later or in a different
-# environment.
-# In programming language, the recipe can be serialized to JSON and deserialized back to the original python object
-# recipe.
+# Unlike the HafniaDataset, a DatasetRecipe does not execute operations. It only registers
+# the operations applied to the recipe and can be used to build the dataset later.
+# You can print the dataset recipe to the operations that were applied to it.
+rprint(dataset_recipe)
+# Or as a JSON string:
+json_str: str = dataset_recipe.as_json_str()
+rprint(json_str)
+# This is an important feature of a 'DatasetRecipe' it only registers operations and that the recipe itself
+# - and not the dataset - can be saved as a file and loaded from file.
+# Meaning you can easily save, share, load and build the dataset later or in a different environment.
 # For TaaS, this is the only way to include multiple datasets during training.
-# This is how it looks like in practice:
-# 1) Save the dataset recipe to a file
-path_json = Path(".data/tmp/dataset_recipe.json")
-dataset_recipe.as_json_file(path_json)
-# 2) The recipe can be loaded from the file
-dataset_recipe_again = DatasetRecipe.from_json_file(path_json)
+# 2) The recipe can be loaded from json string
+dataset_recipe_again: DatasetRecipe = DatasetRecipe.from_json_str(json_str)
+# dataset_recipe_again.build()
 # We can verify that the loaded recipe is the same as the original recipe.
 assert dataset_recipe_again == dataset_recipe

{hafnia-0.2.2 → hafnia-0.2.3}/examples/example_hafnia_dataset.py RENAMED Viewed

@@ -26,11 +26,10 @@ dataset = HafniaDataset.from_path(path_dataset)
 # Alternatively, you can use the 'load_dataset' function
 dataset = load_dataset("midwest-vehicle-detection")
 # Dataset information is stored in 'dataset.info'
 rprint(dataset.info)
-# Annotations are stored in 'dataset.table' as a Polars DataFrame
+# Annotations are stored in 'dataset.samples' as a Polars DataFrame
 dataset.samples.head(2)
 # Print dataset information
@@ -49,14 +48,29 @@ shuffled_dataset = dataset.shuffle(seed=42)  # Shuffle the dataset
 split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
 new_dataset_splits = dataset.splits_by_ratios(split_ratios)
+# Support Chaining Operations (load, shuffle, select samples)
+dataset = load_dataset("midwest-vehicle-detection").shuffle(seed=42).select_samples(n_samples=10)
 # Write dataset to disk
 path_tmp = Path(".data/tmp")
 path_dataset = path_tmp / "hafnia_dataset"
-dataset.write(path_dataset)  # --> Check that data is human readable
+dataset.write(path_dataset)
 # Load dataset from disk
 dataset_again = HafniaDataset.from_path(path_dataset)
+# Want custom dataset transformations or statistics? Use the polars table (dataset.samples) directly
+n_objects = dataset.samples["objects"].list.len().sum()
+n_objects = dataset.samples[Bbox.column_name()].list.len().sum()  # Use Bbox.column_name() to avoid magic variables
+n_classifications = dataset.samples[Classification.column_name()].list.len().sum()
+class_counts = dataset.samples[Classification.column_name()].explode().struct.field("class_name").value_counts()
+class_counts = dataset.samples[Bbox.column_name()].explode().struct.field("class_name").value_counts()
+rprint(dict(class_counts.iter_rows()))
 # Access the first sample in the training split - data is stored in a dictionary
 sample_dict = dataset_train[0]
@@ -78,25 +92,15 @@ image: np.ndarray = sample.read_image()
 # Visualize sample and annotations
 image_with_annotations = sample.draw_annotations()
+# Save the image with annotations to a temporary directory
 path_tmp.mkdir(parents=True, exist_ok=True)
 Image.fromarray(image_with_annotations).save(path_tmp / "sample_with_annotations.png")
-# Do dataset transformations and statistics on the Polars DataFrame
-n_objects = dataset.samples["objects"].list.len().sum()
-n_objects = dataset.samples[Bbox.column_name()].list.len().sum()  # Use Bbox.column_name() to avoid magic variables
-n_classifications = dataset.samples[Classification.column_name()].list.len().sum()
-class_counts = dataset.samples[Classification.column_name()].explode().struct.field("class_name").value_counts()
-class_counts = dataset.samples[Bbox.column_name()].explode().struct.field("class_name").value_counts()
-rprint(dict(class_counts.iter_rows()))
 ## Bring-your-own-data: Create a new dataset from samples
 fake_samples = []
 for i_fake_sample in range(5):
-    bboxes = [Bbox(top_left_x=10, top_left_y=20, width=100, height=200, class_name="car")]
+    bboxes = [Bbox(top_left_x=0.1, top_left_y=0.20, width=0.1, height=0.2, class_name="car")]
     classifications = [Classification(class_name="vehicle", class_idx=0)]
     sample = Sample(
         file_name=f"path/to/image_{i_fake_sample:05}.jpg",
@@ -120,8 +124,14 @@ fake_dataset_info = DatasetInfo(
 )
 fake_dataset = HafniaDataset.from_samples_list(samples_list=fake_samples, info=fake_dataset_info)
+# Coming soon! Upload your dataset to the Hafnia Platform
+# fake_dataset.upload_to_hafnia()
+# Coming soon! Create your own dataset details page in Hafnia
+# fake_dataset.upload_dataset_details()
-## A hafnia dataset can also be used for storing predictions per sample set 'ground_truth=False' and add 'confidence'.
+## Storing predictions: A hafnia dataset can also be used for storing predictions per sample
+# set 'ground_truth=False' and add 'confidence'.
 bboxes_predictions = [
     Bbox(top_left_x=10, top_left_y=20, width=100, height=200, class_name="car", ground_truth=False, confidence=0.9)
 ]

{hafnia-0.2.2 → hafnia-0.2.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hafnia"
-version = "0.2.2"
+version = "0.2.3"
 description = "Python SDK for communication with Hafnia platform."
 readme = "README.md"
 authors = [

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/__main__.py RENAMED Viewed

@@ -20,19 +20,15 @@ def configure(cfg: Config) -> None:
     profile_name = click.prompt("Profile Name", type=str, default=consts.DEFAULT_PROFILE_NAME)
     profile_name = profile_name.strip()
-    try:
-        cfg.add_profile(profile_name, ConfigSchema(), set_active=True)
-    except ValueError:
-        raise click.ClickException(consts.ERROR_CREATE_PROFILE)
+    cfg.check_profile_name(profile_name)
     api_key = click.prompt("Hafnia API Key", type=str, hide_input=True)
-    try:
-        cfg.api_key = api_key.strip()
-    except ValueError as e:
-        click.echo(f"Error: {str(e)}", err=True)
-        return
     platform_url = click.prompt("Hafnia Platform URL", type=str, default=consts.DEFAULT_API_URL)
-    cfg.platform_url = platform_url.strip()
+    cfg_profile = ConfigSchema(api_key=api_key, platform_url=platform_url)
+    cfg.add_profile(profile_name, cfg_profile, set_active=True)
     cfg.save_config()
     profile_cmds.profile_show(cfg)

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/config.py RENAMED Viewed

@@ -6,7 +6,7 @@ from typing import Dict, List, Optional
 from pydantic import BaseModel, field_validator
 import cli.consts as consts
-from hafnia.log import user_logger
+from hafnia.log import sys_logger, user_logger
 PLATFORM_API_MAPPING = {
     "recipes": "/api/v1/recipes",
@@ -23,9 +23,17 @@ class ConfigSchema(BaseModel):
     api_key: Optional[str] = None
     @field_validator("api_key")
-    def validate_api_key(cls, value: str) -> str:
-        if value is not None and len(value) < 10:
+    def validate_api_key(cls, value: Optional[str]) -> Optional[str]:
+        if value is None:
+            return value
+        if len(value) < 10:
             raise ValueError("API key is too short.")
+        if not value.startswith("ApiKey "):
+            sys_logger.warning("API key is missing the 'ApiKey ' prefix. Prefix is being added automatically.")
+            value = f"ApiKey {value}"
         return value
@@ -51,6 +59,7 @@ class Config:
         if profile_name not in self.config_data.profiles:
             raise ValueError(f"Profile '{profile_name}' does not exist.")
         self.config_data.active_profile = profile_name
+        self.save_config()
     @property
     def config(self) -> ConfigSchema:
@@ -92,13 +101,18 @@ class Config:
         return Path.home() / ".hafnia" / "config.json"
-    def add_profile(self, profile_name: str, profile: ConfigSchema, set_active: bool = False) -> None:
-        profile_name = profile_name.strip()
+    def check_profile_name(self, profile_name: str) -> None:
+        if not profile_name or not isinstance(profile_name, str):
+            raise ValueError("Profile name must be a non-empty string.")
         if profile_name in self.config_data.profiles:
             user_logger.warning(
                 f"Profile with name '{profile_name}' already exists, it will be overwritten by the new one."
             )
+    def add_profile(self, profile_name: str, profile: ConfigSchema, set_active: bool = False) -> None:
+        profile_name = profile_name.strip()
+        self.check_profile_name(profile_name)
         self.config_data.profiles[profile_name] = profile
         if set_active:
             self.config_data.active_profile = profile_name

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/profile_cmds.py RENAMED Viewed

@@ -56,6 +56,7 @@ def profile_create(cfg: Config, name: str, api_url: str, api_key: str, activate:
     cfg_profile = ConfigSchema(platform_url=api_url, api_key=api_key)
     cfg.add_profile(profile_name=name, profile=cfg_profile, set_active=activate)
+    profile_show(cfg)
 @profile.command("rm")
@@ -87,7 +88,7 @@ def profile_active(cfg: Config) -> None:
 def profile_show(cfg: Config) -> None:
-    masked_key = f"{cfg.api_key[:4]}...{cfg.api_key[-4:]}" if len(cfg.api_key) > 8 else "****"
+    masked_key = f"{cfg.api_key[:11]}...{cfg.api_key[-4:]}" if len(cfg.api_key) > 20 else "****"
     console = Console()
     table = Table(title=f"{consts.PROFILE_TABLE_HEADER} {cfg.active_profile}", show_header=False)

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_helpers.py RENAMED Viewed

@@ -1,6 +1,7 @@
 import io
 import math
 import random
+import shutil
 from pathlib import Path
 from typing import Dict, List
@@ -21,7 +22,7 @@ def create_split_name_list_from_ratios(split_ratios: Dict[str, float], n_items:
 def hash_file_xxhash(path: Path, chunk_size: int = 262144) -> str:
-    hasher = xxhash.xxh3_64()
+    hasher = xxhash.xxh3_128()
     with open(path, "rb") as f:
         for chunk in iter(lambda: f.read(chunk_size), b""):  # 8192, 16384, 32768, 65536
@@ -30,7 +31,7 @@ def hash_file_xxhash(path: Path, chunk_size: int = 262144) -> str:
 def hash_from_bytes(data: bytes) -> str:
-    hasher = xxhash.xxh3_64()
+    hasher = xxhash.xxh3_128()
     hasher.update(data)
     return hasher.hexdigest()
@@ -40,14 +41,46 @@ def save_image_with_hash_name(image: np.ndarray, path_folder: Path) -> Path:
     buffer = io.BytesIO()
     pil_image.save(buffer, format="PNG")
     hash_value = hash_from_bytes(buffer.getvalue())
-    path_image = Path(path_folder) / f"{hash_value}.png"
+    path_image = Path(path_folder) / relative_path_from_hash(hash=hash_value, suffix=".png")
+    path_image.parent.mkdir(parents=True, exist_ok=True)
     pil_image.save(path_image)
     return path_image
-def filename_as_hash_from_path(path_image: Path) -> str:
-    hash = hash_file_xxhash(path_image)
-    return f"{hash}{path_image.suffix}"
+def copy_and_rename_file_to_hash_value(path_source: Path, path_dataset_root: Path) -> Path:
+    """
+    Copies a file to a dataset root directory with a hash-based name and sub-directory structure.
+    E.g. for an "image.png" with hash "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4", the image will be copied to
+    'path_dataset_root / "data" / "dfe" / "dfe8f3b1c2a4f5b6c7d8e9f0a1b2c3d4.png"'
+    Notice that the hash is used for both the filename and the subfolder name.
+    Placing image/video files into multiple sub-folders (instead of one large folder) is seemingly
+    unnecessary, but it is actually a requirement when the dataset is later downloaded from S3.
+    The reason is that AWS has a rate limit of 3500 ops/sec per prefix (sub-folder) in S3 - meaning we can "only"
+    download 3500 files per second from a single folder (prefix) in S3.
+    For even a single user, we found that this limit was being reached when files are stored in single folder (prefix)
+    in S3. To support multiple users and concurrent experiments, we are required to separate files into
+    multiple sub-folders (prefixes) in S3 to not hit the rate limit.
+    """
+    if not path_source.exists():
+        raise FileNotFoundError(f"Source file {path_source} does not exist.")
+    hash_value = hash_file_xxhash(path_source)
+    path_file = path_dataset_root / relative_path_from_hash(hash=hash_value, suffix=path_source.suffix)
+    path_file.parent.mkdir(parents=True, exist_ok=True)
+    if not path_file.exists():
+        shutil.copy2(path_source, path_file)
+    return path_file
+def relative_path_from_hash(hash: str, suffix: str) -> Path:
+    path_file = Path("data") / hash[:3] / f"{hash}{suffix}"
+    return path_file
 def split_sizes_from_ratios(n_items: int, split_ratios: Dict[str, float]) -> Dict[str, int]:

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py RENAMED Viewed

@@ -216,6 +216,16 @@ class DatasetRecipe(Serializable):
         json_str = self.as_json_str(indent=indent)
         path_json.write_text(json_str, encoding="utf-8")
+    ### Helper methods ###
+    def get_dataset_names(self) -> List[str]:
+        """
+        Get all dataset names added with 'from_name'.
+        Function recursively gathers dataset names.
+        """
+        if self.creation is None:
+            return []
+        return self.creation.get_dataset_names()
     ### Validation and Serialization ###
     @field_validator("creation", mode="plain")
     @classmethod
@@ -282,7 +292,10 @@ class FromPath(RecipeCreation):
         return HafniaDataset.from_path
     def as_short_name(self) -> str:
-        return f"'{self.path_folder}'".replace(os.sep, "|")
+        return f"'{self.path_folder}'".replace(os.sep, "-")
+    def get_dataset_names(self) -> List[str]:
+        return []  # Only counts 'from_name' datasets
 class FromName(RecipeCreation):
@@ -297,6 +310,9 @@ class FromName(RecipeCreation):
     def as_short_name(self) -> str:
         return self.name
+    def get_dataset_names(self) -> List[str]:
+        return [self.name]
 class FromMerge(RecipeCreation):
     recipe0: DatasetRecipe
@@ -310,6 +326,11 @@ class FromMerge(RecipeCreation):
         merger = FromMerger(recipes=[self.recipe0, self.recipe1])
         return merger.as_short_name()
+    def get_dataset_names(self) -> List[str]:
+        """Get the dataset names from the merged recipes."""
+        names = [*self.recipe0.creation.get_dataset_names(), *self.recipe1.creation.get_dataset_names()]
+        return names
 class FromMerger(RecipeCreation):
     recipes: List[DatasetRecipe]
@@ -325,3 +346,40 @@ class FromMerger(RecipeCreation):
     def as_short_name(self) -> str:
         return f"Merger({','.join(recipe.as_short_name() for recipe in self.recipes)})"
+    def get_dataset_names(self) -> List[str]:
+        """Get the dataset names from the merged recipes."""
+        names = []
+        for recipe in self.recipes:
+            names.extend(recipe.creation.get_dataset_names())
+        return names
+def extract_dataset_names_from_json_dict(data: dict) -> list[str]:
+    """
+    Extract dataset names recursively from a JSON dictionary added with 'from_name'.
+    Even if the same functionality is achieved with `DatasetRecipe.get_dataset_names()`,
+    we want to keep this function in 'dipdatalib' to extract dataset names from json dictionaries
+    directly.
+    """
+    creation_field = data.get("creation")
+    if creation_field is None:
+        return []
+    if creation_field.get("__type__") == "FromName":
+        return [creation_field["name"]]
+    elif creation_field.get("__type__") == "FromMerge":
+        recipe_names = ["recipe0", "recipe1"]
+        dataset_name = []
+        for recipe_name in recipe_names:
+            recipe = creation_field.get(recipe_name)
+            if recipe is None:
+                continue
+            dataset_name.extend(extract_dataset_names_from_json_dict(recipe))
+        return dataset_name
+    elif creation_field.get("__type__") == "FromMerger":
+        dataset_name = []
+        for recipe in creation_field.get("recipes", []):
+            dataset_name.extend(extract_dataset_names_from_json_dict(recipe))
+        return dataset_name
+    return []

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_recipe/recipe_types.py RENAMED Viewed

@@ -108,6 +108,10 @@ class RecipeCreation(Serializable):
     def get_function() -> Callable[..., "HafniaDataset"]:
         pass
+    @abstractmethod
+    def get_dataset_names(self) -> List[str]:
+        pass
     def build(self) -> "HafniaDataset":
         from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/hafnia_dataset.py RENAMED Viewed

@@ -411,30 +411,18 @@ class HafniaDataset:
         return True
-    def write(self, path_folder: Path, name_by_hash: bool = True, add_version: bool = False) -> None:
+    def write(self, path_folder: Path, add_version: bool = False) -> None:
         user_logger.info(f"Writing dataset to {path_folder}...")
         if not path_folder.exists():
             path_folder.mkdir(parents=True)
-        path_folder_images = path_folder / "data"
-        path_folder_images.mkdir(parents=True, exist_ok=True)
         new_relative_paths = []
         for org_path in tqdm(self.samples["file_name"].to_list(), desc="- Copy images"):
-            org_path = Path(org_path)
-            if not org_path.exists():
-                raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
-            if name_by_hash:
-                filename = dataset_helpers.filename_as_hash_from_path(org_path)
-            else:
-                filename = Path(org_path).name
-            new_path = path_folder_images / filename
-            if not new_path.exists():
-                shutil.copy2(org_path, new_path)
-            if not new_path.exists():
-                raise FileNotFoundError(f"File {new_path} does not exist in the dataset.")
+            new_path = dataset_helpers.copy_and_rename_file_to_hash_value(
+                path_source=Path(org_path),
+                path_dataset_root=path_folder,
+            )
             new_relative_paths.append(str(new_path.relative_to(path_folder)))
         table = self.samples.with_columns(pl.Series(new_relative_paths).alias("file_name"))
         table.write_ndjson(path_folder / FILENAME_ANNOTATIONS_JSONL)  # Json for readability
         table.write_parquet(path_folder / FILENAME_ANNOTATIONS_PARQUET)  # Parquet for speed

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/datasets.py RENAMED Viewed

@@ -62,7 +62,12 @@ def download_or_get_dataset_path(
     dataset_id = get_dataset_id(dataset_name=dataset_name, endpoint=endpoint_dataset, api_key=api_key)
     if dataset_id is None:
         sys_logger.error(f"Dataset '{dataset_name}' not found on the Hafnia platform.")
-    access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/temporary-credentials"
+    if utils.is_hafnia_cloud_job():
+        credentials_endpoint_suffix = "temporary-credentials-hidden"  # Access to hidden datasets
+    else:
+        credentials_endpoint_suffix = "temporary-credentials"  # Access to sample dataset
+    access_dataset_endpoint = f"{endpoint_dataset}/{dataset_id}/{credentials_endpoint_suffix}"
     download_dataset_from_access_endpoint(
         endpoint=access_dataset_endpoint,

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/dataset_recipe/test_dataset_recipes.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import inspect
+import json
 import tempfile
 from dataclasses import dataclass
 from pathlib import Path
@@ -9,6 +10,7 @@ import pytest
 from hafnia.dataset.dataset_recipe.dataset_recipe import (
     DatasetRecipe,
     FromMerger,
+    extract_dataset_names_from_json_dict,
     get_dataset_path_from_recipe,
 )
 from hafnia.dataset.dataset_recipe.recipe_transforms import SelectSamples, Shuffle
@@ -171,7 +173,7 @@ class IntegrationTestUseCase:
         ),
         IntegrationTestUseCase(
             recipe=DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"), check_for_images=False),
-            short_name="'.data|datasets|mnist'",
+            short_name="'.data-datasets-mnist'",
         ),
         IntegrationTestUseCase(
             recipe=DatasetRecipe.from_merger(
@@ -180,14 +182,14 @@ class IntegrationTestUseCase:
                     DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"), check_for_images=False),
                 ]
             ),
-            short_name="Merger(mnist,'.data|datasets|mnist')",
+            short_name="Merger(mnist,'.data-datasets-mnist')",
         ),
         IntegrationTestUseCase(
             recipe=DatasetRecipe.from_merge(
                 recipe0=DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"), check_for_images=False),
                 recipe1=DatasetRecipe.from_name(name="mnist", force_redownload=False),
             ),
-            short_name="Merger('.data|datasets|mnist',mnist)",
+            short_name="Merger('.data-datasets-mnist',mnist)",
         ),
         IntegrationTestUseCase(
             recipe=DatasetRecipe.from_name(name="mnist", force_redownload=False)
@@ -258,3 +260,44 @@ def test_cases_integration_tests(recipe_use_case: IntegrationTestUseCase):
     assert isinstance(dataset, HafniaDataset), "Dataset is not an instance of HafniaDataset"
     # assert isinstance(dataset, HafniaDataset), "Dataset is not an instance of HafniaDataset"
+def test_get_dataset_names():
+    expected_dataset_names = {"dataset0", "dataset1", "dataset2", "dataset3", "dataset4", "dataset5", "dataset6"}
+    nested_recipe = DatasetRecipe.from_merger(
+        recipes=[
+            DatasetRecipe.from_merger(
+                recipes=[
+                    DatasetRecipe.from_name(name="dataset0"),
+                    DatasetRecipe.from_name(name="dataset1"),
+                    DatasetRecipe.from_merge(
+                        recipe0=DatasetRecipe.from_name(name="dataset2"),
+                        recipe1=DatasetRecipe.from_name(name="dataset3"),
+                    ),
+                ]
+            ),
+            DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
+            .select_samples(n_samples=30)
+            .splits_by_ratios(split_ratios={"train": 0.8, "val": 0.1, "test": 0.1}),
+            DatasetRecipe.from_name(name="dataset4").select_samples(n_samples=20).shuffle(),
+            DatasetRecipe.from_merger(
+                recipes=[
+                    DatasetRecipe.from_name(name="dataset5"),
+                    DatasetRecipe.from_name(name="dataset6"),
+                ]
+            ),
+        ]
+    )
+    assert set(nested_recipe.get_dataset_names()) == expected_dataset_names, "Dataset names do not match expected names"
+    json_str = nested_recipe.as_json_str()
+    nested_recipe.as_json_file(path_json=Path("nested_recipe.json"))
+    data_dict = json.loads(json_str)
+    dataset_names = extract_dataset_names_from_json_dict(data_dict)
+    assert set(dataset_names) == expected_dataset_names, (
+        f"If this function fails, you should be concerned !! The '{extract_dataset_names_from_json_dict.__name__}' "
+        "function is copy/pasted to 'dipdatalib' to extract dataset names from json dictionaries directly. "
+        "If this test fails, please fix the function and copy/paste the function to dipdatalib as well."
+    )

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_dataset_helpers.py RENAMED Viewed

@@ -72,8 +72,13 @@ def test_create_split_name_list_from_ratios(test_case: CreateSplitNameListFromRa
 def test_save_image_with_hash_name(tmp_path: Path):
     dummy_image = (255 * np.random.rand(100, 100, 3)).astype(np.uint8)  # Create a dummy image
-    path_image = dataset_helpers.save_image_with_hash_name(dummy_image, tmp_path)
-    filename_from_path = dataset_helpers.filename_as_hash_from_path(path_image)
-    assert filename_from_path == path_image.name
-    assert path_image.exists()
-    assert path_image.suffix in [".png"]
+    tmp_path0 = tmp_path / "folder0"
+    path_image0 = dataset_helpers.save_image_with_hash_name(dummy_image, tmp_path0)
+    tmp_path1 = tmp_path / "folder1"
+    path_image1 = dataset_helpers.copy_and_rename_file_to_hash_value(path_image0, tmp_path1)
+    assert path_image1.relative_to(tmp_path1) == path_image0.relative_to(tmp_path0)
+    assert path_image0.exists()
+    assert path_image1.exists()
+    assert path_image0.suffix in [".png"]
+    assert path_image1.suffix in [".png"]

{hafnia-0.2.2 → hafnia-0.2.3}/tests/test_cli.py RENAMED Viewed

@@ -47,10 +47,47 @@ def config_with_profiles(test_config_path: Path, profile_data: dict) -> Config:
 def test_configure(cli_runner: CliRunner, empty_config: Config, api_key: str) -> None:
-    inputs = f"default\ntest-api-key\n{consts.DEFAULT_API_URL}\n"
+    inputs = f"default\nApiKey some-fake-test-api-key\n{consts.DEFAULT_API_URL}\n"
     result = cli_runner.invoke(cli.main, ["configure"], input="".join(inputs))
     assert result.exit_code == 0
     assert f"{consts.PROFILE_TABLE_HEADER} default" in result.output
+    assert "ApiKey some" in result.output
+def test_configure_api_key_autofix(cli_runner: CliRunner, empty_config: Config, api_key: str) -> None:
+    """
+    The submitted api key should always contain an "ApiKey " prefix.
+    Namely the submitted api key should be in this form "ApiKey [HASH_VALUE]"
+    Many users submit the api key without the prefix.
+    This test ensures that the CLI will automatically add the prefix if missing.
+    """
+    inputs = f"default\nfake-api-key-with-out-prefix\n{consts.DEFAULT_API_URL}\n"
+    result = cli_runner.invoke(cli.main, ["configure"], input="".join(inputs))
+    assert result.exit_code == 0
+    assert f"{consts.PROFILE_TABLE_HEADER} default" in result.output
+    assert "ApiKey fake" in result.output, (
+        "'ApiKey ' was not added automatically. API key should be automatically prefixed with 'ApiKey ' when missing"
+    )
+def test_create_profile(cli_runner: CliRunner, empty_config: Config, api_key: str) -> None:
+    fake_api_key = "SomeFakeApiKey123"
+    args = [
+        "profile",
+        "create",
+        fake_api_key,
+        "--name",
+        "test_profile",
+        "--api-url",
+        consts.DEFAULT_API_URL,
+        "--activate",
+    ]
+    result = cli_runner.invoke(cli.main, args)
+    assert result.exit_code == 0
+    assert f"ApiKey {fake_api_key[:3]}" in result.output, (
+        "'ApiKey ' was not added automatically. API key should be automatically prefixed with 'ApiKey ' when missing"
+    )
 class TestProfile:

{hafnia-0.2.2 → hafnia-0.2.3}/tests/test_samples.py RENAMED Viewed

@@ -1,3 +1,5 @@
+import collections
+from pathlib import Path
 from typing import Any, Dict
 import numpy as np
@@ -94,6 +96,11 @@ def test_check_dataset(loaded_dataset, compare_to_expected_image):
     compare_to_expected_image(image)
+    # We are arranging dataset files in multiple sub-folders to avoid S3 rate limits.
+    # This test checks that the dataset files are distributed across multiple sub-folders.
+    unique_sub_folders = collections.Counter([Path(path).parent.name for path in dataset.samples[ColumnName.FILE_NAME]])
+    assert len(unique_sub_folders) > 1, "Expected dataset files to be distributed across sub-folders"
 @pytest.mark.slow
 def test_dataset_draw_image_and_target(loaded_dataset, compare_to_expected_image):

{hafnia-0.2.2 → hafnia-0.2.3}/uv.lock RENAMED Viewed

@@ -269,7 +269,7 @@ wheels = [
 [[package]]
 name = "hafnia"
-version = "0.2.2"
+version = "0.2.3"
 source = { editable = "." }
 dependencies = [
     { name = "boto3" },

{hafnia-0.2.2 → hafnia-0.2.3}/.devcontainer/devcontainer.json RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.devcontainer/hooks/post_create RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/dependabot.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/Dockerfile RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/build.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/check_release.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/ci_cd.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/lint.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/publish_pypi.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.github/workflows/tests.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.gitignore RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.pre-commit-config.yaml RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.python-version RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.vscode/extensions.json RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.vscode/launch.json RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/.vscode/settings.json RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/LICENSE RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/README.md RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/docs/cli.md RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/docs/release.md RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/examples/example_logger.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/examples/example_torchvision_dataloader.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/__init__.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/consts.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/dataset_cmds.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/experiment_cmds.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/recipe_cmds.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/cli/runc_cmds.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/__init__.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/data/__init__.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/data/factory.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_names.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_recipe/recipe_transforms.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/dataset_upload_helper.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/operations/dataset_stats.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/operations/dataset_transformations.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/operations/table_transformations.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/__init__.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/bbox.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/bitmask.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/classification.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/point.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/polygon.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/primitive.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/segmentation.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/dataset/primitives/utils.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/experiment/__init__.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/experiment/hafnia_logger.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/http.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/log.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/__init__.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/builder.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/download.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/platform/experiment.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/torch_helpers.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/utils.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/visualizations/colors.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/src/hafnia/visualizations/image_visualizations.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/__init__.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/conftest.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-101].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[caltech-256].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar100].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[cifar10].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[coco-2017].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[midwest-vehicle-detection].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[mnist].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_check_dataset[tiny-dataset].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-101].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[caltech-256].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar100].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[cifar10].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[coco-2017].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[midwest-vehicle-detection].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[mnist].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_samples/test_dataset_draw_image_and_target[tiny-dataset].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_blur_anonymization[coco-2017].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_blur_anonymization[tiny-dataset].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_draw_annotations[coco-2017].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_draw_annotations[tiny-dataset].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_mask_region[coco-2017].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/expected_images/test_visualizations/test_mask_region[tiny-dataset].png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/annotations.jsonl RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/annotations.parquet RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/data/182a2c0a3ce312cf.jpg RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/data/4e95c6eb6209880a.jpg RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/data/cf86c7a23edb55ce.jpg RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/coco-2017/dataset_info.json RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/annotations.jsonl RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/annotations.parquet RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/data/222bbd5721a8a86e.png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/data/3251d85443622e4c.png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/data/3657ababa44af9b6.png RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/data/micro_test_datasets/tiny-dataset/dataset_info.json RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/dataset_recipe/test_dataset_recipe_helpers.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/dataset_recipe/test_recipe_transformations.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/operations/test_dataset_transformations.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/operations/test_table_transformations.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_colors.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_hafnia_dataset.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/dataset/test_shape_primitives.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/helper_testing.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/test_builder.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/test_check_example_scripts.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/test_hafnia_logger.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/test_utils.py RENAMED Viewed

File without changes

{hafnia-0.2.2 → hafnia-0.2.3}/tests/test_visualizations.py RENAMED Viewed

File without changes

hafnia 0.2.2__tar.gz → 0.2.3__tar.gz

hafnia 0.2.2tar.gz → 0.2.3tar.gz