PyPI - hafnia - Versions diffs - 0.2.4__tar.gz → 0.3.0__tar.gz - Mend

hafnia 0.2.4tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (147) hide show

{hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/build.yaml RENAMED Viewed

@@ -18,7 +18,7 @@ jobs:
       package-version: ${{ steps.extract-version.outputs.package_version }}
     steps:
       - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v5.6.0
+      - uses: actions/setup-python@v6.0.0
         with:
           python-version-file: ${{ inputs.python-version-file }}

{hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/ci_cd.yaml RENAMED Viewed

@@ -21,7 +21,7 @@ jobs:
     steps:
       - uses: actions/checkout@v5.0.0
       - name: Run Trivy vulnerability scanner
-        uses: aquasecurity/trivy-action@0.32.0
+        uses: aquasecurity/trivy-action@0.33.1
         with:
           scan-type: 'fs'
           scan-ref: '.'
@@ -33,6 +33,7 @@ jobs:
   test:
     name: Run Tests
     needs: lint
+    secrets: inherit
     uses: ./.github/workflows/tests.yaml
     with:
         python-version-file: "pyproject.toml"

{hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/lint.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v5.6.0
+      - uses: actions/setup-python@v6.0.0
         with:
           python-version-file: ${{ inputs.python-version-file }}
       - uses: pre-commit/action@v3.0.1

{hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/publish_docker.yaml RENAMED Viewed

@@ -25,7 +25,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v5.6.0
+      - uses: actions/setup-python@v6.0.0
         id: python
         with:
           python-version-file: ${{ inputs.python-version-file }}
@@ -47,7 +47,7 @@ jobs:
             echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
           fi
       - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v4.3.1
+        uses: aws-actions/configure-aws-credentials@v5.0.0
         with:
           role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
           aws-region: ${{ steps.env-vars.outputs.aws_region }}
@@ -63,7 +63,7 @@ jobs:
         uses: docker/build-push-action@v6.18.0
         env:
           ECR_REGISTRY: ${{ steps.login-ecr.outputs.registry }}
-          ECR_REPOSITORY: mdi-runtime
+          ECR_REPOSITORY: platform_sdk_runtime
         with:
           context: .
           file: .github/workflows/Dockerfile
@@ -77,4 +77,4 @@ jobs:
           cache-from: type=gha
           cache-to: type=gha,mode=max
           build-args: |
-            PYTHON_VERSION=${{ steps.python.outputs.python-version }}
+            PYTHON_VERSION=${{ steps.python.outputs.python-version }}

{hafnia-0.2.4 → hafnia-0.3.0}/.github/workflows/tests.yaml RENAMED Viewed

@@ -9,10 +9,13 @@ on:
         type: string
 jobs:
   test:
-    runs-on: ubuntu-latest
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest]
     steps:
       - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v5.6.0
+      - uses: actions/setup-python@v6.0.0
         with:
           python-version-file: ${{ inputs.python-version-file }}
       - name: Install uv
@@ -21,5 +24,13 @@ jobs:
           version: 0.6.8
       - name: Install the project
         run: uv sync --group dev
+      - name: Mount secrets config
+        shell: bash
+        env:
+          HAFNIA_CONFIG: ${{ secrets.HAFNIA_CONFIG }}
+        run: |
+          mkdir -p ~/.hafnia
+          echo "$HAFNIA_CONFIG" | jq . > ~/.hafnia/config.json
       - name: Run tests
-        run: uv run pytest tests
+        run: uv run pytest tests

{hafnia-0.2.4 → hafnia-0.3.0}/.gitignore RENAMED Viewed

@@ -163,4 +163,4 @@ cython_debug/
 /pypiprivate/
 /packaging/
 /.data/
-/recipe.zip
+/trainer.zip

{hafnia-0.2.4 → hafnia-0.3.0}/.vscode/extensions.json RENAMED Viewed

@@ -5,7 +5,8 @@
         "ms-python.mypy-type-checker",
         "charliermarsh.ruff",
         "tamasfe.even-better-toml",
-        "streetsidesoftware.code-spell-checker"
+        "streetsidesoftware.code-spell-checker",
+        "ryanluker.vscode-coverage-gutters"
     ]
   }

{hafnia-0.2.4 → hafnia-0.3.0}/.vscode/launch.json RENAMED Viewed

@@ -48,17 +48,45 @@
             ],
         },
         {
-            "name": "debug (hafnia dataset X)",
+            "name": "cmd: 'hafnia dataset [X]'",
             "type": "debugpy",
             "request": "launch",
             "program": "${workspaceFolder}/src/cli/__main__.py",
             "args": [
                 "dataset",
+                //"ls",
                 "download",
                 "mnist",
-                //"./.data",
                 "--force"
             ]
+        },
+        {
+            "name": "cmd: 'hafnia experiment [X]'",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/src/cli/__main__.py",
+            "args": [
+                "experiment",
+                "create",
+                // "--trainer-path",
+                // "${workspaceFolder}/../trainer-classification",
+                //"--trainer-id",
+                //"e47d701d-c5ed-4014-9480-434f04e9459b",
+                "--trainer-path",
+                "${workspaceFolder}/../trainer-classification",
+                "--dataset",
+                "mnist",
+            ]
+        },
+        {
+            "name": "cmd: 'hafnia train-recipe [X]'",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${workspaceFolder}/src/cli/__main__.py",
+            "args": [
+                "trainer",
+                "ls"
+            ]
         }
     ]
 }

{hafnia-0.2.4 → hafnia-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hafnia
-Version: 0.2.4
+Version: 0.3.0
 Summary: Python SDK for communication with Hafnia platform.
 Author-email: Milestone Systems <hafniaplatform@milestone.dk>
 License-File: LICENSE
@@ -9,6 +9,7 @@ Requires-Dist: boto3>=1.35.91
 Requires-Dist: click>=8.1.8
 Requires-Dist: emoji>=2.14.1
 Requires-Dist: flatten-dict>=0.4.2
+Requires-Dist: mlflow>=3.2.0
 Requires-Dist: more-itertools>=10.7.0
 Requires-Dist: opencv-python-headless>=4.11.0.86
 Requires-Dist: pathspec>=0.12.1
@@ -19,6 +20,7 @@ Requires-Dist: pycocotools>=2.0.10
 Requires-Dist: pydantic>=2.10.4
 Requires-Dist: rich>=13.9.4
 Requires-Dist: s5cmd>=0.2.0
+Requires-Dist: sagemaker-mlflow>=0.1.0
 Requires-Dist: seedir>=0.5.0
 Requires-Dist: tqdm>=4.67.1
 Requires-Dist: xxhash>=3.5.0
@@ -26,13 +28,13 @@ Description-Content-Type: text/markdown
 # Hafnia
-The `hafnia` python package is a collection of tools to create and run model training recipes on
+The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
 the [Hafnia Platform](https://hafnia.milestonesys.com/).
 The package includes the following interfaces:
 - `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
-launch recipe scripts.
+launch trainer packages.
 - `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
 experiment tracking.
@@ -42,19 +44,19 @@ experiment tracking.
 and *hidden* datasets. Hidden datasets refers to datasets that can be used for
 training, but are not available for download or direct access.
-This is a key feature of the Hafnia platform, as a hidden dataset ensures data
+This is a key for the Hafnia platform, as a hidden dataset ensures data
 privacy, and allow models to be trained compliantly and ethically by third parties (you).
 The `script2model` approach is a Training-aaS concept, where you package your custom training
-script as a *training recipe* and use the recipe to train models on the hidden datasets.
+project or script as a *trainer package* and use the package to train models on the hidden datasets.
-To support local development of a training recipe, we have introduced a **sample dataset**
+To support local development of a trainer package, we have introduced a **sample dataset**
 for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
-and anonymized subset of the full dataset and available for download.
+and an anonymized subset of the full dataset and available for download.
 With the sample dataset, you can seamlessly switch between local development and Training-aaS.
-Locally, you can create, validate and debug your training recipe. The recipe is then
-launched with Training-aaS, where the recipe runs on the full dataset and can be scaled to run on
+Locally, you can create, validate and debug your trainer package. The trainer package is then
+launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
 multiple GPUs and instances if needed.
 ## Getting started: Configuration
@@ -122,19 +124,19 @@ midwest-vehicle-detection
 You can interact with data as you want, but we also provide `HafniaDataset`
 for loading/saving, managing and interacting with the dataset.
-We recommend to visit and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
-to see how to use the `HafniaDataset` class and its methods.
+We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
+for a short introduction on the `HafniaDataset`.
 Below is a short introduction to the `HafniaDataset` class.
 ```python
 from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
-# Load dataset
+# Load dataset from path
 dataset = HafniaDataset.read_from_path(path_dataset)
-# Alternatively, you can use the 'load_dataset' function to download and load dataset in one go.
-# dataset = load_dataset("midwest-vehicle-detection")
+# Or get dataset directly by name
+dataset = HafniaDataset.from_name("midwest-vehicle-detection")
 # Print dataset information
 dataset.print_stats()
@@ -199,6 +201,8 @@ DatasetInfo(
         'duration_average': 120.0,
         ...
     }
+    "format_version": "0.0.2",
+    "updated_at": "2025-09-24T21:50:20.231263"
 )
 ```
@@ -238,7 +242,7 @@ Sample(
     height=1080,
     width=1920,
     split='train',
-    is_sample=True,
+    tags=["sample"],
     collection_index=None,
     collection_id=None,
     remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
@@ -302,10 +306,10 @@ Sample(
 )
 ```
-To learn more, view and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
+To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
 ### Dataset Locally vs. Training-aaS
-An important feature of `load_dataset` is that it will return the full dataset
+An important feature of `HafniaDataset.from_name` is that it will return the full dataset
 when loaded with Training-aaS on the Hafnia platform.
 This enables seamlessly switching between running/validating a training script
@@ -316,7 +320,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
 ## Getting started: Experiment Tracking with HafniaLogger
-The `HafniaLogger` is an important part of the recipe script and enables you to track, log and
+The `HafniaLogger` is an important part of the trainer and enables you to track, log and
 reproduce your experiments.
 When integrated into your training script, the `HafniaLogger` is responsible for collecting:
@@ -422,25 +426,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
 ## Example: Training-aaS
-By combining logging and dataset loading, we can now construct our model training recipe.
+By combining logging and dataset loading, we can now construct our model trainer package.
-To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
-[recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
+To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
+[trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
-The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
-the training recipe on the Hafnia platform.
+The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
+the trainer on the Hafnia platform.
-## Create, Build and Run `recipe.zip` locally
-In order to test recipe compatibility with Hafnia cloud use the following command to build and
+## Create, Build and Run `trainer.zip` locally
+In order to test trainer package compatibility with Hafnia cloud use the following command to build and
 start the job locally.
 ```bash
-    # Create 'recipe.zip' from source folder '.'
-    hafnia recipe create .
-    # Build the docker image locally from a 'recipe.zip' file
-    hafnia runc build-local recipe.zip
+    # Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
+    hafnia trainer create-zip ../trainer-classification
+    # Build the docker image locally from a 'trainer.zip' file
+    hafnia runc build-local trainer.zip
     # Execute the docker image locally with a desired dataset
     hafnia runc launch-local --dataset mnist  "python scripts/train.py"

{hafnia-0.2.4 → hafnia-0.3.0}/README.md RENAMED Viewed

@@ -1,12 +1,12 @@
 # Hafnia
-The `hafnia` python package is a collection of tools to create and run model training recipes on
+The `hafnia` python sdk and cli is a collection of tools to create and run model trainer packages on
 the [Hafnia Platform](https://hafnia.milestonesys.com/).
 The package includes the following interfaces:
 - `cli`: A Command Line Interface (CLI) to 1) configure/connect to Hafnia's [Training-aaS](https://hafnia.readme.io/docs/training-as-a-service) and 2) create and
-launch recipe scripts.
+launch trainer packages.
 - `hafnia`: A python package including `HafniaDataset` to manage datasets and `HafniaLogger` to do
 experiment tracking.
@@ -16,19 +16,19 @@ experiment tracking.
 and *hidden* datasets. Hidden datasets refers to datasets that can be used for
 training, but are not available for download or direct access.
-This is a key feature of the Hafnia platform, as a hidden dataset ensures data
+This is a key for the Hafnia platform, as a hidden dataset ensures data
 privacy, and allow models to be trained compliantly and ethically by third parties (you).
 The `script2model` approach is a Training-aaS concept, where you package your custom training
-script as a *training recipe* and use the recipe to train models on the hidden datasets.
+project or script as a *trainer package* and use the package to train models on the hidden datasets.
-To support local development of a training recipe, we have introduced a **sample dataset**
+To support local development of a trainer package, we have introduced a **sample dataset**
 for each dataset available in the Hafnia [data library](https://hafnia.milestonesys.com/training-aas/datasets). The sample dataset is a small
-and anonymized subset of the full dataset and available for download.
+and an anonymized subset of the full dataset and available for download.
 With the sample dataset, you can seamlessly switch between local development and Training-aaS.
-Locally, you can create, validate and debug your training recipe. The recipe is then
-launched with Training-aaS, where the recipe runs on the full dataset and can be scaled to run on
+Locally, you can create, validate and debug your trainer package. The trainer package is then
+launched with Training-aaS, where the package runs on the full dataset and can be scaled to run on
 multiple GPUs and instances if needed.
 ## Getting started: Configuration
@@ -96,19 +96,19 @@ midwest-vehicle-detection
 You can interact with data as you want, but we also provide `HafniaDataset`
 for loading/saving, managing and interacting with the dataset.
-We recommend to visit and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
-to see how to use the `HafniaDataset` class and its methods.
+We recommend the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py)
+for a short introduction on the `HafniaDataset`.
 Below is a short introduction to the `HafniaDataset` class.
 ```python
 from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
-# Load dataset
+# Load dataset from path
 dataset = HafniaDataset.read_from_path(path_dataset)
-# Alternatively, you can use the 'load_dataset' function to download and load dataset in one go.
-# dataset = load_dataset("midwest-vehicle-detection")
+# Or get dataset directly by name
+dataset = HafniaDataset.from_name("midwest-vehicle-detection")
 # Print dataset information
 dataset.print_stats()
@@ -173,6 +173,8 @@ DatasetInfo(
         'duration_average': 120.0,
         ...
     }
+    "format_version": "0.0.2",
+    "updated_at": "2025-09-24T21:50:20.231263"
 )
 ```
@@ -212,7 +214,7 @@ Sample(
     height=1080,
     width=1920,
     split='train',
-    is_sample=True,
+    tags=["sample"],
     collection_index=None,
     collection_id=None,
     remote_path='s3://mdi-production-midwest-vehicle-detection/sample/data/343403325f27e390.png',
@@ -276,10 +278,10 @@ Sample(
 )
 ```
-To learn more, view and potentially execute the example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
+To learn more, we recommend the `HafniaDataset` example script [examples/example_hafnia_dataset.py](examples/example_hafnia_dataset.py).
 ### Dataset Locally vs. Training-aaS
-An important feature of `load_dataset` is that it will return the full dataset
+An important feature of `HafniaDataset.from_name` is that it will return the full dataset
 when loaded with Training-aaS on the Hafnia platform.
 This enables seamlessly switching between running/validating a training script
@@ -290,7 +292,7 @@ Available datasets with corresponding sample datasets can be found in [data libr
 ## Getting started: Experiment Tracking with HafniaLogger
-The `HafniaLogger` is an important part of the recipe script and enables you to track, log and
+The `HafniaLogger` is an important part of the trainer and enables you to track, log and
 reproduce your experiments.
 When integrated into your training script, the `HafniaLogger` is responsible for collecting:
@@ -396,25 +398,25 @@ train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=
 ## Example: Training-aaS
-By combining logging and dataset loading, we can now construct our model training recipe.
+By combining logging and dataset loading, we can now construct our model trainer package.
-To demonstrate this, we have provided a recipe project that serves as a template for creating and structuring training recipes
-[recipe-classification](https://github.com/milestone-hafnia/recipe-classification)
+To demonstrate this, we have provided a trainer package project that serves as a template for creating and structuring trainers. The example repo is called
+[trainer-classification](https://github.com/milestone-hafnia/trainer-classification)
-The project also contains additional information on how to structure your training recipe, use the `HafniaLogger`, the `load_dataset` function and different approach for launching
-the training recipe on the Hafnia platform.
+The project also contains additional information on how to structure your trainer package, use the `HafniaLogger`, loading a dataset and different approach for launching
+the trainer on the Hafnia platform.
-## Create, Build and Run `recipe.zip` locally
-In order to test recipe compatibility with Hafnia cloud use the following command to build and
+## Create, Build and Run `trainer.zip` locally
+In order to test trainer package compatibility with Hafnia cloud use the following command to build and
 start the job locally.
 ```bash
-    # Create 'recipe.zip' from source folder '.'
-    hafnia recipe create .
-    # Build the docker image locally from a 'recipe.zip' file
-    hafnia runc build-local recipe.zip
+    # Create 'trainer.zip' in the root folder of your training trainer project '../trainer/classification'
+    hafnia trainer create-zip ../trainer-classification
+    # Build the docker image locally from a 'trainer.zip' file
+    hafnia runc build-local trainer.zip
     # Execute the docker image locally with a desired dataset
     hafnia runc launch-local --dataset mnist  "python scripts/train.py"

{hafnia-0.2.4 → hafnia-0.3.0}/examples/example_dataset_recipe.py RENAMED Viewed

@@ -2,6 +2,7 @@ from pathlib import Path
 from rich import print as rprint
+from hafnia import utils
 from hafnia.data.factory import load_dataset
 from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
 from hafnia.dataset.dataset_recipe.recipe_transforms import (
@@ -15,10 +16,6 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
 # A DatasetRecipe is a recipe for the dataset you want to create.
 # The recipe itself is not executed - this is just a specification of the dataset you want!
-# A DatasetRecipe is an important concept in Hafnia as it allows you to merge multiple datasets
-# and transformations in a single recipe. This is especially useful for Training as a Service (TaaS)
-# where you need to define the dataset you want as a configuration and load it in the TaaS platform.
 # The 'DatasetRecipe' interface is similar to the 'HafniaDataset' interface.
 # To demonstrate, we will first create a dataset with the regular 'HafniaDataset' interface.
 # This line will get the "mnist" dataset, shuffle it, and select 20 samples.
@@ -34,30 +31,38 @@ dataset = dataset_recipe.build()
 # You can print the dataset recipe to the operations that were applied to it.
 rprint(dataset_recipe)
-# Or as a JSON string:
-json_str: str = dataset_recipe.as_json_str()
-rprint(json_str)
-# This is an important feature of a 'DatasetRecipe' it only registers operations and that the recipe itself
-# - and not the dataset - can be saved as a file and loaded from file.
-# Meaning you can easily save, share, load and build the dataset later or in a different environment.
-# For TaaS, this is the only way to include multiple datasets during training.
+# The key for recipes is that they can be saved and loaded as a JSON.
+# This also allows the recipe to be saved, shared, loaded and used later to build a dataset
+# in a different environment.
-# 2) The recipe can be loaded from json string
-dataset_recipe_again: DatasetRecipe = DatasetRecipe.from_json_str(json_str)
-# dataset_recipe_again.build()
+# Example: Saving and loading a dataset recipe from file.
+path_recipe = Path(".data/dataset_recipes/example_recipe.json")
+json_str: str = dataset_recipe.as_json_file(path_recipe)
+dataset_recipe_again: DatasetRecipe = DatasetRecipe.from_json_file(path_recipe)
-# We can verify that the loaded recipe is the same as the original recipe.
+# Verify that the loaded recipe is identical to the original recipe.
 assert dataset_recipe_again == dataset_recipe
-# Additionally, you can get the python code for creating the same recipe.
+# It is also possible to generate the recipe as python code
 dataset_recipe.as_python_code()
-# Example: DatasetRecipe from Path
-dataset_recipe = DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
+# The recipe also allows you to combine multiple datasets and transformations that can be
+# executed in the TaaS platform. This is demonstrated below:
+if utils.is_hafnia_configured():  # First ensure you are connected to the hafnia platform
+    # Upload the dataset recipe - this will make it available for TaaS and for users of your organization
+    dataset_recipe.as_platform_recipe(recipe_name="example-mnist-recipe")
+    # The recipe is now available in TaaS, for different environments and other users in your organization
+    dataset_recipe_again = DatasetRecipe.from_recipe_name(name="example-mnist-recipe")
+    # Launch an experiment with the dataset recipe using the CLI:
+    # hafnia experiment create --dataset-recipe example-mnist-recipe --trainer-path ../trainer-classification
+    # Coming soon: Dataset recipes will be included in the web platform to them to be shared, managed
+    # and used in experiments.
-# Example: DatasetRecipe by merging multiple dataset recipes
+### More examples dataset recipes ###
+# Example: 'DatasetRecipe' by merging multiple dataset recipes
 dataset_recipe = DatasetRecipe.from_merger(
     recipes=[
         DatasetRecipe.from_name(name="mnist"),
@@ -166,4 +171,4 @@ rprint(explicit_recipe_from_implicit)
 # Verify that the conversion produces the same result
 assert explicit_recipe_from_implicit == explicit_recipe
-rprint("✓ Conversion successful - recipes are equivalent!")
+rprint("Conversion successful - recipes are equivalent!")

{hafnia-0.2.4 → hafnia-0.3.0}/examples/example_hafnia_dataset.py RENAMED Viewed

@@ -5,7 +5,7 @@ import numpy as np
 from PIL import Image
 from rich import print as rprint
-from hafnia.data import get_dataset_path, load_dataset
+from hafnia.data import load_dataset
 from hafnia.dataset.dataset_names import SplitName
 from hafnia.dataset.hafnia_dataset import DatasetInfo, HafniaDataset, Sample, TaskInfo
 from hafnia.dataset.primitives.bbox import Bbox
@@ -20,11 +20,7 @@ from hafnia.dataset.primitives.polygon import Polygon
 #   hafnia configure
 # Load dataset
-path_dataset = get_dataset_path("midwest-vehicle-detection")
-dataset = HafniaDataset.from_path(path_dataset)
-# Alternatively, you can use the 'load_dataset' function
-dataset = load_dataset("midwest-vehicle-detection")
+dataset = HafniaDataset.from_name("mnist")
 # Dataset information is stored in 'dataset.info'
 rprint(dataset.info)
@@ -33,7 +29,13 @@ rprint(dataset.info)
 dataset.samples.head(2)
 # Print dataset information
-dataset.print_stats()
+dataset.print_sample_and_task_counts()
+dataset.print_class_distribution()
+dataset.print_stats()  # Print verbose dataset statistics
+# Get dataset stats
+dataset.class_counts_all()  # Get class counts for all tasks
+dataset.class_counts_for_task(primitive=Classification)  # Get class counts for a specific task
 # Create a dataset split for training
 dataset_train = dataset.create_split_dataset("train")
@@ -41,13 +43,32 @@ dataset_train = dataset.create_split_dataset("train")
 # Checkout built-in transformations in 'operations/dataset_transformations' or 'HafniaDataset'
 dataset_val = dataset.create_split_dataset(SplitName.VAL)  # Use 'SplitName' to avoid magic strings
 small_dataset = dataset.select_samples(n_samples=10, seed=42)  # Selects 10 samples from the dataset
 shuffled_dataset = dataset.shuffle(seed=42)  # Shuffle the dataset
+# Create dataset splits by ratios
 split_ratios = {SplitName.TRAIN: 0.8, SplitName.VAL: 0.1, SplitName.TEST: 0.1}
 new_dataset_splits = dataset.splits_by_ratios(split_ratios)
+# Get only samples with specific class names
+dataset_ones = dataset.select_samples_by_class_name(name="1 - one", primitive=Classification)
+# Rename class names with mapping
+class_mapping_strict = {
+    "0 - zero": "even",  # "0 - zero" will be renamed to "even". "even" appear first and get class index 0
+    "1 - one": "odd",  # "1 - one" will be renamed to "odd". "odd" appear second and will get class index 1
+    "2 - two": "even",
+    "3 - three": "odd",
+    "4 - four": "even",
+    "5 - five": "odd",
+    "6 - six": "even",
+    "7 - seven": "odd",
+    "8 - eight": "even",
+    "9 - nine": "__REMOVE__",  # Remove all samples with class "9 - nine"
+}
+dataset_mapped = dataset.class_mapper(class_mapping=class_mapping_strict)
+dataset_mapped.print_class_distribution()
 # Support Chaining Operations (load, shuffle, select samples)
 dataset = load_dataset("midwest-vehicle-detection").shuffle(seed=42).select_samples(n_samples=10)
@@ -70,7 +91,6 @@ class_counts = dataset.samples[Classification.column_name()].explode().struct.fi
 class_counts = dataset.samples[Bbox.column_name()].explode().struct.field("class_name").value_counts()
 rprint(dict(class_counts.iter_rows()))
 # Access the first sample in the training split - data is stored in a dictionary
 sample_dict = dataset_train[0]
@@ -86,6 +106,7 @@ bitmasks: List[Bitmask] = sample.bitmasks  # Use 'sample.bitmasks' to access bit
 polygons: List[Polygon] = sample.polygons  # Use 'sample.polygons' to access polygons as a list of Polygon objects
 classifications: List[Classification] = sample.classifications  # As a list of Classification objects
 # Read image using the sample object
 image: np.ndarray = sample.read_image()
@@ -107,7 +128,7 @@ for i_fake_sample in range(5):
         height=480,
         width=640,
         split="train",
-        is_sample=True,
+        tags=["sample"],
         objects=bboxes,
         classifications=classifications,
     )

hafnia 0.2.4__tar.gz → 0.3.0__tar.gz

hafnia 0.2.4tar.gz → 0.3.0tar.gz