PyPI - hafnia - Versions diffs - 0.5.0__tar.gz → 0.5.1__tar.gz - Mend

hafnia 0.5.0tar.gz → 0.5.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

{hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/build.yaml RENAMED Viewed

@@ -21,8 +21,8 @@ jobs:
     outputs:
       package-version: ${{ steps.extract-version.outputs.package_version }}
     steps:
-      - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v6.0.0
+      - uses: actions/checkout@v6.0.1
+      - uses: actions/setup-python@v6.1.0
         with:
           python-version-file: ${{ inputs.python-version-file }}
@@ -49,7 +49,7 @@ jobs:
         run: uv build
       - name: Upload package artifact
-        uses: actions/upload-artifact@v5.0.0
+        uses: actions/upload-artifact@v6.0.0
         with:
           name: python-package
           path: dist/

{hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/check_release.yaml RENAMED Viewed

@@ -20,7 +20,7 @@ jobs:
       make_release: ${{ steps.check_release.outputs.make_release }}
     steps:
       - name: Download package artifact
-        uses: actions/download-artifact@v6.0.0
+        uses: actions/download-artifact@v7.0.0
         with:
             name: python-package
             path: dist/

{hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/ci_cd.yaml RENAMED Viewed

@@ -19,7 +19,7 @@ jobs:
     runs-on: ubuntu-latest
     needs: lint
     steps:
-      - uses: actions/checkout@v5.0.0
+      - uses: actions/checkout@v6.0.1
       - name: Run Trivy vulnerability scanner
         uses: aquasecurity/trivy-action@0.33.1
         with:

{hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/lint.yaml RENAMED Viewed

@@ -10,8 +10,8 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v6.0.0
+      - uses: actions/checkout@v6.0.1
+      - uses: actions/setup-python@v6.1.0
         with:
           python-version-file: ${{ inputs.python-version-file }}
       - uses: pre-commit/action@v3.0.1

{hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/publish_docker.yaml RENAMED Viewed

@@ -24,14 +24,14 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v6.0.0
+      - uses: actions/checkout@v6.0.1
+      - uses: actions/setup-python@v6.1.0
         id: python
         with:
           python-version-file: ${{ inputs.python-version-file }}
       - name: Download package artifact
-        uses: actions/download-artifact@v6.0.0
+        uses: actions/download-artifact@v7.0.0
         with:
           name: python-package
           path: dist/
@@ -47,7 +47,7 @@ jobs:
             echo "aws_region=${{ secrets.STAGE_AWS_REGION }}" >> $GITHUB_OUTPUT
           fi
       - name: Configure AWS credentials
-        uses: aws-actions/configure-aws-credentials@v5.1.0
+        uses: aws-actions/configure-aws-credentials@v5.1.1
         with:
           role-to-assume: arn:aws:iam::${{ steps.env-vars.outputs.aws_account_id }}:role/${{ secrets.AWS_ROLE_NAME }}
           aws-region: ${{ steps.env-vars.outputs.aws_region }}

{hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/publish_pypi.yaml RENAMED Viewed

@@ -17,7 +17,7 @@ jobs:
       contents: read
     steps:
       - name: Download package artifact
-        uses: actions/download-artifact@v6.0.0
+        uses: actions/download-artifact@v7.0.0
         with:
           name: python-package
           path: dist/

{hafnia-0.5.0 → hafnia-0.5.1}/.github/workflows/tests.yaml RENAMED Viewed

@@ -15,8 +15,8 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest]
     steps:
-      - uses: actions/checkout@v5.0.0
-      - uses: actions/setup-python@v6.0.0
+      - uses: actions/checkout@v6.0.1
+      - uses: actions/setup-python@v6.1.0
         with:
           python-version-file: ${{ inputs.python-version-file }}
       - name: Install uv
@@ -32,9 +32,7 @@ jobs:
         run: |
           mkdir -p ~/.hafnia
           echo "$HAFNIA_CONFIG" | jq . > ~/.hafnia/config.json
-      - name: Check hafnia configured
-        run: uv run hafnia profile active
-      - name: Check hafnia by download
+      - name: Check hafnia configuration by download
         run: uv run hafnia dataset download mnist --force
       - name: Run tests
         run: uv run pytest tests

{hafnia-0.5.0 → hafnia-0.5.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hafnia
-Version: 0.5.0
+Version: 0.5.1
 Summary: Python SDK for communication with Hafnia platform.
 Author-email: Milestone Systems <hafniaplatform@milestone.dk>
 License-File: LICENSE
@@ -10,7 +10,7 @@ Requires-Dist: click>=8.1.8
 Requires-Dist: emoji>=2.14.1
 Requires-Dist: flatten-dict>=0.4.2
 Requires-Dist: keyring>=25.6.0
-Requires-Dist: mcp>=1.16.0
+Requires-Dist: mcp>=1.23.0
 Requires-Dist: mlflow>=3.4.0
 Requires-Dist: more-itertools>=10.7.0
 Requires-Dist: opencv-python-headless>=4.11.0.86

{hafnia-0.5.0 → hafnia-0.5.1}/examples/example_dataset_recipe.py RENAMED Viewed

@@ -5,13 +5,12 @@ from rich import print as rprint
 from hafnia import utils
 from hafnia.dataset.dataset_names import OPS_REMOVE_CLASS
 from hafnia.dataset.dataset_recipe.dataset_recipe import DatasetRecipe
-from hafnia.dataset.dataset_recipe.recipe_transforms import (
-    SelectSamples,
-    Shuffle,
-    SplitsByRatios,
-)
 from hafnia.dataset.hafnia_dataset import HafniaDataset
+COCO_VERSION = "1.0.0"
+MIDWEST_VERSION = "1.0.0"
+MNIST_VERSION = "1.0.0"
 ### Introducing DatasetRecipe ###
 # A DatasetRecipe is a recipe for the dataset you want to create.
 # The recipe itself is not executed - this is just a specification of the dataset you want!
@@ -19,10 +18,10 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
 # The 'DatasetRecipe' interface is similar to the 'HafniaDataset' interface.
 # To demonstrate, we will first create a dataset with the regular 'HafniaDataset' interface.
 # This line will get the "mnist" dataset, shuffle it, and select 20 samples.
-dataset = HafniaDataset.from_name(name="mnist").shuffle().select_samples(n_samples=20)
+dataset = HafniaDataset.from_name(name="mnist", version=MNIST_VERSION).shuffle().select_samples(n_samples=20)
 # Now the same dataset is created using the 'DatasetRecipe' interface.
-dataset_recipe = DatasetRecipe.from_name(name="mnist").shuffle().select_samples(n_samples=20)
+dataset_recipe = DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION).shuffle().select_samples(n_samples=20)
 dataset = dataset_recipe.build()
 # Note that the interface is similar, but to actually create the dataset you need to call `build()` on the recipe.
@@ -65,8 +64,8 @@ if utils.is_hafnia_configured():  # First ensure you are connected to the hafnia
 # Example: 'DatasetRecipe' by merging multiple dataset recipes
 dataset_recipe = DatasetRecipe.from_merger(
     recipes=[
-        DatasetRecipe.from_name(name="mnist"),
-        DatasetRecipe.from_name(name="mnist"),
+        DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
+        DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
     ]
 )
@@ -75,14 +74,14 @@ dataset_recipe = DatasetRecipe.from_merger(
     recipes=[
         DatasetRecipe.from_merger(
             recipes=[
-                DatasetRecipe.from_name(name="mnist"),
-                DatasetRecipe.from_name(name="mnist"),
+                DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
+                DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION),
             ]
         ),
         DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
         .select_samples(n_samples=30)
         .splits_by_ratios(split_ratios={"train": 0.8, "val": 0.1, "test": 0.1}),
-        DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
+        DatasetRecipe.from_name(name="mnist", version=MNIST_VERSION).select_samples(n_samples=20).shuffle(),
     ]
 )
@@ -99,14 +98,14 @@ print(dataset_recipe.as_json_str())  # as a JSON string
 # 1) The first step is to use the regular 'HafniaDataset' interface to investigate and understand the datasets
 # 1a) Explore 'coco-2017'
-coco = HafniaDataset.from_name("coco-2017")
+coco = HafniaDataset.from_name("coco-2017", version=COCO_VERSION)
 coco.print_stats()  # Print dataset statistics
 coco_class_names = coco.info.get_task_by_primitive("Bbox").class_names  # Get the class names for the bbox task
 # You will notice coco has 80 classes including 'person' and various vehicle classes such as 'car', 'bus', 'truck', etc.
 # but also many unrelated classes such as 'toaster', 'hair drier', etc.
 # 1b) Explore 'midwest-vehicle-detection'
-midwest = HafniaDataset.from_name("midwest-vehicle-detection")
+midwest = HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
 midwest.print_stats()  # Print dataset statistics
 midwest_class_names = midwest.info.get_task_by_primitive("Bbox").class_names
 # You will also notice midwest has similar classes, but they are named differently, e.g. 'Persons',
@@ -144,10 +143,10 @@ merged_dataset.print_stats()
 # 3) Once you have verified operations using the 'HafniaDataset' interface, you can convert
 # the operations to a single 'DatasetRecipe'
 merged_recipe = DatasetRecipe.from_merge(
-    recipe0=DatasetRecipe.from_name("coco-2017").class_mapper(
+    recipe0=DatasetRecipe.from_name("coco-2017", version=COCO_VERSION).class_mapper(
         class_mapping=mappings_coco, method="remove_undefined", task_name="object_detection"
     ),
-    recipe1=DatasetRecipe.from_name("midwest-vehicle-detection").class_mapper(
+    recipe1=DatasetRecipe.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION).class_mapper(
         class_mapping=mapping_midwest, task_name="object_detection"
     ),
 ).select_samples_by_class_name(name=["Person", "Vehicle"], task_name="object_detection")
@@ -176,74 +175,3 @@ if utils.is_hafnia_configured():
 # 6) Monitor and manage your experiments
 # 6a) View experiments using the web platform https://staging02.mdi.milestonesys.com/training-aas/experiments
 # 6b) Or use the CLI: 'hafnia experiment ls'
-### DatasetRecipe Implicit Form ###
-# Below we demonstrate the difference between implicit and explicit forms of dataset recipes.
-# Example: Get dataset by name with implicit and explicit forms
-recipe_implicit_form = "mnist"
-recipe_explicit_form = DatasetRecipe.from_name(name="mnist")
-# The implicit form can now be loaded and built as a dataset
-dataset_implicit = DatasetRecipe.from_implicit_form(recipe_implicit_form).build()
-# Or directly as a dataset
-dataset_implicit = HafniaDataset.from_recipe(recipe_implicit_form)
-# Example: Get dataset from path with implicit and explicit forms:
-recipe_implicit_form = Path(".data/datasets/mnist")
-recipe_explicit_form = DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
-# Example: Merge datasets with implicit and explicit forms
-recipe_implicit_form = ("mnist", "mnist")
-recipe_explicit_form = DatasetRecipe.from_merger(
-    recipes=[
-        DatasetRecipe.from_name(name="mnist"),
-        DatasetRecipe.from_name(name="mnist"),
-    ]
-)
-# Example: Define a dataset with transformations using implicit and explicit forms
-recipe_implicit_form = ["mnist", SelectSamples(n_samples=20), Shuffle()]
-recipe_explicit_form = DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle()
-# Example: Complex nested example with implicit vs explicit forms
-# Implicit form of a complex dataset recipe
-split_ratio = {"train": 0.8, "val": 0.1, "test": 0.1}
-recipe_implicit_complex = (
-    ("mnist", "mnist"),
-    [Path(".data/datasets/mnist"), SelectSamples(n_samples=30), SplitsByRatios(split_ratios=split_ratio)],
-    ["mnist", SelectSamples(n_samples=20), Shuffle()],
-)
-# Explicit form of the same complex dataset recipe
-recipe_explicit_complex = DatasetRecipe.from_merger(
-    recipes=[
-        DatasetRecipe.from_merger(
-            recipes=[
-                DatasetRecipe.from_name(name="mnist"),
-                DatasetRecipe.from_name(name="mnist"),
-            ]
-        ),
-        DatasetRecipe.from_path(path_folder=Path(".data/datasets/mnist"))
-        .select_samples(n_samples=30)
-        .splits_by_ratios(split_ratios=split_ratio),
-        DatasetRecipe.from_name(name="mnist").select_samples(n_samples=20).shuffle(),
-    ]
-)
-# The implicit form uses the following rules:
-#    str: Will get a dataset by name -> In explicit form it becomes 'DatasetRecipe.from_name'
-#    Path: Will get a dataset from path -> In explicit form it becomes 'DatasetRecipe.from_path'
-#    tuple: Will merge datasets specified in the tuple -> In explicit form it becomes 'DatasetRecipe.from_merger'
-#    list: Will define a dataset followed by a list of transformations -> In explicit form it becomes chained method calls
-# Generally, we recommend using the explicit form over the implicit form when multiple datasets and transformations are involved.
-# To convert from implicit to explicit recipe form, you can use the `from_implicit_form` method.
-explicit_recipe_from_implicit = DatasetRecipe.from_implicit_form(recipe_implicit_complex)
-rprint("Converted explicit recipe:")
-rprint(explicit_recipe_from_implicit)
-# Verify that the conversion produces the same result
-assert explicit_recipe_from_implicit == recipe_explicit_complex
-rprint("Conversion successful - recipes are equivalent!")

{hafnia-0.5.0 → hafnia-0.5.1}/examples/example_hafnia_dataset.py RENAMED Viewed

@@ -13,6 +13,10 @@ from hafnia.dataset.primitives.bitmask import Bitmask
 from hafnia.dataset.primitives.classification import Classification
 from hafnia.dataset.primitives.polygon import Polygon
+MNIST_VERSION = "1.0.0"
+MIDWEST_VERSION = "1.0.0"
+COCO_VERSION = "1.0.0"
 # First ensure that you have the Hafnia CLI installed and configured.
 # You can install it via pip:
 #   pip install hafnia
@@ -20,7 +24,11 @@ from hafnia.dataset.primitives.polygon import Polygon
 #   hafnia configure
 # Load sample dataset
-dataset = HafniaDataset.from_name("mnist")
+dataset = HafniaDataset.from_name("mnist", version=MNIST_VERSION)
+# Use 'from_name' without version-argument to get available versions:
+# dataset = HafniaDataset.from_name("mnist")
+# >>> ValueError: Version must be specified. Available versions: ['1.0.0', '0.0.1']
 # Dataset information is stored in 'dataset.info'
 rprint(dataset.info)
@@ -76,7 +84,11 @@ dataset_mapped = dataset.class_mapper(class_mapping=class_mapping_strict)
 dataset_mapped.print_class_distribution()
 # Support Chaining Operations (load, shuffle, select samples)
-dataset = HafniaDataset.from_name("midwest-vehicle-detection").shuffle(seed=42).select_samples(n_samples=10)
+dataset = (
+    HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
+    .shuffle(seed=42)
+    .select_samples(n_samples=10)
+)
 # Write dataset to disk
@@ -88,7 +100,7 @@ dataset.write(path_dataset)
 dataset_again = HafniaDataset.from_path(path_dataset)
 ## Dataset importers and exporters ##
-dataset_od = HafniaDataset.from_name("coco-2017").select_samples(n_samples=5, seed=42)
+dataset_od = HafniaDataset.from_name("coco-2017", version=COCO_VERSION).select_samples(n_samples=5, seed=42)
 # Export/import dataset to YOLO format
 path_yolo_format = Path(".data/tmp/yolo_dataset")

{hafnia-0.5.0 → hafnia-0.5.1}/examples/example_torchvision_dataloader.py RENAMED Viewed

@@ -12,7 +12,8 @@ from hafnia.dataset.hafnia_dataset import HafniaDataset
 if __name__ == "__main__":
     torch.manual_seed(1)
     # Load Hugging Face dataset
-    dataset = HafniaDataset.from_name("midwest-vehicle-detection")
+    MIDWEST_VERSION = "1.0.0"
+    dataset = HafniaDataset.from_name("midwest-vehicle-detection", version=MIDWEST_VERSION)
     # Define transforms
     train_transforms = v2.Compose(

{hafnia-0.5.0 → hafnia-0.5.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "hafnia"
-version = "0.5.0"
+version = "0.5.1"
 description = "Python SDK for communication with Hafnia platform."
 readme = "README.md"
 authors = [
@@ -28,7 +28,7 @@ dependencies = [
     "xxhash>=3.5.0",
     "mlflow>=3.4.0",
     "sagemaker-mlflow>=0.1.0",
-    "mcp>=1.16.0",
+    "mcp>=1.23.0",
 ]
 [dependency-groups]

{hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_helpers.py RENAMED Viewed

@@ -3,12 +3,70 @@ import math
 import random
 import shutil
 from pathlib import Path
-from typing import Dict, List
+from typing import Dict, List, Optional, Tuple
 import numpy as np
 import xxhash
+from packaging.version import InvalidVersion, Version
 from PIL import Image
+from hafnia.log import user_logger
+def is_valid_version_string(version: Optional[str], allow_none: bool = False, allow_latest: bool = False) -> bool:
+    if allow_none and version is None:
+        return True
+    if allow_latest and version == "latest":
+        return True
+    return version_from_string(version, raise_error=False) is not None
+def version_from_string(version: Optional[str], raise_error: bool = True) -> Optional[Version]:
+    if version is None:
+        if raise_error:
+            raise ValueError("Version is 'None'. A valid version string is required e.g '1.0.0'")
+        return None
+    try:
+        version_casted = Version(version)
+    except (InvalidVersion, TypeError) as e:
+        if raise_error:
+            raise ValueError(f"Invalid version string/type: {version}") from e
+        return None
+    # Check if version is semantic versioning (MAJOR.MINOR.PATCH)
+    if len(version_casted.release) < 3:
+        if raise_error:
+            raise ValueError(f"Version string '{version}' is not semantic versioning (MAJOR.MINOR.PATCH)")
+        return None
+    return version_casted
+def dataset_name_and_version_from_string(
+    string: str,
+    resolve_missing_version: bool = True,
+) -> Tuple[str, Optional[str]]:
+    if not isinstance(string, str):
+        raise TypeError(f"'{type(string)}' for '{string}' is an unsupported type. Expected 'str' e.g 'mnist:1.0.0'")
+    parts = string.split(":")
+    if len(parts) == 1:
+        dataset_name = parts[0]
+        if resolve_missing_version:
+            version = "latest"  # Default to 'latest' if version is missing. This will be resolved to a specific version later.
+            user_logger.info(f"Version is missing in dataset name: {string}. Defaulting to version='latest'.")
+        else:
+            raise ValueError(f"Version is missing in dataset name: {string}. Use 'name:version'")
+    elif len(parts) == 2:
+        dataset_name, version = parts
+    else:
+        raise ValueError(f"Invalid dataset name format: {string}. Use 'name' or 'name:version' ")
+    if not is_valid_version_string(version, allow_none=True, allow_latest=True):
+        raise ValueError(f"Invalid version string: {version}. Use semantic versioning e.g. '1.0.0' or 'latest'")
+    return dataset_name, version
 def create_split_name_list_from_ratios(split_ratios: Dict[str, float], n_items: int, seed: int = 42) -> List[str]:
     samples_per_split = split_sizes_from_ratios(split_ratios=split_ratios, n_items=n_items)

hafnia-0.5.1/src/hafnia/dataset/dataset_names.py ADDED Viewed

@@ -0,0 +1,123 @@
+from enum import Enum
+from typing import List
+FILENAME_RECIPE_JSON = "recipe.json"
+FILENAME_DATASET_INFO = "dataset_info.json"
+FILENAME_ANNOTATIONS_JSONL = "annotations.jsonl"
+FILENAME_ANNOTATIONS_PARQUET = "annotations.parquet"
+DATASET_FILENAMES_REQUIRED = [
+    FILENAME_DATASET_INFO,
+    FILENAME_ANNOTATIONS_JSONL,
+    FILENAME_ANNOTATIONS_PARQUET,
+]
+class DeploymentStage(Enum):
+    STAGING = "staging"
+    PRODUCTION = "production"
+TAG_IS_SAMPLE = "sample"
+OPS_REMOVE_CLASS = "__REMOVE__"
+class PrimitiveField:
+    CLASS_NAME: str = "class_name"  # Name of the class this primitive is associated with, e.g. "car" for Bbox
+    CLASS_IDX: str = "class_idx"  # Index of the class this primitive is associated with, e.g. 0 for "car" if it is the first class  # noqa: E501
+    OBJECT_ID: str = "object_id"  # Unique identifier for the object, e.g. "12345123"
+    CONFIDENCE: str = "confidence"  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
+    META: str = "meta"  # Contains metadata about each primitive, e.g. attributes color, occluded, iscrowd, etc.
+    TASK_NAME: str = "task_name"  # Name of the task this primitive is associated with, e.g. "bboxes" for Bbox
+    @staticmethod
+    def fields() -> List[str]:
+        """
+        Returns a list of expected field names for primitives.
+        """
+        return [
+            PrimitiveField.CLASS_NAME,
+            PrimitiveField.CLASS_IDX,
+            PrimitiveField.OBJECT_ID,
+            PrimitiveField.CONFIDENCE,
+            PrimitiveField.META,
+            PrimitiveField.TASK_NAME,
+        ]
+class SampleField:
+    FILE_PATH: str = "file_path"
+    HEIGHT: str = "height"
+    WIDTH: str = "width"
+    SPLIT: str = "split"
+    TAGS: str = "tags"
+    CLASSIFICATIONS: str = "classifications"
+    BBOXES: str = "bboxes"
+    BITMASKS: str = "bitmasks"
+    POLYGONS: str = "polygons"
+    STORAGE_FORMAT: str = "storage_format"  # E.g. "image", "video", "zip"
+    COLLECTION_INDEX: str = "collection_index"
+    COLLECTION_ID: str = "collection_id"
+    REMOTE_PATH: str = "remote_path"  # Path to the file in remote storage, e.g. S3
+    SAMPLE_INDEX: str = "sample_index"
+    ATTRIBUTION: str = "attribution"  # Attribution for the sample (image/video), e.g. creator, license, source, etc.
+    META: str = "meta"
+    DATASET_NAME: str = "dataset_name"
+class StorageFormat:
+    IMAGE: str = "image"
+    VIDEO: str = "video"
+    ZIP: str = "zip"
+class SplitName:
+    TRAIN: str = "train"
+    VAL: str = "validation"
+    TEST: str = "test"
+    UNDEFINED: str = "UNDEFINED"
+    @staticmethod
+    def valid_splits() -> List[str]:
+        return [SplitName.TRAIN, SplitName.VAL, SplitName.TEST]
+    @staticmethod
+    def all_split_names() -> List[str]:
+        return [*SplitName.valid_splits(), SplitName.UNDEFINED]
+    @staticmethod
+    def map_split_name(potential_split_name: str, strict: bool = True) -> str:
+        normalized = potential_split_name.strip().lower()
+        if normalized in SPLIT_NAME_MAPPINGS:
+            return SPLIT_NAME_MAPPINGS[normalized]
+        if strict:
+            raise ValueError(f"Unrecognized split name: {potential_split_name}")
+        else:
+            return SplitName.UNDEFINED
+SPLIT_NAME_MAPPINGS = {
+    # Train variations
+    "train": SplitName.TRAIN,
+    "training": SplitName.TRAIN,
+    # Validation variations
+    "validation": SplitName.VAL,
+    "val": SplitName.VAL,
+    "valid": SplitName.VAL,
+    # Test variations
+    "test": SplitName.TEST,
+    "testing": SplitName.TEST,
+}
+class DatasetVariant(Enum):
+    DUMP = "dump"
+    SAMPLE = "sample"
+    HIDDEN = "hidden"

{hafnia-0.5.0 → hafnia-0.5.1}/src/hafnia/dataset/dataset_recipe/dataset_recipe.py RENAMED Viewed

@@ -11,14 +11,19 @@ from pydantic import (
 )
 from hafnia import utils
+from hafnia.dataset.dataset_helpers import dataset_name_and_version_from_string
 from hafnia.dataset.dataset_recipe import recipe_transforms
 from hafnia.dataset.dataset_recipe.recipe_types import (
     RecipeCreation,
     RecipeTransform,
     Serializable,
 )
-from hafnia.dataset.hafnia_dataset import HafniaDataset
+from hafnia.dataset.hafnia_dataset import (
+    HafniaDataset,
+    available_dataset_versions_from_name,
+)
 from hafnia.dataset.primitives.primitive import Primitive
+from hafnia.log import user_logger
 class DatasetRecipe(Serializable):
@@ -41,8 +46,31 @@ class DatasetRecipe(Serializable):
     ### Creation Methods (using the 'from_X' )###
     @staticmethod
-    def from_name(name: str, force_redownload: bool = False, download_files: bool = True) -> DatasetRecipe:
-        creation = FromName(name=name, force_redownload=force_redownload, download_files=download_files)
+    def from_name(
+        name: str,
+        version: Optional[str] = None,
+        force_redownload: bool = False,
+        download_files: bool = True,
+    ) -> DatasetRecipe:
+        if version == "latest":
+            user_logger.info(
+                f"The dataset '{name}' in a dataset recipe uses 'latest' as version. For dataset recipes the "
+                "version is pinned to a specific version. Consider specifying a specific version to ensure "
+                "reproducibility of your experiments. "
+            )
+            available_versions = available_dataset_versions_from_name(name)
+            version = str(max(available_versions))
+        if version is None:
+            available_versions = available_dataset_versions_from_name(name)
+            str_versions = ", ".join([str(v) for v in available_versions])
+            raise ValueError(
+                f"Version must be specified when creating a DatasetRecipe from name. "
+                f"Available versions are: {str_versions}"
+            )
+        creation = FromName(
+            name=name, version=version, force_redownload=force_redownload, download_files=download_files
+        )
         return DatasetRecipe(creation=creation)
     @staticmethod
@@ -125,6 +153,21 @@ class DatasetRecipe(Serializable):
         recipe_id = recipe["id"]
         return DatasetRecipe.from_recipe_id(recipe_id)
+    @staticmethod
+    def from_name_and_version_string(string: str, resolve_missing_version: bool = False) -> "DatasetRecipe":
+        """
+        Validates and converts a dataset name and version string (name:version) to a DatasetRecipe.from_name recipe.
+        If version is missing and 'resolve_missing_version' is True, it will default to 'latest'.
+        If resolve_missing_version is False, it will raise an error if version is missing.
+        """
+        dataset_name, version = dataset_name_and_version_from_string(
+            string=string,
+            resolve_missing_version=resolve_missing_version,
+        )
+        return DatasetRecipe.from_name(name=dataset_name, version=version)
     @staticmethod
     def from_implicit_form(recipe: Any) -> DatasetRecipe:
         """
@@ -180,7 +223,7 @@ class DatasetRecipe(Serializable):
             return recipe
         if isinstance(recipe, str):  # str-type is convert to DatasetFromName
-            return DatasetRecipe.from_name(name=recipe)
+            return DatasetRecipe.from_name_and_version_string(string=recipe, resolve_missing_version=True)
         if isinstance(recipe, Path):  # Path-type is convert to DatasetFromPath
             return DatasetRecipe.from_path(path_folder=recipe)
@@ -409,6 +452,7 @@ class FromPath(RecipeCreation):
 class FromName(RecipeCreation):
     name: str
+    version: Optional[str] = None
     force_redownload: bool = False
     download_files: bool = True

hafnia 0.5.0__tar.gz → 0.5.1__tar.gz

hafnia 0.5.0tar.gz → 0.5.1tar.gz