PyPI - hafnia - Versions diffs - 0.2.4__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hafnia 0.2.4py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

cli/__main__.py +16 -3
cli/config.py +45 -4
cli/consts.py +1 -1
cli/dataset_cmds.py +6 -14
cli/dataset_recipe_cmds.py +78 -0
cli/experiment_cmds.py +226 -43
cli/keychain.py +88 -0
cli/profile_cmds.py +10 -6
cli/runc_cmds.py +5 -5
cli/trainer_package_cmds.py +65 -0
hafnia/__init__.py +2 -0
hafnia/data/factory.py +1 -2
hafnia/dataset/dataset_helpers.py +9 -14
hafnia/dataset/dataset_names.py +10 -5
hafnia/dataset/dataset_recipe/dataset_recipe.py +165 -67
hafnia/dataset/dataset_recipe/recipe_transforms.py +48 -4
hafnia/dataset/dataset_recipe/recipe_types.py +1 -1
hafnia/dataset/dataset_upload_helper.py +265 -56
hafnia/dataset/format_conversions/image_classification_from_directory.py +106 -0
hafnia/dataset/format_conversions/torchvision_datasets.py +281 -0
hafnia/dataset/hafnia_dataset.py +577 -213
hafnia/dataset/license_types.py +63 -0
hafnia/dataset/operations/dataset_stats.py +259 -3
hafnia/dataset/operations/dataset_transformations.py +332 -7
hafnia/dataset/operations/table_transformations.py +43 -5
hafnia/dataset/primitives/__init__.py +8 -0
hafnia/dataset/primitives/bbox.py +25 -12
hafnia/dataset/primitives/bitmask.py +26 -14
hafnia/dataset/primitives/classification.py +16 -8
hafnia/dataset/primitives/point.py +7 -3
hafnia/dataset/primitives/polygon.py +16 -9
hafnia/dataset/primitives/segmentation.py +10 -7
hafnia/experiment/hafnia_logger.py +111 -8
hafnia/http.py +16 -2
hafnia/platform/__init__.py +9 -3
hafnia/platform/builder.py +12 -10
hafnia/platform/dataset_recipe.py +104 -0
hafnia/platform/datasets.py +47 -9
hafnia/platform/download.py +25 -19
hafnia/platform/experiment.py +51 -56
hafnia/platform/trainer_package.py +57 -0
hafnia/utils.py +81 -13
hafnia/visualizations/image_visualizations.py +4 -4
{hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/METADATA +40 -34
hafnia-0.4.0.dist-info/RECORD +56 -0
cli/recipe_cmds.py +0 -45
hafnia-0.2.4.dist-info/RECORD +0 -49
{hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/WHEEL +0 -0
{hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/entry_points.txt +0 -0
{hafnia-0.2.4.dist-info → hafnia-0.4.0.dist-info}/licenses/LICENSE +0 -0

hafnia/dataset/operations/dataset_transformations.py CHANGED Viewed

@@ -29,19 +29,27 @@ HafniaDataset class and a RecipeTransform class in the `data_recipe/recipe_trans
 that the signatures match.
 """
+import json
+import re
+import textwrap
 from pathlib import Path
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Type, Union
 import cv2
+import more_itertools
 import numpy as np
 import polars as pl
 from PIL import Image
-from tqdm import tqdm
+from rich.progress import track
 from hafnia.dataset import dataset_helpers
+from hafnia.dataset.dataset_names import OPS_REMOVE_CLASS, ColumnName, FieldName
+from hafnia.dataset.primitives import get_primitive_type_from_string
+from hafnia.dataset.primitives.primitive import Primitive
+from hafnia.utils import remove_duplicates_preserve_order
-if TYPE_CHECKING:
-    from hafnia.dataset.hafnia_dataset import HafniaDataset
+if TYPE_CHECKING:  # Using 'TYPE_CHECKING' to avoid circular imports during type checking
+    from hafnia.dataset.hafnia_dataset import HafniaDataset, TaskInfo
 ### Image transformations ###
@@ -65,7 +73,8 @@ def transform_images(
     path_image_folder = path_output / "data"
     path_image_folder.mkdir(parents=True, exist_ok=True)
-    for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="Transform images"):
+    org_paths = dataset.samples[ColumnName.FILE_PATH].to_list()
+    for org_path in track(org_paths, description="Transform images"):
         org_path = Path(org_path)
         if not org_path.exists():
             raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
@@ -78,5 +87,321 @@ def transform_images(
             raise FileNotFoundError(f"Transformed file {new_path} does not exist in the dataset.")
         new_paths.append(str(new_path))
-    table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
-    return dataset.update_table(table)
+    table = dataset.samples.with_columns(pl.Series(new_paths).alias(ColumnName.FILE_PATH))
+    return dataset.update_samples(table)
+def get_task_info_from_task_name_and_primitive(
+    tasks: List["TaskInfo"],
+    task_name: Optional[str] = None,
+    primitive: Union[None, str, Type[Primitive]] = None,
+) -> "TaskInfo":
+    if len(tasks) == 0:
+        raise ValueError("Dataset has no tasks defined.")
+    tasks_str = "\n".join([f"\t{task.__repr__()}" for task in tasks])
+    if task_name is None and primitive is None:
+        if len(tasks) == 1:
+            return tasks[0]
+        else:
+            raise ValueError(
+                "For multiple tasks, you will need to specify 'task_name' or 'type_primitive' "
+                "to return a unique task. The dataset contains the following tasks: \n" + tasks_str
+            )
+    if isinstance(primitive, str):
+        primitive = get_primitive_type_from_string(primitive)
+    tasks_filtered = tasks
+    if primitive is None:
+        tasks_filtered = [task for task in tasks if task.name == task_name]
+        if len(tasks_filtered) == 0:
+            raise ValueError(f"No task found with {task_name=}. Available tasks: \n {tasks_str}")
+        unique_primitives = set(task.primitive for task in tasks_filtered)
+        if len(unique_primitives) > 1:
+            raise ValueError(
+                f"Found multiple tasks with {task_name=} using different primitives {unique_primitives=}. "
+                "Please specify the primitive type to make it unique. "
+                f"The dataset contains the following tasks: \n {tasks_str}"
+            )
+        primitive = list(unique_primitives)[0]
+    if task_name is None:
+        tasks_filtered = [task for task in tasks if task.primitive == primitive]
+        if len(tasks_filtered) == 0:
+            raise ValueError(f"No task found with {primitive=}. Available tasks: \n {tasks_str}")
+        unique_task_names = set(task.name for task in tasks_filtered)
+        if len(unique_task_names) > 1:
+            raise ValueError(
+                f"Found multiple tasks with {primitive=} using different task names {unique_task_names=}. "
+                "Please specify the 'task_name' to make it unique."
+                f"The dataset contains the following tasks: \n {tasks_str}"
+            )
+        task_name = list(unique_task_names)[0]
+    tasks_filtered = [task for task in tasks_filtered if task.primitive == primitive and task.name == task_name]
+    if len(tasks_filtered) == 0:
+        raise ValueError(f"No task found with {task_name=} and {primitive=}. Available tasks: \n {tasks_str}")
+    if len(tasks_filtered) > 1:
+        raise ValueError(
+            f"Multiple tasks found with {task_name=} and {primitive=}. "
+            f"This should never happen. The dataset contains the following tasks: \n {tasks_str}"
+        )
+    task = tasks_filtered[0]
+    return task
+def class_mapper(
+    dataset: "HafniaDataset",
+    class_mapping: Union[Dict[str, str], List[Tuple[str, str]]],
+    method: str = "strict",
+    primitive: Optional[Type[Primitive]] = None,
+    task_name: Optional[str] = None,
+) -> "HafniaDataset":
+    from hafnia.dataset.hafnia_dataset import HafniaDataset
+    if isinstance(class_mapping, list):
+        class_mapping = dict(class_mapping)
+    allowed_methods = ("strict", "remove_undefined", "keep_undefined")
+    if method not in allowed_methods:
+        raise ValueError(f"Method '{method}' is not recognized. Allowed methods are: {allowed_methods}")
+    task = dataset.info.get_task_by_task_name_and_primitive(task_name=task_name, primitive=primitive)
+    current_names = task.class_names or []
+    # Expand wildcard mappings e.g. {"Vehicle.*": "Vehicle"} to {"Vehicle.Car": "Vehicle", "Vehicle.Bus": "Vehicle"}
+    class_mapping = expand_class_mapping(class_mapping, current_names)
+    non_existing_mapping_names = set(class_mapping) - set(current_names)
+    if len(non_existing_mapping_names) > 0:
+        raise ValueError(
+            f"The specified class mapping contains class names {list(non_existing_mapping_names)} "
+            f"that do not exist in the dataset task '{task.name}'. "
+            f"Available class names: {current_names}"
+        )
+    missing_class_names = [c for c in current_names if c not in class_mapping]  # List-comprehension to preserve order
+    class_mapping = class_mapping.copy()
+    if method == "strict":
+        pass  # Continue to strict mapping below
+    elif method == "remove_undefined":
+        for missing_class_name in missing_class_names:
+            class_mapping[missing_class_name] = OPS_REMOVE_CLASS
+    elif method == "keep_undefined":
+        for missing_class_name in missing_class_names:
+            class_mapping[missing_class_name] = missing_class_name
+    else:
+        raise ValueError(f"Method '{method}' is not recognized. Allowed methods are: {allowed_methods}")
+    missing_class_names = [c for c in current_names if c not in class_mapping]
+    if len(missing_class_names) > 0:
+        error_msg = f"""\
+        The specified class mapping is not a strict mapping - meaning that all class names have not
+        been mapped to a new class name.
+        In the current mapping, the following classes {list(missing_class_names)} have not been mapped.
+        The currently specified mapping is:
+        {json.dumps(class_mapping, indent=2)}
+        A strict mapping will replace all old class names (dictionary keys) to new class names (dictionary values).
+        Please update the mapping to include all class names from the dataset task '{task.name}'.
+        To keep class map to the same name e.g. 'person' = 'person'
+        or remove class by using the '__REMOVE__' key, e.g. 'person': '__REMOVE__'."""
+        raise ValueError(textwrap.dedent(error_msg))
+    new_class_names = remove_duplicates_preserve_order(class_mapping.values())
+    if OPS_REMOVE_CLASS in new_class_names:
+        # Move __REMOVE__ to the end of the list if it exists
+        new_class_names.append(new_class_names.pop(new_class_names.index(OPS_REMOVE_CLASS)))
+    samples = dataset.samples
+    samples_updated = samples.with_columns(
+        pl.col(task.primitive.column_name())
+        .list.eval(
+            pl.element().struct.with_fields(
+                pl.when(pl.field(FieldName.TASK_NAME) == task.name)
+                .then(pl.field(FieldName.CLASS_NAME).replace_strict(class_mapping))
+                .otherwise(pl.field(FieldName.CLASS_NAME))
+                .alias(FieldName.CLASS_NAME)
+            )
+        )
+        .alias(task.primitive.column_name())
+    )
+    # Update class indices too
+    name_2_idx_mapping: Dict[str, int] = {name: idx for idx, name in enumerate(new_class_names)}
+    samples_updated = samples_updated.with_columns(
+        pl.col(task.primitive.column_name())
+        .list.eval(
+            pl.element().struct.with_fields(
+                pl.when(pl.field(FieldName.TASK_NAME) == task.name)
+                .then(pl.field(FieldName.CLASS_NAME).replace_strict(name_2_idx_mapping))
+                .otherwise(pl.field(FieldName.CLASS_IDX))
+                .alias(FieldName.CLASS_IDX)
+            )
+        )
+        .alias(task.primitive.column_name())
+    )
+    if OPS_REMOVE_CLASS in new_class_names:  # Remove class_names that are mapped to REMOVE_CLASS
+        samples_updated = samples_updated.with_columns(
+            pl.col(task.primitive.column_name())
+            .list.filter(pl.element().struct.field(FieldName.CLASS_NAME) != OPS_REMOVE_CLASS)
+            .alias(task.primitive.column_name())
+        )
+        new_class_names = [c for c in new_class_names if c != OPS_REMOVE_CLASS]
+    new_task = task.model_copy(deep=True)
+    new_task.class_names = new_class_names
+    dataset_info = dataset.info.replace_task(old_task=task, new_task=new_task)
+    return HafniaDataset(info=dataset_info, samples=samples_updated)
+def expand_class_mapping(wildcard_mapping: Dict[str, str], class_names: List[str]) -> Dict[str, str]:
+    """
+    Expand a wildcard class mapping to a full explicit mapping.
+    This function takes a mapping that may contain wildcard patterns (using '*')
+    and expands them to match actual class names from a dataset. Exact matches
+    take precedence over wildcard patterns.
+    Examples:
+        >>> from hafnia.dataset.dataset_names import OPS_REMOVE_CLASS
+        >>> wildcard_mapping = {
+        ...     "Person": "Person",
+        ...     "Vehicle.*": "Vehicle",
+        ...     "Vehicle.Trailer": OPS_REMOVE_CLASS
+        ... }
+        >>> class_names = [
+        ...     "Person", "Vehicle.Car", "Vehicle.Trailer", "Vehicle.Bus", "Animal.Dog"
+        ... ]
+        >>> result = expand_wildcard_mapping(wildcard_mapping, class_names)
+        >>> print(result)
+        {
+            "Person": "Person",
+            "Vehicle.Car": "Vehicle",
+            "Vehicle.Trailer": OPS_REMOVE_CLASS,  # Exact match overrides wildcard
+            "Vehicle.Bus": "Vehicle",
+            # Note: "Animal.Dog" is not included as it doesn't match any pattern
+        }
+    """
+    expanded_mapping = {}
+    for match_pattern, mapping_value in wildcard_mapping.items():
+        if "*" in match_pattern:
+            # Convert wildcard pattern to regex: Escape special regex characters except *, then replace * with .*
+            regex_pattern = re.escape(match_pattern).replace("\\*", ".*")
+            class_names_matched = [cn for cn in class_names if re.fullmatch(regex_pattern, cn)]
+            expanded_mapping.update({cn: mapping_value for cn in class_names_matched})
+        else:
+            expanded_mapping.pop(match_pattern, None)
+            expanded_mapping[match_pattern] = mapping_value
+    return expanded_mapping
+def rename_task(
+    dataset: "HafniaDataset",
+    old_task_name: str,
+    new_task_name: str,
+) -> "HafniaDataset":
+    from hafnia.dataset.hafnia_dataset import HafniaDataset
+    old_task = dataset.info.get_task_by_name(task_name=old_task_name)
+    new_task = old_task.model_copy(deep=True)
+    new_task.name = new_task_name
+    samples = dataset.samples.with_columns(
+        pl.col(old_task.primitive.column_name())
+        .list.eval(
+            pl.element().struct.with_fields(
+                pl.field(FieldName.TASK_NAME).replace(old_task.name, new_task.name).alias(FieldName.TASK_NAME)
+            )
+        )
+        .alias(new_task.primitive.column_name())
+    )
+    dataset_info = dataset.info.replace_task(old_task=old_task, new_task=new_task)
+    return HafniaDataset(info=dataset_info, samples=samples)
+def select_samples_by_class_name(
+    dataset: "HafniaDataset",
+    name: Union[List[str], str],
+    task_name: Optional[str] = None,
+    primitive: Optional[Type[Primitive]] = None,
+) -> "HafniaDataset":
+    task, class_names = _validate_inputs_select_samples_by_class_name(
+        dataset=dataset,
+        name=name,
+        task_name=task_name,
+        primitive=primitive,
+    )
+    samples = dataset.samples.filter(
+        pl.col(task.primitive.column_name())
+        .list.eval(
+            pl.element().struct.field(FieldName.CLASS_NAME).is_in(class_names)
+            & (pl.element().struct.field(FieldName.TASK_NAME) == task.name)
+        )
+        .list.any()
+    )
+    dataset_updated = dataset.update_samples(samples)
+    return dataset_updated
+def _validate_inputs_select_samples_by_class_name(
+    dataset: "HafniaDataset",
+    name: Union[List[str], str],
+    task_name: Optional[str] = None,
+    primitive: Optional[Type[Primitive]] = None,
+) -> Tuple["TaskInfo", List[str]]:
+    if isinstance(name, str):
+        name = [name]
+    names = list(name)
+    # Check that specified names are available in at least one of the tasks
+    available_names_across_tasks = set(more_itertools.flatten([t.class_names for t in dataset.info.tasks]))
+    missing_class_names_across_tasks = set(names) - available_names_across_tasks
+    if len(missing_class_names_across_tasks) > 0:
+        raise ValueError(
+            f"The specified names {list(names)} have not been found in any of the tasks. "
+            f"Available class names: {available_names_across_tasks}"
+        )
+    # Auto infer task if task_name and primitive are not provided
+    if task_name is None and primitive is None:
+        tasks_with_names = [t for t in dataset.info.tasks if set(names).issubset(t.class_names or [])]
+        if len(tasks_with_names) == 0:
+            raise ValueError(
+                f"The specified names {names} have not been found in any of the tasks. "
+                f"Available class names: {available_names_across_tasks}"
+            )
+        if len(tasks_with_names) > 1:
+            raise ValueError(
+                f"Found multiple tasks containing the specified names {names}. "
+                f"Specify either 'task_name' or 'primitive' to only select from one task. "
+                f"Tasks containing all provided names: {[t.name for t in tasks_with_names]}"
+            )
+        task = tasks_with_names[0]
+    else:
+        task = get_task_info_from_task_name_and_primitive(
+            tasks=dataset.info.tasks,
+            task_name=task_name,
+            primitive=primitive,
+        )
+    task_class_names = set(task.class_names or [])
+    missing_class_names = set(names) - task_class_names
+    if len(missing_class_names) > 0:
+        raise ValueError(
+            f"The specified names {list(missing_class_names)} have not been found for the '{task.name}' task. "
+            f"Available class names: {task_class_names}"
+        )
+    return task, names

hafnia/dataset/operations/table_transformations.py CHANGED Viewed

@@ -2,11 +2,12 @@ from pathlib import Path
 from typing import List, Optional, Type
 import polars as pl
-from tqdm import tqdm
+from rich.progress import track
 from hafnia.dataset.dataset_names import (
     FILENAME_ANNOTATIONS_JSONL,
     FILENAME_ANNOTATIONS_PARQUET,
+    ColumnName,
     FieldName,
 )
 from hafnia.dataset.operations import table_transformations
@@ -34,8 +35,12 @@ def create_primitive_table(
     if keep_sample_data:
         # Drop other primitive columns to avoid conflicts
-        drop_columns = set(PRIMITIVE_TYPES) - {PrimitiveType, Classification}
-        remove_no_object_frames = remove_no_object_frames.drop(*[primitive.column_name() for primitive in drop_columns])
+        drop_columns_primitives = set(PRIMITIVE_TYPES) - {PrimitiveType, Classification}
+        drop_columns_names = [primitive.column_name() for primitive in drop_columns_primitives]
+        drop_columns_names = [c for c in drop_columns_names if c in remove_no_object_frames.columns]
+        remove_no_object_frames = remove_no_object_frames.drop(drop_columns_names)
         # Rename columns "height", "width" and "meta" for sample to avoid conflicts with object fields names
         remove_no_object_frames = remove_no_object_frames.rename(
             {"height": "image.height", "width": "image.width", "meta": "image.meta"}
@@ -46,6 +51,38 @@ def create_primitive_table(
     return objects_df
+def merge_samples(samples0: pl.DataFrame, samples1: pl.DataFrame) -> pl.DataFrame:
+    has_same_schema = samples0.schema == samples1.schema
+    if not has_same_schema:
+        shared_columns = []
+        for column_name, column_type in samples0.schema.items():
+            if column_name not in samples1.schema:
+                continue
+            if column_type != samples1.schema[column_name]:
+                continue
+            shared_columns.append(column_name)
+        dropped_columns0 = [
+            f"{n}[{ctype._string_repr()}]" for n, ctype in samples0.schema.items() if n not in shared_columns
+        ]
+        dropped_columns1 = [
+            f"{n}[{ctype._string_repr()}]" for n, ctype in samples1.schema.items() if n not in shared_columns
+        ]
+        user_logger.warning(
+            "Datasets with different schemas are being merged. "
+            "Only the columns with the same name and type will be kept in the merged dataset.\n"
+            f"Dropped columns in samples0: {dropped_columns0}\n"
+            f"Dropped columns in samples1: {dropped_columns1}\n"
+        )
+        samples0 = samples0.select(list(shared_columns))
+        samples1 = samples1.select(list(shared_columns))
+    merged_samples = pl.concat([samples0, samples1], how="vertical")
+    merged_samples = merged_samples.drop(ColumnName.SAMPLE_INDEX).with_row_index(name=ColumnName.SAMPLE_INDEX)
+    return merged_samples
 def filter_table_for_class_names(
     samples_table: pl.DataFrame, class_names: List[str], PrimitiveType: Type[Primitive]
 ) -> Optional[pl.DataFrame]:
@@ -107,7 +144,7 @@ def split_primitive_columns_by_task_name(
     return samples_table
-def read_table_from_path(path: Path) -> pl.DataFrame:
+def read_samples_from_path(path: Path) -> pl.DataFrame:
     path_annotations = path / FILENAME_ANNOTATIONS_PARQUET
     if path_annotations.exists():
         user_logger.info(f"Reading dataset annotations from Parquet file: {path_annotations}")
@@ -125,7 +162,8 @@ def read_table_from_path(path: Path) -> pl.DataFrame:
 def check_image_paths(table: pl.DataFrame) -> bool:
     missing_files = []
-    for org_path in tqdm(table["file_name"].to_list(), desc="Check image paths"):
+    org_paths = table[ColumnName.FILE_PATH].to_list()
+    for org_path in track(org_paths, description="Check image paths"):
         org_path = Path(org_path)
         if not org_path.exists():
             missing_files.append(org_path)

hafnia/dataset/primitives/__init__.py CHANGED Viewed

@@ -14,3 +14,11 @@ from .utils import class_color_by_name  # noqa: F401
 PRIMITIVE_TYPES: List[Type[Primitive]] = [Bbox, Classification, Polygon, Bitmask]
 PRIMITIVE_NAME_TO_TYPE = {cls.__name__: cls for cls in PRIMITIVE_TYPES}
 PRIMITIVE_COLUMN_NAMES: List[str] = [PrimitiveType.column_name() for PrimitiveType in PRIMITIVE_TYPES]
+def get_primitive_type_from_string(name: str) -> Type[Primitive]:
+    if name not in PRIMITIVE_NAME_TO_TYPE:
+        raise ValueError(
+            f"Primitive '{name}' is not recognized. Available primitives: {list(PRIMITIVE_NAME_TO_TYPE.keys())}"
+        )
+    return PRIMITIVE_NAME_TO_TYPE[name]

hafnia/dataset/primitives/bbox.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 import cv2
 import numpy as np
+from pydantic import Field
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import (
@@ -17,18 +18,30 @@ from hafnia.dataset.primitives.utils import (
 class Bbox(Primitive):
     # Names should match names in FieldName
-    height: float  # Height of the bounding box as a fraction of the image height, e.g. 0.1 for 10% of the image height
-    width: float  # Width of the bounding box as a fraction of the image width, e.g. 0.1 for 10% of the image width
-    top_left_x: float  # X coordinate of top-left corner of Bbox as a fraction of the image width, e.g. 0.1 for 10% of the image width
-    top_left_y: float  # Y coordinate of top-left corner of Bbox as a fraction of the image height, e.g. 0.1 for 10% of the image height
-    class_name: Optional[str] = None  # Class name, e.g. "car"
-    class_idx: Optional[int] = None  # Class index, e.g. 0 for "car" if it is the first class
-    object_id: Optional[str] = None  # Unique identifier for the object, e.g. "12345123"
-    confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
-    task_name: str = ""  # Task name to support multiple Bbox tasks in the same dataset. "" defaults to "bboxes"
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
+    height: float = Field(
+        description="Normalized height of the bounding box (0.0=no height, 1.0=full image height) as a fraction of image height"
+    )
+    width: float = Field(
+        description="Normalized width of the bounding box (0.0=no width, 1.0=full image width) as a fraction of image width"
+    )
+    top_left_x: float = Field(
+        description="Normalized x-coordinate of top-left corner (0.0=left edge, 1.0=right edge) as a fraction of image width"
+    )
+    top_left_y: float = Field(
+        description="Normalized y-coordinate of top-left corner (0.0=top edge, 1.0=bottom edge) as a fraction of image height"
+    )
+    class_name: Optional[str] = Field(default=None, description="Class name, e.g. 'car'")
+    class_idx: Optional[int] = Field(default=None, description="Class index, e.g. 0 for 'car' if it is the first class")
+    object_id: Optional[str] = Field(default=None, description="Unique identifier for the object, e.g. '12345123'")
+    confidence: Optional[float] = Field(
+        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
+    )
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
+    task_name: str = Field(
+        default="", description="Task name to support multiple Bbox tasks in the same dataset. '' defaults to 'bboxes'"
+    )
+    meta: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata for the annotation")
     @staticmethod
     def default_task_name() -> str:

hafnia/dataset/primitives/bitmask.py CHANGED Viewed

@@ -5,7 +5,9 @@ from typing import Any, Dict, Optional, Tuple
 import cv2
 import numpy as np
 import pycocotools.mask as coco_mask
+from pydantic import Field
+from hafnia.dataset.dataset_names import FieldName
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import (
     anonymize_by_resizing,
@@ -14,23 +16,33 @@ from hafnia.dataset.primitives.utils import (
     text_org_from_left_bottom_to_centered,
 )
+FieldName
 class Bitmask(Primitive):
     # Names should match names in FieldName
-    top: int  # Bitmask top coordinate in pixels
-    left: int  # Bitmask left coordinate in pixels
-    height: int  # Bitmask height of the bounding box in pixels
-    width: int  # Bitmask width of the bounding box in pixels
-    rleString: str  # Run-length encoding (RLE) string for the bitmask region of size (height, width) at (top, left).
-    area: Optional[float] = None  # Area of the bitmask in pixels is calculated from the RLE string
-    class_name: Optional[str] = None  # This should match the string in 'FieldName.CLASS_NAME'
-    class_idx: Optional[int] = None  # This should match the string in 'FieldName.CLASS_IDX'
-    object_id: Optional[str] = None  # This should match the string in 'FieldName.OBJECT_ID'
-    confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
-    task_name: str = ""  # Task name to support multiple Bitmask tasks in the same dataset. "" defaults to "bitmask"
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
+    top: int = Field(description="Bitmask top coordinate in pixels ")
+    left: int = Field(description="Bitmask left coordinate in pixels")
+    height: int = Field(description="Bitmask height of the bounding box in pixels")
+    width: int = Field(description="Bitmask width of the bounding box in pixels")
+    rleString: str = Field(
+        description="Run-length encoding (RLE) string for the bitmask region of size (height, width) at (top, left)."
+    )
+    area: Optional[float] = Field(
+        default=None, description="Area of the bitmask in pixels is calculated from the RLE string"
+    )
+    class_name: Optional[str] = Field(default=None, description="Class name of the object represented by the bitmask")
+    class_idx: Optional[int] = Field(default=None, description="Class index of the object represented by the bitmask")
+    object_id: Optional[str] = Field(default=None, description="Object ID of the instance represented by the bitmask")
+    confidence: Optional[float] = Field(
+        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
+    )
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
+    task_name: str = Field(
+        default="", description="Task name to support multiple Bitmask tasks in the same dataset. Defaults to 'bitmask'"
+    )
+    meta: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata for the annotation")
     @staticmethod
     def default_task_name() -> str:

hafnia/dataset/primitives/classification.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Any, Dict, Optional, Tuple
 import numpy as np
+from pydantic import Field
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import anonymize_by_resizing, get_class_name
@@ -8,14 +9,21 @@ from hafnia.dataset.primitives.utils import anonymize_by_resizing, get_class_nam
 class Classification(Primitive):
     # Names should match names in FieldName
-    class_name: Optional[str] = None  # Class name, e.g. "car"
-    class_idx: Optional[int] = None  # Class index, e.g. 0 for "car" if it is the first class
-    object_id: Optional[str] = None  # Unique identifier for the object, e.g. "12345123"
-    confidence: Optional[float] = None  # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification
-    ground_truth: bool = True  # Whether this is ground truth or a prediction
+    class_name: Optional[str] = Field(default=None, description="Class name, e.g. 'car'")
+    class_idx: Optional[int] = Field(default=None, description="Class index, e.g. 0 for 'car' if it is the first class")
+    object_id: Optional[str] = Field(default=None, description="Unique identifier for the object, e.g. '12345123'")
+    confidence: Optional[float] = Field(
+        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification"
+    )
+    ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
-    task_name: str = ""  # To support multiple Classification tasks in the same dataset. "" defaults to "classification"
-    meta: Optional[Dict[str, Any]] = None  # This can be used to store additional information about the bitmask
+    task_name: str = Field(
+        default="",
+        description="To support multiple Classification tasks in the same dataset. '' defaults to 'classification'",
+    )
+    meta: Optional[Dict[str, Any]] = Field(
+        default=None, description="This can be used to store additional information about the classification"
+    )
     @staticmethod
     def default_task_name() -> str:
@@ -38,7 +46,7 @@ class Classification(Primitive):
             text = class_name
         else:
             text = f"{self.task_name}: {class_name}"
-        image = image_visualizations.append_text_below_frame(image, text=text)
+        image = image_visualizations.append_text_below_frame(image, text=text, text_size_ratio=0.05)
         return image

hafnia/dataset/primitives/point.py CHANGED Viewed

@@ -1,13 +1,17 @@
 from typing import Any, Tuple
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from hafnia.dataset.primitives.utils import clip
 class Point(BaseModel):
-    x: float
-    y: float
+    x: float = Field(
+        description="Normalized x-coordinate (0.0=left edge, 1.0=right edge) relative to image width",
+    )
+    y: float = Field(
+        description="Normalized y-coordinate (0.0=top edge, 1.0=bottom edge) relative to image height",
+    )
     def to_pixel_coordinates(
         self, image_shape: Tuple[int, int], as_int: bool = True, clip_values: bool = True

hafnia 0.2.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

hafnia 0.2.4py3-none-any.whl → 0.4.0py3-none-any.whl