PyPI - hafnia - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

hafnia 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

hafnia/__init__.py +1 -1
hafnia/dataset/dataset_names.py +128 -15
hafnia/dataset/dataset_upload_helper.py +30 -25
hafnia/dataset/format_conversions/{image_classification_from_directory.py → format_image_classification_folder.py} +14 -10
hafnia/dataset/format_conversions/format_yolo.py +164 -0
hafnia/dataset/format_conversions/torchvision_datasets.py +10 -4
hafnia/dataset/hafnia_dataset.py +246 -72
hafnia/dataset/operations/dataset_stats.py +82 -70
hafnia/dataset/operations/dataset_transformations.py +102 -37
hafnia/dataset/operations/table_transformations.py +132 -15
hafnia/dataset/primitives/bbox.py +3 -5
hafnia/dataset/primitives/bitmask.py +2 -7
hafnia/dataset/primitives/classification.py +3 -3
hafnia/dataset/primitives/polygon.py +2 -4
hafnia/dataset/primitives/primitive.py +1 -1
hafnia/dataset/primitives/segmentation.py +2 -2
hafnia/platform/datasets.py +3 -7
hafnia/platform/download.py +1 -72
hafnia/torch_helpers.py +12 -12
hafnia/visualizations/image_visualizations.py +2 -0
{hafnia-0.4.0.dist-info → hafnia-0.4.1.dist-info}/METADATA +4 -4
{hafnia-0.4.0.dist-info → hafnia-0.4.1.dist-info}/RECORD +25 -24
{hafnia-0.4.0.dist-info → hafnia-0.4.1.dist-info}/WHEEL +0 -0
{hafnia-0.4.0.dist-info → hafnia-0.4.1.dist-info}/entry_points.txt +0 -0
{hafnia-0.4.0.dist-info → hafnia-0.4.1.dist-info}/licenses/LICENSE +0 -0

hafnia/dataset/operations/table_transformations.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import List, Optional, Type
+from typing import TYPE_CHECKING, List, Optional, Tuple, Type
 import polars as pl
 from rich.progress import track
@@ -7,8 +7,8 @@ from rich.progress import track
 from hafnia.dataset.dataset_names import (
     FILENAME_ANNOTATIONS_JSONL,
     FILENAME_ANNOTATIONS_PARQUET,
-    ColumnName,
-    FieldName,
+    PrimitiveField,
+    SampleField,
 )
 from hafnia.dataset.operations import table_transformations
 from hafnia.dataset.primitives import PRIMITIVE_TYPES
@@ -16,9 +16,15 @@ from hafnia.dataset.primitives.classification import Classification
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.log import user_logger
+if TYPE_CHECKING:
+    from hafnia.dataset.hafnia_dataset import TaskInfo
 def create_primitive_table(
-    samples_table: pl.DataFrame, PrimitiveType: Type[Primitive], keep_sample_data: bool = False
+    samples_table: pl.DataFrame,
+    PrimitiveType: Type[Primitive],
+    keep_sample_data: bool = False,
+    task_name: Optional[str] = None,
 ) -> Optional[pl.DataFrame]:
     """
     Returns a DataFrame with objects of the specified primitive type.
@@ -48,6 +54,9 @@ def create_primitive_table(
         objects_df = remove_no_object_frames.explode(column_name).unnest(column_name)
     else:
         objects_df = remove_no_object_frames.select(pl.col(column_name).explode().struct.unnest())
+    if task_name is not None:
+        objects_df = objects_df.filter(pl.col(PrimitiveField.TASK_NAME) == task_name)
     return objects_df
@@ -55,11 +64,12 @@ def merge_samples(samples0: pl.DataFrame, samples1: pl.DataFrame) -> pl.DataFram
     has_same_schema = samples0.schema == samples1.schema
     if not has_same_schema:
         shared_columns = []
-        for column_name, column_type in samples0.schema.items():
+        for column_name, s0_column_type in samples0.schema.items():
             if column_name not in samples1.schema:
                 continue
+            samples0, samples1 = correction_of_list_struct_primitives(samples0, samples1, column_name)
-            if column_type != samples1.schema[column_name]:
+            if samples0.schema[column_name] != samples1.schema[column_name]:
                 continue
             shared_columns.append(column_name)
@@ -79,16 +89,58 @@ def merge_samples(samples0: pl.DataFrame, samples1: pl.DataFrame) -> pl.DataFram
         samples0 = samples0.select(list(shared_columns))
         samples1 = samples1.select(list(shared_columns))
     merged_samples = pl.concat([samples0, samples1], how="vertical")
-    merged_samples = merged_samples.drop(ColumnName.SAMPLE_INDEX).with_row_index(name=ColumnName.SAMPLE_INDEX)
+    merged_samples = add_sample_index(merged_samples)
     return merged_samples
+def correction_of_list_struct_primitives(
+    samples0: pl.DataFrame,
+    samples1: pl.DataFrame,
+    column_name: str,
+) -> Tuple[pl.DataFrame, pl.DataFrame]:
+    """
+    Corrects primitive columns (bboxes, polygons etc of type 'list[struct]') by removing non-matching struct fields
+    between two datasets. This is useful when merging two datasets with the same primitive (e.g. Bbox), where
+    some (less important) field types in the struct differ between the two datasets.
+    This issue often occurs with the 'meta' field as different dataset formats may store different metadata information.
+    """
+    s0_column_type = samples0.schema[column_name]
+    s1_column_type = samples1.schema[column_name]
+    is_list_structs = s1_column_type == pl.List(pl.Struct) and s0_column_type == pl.List(pl.Struct)
+    is_non_matching_types = s1_column_type != s0_column_type
+    if is_list_structs and is_non_matching_types:  # Only perform correction for list[struct] types that do not match
+        s0_fields = set(s0_column_type.inner.fields)
+        s1_fields = set(s1_column_type.inner.fields)
+        similar_fields = s0_fields.intersection(s1_fields)
+        s0_dropped_fields = s0_fields - similar_fields
+        if len(s0_dropped_fields) > 0:
+            samples0 = samples0.with_columns(
+                pl.col(column_name)
+                .list.eval(pl.struct([pl.element().struct.field(k.name) for k in similar_fields]))
+                .alias(column_name)
+            )
+        s1_dropped_fields = s1_fields - similar_fields
+        if len(s1_dropped_fields) > 0:
+            samples1 = samples1.with_columns(
+                pl.col(column_name)
+                .list.eval(pl.struct([pl.element().struct.field(k.name) for k in similar_fields]))
+                .alias(column_name)
+            )
+        user_logger.warning(
+            f"Primitive column '{column_name}' has none-matching fields in the two datasets. "
+            f"Dropping fields in samples0: {[f.name for f in s0_dropped_fields]}. "
+            f"Dropping fields in samples1: {[f.name for f in s1_dropped_fields]}."
+        )
+    return samples0, samples1
 def filter_table_for_class_names(
     samples_table: pl.DataFrame, class_names: List[str], PrimitiveType: Type[Primitive]
 ) -> Optional[pl.DataFrame]:
     table_with_selected_class_names = samples_table.filter(
         pl.col(PrimitiveType.column_name())
-        .list.eval(pl.element().struct.field(FieldName.CLASS_NAME).is_in(class_names))
+        .list.eval(pl.element().struct.field(PrimitiveField.CLASS_NAME).is_in(class_names))
         .list.any()
     )
@@ -100,20 +152,20 @@ def split_primitive_columns_by_task_name(
     coordinate_types: Optional[List[Type[Primitive]]] = None,
 ) -> pl.DataFrame:
     """
-    Convert Primitive columns such as "objects" (Bbox) into a column for each task name.
-    For example, if the "objects" column (containing Bbox objects) has tasks "task1" and "task2".
+    Convert Primitive columns such as "bboxes" (Bbox) into a column for each task name.
+    For example, if the "bboxes" column (containing Bbox objects) has tasks "task1" and "task2".
     This:
     ─┬────────────┬─
-     ┆ objects    ┆
+     ┆ bboxes    ┆
      ┆ ---        ┆
      ┆ list[struc ┆
      ┆ t[11]]     ┆
     ═╪════════════╪═
     becomes this:
     ─┬────────────┬────────────┬─
-     ┆ objects.   ┆ objects.   ┆
+     ┆ bboxes.   ┆ bboxes.   ┆
      ┆ task1      ┆ task2      ┆
      ┆ ---        ┆ ---        ┆
      ┆ list[struc ┆ list[struc ┆
@@ -131,11 +183,11 @@ def split_primitive_columns_by_task_name(
         if samples_table[col_name].dtype != pl.List(pl.Struct):
             continue
-        task_names = samples_table[col_name].explode().struct.field(FieldName.TASK_NAME).unique().to_list()
+        task_names = samples_table[col_name].explode().struct.field(PrimitiveField.TASK_NAME).unique().to_list()
         samples_table = samples_table.with_columns(
             [
                 pl.col(col_name)
-                .list.filter(pl.element().struct.field(FieldName.TASK_NAME).eq(task_name))
+                .list.filter(pl.element().struct.field(PrimitiveField.TASK_NAME).eq(task_name))
                 .alias(f"{col_name}.{task_name}")
                 for task_name in task_names
             ]
@@ -162,7 +214,7 @@ def read_samples_from_path(path: Path) -> pl.DataFrame:
 def check_image_paths(table: pl.DataFrame) -> bool:
     missing_files = []
-    org_paths = table[ColumnName.FILE_PATH].to_list()
+    org_paths = table[SampleField.FILE_PATH].to_list()
     for org_path in track(org_paths, description="Check image paths"):
         org_path = Path(org_path)
         if not org_path.exists():
@@ -219,3 +271,68 @@ def unnest_classification_tasks(table: pl.DataFrame, strict: bool = True) -> pl.
     table_out = table_out.with_columns([pl.col(c).list.first() for c in classification_columns])
     return table_out
+def update_class_indices(samples: pl.DataFrame, task: "TaskInfo") -> pl.DataFrame:
+    if task.class_names is None or len(task.class_names) == 0:
+        raise ValueError(f"Task '{task.name}' does not have defined class names to update class indices.")
+    objs = (
+        samples[task.primitive.column_name()]
+        .explode()
+        .struct.unnest()
+        .filter(pl.col(PrimitiveField.TASK_NAME) == task.name)
+    )
+    expected_class_names = set(objs[PrimitiveField.CLASS_NAME].unique())
+    missing_class_names = expected_class_names - set(task.class_names)
+    if len(missing_class_names) > 0:
+        raise ValueError(
+            f"Task '{task.name}' is missing class names: {missing_class_names}. Cannot update class indices."
+        )
+    name_2_idx_mapping = {name: idx for idx, name in enumerate(task.class_names)}
+    samples_updated = samples.with_columns(
+        pl.col(task.primitive.column_name())
+        .list.eval(
+            pl.element().struct.with_fields(
+                pl.when(pl.field(PrimitiveField.TASK_NAME) == task.name)
+                .then(pl.field(PrimitiveField.CLASS_NAME).replace_strict(name_2_idx_mapping, default=-1))
+                .otherwise(pl.field(PrimitiveField.CLASS_IDX))
+                .alias(PrimitiveField.CLASS_IDX)
+            )
+        )
+        .alias(task.primitive.column_name())
+    )
+    return samples_updated
+def add_sample_index(samples: pl.DataFrame) -> pl.DataFrame:
+    """
+    Adds a sample index column to the samples DataFrame.
+    Note: Unlike the built-in 'polars.DataFrame.with_row_count', this function
+    always guarantees 'pl.UInt64' type for the index column.
+    """
+    if SampleField.SAMPLE_INDEX in samples.columns:
+        samples = samples.drop(SampleField.SAMPLE_INDEX)
+    samples = samples.select(
+        pl.int_range(0, pl.count(), dtype=pl.UInt64).alias(SampleField.SAMPLE_INDEX),
+        pl.all(),
+    )
+    return samples
+def add_dataset_name_if_missing(table: pl.DataFrame, dataset_name: str) -> pl.DataFrame:
+    if SampleField.DATASET_NAME not in table.columns:
+        table = table.with_columns(pl.lit(dataset_name).alias(SampleField.DATASET_NAME))
+    else:
+        table = table.with_columns(
+            pl.when(pl.col(SampleField.DATASET_NAME).is_null())
+            .then(pl.lit(dataset_name))
+            .otherwise(pl.col(SampleField.DATASET_NAME))
+            .alias(SampleField.DATASET_NAME)
+        )
+    return table

hafnia/dataset/primitives/bbox.py CHANGED Viewed

@@ -33,9 +33,7 @@ class Bbox(Primitive):
     class_name: Optional[str] = Field(default=None, description="Class name, e.g. 'car'")
     class_idx: Optional[int] = Field(default=None, description="Class index, e.g. 0 for 'car' if it is the first class")
     object_id: Optional[str] = Field(default=None, description="Unique identifier for the object, e.g. '12345123'")
-    confidence: Optional[float] = Field(
-        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
-    )
+    confidence: float = Field(default=1.0, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox")
     ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
     task_name: str = Field(
@@ -45,11 +43,11 @@ class Bbox(Primitive):
     @staticmethod
     def default_task_name() -> str:
-        return "bboxes"
+        return "object_detection"
     @staticmethod
     def column_name() -> str:
-        return "objects"
+        return "bboxes"
     def calculate_area(self) -> float:
         return self.height * self.width

hafnia/dataset/primitives/bitmask.py CHANGED Viewed

@@ -7,7 +7,6 @@ import numpy as np
 import pycocotools.mask as coco_mask
 from pydantic import Field
-from hafnia.dataset.dataset_names import FieldName
 from hafnia.dataset.primitives.primitive import Primitive
 from hafnia.dataset.primitives.utils import (
     anonymize_by_resizing,
@@ -16,8 +15,6 @@ from hafnia.dataset.primitives.utils import (
     text_org_from_left_bottom_to_centered,
 )
-FieldName
 class Bitmask(Primitive):
     # Names should match names in FieldName
@@ -34,9 +31,7 @@ class Bitmask(Primitive):
     class_name: Optional[str] = Field(default=None, description="Class name of the object represented by the bitmask")
     class_idx: Optional[int] = Field(default=None, description="Class index of the object represented by the bitmask")
     object_id: Optional[str] = Field(default=None, description="Object ID of the instance represented by the bitmask")
-    confidence: Optional[float] = Field(
-        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
-    )
+    confidence: float = Field(default=1.0, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox")
     ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
     task_name: str = Field(
@@ -46,7 +41,7 @@ class Bitmask(Primitive):
     @staticmethod
     def default_task_name() -> str:
-        return "bitmask"
+        return "mask_detection"
     @staticmethod
     def column_name() -> str:

hafnia/dataset/primitives/classification.py CHANGED Viewed

@@ -12,8 +12,8 @@ class Classification(Primitive):
     class_name: Optional[str] = Field(default=None, description="Class name, e.g. 'car'")
     class_idx: Optional[int] = Field(default=None, description="Class index, e.g. 0 for 'car' if it is the first class")
     object_id: Optional[str] = Field(default=None, description="Unique identifier for the object, e.g. '12345123'")
-    confidence: Optional[float] = Field(
-        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification"
+    confidence: float = Field(
+        default=1.0, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification"
     )
     ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
@@ -27,7 +27,7 @@ class Classification(Primitive):
     @staticmethod
     def default_task_name() -> str:
-        return "classification"
+        return "image_classification"
     @staticmethod
     def column_name() -> str:

hafnia/dataset/primitives/polygon.py CHANGED Viewed

@@ -16,9 +16,7 @@ class Polygon(Primitive):
     class_name: Optional[str] = Field(default=None, description="Class name of the polygon")
     class_idx: Optional[int] = Field(default=None, description="Class index of the polygon")
     object_id: Optional[str] = Field(default=None, description="Object ID of the polygon")
-    confidence: Optional[float] = Field(
-        default=None, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox"
-    )
+    confidence: float = Field(default=1.0, description="Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox")
     ground_truth: bool = Field(default=True, description="Whether this is ground truth or a prediction")
     task_name: str = Field(
@@ -40,7 +38,7 @@ class Polygon(Primitive):
     @staticmethod
     def default_task_name() -> str:
-        return "polygon"
+        return "polygon_detection"
     @staticmethod
     def column_name() -> str:

hafnia/dataset/primitives/primitive.py CHANGED Viewed

@@ -22,7 +22,7 @@ class Primitive(BaseModel, metaclass=ABCMeta):
     def column_name() -> str:
         """
         Name of field used in hugging face datasets for storing annotations
-        E.g. "objects" for Bbox.
+        E.g. "bboxes" for Bbox.
         """
         pass

hafnia/dataset/primitives/segmentation.py CHANGED Viewed

@@ -24,11 +24,11 @@ class Segmentation(Primitive):
     @staticmethod
     def default_task_name() -> str:
-        return "segmentation"
+        return "semantic_segmentation"
     @staticmethod
     def column_name() -> str:
-        return "segmentation"
+        return "segmentations"
     def calculate_area(self) -> float:
         raise NotImplementedError()

hafnia/platform/datasets.py CHANGED Viewed

@@ -13,7 +13,7 @@ from rich.progress import track
 from cli.config import Config
 from hafnia import http, utils
-from hafnia.dataset.dataset_names import DATASET_FILENAMES_REQUIRED, ColumnName
+from hafnia.dataset.dataset_names import DATASET_FILENAMES_REQUIRED
 from hafnia.dataset.dataset_recipe.dataset_recipe import (
     DatasetRecipe,
     get_dataset_path_from_recipe,
@@ -120,15 +120,11 @@ def download_dataset_from_access_endpoint(
         return
     dataset = HafniaDataset.from_path(path_dataset, check_for_images=False)
     try:
-        fast_copy_files_s3(
-            src_paths=dataset.samples[ColumnName.REMOTE_PATH].to_list(),
-            dst_paths=dataset.samples[ColumnName.FILE_PATH].to_list(),
-            append_envs=envs,
-            description="Downloading images",
-        )
+        dataset = dataset.download_files_aws(path_dataset, aws_credentials=resource_credentials, force_redownload=True)
     except ValueError as e:
         user_logger.error(f"Failed to download images: {e}")
         return
+    dataset.write_annotations(path_folder=path_dataset)  # Overwrite annotations as files have been re-downloaded
 def fast_copy_files_s3(

hafnia/platform/download.py CHANGED Viewed

@@ -3,83 +3,12 @@ from typing import Dict, Optional
 import boto3
 from botocore.exceptions import ClientError
-from pydantic import BaseModel, field_validator
 from rich.progress import Progress
+from hafnia.dataset.dataset_names import ResourceCredentials
 from hafnia.http import fetch
 from hafnia.log import sys_logger, user_logger
-ARN_PREFIX = "arn:aws:s3:::"
-class ResourceCredentials(BaseModel):
-    access_key: str
-    secret_key: str
-    session_token: str
-    s3_arn: str
-    region: str
-    @staticmethod
-    def fix_naming(payload: Dict[str, str]) -> "ResourceCredentials":
-        """
-        The endpoint returns a payload with a key called 's3_path', but it
-        is actually an ARN path (starts with arn:aws:s3::). This method renames it to 's3_arn' for consistency.
-        """
-        if "s3_path" in payload and payload["s3_path"].startswith(ARN_PREFIX):
-            payload["s3_arn"] = payload.pop("s3_path")
-        if "region" not in payload:
-            payload["region"] = "eu-west-1"
-        return ResourceCredentials(**payload)
-    @field_validator("s3_arn")
-    @classmethod
-    def validate_s3_arn(cls, value: str) -> str:
-        """Validate s3_arn to ensure it starts with 'arn:aws:s3:::'"""
-        if not value.startswith("arn:aws:s3:::"):
-            raise ValueError(f"Invalid S3 ARN: {value}. It should start with 'arn:aws:s3:::'")
-        return value
-    def s3_path(self) -> str:
-        """
-        Extracts the S3 path from the ARN.
-        Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket/my-prefix
-        """
-        return self.s3_arn[len(ARN_PREFIX) :]
-    def s3_uri(self) -> str:
-        """
-        Converts the S3 ARN to a URI format.
-        Example: arn:aws:s3:::my-bucket/my-prefix -> s3://my-bucket/my-prefix
-        """
-        return f"s3://{self.s3_path()}"
-    def bucket_name(self) -> str:
-        """
-        Extracts the bucket name from the S3 ARN.
-        Example: arn:aws:s3:::my-bucket/my-prefix -> my-bucket
-        """
-        return self.s3_path().split("/")[0]
-    def object_key(self) -> str:
-        """
-        Extracts the object key from the S3 ARN.
-        Example: arn:aws:s3:::my-bucket/my-prefix -> my-prefix
-        """
-        return "/".join(self.s3_path().split("/")[1:])
-    def aws_credentials(self) -> Dict[str, str]:
-        """
-        Returns the AWS credentials as a dictionary.
-        """
-        environment_vars = {
-            "AWS_ACCESS_KEY_ID": self.access_key,
-            "AWS_SECRET_ACCESS_KEY": self.secret_key,
-            "AWS_SESSION_TOKEN": self.session_token,
-            "AWS_REGION": self.region,
-        }
-        return environment_vars
 def get_resource_credentials(endpoint: str, api_key: str) -> ResourceCredentials:
     """

hafnia/torch_helpers.py CHANGED Viewed

@@ -9,7 +9,7 @@ from torchvision import tv_tensors
 from torchvision import utils as tv_utils
 from torchvision.transforms import v2
-from hafnia.dataset.dataset_names import FieldName
+from hafnia.dataset.dataset_names import PrimitiveField
 from hafnia.dataset.hafnia_dataset import HafniaDataset, Sample
 from hafnia.dataset.primitives import (
     PRIMITIVE_COLUMN_NAMES,
@@ -68,8 +68,8 @@ class TorchvisionDataset(torch.utils.data.Dataset):
         for task_name, classifications in class_tasks.items():
             assert len(classifications) == 1, "Expected exactly one classification task per sample"
             target_flat[f"{Classification.column_name()}.{task_name}"] = {
-                FieldName.CLASS_IDX: classifications[0].class_idx,
-                FieldName.CLASS_NAME: classifications[0].class_name,
+                PrimitiveField.CLASS_IDX: classifications[0].class_idx,
+                PrimitiveField.CLASS_NAME: classifications[0].class_name,
             }
         bbox_tasks: Dict[str, List[Bbox]] = get_primitives_per_task_name_for_primitive(sample, Bbox)
@@ -77,8 +77,8 @@ class TorchvisionDataset(torch.utils.data.Dataset):
             bboxes_list = [bbox.to_coco(image_height=h, image_width=w) for bbox in bboxes]
             bboxes_tensor = torch.as_tensor(bboxes_list).reshape(-1, 4)
             target_flat[f"{Bbox.column_name()}.{task_name}"] = {
-                FieldName.CLASS_IDX: [bbox.class_idx for bbox in bboxes],
-                FieldName.CLASS_NAME: [bbox.class_name for bbox in bboxes],
+                PrimitiveField.CLASS_IDX: [bbox.class_idx for bbox in bboxes],
+                PrimitiveField.CLASS_NAME: [bbox.class_name for bbox in bboxes],
                 "bbox": tv_tensors.BoundingBoxes(bboxes_tensor, format="XYWH", canvas_size=(h, w)),
             }
@@ -86,8 +86,8 @@ class TorchvisionDataset(torch.utils.data.Dataset):
         for task_name, bitmasks in bitmask_tasks.items():
             bitmasks_np = np.array([bitmask.to_mask(img_height=h, img_width=w) for bitmask in bitmasks])
             target_flat[f"{Bitmask.column_name()}.{task_name}"] = {
-                FieldName.CLASS_IDX: [bitmask.class_idx for bitmask in bitmasks],
-                FieldName.CLASS_NAME: [bitmask.class_name for bitmask in bitmasks],
+                PrimitiveField.CLASS_IDX: [bitmask.class_idx for bitmask in bitmasks],
+                PrimitiveField.CLASS_NAME: [bitmask.class_name for bitmask in bitmasks],
                 "mask": tv_tensors.Mask(bitmasks_np),
             }
@@ -161,7 +161,7 @@ def draw_image_and_targets(
     if Bitmask.column_name() in targets:
         primitive_annotations = targets[Bitmask.column_name()]
         for task_name, task_annotations in primitive_annotations.items():
-            colors = [class_color_by_name(class_name) for class_name in task_annotations[FieldName.CLASS_NAME]]
+            colors = [class_color_by_name(class_name) for class_name in task_annotations[PrimitiveField.CLASS_NAME]]
             visualize_image = tv_utils.draw_segmentation_masks(
                 image=visualize_image,
                 masks=task_annotations["mask"],
@@ -172,11 +172,11 @@ def draw_image_and_targets(
         primitive_annotations = targets[Bbox.column_name()]
         for task_name, task_annotations in primitive_annotations.items():
             bboxes = torchvision.ops.box_convert(task_annotations["bbox"], in_fmt="xywh", out_fmt="xyxy")
-            colors = [class_color_by_name(class_name) for class_name in task_annotations[FieldName.CLASS_NAME]]
+            colors = [class_color_by_name(class_name) for class_name in task_annotations[PrimitiveField.CLASS_NAME]]
             visualize_image = tv_utils.draw_bounding_boxes(
                 image=visualize_image,
                 boxes=bboxes,
-                labels=task_annotations[FieldName.CLASS_NAME],
+                labels=task_annotations[PrimitiveField.CLASS_NAME],
                 width=2,
                 colors=colors,
             )
@@ -187,9 +187,9 @@ def draw_image_and_targets(
         text_labels = []
         for task_name, task_annotations in primitive_annotations.items():
             if task_name == Classification.default_task_name():
-                text_label = task_annotations[FieldName.CLASS_NAME]
+                text_label = task_annotations[PrimitiveField.CLASS_NAME]
             else:
-                text_label = f"{task_name}: {task_annotations[FieldName.CLASS_NAME]}"
+                text_label = f"{task_name}: {task_annotations[PrimitiveField.CLASS_NAME]}"
             text_labels.append(text_label)
         visualize_image = draw_image_classification(visualize_image, text_labels)
     return visualize_image

hafnia/visualizations/image_visualizations.py CHANGED Viewed

@@ -193,6 +193,8 @@ def save_dataset_sample_set_visualizations(
         image = draw_annotations(image, annotations, draw_settings=draw_settings)
         pil_image = Image.fromarray(image)
+        if sample.file_path is None:
+            raise ValueError("Sample has no file_path defined.")
         path_image = path_output_folder / Path(sample.file_path).name
         pil_image.save(path_image)
         paths.append(path_image)

{hafnia-0.4.0.dist-info → hafnia-0.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hafnia
-Version: 0.4.0
+Version: 0.4.1
 Summary: Python SDK for communication with Hafnia platform.
 Author-email: Milestone Systems <hafniaplatform@milestone.dk>
 License-File: LICENSE
@@ -158,7 +158,7 @@ and `dataset.samples` with annotations as a polars DataFrame
 print(dataset.samples.head(2))
 shape: (2, 14)
 ┌──────────────┬─────────────────────────────────┬────────┬───────┬───┬─────────────────────────────────┬──────────┬──────────┬─────────────────────────────────┐
-│ sample_index ┆ file_name                       ┆ height ┆ width ┆ … ┆ objects                         ┆ bitmasks ┆ polygons ┆ meta                            │
+│ sample_index ┆ file_name                       ┆ height ┆ width ┆ … ┆ bboxes                          ┆ bitmasks ┆ polygons ┆ meta                            │
 │ ---          ┆ ---                             ┆ ---    ┆ ---   ┆   ┆ ---                             ┆ ---      ┆ ---      ┆ ---                             │
 │ u32          ┆ str                             ┆ i64    ┆ i64   ┆   ┆ list[struct[11]]                ┆ null     ┆ null     ┆ struct[5]                       │
 ╞══════════════╪═════════════════════════════════╪════════╪═══════╪═══╪═════════════════════════════════╪══════════╪══════════╪═════════════════════════════════╡
@@ -218,7 +218,7 @@ sample_dict = dataset[0]
 for sample_dict in dataset:
     sample = Sample(**sample_dict)
-    print(sample.sample_id, sample.objects)
+    print(sample.sample_id, sample.bboxes)
     break
 ```
 Not that it is possible to create a `Sample` object from the sample dictionary.
@@ -421,7 +421,7 @@ pil_image.save("visualized_labels.png")
 # Create DataLoaders - using TorchVisionCollateFn
 collate_fn = torch_helpers.TorchVisionCollateFn(
-    skip_stacking=["objects.bbox", "objects.class_idx"]
+    skip_stacking=["bboxes.bbox", "bboxes.class_idx"]
 )
 train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)
 ```

hafnia 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

hafnia 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl