hafnia 0.1.27__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cli/__main__.py +2 -2
  2. cli/config.py +17 -4
  3. cli/dataset_cmds.py +60 -0
  4. cli/runc_cmds.py +1 -1
  5. hafnia/data/__init__.py +2 -2
  6. hafnia/data/factory.py +12 -56
  7. hafnia/dataset/dataset_helpers.py +91 -0
  8. hafnia/dataset/dataset_names.py +72 -0
  9. hafnia/dataset/dataset_recipe/dataset_recipe.py +327 -0
  10. hafnia/dataset/dataset_recipe/recipe_transforms.py +53 -0
  11. hafnia/dataset/dataset_recipe/recipe_types.py +140 -0
  12. hafnia/dataset/dataset_upload_helper.py +468 -0
  13. hafnia/dataset/hafnia_dataset.py +624 -0
  14. hafnia/dataset/operations/dataset_stats.py +15 -0
  15. hafnia/dataset/operations/dataset_transformations.py +82 -0
  16. hafnia/dataset/operations/table_transformations.py +183 -0
  17. hafnia/dataset/primitives/__init__.py +16 -0
  18. hafnia/dataset/primitives/bbox.py +137 -0
  19. hafnia/dataset/primitives/bitmask.py +182 -0
  20. hafnia/dataset/primitives/classification.py +56 -0
  21. hafnia/dataset/primitives/point.py +25 -0
  22. hafnia/dataset/primitives/polygon.py +100 -0
  23. hafnia/dataset/primitives/primitive.py +44 -0
  24. hafnia/dataset/primitives/segmentation.py +51 -0
  25. hafnia/dataset/primitives/utils.py +51 -0
  26. hafnia/experiment/hafnia_logger.py +7 -7
  27. hafnia/helper_testing.py +108 -0
  28. hafnia/http.py +5 -3
  29. hafnia/platform/__init__.py +2 -2
  30. hafnia/platform/datasets.py +197 -0
  31. hafnia/platform/download.py +85 -23
  32. hafnia/torch_helpers.py +180 -95
  33. hafnia/utils.py +21 -2
  34. hafnia/visualizations/colors.py +267 -0
  35. hafnia/visualizations/image_visualizations.py +202 -0
  36. {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/METADATA +209 -99
  37. hafnia-0.2.1.dist-info/RECORD +50 -0
  38. cli/data_cmds.py +0 -53
  39. hafnia-0.1.27.dist-info/RECORD +0 -27
  40. {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/WHEEL +0 -0
  41. {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/entry_points.txt +0 -0
  42. {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,82 @@
1
+ """
2
+ Hafnia dataset transformations that takes and returns a HafniaDataset object.
3
+
4
+ All functions here will have a corresponding function in both the HafniaDataset class
5
+ and a corresponding RecipeTransform class in the `data_recipe/recipe_transformations.py` file.
6
+
7
+ This allows each function to be used in three ways:
8
+
9
+ ```python
10
+ from hafnia.dataset.operations import dataset_transformations
11
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
12
+ from hafnia.dataset.data_recipe.recipe_transformations import SplitByRatios
13
+
14
+ splits_by_ratios = {"train": 0.8, "val": 0.1, "test": 0.1}
15
+
16
+ # Option 1: Using the function directly
17
+ dataset = recipe_transformations.splits_by_ratios(dataset, split_ratios=splits_by_ratios)
18
+
19
+ # Option 2: Using the method of the HafniaDataset class
20
+ dataset = dataset.splits_by_ratios(split_ratios=splits_by_ratios)
21
+
22
+ # Option 3: Using the RecipeTransform class
23
+ serializable_transform = SplitByRatios(split_ratios=splits_by_ratios)
24
+ dataset = serializable_transform(dataset)
25
+ ```
26
+
27
+ Tests will ensure that all functions in this file will have a corresponding function in the
28
+ HafniaDataset class and a RecipeTransform class in the `data_recipe/recipe_transformations.py` file and
29
+ that the signatures match.
30
+ """
31
+
32
+ from pathlib import Path
33
+ from typing import TYPE_CHECKING, Callable
34
+
35
+ import cv2
36
+ import numpy as np
37
+ import polars as pl
38
+ from PIL import Image
39
+ from tqdm import tqdm
40
+
41
+ from hafnia.dataset import dataset_helpers
42
+
43
+ if TYPE_CHECKING:
44
+ from hafnia.dataset.hafnia_dataset import HafniaDataset
45
+
46
+
47
+ ### Image transformations ###
48
+ class AnonymizeByPixelation:
49
+ def __init__(self, resize_factor: float = 0.10):
50
+ self.resize_factor = resize_factor
51
+
52
+ def __call__(self, frame: np.ndarray) -> np.ndarray:
53
+ org_size = frame.shape[:2]
54
+ frame = cv2.resize(frame, (0, 0), fx=self.resize_factor, fy=self.resize_factor)
55
+ frame = cv2.resize(frame, org_size[::-1], interpolation=cv2.INTER_NEAREST)
56
+ return frame
57
+
58
+
59
+ def transform_images(
60
+ dataset: "HafniaDataset",
61
+ transform: Callable[[np.ndarray], np.ndarray],
62
+ path_output: Path,
63
+ ) -> "HafniaDataset":
64
+ new_paths = []
65
+ path_image_folder = path_output / "data"
66
+ path_image_folder.mkdir(parents=True, exist_ok=True)
67
+
68
+ for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="Transform images"):
69
+ org_path = Path(org_path)
70
+ if not org_path.exists():
71
+ raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
72
+
73
+ image = np.array(Image.open(org_path))
74
+ image_transformed = transform(image)
75
+ new_path = dataset_helpers.save_image_with_hash_name(image_transformed, path_image_folder)
76
+
77
+ if not new_path.exists():
78
+ raise FileNotFoundError(f"Transformed file {new_path} does not exist in the dataset.")
79
+ new_paths.append(str(new_path))
80
+
81
+ table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
82
+ return dataset.update_table(table)
@@ -0,0 +1,183 @@
1
+ from pathlib import Path
2
+ from typing import List, Optional, Type
3
+
4
+ import polars as pl
5
+ from tqdm import tqdm
6
+
7
+ from hafnia.dataset.dataset_names import (
8
+ FILENAME_ANNOTATIONS_JSONL,
9
+ FILENAME_ANNOTATIONS_PARQUET,
10
+ FieldName,
11
+ )
12
+ from hafnia.dataset.operations import table_transformations
13
+ from hafnia.dataset.primitives import PRIMITIVE_TYPES
14
+ from hafnia.dataset.primitives.classification import Classification
15
+ from hafnia.dataset.primitives.primitive import Primitive
16
+ from hafnia.log import user_logger
17
+
18
+
19
+ def create_primitive_table(
20
+ samples_table: pl.DataFrame, PrimitiveType: Type[Primitive], keep_sample_data: bool = False
21
+ ) -> Optional[pl.DataFrame]:
22
+ """
23
+ Returns a DataFrame with objects of the specified primitive type.
24
+ """
25
+ column_name = PrimitiveType.column_name()
26
+ has_primitive_column = (column_name in samples_table.columns) and (
27
+ samples_table[column_name].dtype == pl.List(pl.Struct)
28
+ )
29
+ if not has_primitive_column:
30
+ return None
31
+
32
+ # Remove frames without objects
33
+ remove_no_object_frames = samples_table.filter(pl.col(column_name).list.len() > 0)
34
+
35
+ if keep_sample_data:
36
+ # Drop other primitive columns to avoid conflicts
37
+ drop_columns = set(PRIMITIVE_TYPES) - {PrimitiveType, Classification}
38
+ remove_no_object_frames = remove_no_object_frames.drop(*[primitive.column_name() for primitive in drop_columns])
39
+ # Rename columns "height", "width" and "meta" for sample to avoid conflicts with object fields names
40
+ remove_no_object_frames = remove_no_object_frames.rename(
41
+ {"height": "image.height", "width": "image.width", "meta": "image.meta"}
42
+ )
43
+ objects_df = remove_no_object_frames.explode(column_name).unnest(column_name)
44
+ else:
45
+ objects_df = remove_no_object_frames.select(pl.col(column_name).explode().struct.unnest())
46
+ return objects_df
47
+
48
+
49
+ def filter_table_for_class_names(
50
+ samples_table: pl.DataFrame, class_names: List[str], PrimitiveType: Type[Primitive]
51
+ ) -> Optional[pl.DataFrame]:
52
+ table_with_selected_class_names = samples_table.filter(
53
+ pl.col(PrimitiveType.column_name())
54
+ .list.eval(pl.element().struct.field(FieldName.CLASS_NAME).is_in(class_names))
55
+ .list.any()
56
+ )
57
+
58
+ return table_with_selected_class_names
59
+
60
+
61
+ def split_primitive_columns_by_task_name(
62
+ samples_table: pl.DataFrame,
63
+ coordinate_types: Optional[List[Type[Primitive]]] = None,
64
+ ) -> pl.DataFrame:
65
+ """
66
+ Convert Primitive columns such as "objects" (Bbox) into a column for each task name.
67
+ For example, if the "objects" column (containing Bbox objects) has tasks "task1" and "task2".
68
+
69
+
70
+ This:
71
+ ─┬────────────┬─
72
+ ┆ objects ┆
73
+ ┆ --- ┆
74
+ ┆ list[struc ┆
75
+ ┆ t[11]] ┆
76
+ ═╪════════════╪═
77
+ becomes this:
78
+ ─┬────────────┬────────────┬─
79
+ ┆ objects. ┆ objects. ┆
80
+ ┆ task1 ┆ task2 ┆
81
+ ┆ --- ┆ --- ┆
82
+ ┆ list[struc ┆ list[struc ┆
83
+ ┆ t[11]] ┆ t[13]] ┆
84
+ ═╪════════════╪════════════╪═
85
+
86
+ """
87
+ coordinate_types = coordinate_types or PRIMITIVE_TYPES
88
+ for PrimitiveType in coordinate_types:
89
+ col_name = PrimitiveType.column_name()
90
+
91
+ if col_name not in samples_table.columns:
92
+ continue
93
+
94
+ if samples_table[col_name].dtype != pl.List(pl.Struct):
95
+ continue
96
+
97
+ task_names = samples_table[col_name].explode().struct.field(FieldName.TASK_NAME).unique().to_list()
98
+ samples_table = samples_table.with_columns(
99
+ [
100
+ pl.col(col_name)
101
+ .list.filter(pl.element().struct.field(FieldName.TASK_NAME).eq(task_name))
102
+ .alias(f"{col_name}.{task_name}")
103
+ for task_name in task_names
104
+ ]
105
+ )
106
+ samples_table = samples_table.drop(col_name)
107
+ return samples_table
108
+
109
+
110
+ def read_table_from_path(path: Path) -> pl.DataFrame:
111
+ path_annotations = path / FILENAME_ANNOTATIONS_PARQUET
112
+ if path_annotations.exists():
113
+ user_logger.info(f"Reading dataset annotations from Parquet file: {path_annotations}")
114
+ return pl.read_parquet(path_annotations)
115
+
116
+ path_annotations_jsonl = path / FILENAME_ANNOTATIONS_JSONL
117
+ if path_annotations_jsonl.exists():
118
+ user_logger.info(f"Reading dataset annotations from JSONL file: {path_annotations_jsonl}")
119
+ return pl.read_ndjson(path_annotations_jsonl)
120
+
121
+ raise FileNotFoundError(
122
+ f"Unable to read annotations. No json file '{path_annotations.name}' or Parquet file '{{path_annotations.name}} in in '{path}'."
123
+ )
124
+
125
+
126
+ def check_image_paths(table: pl.DataFrame) -> bool:
127
+ missing_files = []
128
+ for org_path in tqdm(table["file_name"].to_list(), desc="Check image paths"):
129
+ org_path = Path(org_path)
130
+ if not org_path.exists():
131
+ missing_files.append(org_path)
132
+
133
+ if len(missing_files) > 0:
134
+ user_logger.warning(f"Missing files: {len(missing_files)}. Show first 5:")
135
+ for missing_file in missing_files[:5]:
136
+ user_logger.warning(f" - {missing_file}")
137
+ raise FileNotFoundError(f"Some files are missing in the dataset: {len(missing_files)} files not found.")
138
+
139
+ return True
140
+
141
+
142
+ def unnest_classification_tasks(table: pl.DataFrame, strict: bool = True) -> pl.DataFrame:
143
+ """
144
+ Unnest classification tasks in table.
145
+ Classificiations tasks are all stored in the same column in the HafniaDataset table.
146
+ This function splits them into separate columns for each task name.
147
+
148
+ Type is converted from a list of structs (pl.List[pl.Struct]) to a struct (pl.Struct) column.
149
+
150
+ Converts classification column from this:
151
+ ─┬─────────────────┬─
152
+ ┆ classifications ┆
153
+ ┆ --- ┆
154
+ ┆ list[struct[6]] ┆
155
+ ═╪═════════════════╪═
156
+
157
+ For example, if the classification column has tasks "task1" and "task2",
158
+ ─┬──────────────────┬──────────────────┬─
159
+ ┆ classifications. ┆ classifications. ┆
160
+ ┆ task1 ┆ task2 ┆
161
+ ┆ --- ┆ --- ┆
162
+ ┆ struct[6] ┆ struct[6] ┆
163
+ ═╪══════════════════╪══════════════════╪═
164
+
165
+ """
166
+ coordinate_types = [Classification]
167
+ table_out = table_transformations.split_primitive_columns_by_task_name(table, coordinate_types=coordinate_types)
168
+
169
+ classification_columns = [c for c in table_out.columns if c.startswith(Classification.column_name() + ".")]
170
+ for classification_column in classification_columns:
171
+ has_multiple_items_per_sample = all(table_out[classification_column].list.len() > 1)
172
+ if has_multiple_items_per_sample:
173
+ if strict:
174
+ raise ValueError(
175
+ f"Column {classification_column} has multiple items per sample, but expected only one item."
176
+ )
177
+ else:
178
+ user_logger.warning(
179
+ f"Warning: Unnesting of column '{classification_column}' is skipped because it has multiple items per sample."
180
+ )
181
+
182
+ table_out = table_out.with_columns([pl.col(c).list.first() for c in classification_columns])
183
+ return table_out
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Type
4
+
5
+ from .bbox import Bbox
6
+ from .bitmask import Bitmask
7
+ from .classification import Classification
8
+ from .point import Point # noqa: F401
9
+ from .polygon import Polygon
10
+ from .primitive import Primitive
11
+ from .segmentation import Segmentation # noqa: F401
12
+ from .utils import class_color_by_name # noqa: F401
13
+
14
+ PRIMITIVE_TYPES: List[Type[Primitive]] = [Bbox, Classification, Polygon, Bitmask]
15
+ PRIMITIVE_NAME_TO_TYPE = {cls.__name__: cls for cls in PRIMITIVE_TYPES}
16
+ PRIMITIVE_COLUMN_NAMES: List[str] = [PrimitiveType.column_name() for PrimitiveType in PRIMITIVE_TYPES]
@@ -0,0 +1,137 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional, Tuple, Union
4
+
5
+ import cv2
6
+ import numpy as np
7
+
8
+ from hafnia.dataset.primitives.primitive import Primitive
9
+ from hafnia.dataset.primitives.utils import (
10
+ anonymize_by_resizing,
11
+ class_color_by_name,
12
+ clip,
13
+ get_class_name,
14
+ round_int_clip_value,
15
+ )
16
+
17
+
18
+ class Bbox(Primitive):
19
+ # Names should match names in FieldName
20
+ height: float # Height of the bounding box as a fraction of the image height, e.g. 0.1 for 10% of the image height
21
+ width: float # Width of the bounding box as a fraction of the image width, e.g. 0.1 for 10% of the image width
22
+ top_left_x: float # X coordinate of top-left corner of Bbox as a fraction of the image width, e.g. 0.1 for 10% of the image width
23
+ top_left_y: float # Y coordinate of top-left corner of Bbox as a fraction of the image height, e.g. 0.1 for 10% of the image height
24
+ class_name: Optional[str] = None # Class name, e.g. "car"
25
+ class_idx: Optional[int] = None # Class index, e.g. 0 for "car" if it is the first class
26
+ object_id: Optional[str] = None # Unique identifier for the object, e.g. "12345123"
27
+ confidence: Optional[float] = None # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
28
+ ground_truth: bool = True # Whether this is ground truth or a prediction
29
+
30
+ task_name: str = "" # Task name to support multiple Bbox tasks in the same dataset. "" defaults to "bboxes"
31
+ meta: Optional[Dict[str, Any]] = None # This can be used to store additional information about the bitmask
32
+
33
+ @staticmethod
34
+ def default_task_name() -> str:
35
+ return "bboxes"
36
+
37
+ @staticmethod
38
+ def column_name() -> str:
39
+ return "objects"
40
+
41
+ def calculate_area(self) -> float:
42
+ return self.height * self.width
43
+
44
+ @staticmethod
45
+ def from_coco(bbox: List, height: int, width: int) -> Bbox:
46
+ """
47
+ Converts a COCO-style bounding box to a Bbox object.
48
+ The bbox is in the format [x_min, y_min, width, height].
49
+ """
50
+ x_min, y_min, bbox_width, bbox_height = bbox
51
+ return Bbox(
52
+ top_left_x=x_min / width,
53
+ top_left_y=y_min / height,
54
+ width=bbox_width / width,
55
+ height=bbox_height / height,
56
+ )
57
+
58
+ def to_bbox(self) -> Tuple[float, float, float, float]:
59
+ """
60
+ Converts Bbox to a tuple of (x_min, y_min, width, height) with normalized coordinates.
61
+ Values are floats in the range [0, 1].
62
+ """
63
+ return (self.top_left_x, self.top_left_y, self.width, self.height)
64
+
65
+ def to_coco(self, image_height: int, image_width: int) -> Tuple[int, int, int, int]:
66
+ xmin = round_int_clip_value(self.top_left_x * image_width, max_value=image_width)
67
+ bbox_width = round_int_clip_value(self.width * image_width, max_value=image_width)
68
+
69
+ ymin = round_int_clip_value(self.top_left_y * image_height, max_value=image_height)
70
+ bbox_height = round_int_clip_value(self.height * image_height, max_value=image_height)
71
+
72
+ return xmin, ymin, bbox_width, bbox_height
73
+
74
+ def to_pixel_coordinates(
75
+ self, image_shape: Tuple[int, int], as_int: bool = True, clip_values: bool = True
76
+ ) -> Union[Tuple[float, float, float, float], Tuple[int, int, int, int]]:
77
+ bb_height = self.height * image_shape[0]
78
+ bb_width = self.width * image_shape[1]
79
+ bb_top_left_x = self.top_left_x * image_shape[1]
80
+ bb_top_left_y = self.top_left_y * image_shape[0]
81
+ xmin, ymin, xmax, ymax = bb_top_left_x, bb_top_left_y, bb_top_left_x + bb_width, bb_top_left_y + bb_height
82
+
83
+ if as_int:
84
+ xmin, ymin, xmax, ymax = int(round(xmin)), int(round(ymin)), int(round(xmax)), int(round(ymax)) # noqa: RUF046
85
+
86
+ if clip_values:
87
+ xmin = clip(value=xmin, v_min=0, v_max=image_shape[1])
88
+ xmax = clip(value=xmax, v_min=0, v_max=image_shape[1])
89
+ ymin = clip(value=ymin, v_min=0, v_max=image_shape[0])
90
+ ymax = clip(value=ymax, v_min=0, v_max=image_shape[0])
91
+
92
+ return xmin, ymin, xmax, ymax
93
+
94
+ def draw(self, image: np.ndarray, inplace: bool = False, draw_label: bool = True) -> np.ndarray:
95
+ if not inplace:
96
+ image = image.copy()
97
+ xmin, ymin, xmax, ymax = self.to_pixel_coordinates(image_shape=image.shape[:2])
98
+
99
+ class_name = self.get_class_name()
100
+ color = class_color_by_name(class_name)
101
+ font = cv2.FONT_HERSHEY_SIMPLEX
102
+ margin = 5
103
+ bottom_left = (xmin + margin, ymax - margin)
104
+ if draw_label:
105
+ cv2.putText(
106
+ img=image, text=class_name, org=bottom_left, fontFace=font, fontScale=0.75, color=color, thickness=2
107
+ )
108
+ cv2.rectangle(image, pt1=(xmin, ymin), pt2=(xmax, ymax), color=color, thickness=2)
109
+
110
+ return image
111
+
112
+ def mask(
113
+ self, image: np.ndarray, inplace: bool = False, color: Optional[Tuple[np.uint8, np.uint8, np.uint8]] = None
114
+ ) -> np.ndarray:
115
+ if not inplace:
116
+ image = image.copy()
117
+ xmin, ymin, xmax, ymax = self.to_pixel_coordinates(image_shape=image.shape[:2])
118
+ xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
119
+
120
+ if color is None:
121
+ color = np.mean(image[ymin:ymax, xmin:xmax], axis=(0, 1)).astype(np.uint8)
122
+
123
+ image[ymin:ymax, xmin:xmax] = color
124
+ return image
125
+
126
+ def anonymize_by_blurring(self, image: np.ndarray, inplace: bool = False, max_resolution: int = 20) -> np.ndarray:
127
+ if not inplace:
128
+ image = image.copy()
129
+ xmin, ymin, xmax, ymax = self.to_pixel_coordinates(image_shape=image.shape[:2])
130
+ xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
131
+ blur_region = image[ymin:ymax, xmin:xmax]
132
+ blur_region_upsized = anonymize_by_resizing(blur_region, max_resolution=max_resolution)
133
+ image[ymin:ymax, xmin:xmax] = blur_region_upsized
134
+ return image
135
+
136
+ def get_class_name(self) -> str:
137
+ return get_class_name(self.class_name, self.class_idx)
@@ -0,0 +1,182 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, Optional, Tuple
4
+
5
+ import cv2
6
+ import numpy as np
7
+ import pycocotools.mask as coco_mask
8
+
9
+ from hafnia.dataset.primitives.primitive import Primitive
10
+ from hafnia.dataset.primitives.utils import (
11
+ anonymize_by_resizing,
12
+ class_color_by_name,
13
+ get_class_name,
14
+ text_org_from_left_bottom_to_centered,
15
+ )
16
+
17
+
18
+ class Bitmask(Primitive):
19
+ # Names should match names in FieldName
20
+ top: int # Bitmask top coordinate in pixels
21
+ left: int # Bitmask left coordinate in pixels
22
+ height: int # Bitmask height of the bounding box in pixels
23
+ width: int # Bitmask width of the bounding box in pixels
24
+ rleString: str # Run-length encoding (RLE) string for the bitmask region of size (height, width) at (top, left).
25
+ area: Optional[float] = None # Area of the bitmask in pixels is calculated from the RLE string
26
+ class_name: Optional[str] = None # This should match the string in 'FieldName.CLASS_NAME'
27
+ class_idx: Optional[int] = None # This should match the string in 'FieldName.CLASS_IDX'
28
+ object_id: Optional[str] = None # This should match the string in 'FieldName.OBJECT_ID'
29
+ confidence: Optional[float] = None # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
30
+ ground_truth: bool = True # Whether this is ground truth or a prediction
31
+
32
+ task_name: str = "" # Task name to support multiple Bitmask tasks in the same dataset. "" defaults to "bitmask"
33
+ meta: Optional[Dict[str, Any]] = None # This can be used to store additional information about the bitmask
34
+
35
+ @staticmethod
36
+ def default_task_name() -> str:
37
+ return "bitmask"
38
+
39
+ @staticmethod
40
+ def column_name() -> str:
41
+ return "bitmasks"
42
+
43
+ def calculate_area(self) -> float:
44
+ raise NotImplementedError()
45
+
46
+ @staticmethod
47
+ def from_mask(
48
+ mask: np.ndarray,
49
+ top: int, # Bounding box top coordinate in pixels
50
+ left: int, # Bounding box left coordinate in pixels
51
+ class_name: Optional[str] = None, # This should match the string in 'FieldName.CLASS_NAME'
52
+ class_idx: Optional[int] = None, # This should match the string in 'FieldName.CLASS_IDX'
53
+ object_id: Optional[str] = None, # This should match the string in 'FieldName.OBJECT_ID') -> "Bitmask":
54
+ ):
55
+ if len(mask.shape) != 2:
56
+ raise ValueError("Bitmask should be a 2-dimensional array.")
57
+
58
+ if mask.dtype != "|b1":
59
+ raise TypeError("Bitmask should be an array of boolean values. For numpy array call .astype(bool).")
60
+
61
+ h, w = mask.shape[:2]
62
+ area_pixels = np.sum(mask != 0)
63
+ area = area_pixels / (h * w)
64
+
65
+ mask_fortran = np.asfortranarray(mask, np.prod(h * w)) # Convert to Fortran order for COCO encoding
66
+ rle_coding = coco_mask.encode(mask_fortran.astype(bool)) # Encode the mask using COCO RLE
67
+ rle_string = rle_coding["counts"].decode("utf-8") # Convert the counts to string
68
+
69
+ return Bitmask(
70
+ top=top,
71
+ left=left,
72
+ height=h,
73
+ width=w,
74
+ area=area,
75
+ rleString=rle_string,
76
+ class_name=class_name,
77
+ class_idx=class_idx,
78
+ object_id=object_id,
79
+ )
80
+
81
+ def squeeze_mask(self):
82
+ """
83
+ A mask may have large redundant areas of zeros. This function squeezes the mask to remove those areas.
84
+ """
85
+ region_mask = self.to_region_mask()
86
+ shift_left, last_left = np.flatnonzero(region_mask.sum(axis=0))[[0, -1]]
87
+ shift_top, last_top = np.flatnonzero(region_mask.sum(axis=1))[[0, -1]]
88
+ new_top = self.top + shift_top
89
+ new_left = self.left + shift_left
90
+ new_region_mask = region_mask[shift_top : last_top + 1, shift_left : last_left + 1]
91
+
92
+ bitmask_squeezed = Bitmask.from_mask(
93
+ mask=new_region_mask,
94
+ top=new_top,
95
+ left=new_left,
96
+ class_name=self.class_name,
97
+ class_idx=self.class_idx,
98
+ object_id=self.object_id,
99
+ )
100
+ return bitmask_squeezed
101
+
102
+ def anonymize_by_blurring(self, image: np.ndarray, inplace: bool = False, max_resolution: int = 20) -> np.ndarray:
103
+ mask_tight = self.squeeze_mask()
104
+
105
+ mask_region = mask_tight.to_region_mask()
106
+ region_image = image[
107
+ mask_tight.top : mask_tight.top + mask_tight.height, mask_tight.left : mask_tight.left + mask_tight.width
108
+ ]
109
+ region_image_blurred = anonymize_by_resizing(blur_region=region_image, max_resolution=max_resolution)
110
+ image_mixed = np.where(mask_region[:, :, None], region_image_blurred, region_image)
111
+ image[
112
+ mask_tight.top : mask_tight.top + mask_tight.height, mask_tight.left : mask_tight.left + mask_tight.width
113
+ ] = image_mixed
114
+ return image
115
+
116
+ def to_region_mask(self) -> np.ndarray:
117
+ """Returns a binary mask from the RLE string. The masks is only the region of the object and not the full image."""
118
+ rle = {"counts": self.rleString.encode(), "size": [self.height, self.width]}
119
+ mask = coco_mask.decode(rle) > 0
120
+ return mask
121
+
122
+ def to_mask(self, img_height: int, img_width: int) -> np.ndarray:
123
+ """Creates a full image mask from the RLE string."""
124
+
125
+ region_mask = self.to_region_mask()
126
+ bitmask_np = np.zeros((img_height, img_width), dtype=bool)
127
+ bitmask_np[self.top : self.top + self.height, self.left : self.left + self.width] = region_mask
128
+ return bitmask_np
129
+
130
+ def draw(self, image: np.ndarray, inplace: bool = False, draw_label: bool = True) -> np.ndarray:
131
+ if not inplace:
132
+ image = image.copy()
133
+ if image.ndim == 2: # for grayscale/monochromatic images
134
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
135
+ img_height, img_width = image.shape[:2]
136
+ bitmask_np = self.to_mask(img_height=img_height, img_width=img_width)
137
+
138
+ class_name = self.get_class_name()
139
+ color = class_color_by_name(class_name)
140
+
141
+ # Creates transparent masking with the specified color
142
+ image_masked = image.copy()
143
+ image_masked[bitmask_np] = color
144
+ cv2.addWeighted(src1=image, alpha=0.3, src2=image_masked, beta=0.7, gamma=0, dst=image)
145
+
146
+ if draw_label:
147
+ # Determines the center of mask
148
+ xy = np.stack(np.nonzero(bitmask_np))
149
+ xy_org = tuple(np.median(xy, axis=1).astype(int))[::-1]
150
+
151
+ xy_org = np.median(xy, axis=1).astype(int)[::-1]
152
+ font = cv2.FONT_HERSHEY_SIMPLEX
153
+ font_scale = 0.75
154
+ thickness = 2
155
+ xy_centered = text_org_from_left_bottom_to_centered(xy_org, class_name, font, font_scale, thickness)
156
+
157
+ cv2.putText(
158
+ img=image,
159
+ text=class_name,
160
+ org=xy_centered,
161
+ fontFace=font,
162
+ fontScale=font_scale,
163
+ color=(255, 255, 255),
164
+ thickness=thickness,
165
+ )
166
+ return image
167
+
168
+ def mask(
169
+ self, image: np.ndarray, inplace: bool = False, color: Optional[Tuple[np.uint8, np.uint8, np.uint8]] = None
170
+ ) -> np.ndarray:
171
+ if not inplace:
172
+ image = image.copy()
173
+
174
+ bitmask_np = self.to_mask(img_height=image.shape[0], img_width=image.shape[1])
175
+
176
+ if color is None:
177
+ color = tuple(int(value) for value in np.mean(image[bitmask_np], axis=0)) # type: ignore[assignment]
178
+ image[bitmask_np] = color
179
+ return image
180
+
181
+ def get_class_name(self) -> str:
182
+ return get_class_name(self.class_name, self.class_idx)
@@ -0,0 +1,56 @@
1
+ from typing import Any, Dict, Optional, Tuple
2
+
3
+ import numpy as np
4
+
5
+ from hafnia.dataset.primitives.primitive import Primitive
6
+ from hafnia.dataset.primitives.utils import anonymize_by_resizing, get_class_name
7
+
8
+
9
+ class Classification(Primitive):
10
+ # Names should match names in FieldName
11
+ class_name: Optional[str] = None # Class name, e.g. "car"
12
+ class_idx: Optional[int] = None # Class index, e.g. 0 for "car" if it is the first class
13
+ object_id: Optional[str] = None # Unique identifier for the object, e.g. "12345123"
14
+ confidence: Optional[float] = None # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification
15
+ ground_truth: bool = True # Whether this is ground truth or a prediction
16
+
17
+ task_name: str = "" # To support multiple Classification tasks in the same dataset. "" defaults to "classification"
18
+ meta: Optional[Dict[str, Any]] = None # This can be used to store additional information about the bitmask
19
+
20
+ @staticmethod
21
+ def default_task_name() -> str:
22
+ return "classification"
23
+
24
+ @staticmethod
25
+ def column_name() -> str:
26
+ return "classifications"
27
+
28
+ def calculate_area(self) -> float:
29
+ return 1.0
30
+
31
+ def draw(self, image: np.ndarray, inplace: bool = False, draw_label: bool = True) -> np.ndarray:
32
+ if draw_label is False:
33
+ return image
34
+ from hafnia.visualizations import image_visualizations
35
+
36
+ class_name = self.get_class_name()
37
+ if self.task_name == self.default_task_name():
38
+ text = class_name
39
+ else:
40
+ text = f"{self.task_name}: {class_name}"
41
+ image = image_visualizations.append_text_below_frame(image, text=text)
42
+
43
+ return image
44
+
45
+ def mask(
46
+ self, image: np.ndarray, inplace: bool = False, color: Optional[Tuple[np.uint8, np.uint8, np.uint8]] = None
47
+ ) -> np.ndarray:
48
+ # Classification does not have a mask effect, so we return the image as is
49
+ return image
50
+
51
+ def anonymize_by_blurring(self, image: np.ndarray, inplace: bool = False, max_resolution: int = 20) -> np.ndarray:
52
+ # Classification does not have a blur effect, so we return the image as is
53
+ return anonymize_by_resizing(image, max_resolution=max_resolution)
54
+
55
+ def get_class_name(self) -> str:
56
+ return get_class_name(self.class_name, self.class_idx)
@@ -0,0 +1,25 @@
1
+ from typing import Any, Tuple
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from hafnia.dataset.primitives.utils import clip
6
+
7
+
8
+ class Point(BaseModel):
9
+ x: float
10
+ y: float
11
+
12
+ def to_pixel_coordinates(
13
+ self, image_shape: Tuple[int, int], as_int: bool = True, clip_values: bool = True
14
+ ) -> Tuple[Any, Any]:
15
+ x = self.x * image_shape[1]
16
+ y = self.y * image_shape[0]
17
+
18
+ if as_int:
19
+ x, y = int(round(x)), int(round(y)) # noqa: RUF046
20
+
21
+ if clip_values:
22
+ x = clip(value=x, v_min=0, v_max=image_shape[1])
23
+ y = clip(value=y, v_min=0, v_max=image_shape[0])
24
+
25
+ return x, y