hafnia 0.1.27__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__main__.py +2 -2
- cli/config.py +17 -4
- cli/dataset_cmds.py +60 -0
- cli/runc_cmds.py +1 -1
- hafnia/data/__init__.py +2 -2
- hafnia/data/factory.py +12 -56
- hafnia/dataset/dataset_helpers.py +91 -0
- hafnia/dataset/dataset_names.py +72 -0
- hafnia/dataset/dataset_recipe/dataset_recipe.py +327 -0
- hafnia/dataset/dataset_recipe/recipe_transforms.py +53 -0
- hafnia/dataset/dataset_recipe/recipe_types.py +140 -0
- hafnia/dataset/dataset_upload_helper.py +468 -0
- hafnia/dataset/hafnia_dataset.py +624 -0
- hafnia/dataset/operations/dataset_stats.py +15 -0
- hafnia/dataset/operations/dataset_transformations.py +82 -0
- hafnia/dataset/operations/table_transformations.py +183 -0
- hafnia/dataset/primitives/__init__.py +16 -0
- hafnia/dataset/primitives/bbox.py +137 -0
- hafnia/dataset/primitives/bitmask.py +182 -0
- hafnia/dataset/primitives/classification.py +56 -0
- hafnia/dataset/primitives/point.py +25 -0
- hafnia/dataset/primitives/polygon.py +100 -0
- hafnia/dataset/primitives/primitive.py +44 -0
- hafnia/dataset/primitives/segmentation.py +51 -0
- hafnia/dataset/primitives/utils.py +51 -0
- hafnia/experiment/hafnia_logger.py +7 -7
- hafnia/helper_testing.py +108 -0
- hafnia/http.py +5 -3
- hafnia/platform/__init__.py +2 -2
- hafnia/platform/datasets.py +197 -0
- hafnia/platform/download.py +85 -23
- hafnia/torch_helpers.py +180 -95
- hafnia/utils.py +21 -2
- hafnia/visualizations/colors.py +267 -0
- hafnia/visualizations/image_visualizations.py +202 -0
- {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/METADATA +209 -99
- hafnia-0.2.1.dist-info/RECORD +50 -0
- cli/data_cmds.py +0 -53
- hafnia-0.1.27.dist-info/RECORD +0 -27
- {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/WHEEL +0 -0
- {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/entry_points.txt +0 -0
- {hafnia-0.1.27.dist-info → hafnia-0.2.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hafnia dataset transformations that takes and returns a HafniaDataset object.
|
|
3
|
+
|
|
4
|
+
All functions here will have a corresponding function in both the HafniaDataset class
|
|
5
|
+
and a corresponding RecipeTransform class in the `data_recipe/recipe_transformations.py` file.
|
|
6
|
+
|
|
7
|
+
This allows each function to be used in three ways:
|
|
8
|
+
|
|
9
|
+
```python
|
|
10
|
+
from hafnia.dataset.operations import dataset_transformations
|
|
11
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
12
|
+
from hafnia.dataset.data_recipe.recipe_transformations import SplitByRatios
|
|
13
|
+
|
|
14
|
+
splits_by_ratios = {"train": 0.8, "val": 0.1, "test": 0.1}
|
|
15
|
+
|
|
16
|
+
# Option 1: Using the function directly
|
|
17
|
+
dataset = recipe_transformations.splits_by_ratios(dataset, split_ratios=splits_by_ratios)
|
|
18
|
+
|
|
19
|
+
# Option 2: Using the method of the HafniaDataset class
|
|
20
|
+
dataset = dataset.splits_by_ratios(split_ratios=splits_by_ratios)
|
|
21
|
+
|
|
22
|
+
# Option 3: Using the RecipeTransform class
|
|
23
|
+
serializable_transform = SplitByRatios(split_ratios=splits_by_ratios)
|
|
24
|
+
dataset = serializable_transform(dataset)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Tests will ensure that all functions in this file will have a corresponding function in the
|
|
28
|
+
HafniaDataset class and a RecipeTransform class in the `data_recipe/recipe_transformations.py` file and
|
|
29
|
+
that the signatures match.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
from typing import TYPE_CHECKING, Callable
|
|
34
|
+
|
|
35
|
+
import cv2
|
|
36
|
+
import numpy as np
|
|
37
|
+
import polars as pl
|
|
38
|
+
from PIL import Image
|
|
39
|
+
from tqdm import tqdm
|
|
40
|
+
|
|
41
|
+
from hafnia.dataset import dataset_helpers
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from hafnia.dataset.hafnia_dataset import HafniaDataset
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
### Image transformations ###
|
|
48
|
+
class AnonymizeByPixelation:
|
|
49
|
+
def __init__(self, resize_factor: float = 0.10):
|
|
50
|
+
self.resize_factor = resize_factor
|
|
51
|
+
|
|
52
|
+
def __call__(self, frame: np.ndarray) -> np.ndarray:
|
|
53
|
+
org_size = frame.shape[:2]
|
|
54
|
+
frame = cv2.resize(frame, (0, 0), fx=self.resize_factor, fy=self.resize_factor)
|
|
55
|
+
frame = cv2.resize(frame, org_size[::-1], interpolation=cv2.INTER_NEAREST)
|
|
56
|
+
return frame
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def transform_images(
|
|
60
|
+
dataset: "HafniaDataset",
|
|
61
|
+
transform: Callable[[np.ndarray], np.ndarray],
|
|
62
|
+
path_output: Path,
|
|
63
|
+
) -> "HafniaDataset":
|
|
64
|
+
new_paths = []
|
|
65
|
+
path_image_folder = path_output / "data"
|
|
66
|
+
path_image_folder.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
|
|
68
|
+
for org_path in tqdm(dataset.samples["file_name"].to_list(), desc="Transform images"):
|
|
69
|
+
org_path = Path(org_path)
|
|
70
|
+
if not org_path.exists():
|
|
71
|
+
raise FileNotFoundError(f"File {org_path} does not exist in the dataset.")
|
|
72
|
+
|
|
73
|
+
image = np.array(Image.open(org_path))
|
|
74
|
+
image_transformed = transform(image)
|
|
75
|
+
new_path = dataset_helpers.save_image_with_hash_name(image_transformed, path_image_folder)
|
|
76
|
+
|
|
77
|
+
if not new_path.exists():
|
|
78
|
+
raise FileNotFoundError(f"Transformed file {new_path} does not exist in the dataset.")
|
|
79
|
+
new_paths.append(str(new_path))
|
|
80
|
+
|
|
81
|
+
table = dataset.samples.with_columns(pl.Series(new_paths).alias("file_name"))
|
|
82
|
+
return dataset.update_table(table)
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import List, Optional, Type
|
|
3
|
+
|
|
4
|
+
import polars as pl
|
|
5
|
+
from tqdm import tqdm
|
|
6
|
+
|
|
7
|
+
from hafnia.dataset.dataset_names import (
|
|
8
|
+
FILENAME_ANNOTATIONS_JSONL,
|
|
9
|
+
FILENAME_ANNOTATIONS_PARQUET,
|
|
10
|
+
FieldName,
|
|
11
|
+
)
|
|
12
|
+
from hafnia.dataset.operations import table_transformations
|
|
13
|
+
from hafnia.dataset.primitives import PRIMITIVE_TYPES
|
|
14
|
+
from hafnia.dataset.primitives.classification import Classification
|
|
15
|
+
from hafnia.dataset.primitives.primitive import Primitive
|
|
16
|
+
from hafnia.log import user_logger
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_primitive_table(
|
|
20
|
+
samples_table: pl.DataFrame, PrimitiveType: Type[Primitive], keep_sample_data: bool = False
|
|
21
|
+
) -> Optional[pl.DataFrame]:
|
|
22
|
+
"""
|
|
23
|
+
Returns a DataFrame with objects of the specified primitive type.
|
|
24
|
+
"""
|
|
25
|
+
column_name = PrimitiveType.column_name()
|
|
26
|
+
has_primitive_column = (column_name in samples_table.columns) and (
|
|
27
|
+
samples_table[column_name].dtype == pl.List(pl.Struct)
|
|
28
|
+
)
|
|
29
|
+
if not has_primitive_column:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
# Remove frames without objects
|
|
33
|
+
remove_no_object_frames = samples_table.filter(pl.col(column_name).list.len() > 0)
|
|
34
|
+
|
|
35
|
+
if keep_sample_data:
|
|
36
|
+
# Drop other primitive columns to avoid conflicts
|
|
37
|
+
drop_columns = set(PRIMITIVE_TYPES) - {PrimitiveType, Classification}
|
|
38
|
+
remove_no_object_frames = remove_no_object_frames.drop(*[primitive.column_name() for primitive in drop_columns])
|
|
39
|
+
# Rename columns "height", "width" and "meta" for sample to avoid conflicts with object fields names
|
|
40
|
+
remove_no_object_frames = remove_no_object_frames.rename(
|
|
41
|
+
{"height": "image.height", "width": "image.width", "meta": "image.meta"}
|
|
42
|
+
)
|
|
43
|
+
objects_df = remove_no_object_frames.explode(column_name).unnest(column_name)
|
|
44
|
+
else:
|
|
45
|
+
objects_df = remove_no_object_frames.select(pl.col(column_name).explode().struct.unnest())
|
|
46
|
+
return objects_df
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def filter_table_for_class_names(
|
|
50
|
+
samples_table: pl.DataFrame, class_names: List[str], PrimitiveType: Type[Primitive]
|
|
51
|
+
) -> Optional[pl.DataFrame]:
|
|
52
|
+
table_with_selected_class_names = samples_table.filter(
|
|
53
|
+
pl.col(PrimitiveType.column_name())
|
|
54
|
+
.list.eval(pl.element().struct.field(FieldName.CLASS_NAME).is_in(class_names))
|
|
55
|
+
.list.any()
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
return table_with_selected_class_names
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def split_primitive_columns_by_task_name(
|
|
62
|
+
samples_table: pl.DataFrame,
|
|
63
|
+
coordinate_types: Optional[List[Type[Primitive]]] = None,
|
|
64
|
+
) -> pl.DataFrame:
|
|
65
|
+
"""
|
|
66
|
+
Convert Primitive columns such as "objects" (Bbox) into a column for each task name.
|
|
67
|
+
For example, if the "objects" column (containing Bbox objects) has tasks "task1" and "task2".
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
This:
|
|
71
|
+
─┬────────────┬─
|
|
72
|
+
┆ objects ┆
|
|
73
|
+
┆ --- ┆
|
|
74
|
+
┆ list[struc ┆
|
|
75
|
+
┆ t[11]] ┆
|
|
76
|
+
═╪════════════╪═
|
|
77
|
+
becomes this:
|
|
78
|
+
─┬────────────┬────────────┬─
|
|
79
|
+
┆ objects. ┆ objects. ┆
|
|
80
|
+
┆ task1 ┆ task2 ┆
|
|
81
|
+
┆ --- ┆ --- ┆
|
|
82
|
+
┆ list[struc ┆ list[struc ┆
|
|
83
|
+
┆ t[11]] ┆ t[13]] ┆
|
|
84
|
+
═╪════════════╪════════════╪═
|
|
85
|
+
|
|
86
|
+
"""
|
|
87
|
+
coordinate_types = coordinate_types or PRIMITIVE_TYPES
|
|
88
|
+
for PrimitiveType in coordinate_types:
|
|
89
|
+
col_name = PrimitiveType.column_name()
|
|
90
|
+
|
|
91
|
+
if col_name not in samples_table.columns:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
if samples_table[col_name].dtype != pl.List(pl.Struct):
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
task_names = samples_table[col_name].explode().struct.field(FieldName.TASK_NAME).unique().to_list()
|
|
98
|
+
samples_table = samples_table.with_columns(
|
|
99
|
+
[
|
|
100
|
+
pl.col(col_name)
|
|
101
|
+
.list.filter(pl.element().struct.field(FieldName.TASK_NAME).eq(task_name))
|
|
102
|
+
.alias(f"{col_name}.{task_name}")
|
|
103
|
+
for task_name in task_names
|
|
104
|
+
]
|
|
105
|
+
)
|
|
106
|
+
samples_table = samples_table.drop(col_name)
|
|
107
|
+
return samples_table
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def read_table_from_path(path: Path) -> pl.DataFrame:
|
|
111
|
+
path_annotations = path / FILENAME_ANNOTATIONS_PARQUET
|
|
112
|
+
if path_annotations.exists():
|
|
113
|
+
user_logger.info(f"Reading dataset annotations from Parquet file: {path_annotations}")
|
|
114
|
+
return pl.read_parquet(path_annotations)
|
|
115
|
+
|
|
116
|
+
path_annotations_jsonl = path / FILENAME_ANNOTATIONS_JSONL
|
|
117
|
+
if path_annotations_jsonl.exists():
|
|
118
|
+
user_logger.info(f"Reading dataset annotations from JSONL file: {path_annotations_jsonl}")
|
|
119
|
+
return pl.read_ndjson(path_annotations_jsonl)
|
|
120
|
+
|
|
121
|
+
raise FileNotFoundError(
|
|
122
|
+
f"Unable to read annotations. No json file '{path_annotations.name}' or Parquet file '{{path_annotations.name}} in in '{path}'."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def check_image_paths(table: pl.DataFrame) -> bool:
|
|
127
|
+
missing_files = []
|
|
128
|
+
for org_path in tqdm(table["file_name"].to_list(), desc="Check image paths"):
|
|
129
|
+
org_path = Path(org_path)
|
|
130
|
+
if not org_path.exists():
|
|
131
|
+
missing_files.append(org_path)
|
|
132
|
+
|
|
133
|
+
if len(missing_files) > 0:
|
|
134
|
+
user_logger.warning(f"Missing files: {len(missing_files)}. Show first 5:")
|
|
135
|
+
for missing_file in missing_files[:5]:
|
|
136
|
+
user_logger.warning(f" - {missing_file}")
|
|
137
|
+
raise FileNotFoundError(f"Some files are missing in the dataset: {len(missing_files)} files not found.")
|
|
138
|
+
|
|
139
|
+
return True
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def unnest_classification_tasks(table: pl.DataFrame, strict: bool = True) -> pl.DataFrame:
|
|
143
|
+
"""
|
|
144
|
+
Unnest classification tasks in table.
|
|
145
|
+
Classificiations tasks are all stored in the same column in the HafniaDataset table.
|
|
146
|
+
This function splits them into separate columns for each task name.
|
|
147
|
+
|
|
148
|
+
Type is converted from a list of structs (pl.List[pl.Struct]) to a struct (pl.Struct) column.
|
|
149
|
+
|
|
150
|
+
Converts classification column from this:
|
|
151
|
+
─┬─────────────────┬─
|
|
152
|
+
┆ classifications ┆
|
|
153
|
+
┆ --- ┆
|
|
154
|
+
┆ list[struct[6]] ┆
|
|
155
|
+
═╪═════════════════╪═
|
|
156
|
+
|
|
157
|
+
For example, if the classification column has tasks "task1" and "task2",
|
|
158
|
+
─┬──────────────────┬──────────────────┬─
|
|
159
|
+
┆ classifications. ┆ classifications. ┆
|
|
160
|
+
┆ task1 ┆ task2 ┆
|
|
161
|
+
┆ --- ┆ --- ┆
|
|
162
|
+
┆ struct[6] ┆ struct[6] ┆
|
|
163
|
+
═╪══════════════════╪══════════════════╪═
|
|
164
|
+
|
|
165
|
+
"""
|
|
166
|
+
coordinate_types = [Classification]
|
|
167
|
+
table_out = table_transformations.split_primitive_columns_by_task_name(table, coordinate_types=coordinate_types)
|
|
168
|
+
|
|
169
|
+
classification_columns = [c for c in table_out.columns if c.startswith(Classification.column_name() + ".")]
|
|
170
|
+
for classification_column in classification_columns:
|
|
171
|
+
has_multiple_items_per_sample = all(table_out[classification_column].list.len() > 1)
|
|
172
|
+
if has_multiple_items_per_sample:
|
|
173
|
+
if strict:
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Column {classification_column} has multiple items per sample, but expected only one item."
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
user_logger.warning(
|
|
179
|
+
f"Warning: Unnesting of column '{classification_column}' is skipped because it has multiple items per sample."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
table_out = table_out.with_columns([pl.col(c).list.first() for c in classification_columns])
|
|
183
|
+
return table_out
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import List, Type
|
|
4
|
+
|
|
5
|
+
from .bbox import Bbox
|
|
6
|
+
from .bitmask import Bitmask
|
|
7
|
+
from .classification import Classification
|
|
8
|
+
from .point import Point # noqa: F401
|
|
9
|
+
from .polygon import Polygon
|
|
10
|
+
from .primitive import Primitive
|
|
11
|
+
from .segmentation import Segmentation # noqa: F401
|
|
12
|
+
from .utils import class_color_by_name # noqa: F401
|
|
13
|
+
|
|
14
|
+
PRIMITIVE_TYPES: List[Type[Primitive]] = [Bbox, Classification, Polygon, Bitmask]
|
|
15
|
+
PRIMITIVE_NAME_TO_TYPE = {cls.__name__: cls for cls in PRIMITIVE_TYPES}
|
|
16
|
+
PRIMITIVE_COLUMN_NAMES: List[str] = [PrimitiveType.column_name() for PrimitiveType in PRIMITIVE_TYPES]
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
4
|
+
|
|
5
|
+
import cv2
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
from hafnia.dataset.primitives.primitive import Primitive
|
|
9
|
+
from hafnia.dataset.primitives.utils import (
|
|
10
|
+
anonymize_by_resizing,
|
|
11
|
+
class_color_by_name,
|
|
12
|
+
clip,
|
|
13
|
+
get_class_name,
|
|
14
|
+
round_int_clip_value,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Bbox(Primitive):
|
|
19
|
+
# Names should match names in FieldName
|
|
20
|
+
height: float # Height of the bounding box as a fraction of the image height, e.g. 0.1 for 10% of the image height
|
|
21
|
+
width: float # Width of the bounding box as a fraction of the image width, e.g. 0.1 for 10% of the image width
|
|
22
|
+
top_left_x: float # X coordinate of top-left corner of Bbox as a fraction of the image width, e.g. 0.1 for 10% of the image width
|
|
23
|
+
top_left_y: float # Y coordinate of top-left corner of Bbox as a fraction of the image height, e.g. 0.1 for 10% of the image height
|
|
24
|
+
class_name: Optional[str] = None # Class name, e.g. "car"
|
|
25
|
+
class_idx: Optional[int] = None # Class index, e.g. 0 for "car" if it is the first class
|
|
26
|
+
object_id: Optional[str] = None # Unique identifier for the object, e.g. "12345123"
|
|
27
|
+
confidence: Optional[float] = None # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
|
|
28
|
+
ground_truth: bool = True # Whether this is ground truth or a prediction
|
|
29
|
+
|
|
30
|
+
task_name: str = "" # Task name to support multiple Bbox tasks in the same dataset. "" defaults to "bboxes"
|
|
31
|
+
meta: Optional[Dict[str, Any]] = None # This can be used to store additional information about the bitmask
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def default_task_name() -> str:
|
|
35
|
+
return "bboxes"
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def column_name() -> str:
|
|
39
|
+
return "objects"
|
|
40
|
+
|
|
41
|
+
def calculate_area(self) -> float:
|
|
42
|
+
return self.height * self.width
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def from_coco(bbox: List, height: int, width: int) -> Bbox:
|
|
46
|
+
"""
|
|
47
|
+
Converts a COCO-style bounding box to a Bbox object.
|
|
48
|
+
The bbox is in the format [x_min, y_min, width, height].
|
|
49
|
+
"""
|
|
50
|
+
x_min, y_min, bbox_width, bbox_height = bbox
|
|
51
|
+
return Bbox(
|
|
52
|
+
top_left_x=x_min / width,
|
|
53
|
+
top_left_y=y_min / height,
|
|
54
|
+
width=bbox_width / width,
|
|
55
|
+
height=bbox_height / height,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def to_bbox(self) -> Tuple[float, float, float, float]:
|
|
59
|
+
"""
|
|
60
|
+
Converts Bbox to a tuple of (x_min, y_min, width, height) with normalized coordinates.
|
|
61
|
+
Values are floats in the range [0, 1].
|
|
62
|
+
"""
|
|
63
|
+
return (self.top_left_x, self.top_left_y, self.width, self.height)
|
|
64
|
+
|
|
65
|
+
def to_coco(self, image_height: int, image_width: int) -> Tuple[int, int, int, int]:
|
|
66
|
+
xmin = round_int_clip_value(self.top_left_x * image_width, max_value=image_width)
|
|
67
|
+
bbox_width = round_int_clip_value(self.width * image_width, max_value=image_width)
|
|
68
|
+
|
|
69
|
+
ymin = round_int_clip_value(self.top_left_y * image_height, max_value=image_height)
|
|
70
|
+
bbox_height = round_int_clip_value(self.height * image_height, max_value=image_height)
|
|
71
|
+
|
|
72
|
+
return xmin, ymin, bbox_width, bbox_height
|
|
73
|
+
|
|
74
|
+
def to_pixel_coordinates(
|
|
75
|
+
self, image_shape: Tuple[int, int], as_int: bool = True, clip_values: bool = True
|
|
76
|
+
) -> Union[Tuple[float, float, float, float], Tuple[int, int, int, int]]:
|
|
77
|
+
bb_height = self.height * image_shape[0]
|
|
78
|
+
bb_width = self.width * image_shape[1]
|
|
79
|
+
bb_top_left_x = self.top_left_x * image_shape[1]
|
|
80
|
+
bb_top_left_y = self.top_left_y * image_shape[0]
|
|
81
|
+
xmin, ymin, xmax, ymax = bb_top_left_x, bb_top_left_y, bb_top_left_x + bb_width, bb_top_left_y + bb_height
|
|
82
|
+
|
|
83
|
+
if as_int:
|
|
84
|
+
xmin, ymin, xmax, ymax = int(round(xmin)), int(round(ymin)), int(round(xmax)), int(round(ymax)) # noqa: RUF046
|
|
85
|
+
|
|
86
|
+
if clip_values:
|
|
87
|
+
xmin = clip(value=xmin, v_min=0, v_max=image_shape[1])
|
|
88
|
+
xmax = clip(value=xmax, v_min=0, v_max=image_shape[1])
|
|
89
|
+
ymin = clip(value=ymin, v_min=0, v_max=image_shape[0])
|
|
90
|
+
ymax = clip(value=ymax, v_min=0, v_max=image_shape[0])
|
|
91
|
+
|
|
92
|
+
return xmin, ymin, xmax, ymax
|
|
93
|
+
|
|
94
|
+
def draw(self, image: np.ndarray, inplace: bool = False, draw_label: bool = True) -> np.ndarray:
|
|
95
|
+
if not inplace:
|
|
96
|
+
image = image.copy()
|
|
97
|
+
xmin, ymin, xmax, ymax = self.to_pixel_coordinates(image_shape=image.shape[:2])
|
|
98
|
+
|
|
99
|
+
class_name = self.get_class_name()
|
|
100
|
+
color = class_color_by_name(class_name)
|
|
101
|
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
102
|
+
margin = 5
|
|
103
|
+
bottom_left = (xmin + margin, ymax - margin)
|
|
104
|
+
if draw_label:
|
|
105
|
+
cv2.putText(
|
|
106
|
+
img=image, text=class_name, org=bottom_left, fontFace=font, fontScale=0.75, color=color, thickness=2
|
|
107
|
+
)
|
|
108
|
+
cv2.rectangle(image, pt1=(xmin, ymin), pt2=(xmax, ymax), color=color, thickness=2)
|
|
109
|
+
|
|
110
|
+
return image
|
|
111
|
+
|
|
112
|
+
def mask(
|
|
113
|
+
self, image: np.ndarray, inplace: bool = False, color: Optional[Tuple[np.uint8, np.uint8, np.uint8]] = None
|
|
114
|
+
) -> np.ndarray:
|
|
115
|
+
if not inplace:
|
|
116
|
+
image = image.copy()
|
|
117
|
+
xmin, ymin, xmax, ymax = self.to_pixel_coordinates(image_shape=image.shape[:2])
|
|
118
|
+
xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
|
|
119
|
+
|
|
120
|
+
if color is None:
|
|
121
|
+
color = np.mean(image[ymin:ymax, xmin:xmax], axis=(0, 1)).astype(np.uint8)
|
|
122
|
+
|
|
123
|
+
image[ymin:ymax, xmin:xmax] = color
|
|
124
|
+
return image
|
|
125
|
+
|
|
126
|
+
def anonymize_by_blurring(self, image: np.ndarray, inplace: bool = False, max_resolution: int = 20) -> np.ndarray:
|
|
127
|
+
if not inplace:
|
|
128
|
+
image = image.copy()
|
|
129
|
+
xmin, ymin, xmax, ymax = self.to_pixel_coordinates(image_shape=image.shape[:2])
|
|
130
|
+
xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
|
|
131
|
+
blur_region = image[ymin:ymax, xmin:xmax]
|
|
132
|
+
blur_region_upsized = anonymize_by_resizing(blur_region, max_resolution=max_resolution)
|
|
133
|
+
image[ymin:ymax, xmin:xmax] = blur_region_upsized
|
|
134
|
+
return image
|
|
135
|
+
|
|
136
|
+
def get_class_name(self) -> str:
|
|
137
|
+
return get_class_name(self.class_name, self.class_idx)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import cv2
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pycocotools.mask as coco_mask
|
|
8
|
+
|
|
9
|
+
from hafnia.dataset.primitives.primitive import Primitive
|
|
10
|
+
from hafnia.dataset.primitives.utils import (
|
|
11
|
+
anonymize_by_resizing,
|
|
12
|
+
class_color_by_name,
|
|
13
|
+
get_class_name,
|
|
14
|
+
text_org_from_left_bottom_to_centered,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Bitmask(Primitive):
|
|
19
|
+
# Names should match names in FieldName
|
|
20
|
+
top: int # Bitmask top coordinate in pixels
|
|
21
|
+
left: int # Bitmask left coordinate in pixels
|
|
22
|
+
height: int # Bitmask height of the bounding box in pixels
|
|
23
|
+
width: int # Bitmask width of the bounding box in pixels
|
|
24
|
+
rleString: str # Run-length encoding (RLE) string for the bitmask region of size (height, width) at (top, left).
|
|
25
|
+
area: Optional[float] = None # Area of the bitmask in pixels is calculated from the RLE string
|
|
26
|
+
class_name: Optional[str] = None # This should match the string in 'FieldName.CLASS_NAME'
|
|
27
|
+
class_idx: Optional[int] = None # This should match the string in 'FieldName.CLASS_IDX'
|
|
28
|
+
object_id: Optional[str] = None # This should match the string in 'FieldName.OBJECT_ID'
|
|
29
|
+
confidence: Optional[float] = None # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Bbox
|
|
30
|
+
ground_truth: bool = True # Whether this is ground truth or a prediction
|
|
31
|
+
|
|
32
|
+
task_name: str = "" # Task name to support multiple Bitmask tasks in the same dataset. "" defaults to "bitmask"
|
|
33
|
+
meta: Optional[Dict[str, Any]] = None # This can be used to store additional information about the bitmask
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def default_task_name() -> str:
|
|
37
|
+
return "bitmask"
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def column_name() -> str:
|
|
41
|
+
return "bitmasks"
|
|
42
|
+
|
|
43
|
+
def calculate_area(self) -> float:
|
|
44
|
+
raise NotImplementedError()
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def from_mask(
|
|
48
|
+
mask: np.ndarray,
|
|
49
|
+
top: int, # Bounding box top coordinate in pixels
|
|
50
|
+
left: int, # Bounding box left coordinate in pixels
|
|
51
|
+
class_name: Optional[str] = None, # This should match the string in 'FieldName.CLASS_NAME'
|
|
52
|
+
class_idx: Optional[int] = None, # This should match the string in 'FieldName.CLASS_IDX'
|
|
53
|
+
object_id: Optional[str] = None, # This should match the string in 'FieldName.OBJECT_ID') -> "Bitmask":
|
|
54
|
+
):
|
|
55
|
+
if len(mask.shape) != 2:
|
|
56
|
+
raise ValueError("Bitmask should be a 2-dimensional array.")
|
|
57
|
+
|
|
58
|
+
if mask.dtype != "|b1":
|
|
59
|
+
raise TypeError("Bitmask should be an array of boolean values. For numpy array call .astype(bool).")
|
|
60
|
+
|
|
61
|
+
h, w = mask.shape[:2]
|
|
62
|
+
area_pixels = np.sum(mask != 0)
|
|
63
|
+
area = area_pixels / (h * w)
|
|
64
|
+
|
|
65
|
+
mask_fortran = np.asfortranarray(mask, np.prod(h * w)) # Convert to Fortran order for COCO encoding
|
|
66
|
+
rle_coding = coco_mask.encode(mask_fortran.astype(bool)) # Encode the mask using COCO RLE
|
|
67
|
+
rle_string = rle_coding["counts"].decode("utf-8") # Convert the counts to string
|
|
68
|
+
|
|
69
|
+
return Bitmask(
|
|
70
|
+
top=top,
|
|
71
|
+
left=left,
|
|
72
|
+
height=h,
|
|
73
|
+
width=w,
|
|
74
|
+
area=area,
|
|
75
|
+
rleString=rle_string,
|
|
76
|
+
class_name=class_name,
|
|
77
|
+
class_idx=class_idx,
|
|
78
|
+
object_id=object_id,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def squeeze_mask(self):
|
|
82
|
+
"""
|
|
83
|
+
A mask may have large redundant areas of zeros. This function squeezes the mask to remove those areas.
|
|
84
|
+
"""
|
|
85
|
+
region_mask = self.to_region_mask()
|
|
86
|
+
shift_left, last_left = np.flatnonzero(region_mask.sum(axis=0))[[0, -1]]
|
|
87
|
+
shift_top, last_top = np.flatnonzero(region_mask.sum(axis=1))[[0, -1]]
|
|
88
|
+
new_top = self.top + shift_top
|
|
89
|
+
new_left = self.left + shift_left
|
|
90
|
+
new_region_mask = region_mask[shift_top : last_top + 1, shift_left : last_left + 1]
|
|
91
|
+
|
|
92
|
+
bitmask_squeezed = Bitmask.from_mask(
|
|
93
|
+
mask=new_region_mask,
|
|
94
|
+
top=new_top,
|
|
95
|
+
left=new_left,
|
|
96
|
+
class_name=self.class_name,
|
|
97
|
+
class_idx=self.class_idx,
|
|
98
|
+
object_id=self.object_id,
|
|
99
|
+
)
|
|
100
|
+
return bitmask_squeezed
|
|
101
|
+
|
|
102
|
+
def anonymize_by_blurring(self, image: np.ndarray, inplace: bool = False, max_resolution: int = 20) -> np.ndarray:
|
|
103
|
+
mask_tight = self.squeeze_mask()
|
|
104
|
+
|
|
105
|
+
mask_region = mask_tight.to_region_mask()
|
|
106
|
+
region_image = image[
|
|
107
|
+
mask_tight.top : mask_tight.top + mask_tight.height, mask_tight.left : mask_tight.left + mask_tight.width
|
|
108
|
+
]
|
|
109
|
+
region_image_blurred = anonymize_by_resizing(blur_region=region_image, max_resolution=max_resolution)
|
|
110
|
+
image_mixed = np.where(mask_region[:, :, None], region_image_blurred, region_image)
|
|
111
|
+
image[
|
|
112
|
+
mask_tight.top : mask_tight.top + mask_tight.height, mask_tight.left : mask_tight.left + mask_tight.width
|
|
113
|
+
] = image_mixed
|
|
114
|
+
return image
|
|
115
|
+
|
|
116
|
+
def to_region_mask(self) -> np.ndarray:
|
|
117
|
+
"""Returns a binary mask from the RLE string. The masks is only the region of the object and not the full image."""
|
|
118
|
+
rle = {"counts": self.rleString.encode(), "size": [self.height, self.width]}
|
|
119
|
+
mask = coco_mask.decode(rle) > 0
|
|
120
|
+
return mask
|
|
121
|
+
|
|
122
|
+
def to_mask(self, img_height: int, img_width: int) -> np.ndarray:
|
|
123
|
+
"""Creates a full image mask from the RLE string."""
|
|
124
|
+
|
|
125
|
+
region_mask = self.to_region_mask()
|
|
126
|
+
bitmask_np = np.zeros((img_height, img_width), dtype=bool)
|
|
127
|
+
bitmask_np[self.top : self.top + self.height, self.left : self.left + self.width] = region_mask
|
|
128
|
+
return bitmask_np
|
|
129
|
+
|
|
130
|
+
def draw(self, image: np.ndarray, inplace: bool = False, draw_label: bool = True) -> np.ndarray:
|
|
131
|
+
if not inplace:
|
|
132
|
+
image = image.copy()
|
|
133
|
+
if image.ndim == 2: # for grayscale/monochromatic images
|
|
134
|
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
|
|
135
|
+
img_height, img_width = image.shape[:2]
|
|
136
|
+
bitmask_np = self.to_mask(img_height=img_height, img_width=img_width)
|
|
137
|
+
|
|
138
|
+
class_name = self.get_class_name()
|
|
139
|
+
color = class_color_by_name(class_name)
|
|
140
|
+
|
|
141
|
+
# Creates transparent masking with the specified color
|
|
142
|
+
image_masked = image.copy()
|
|
143
|
+
image_masked[bitmask_np] = color
|
|
144
|
+
cv2.addWeighted(src1=image, alpha=0.3, src2=image_masked, beta=0.7, gamma=0, dst=image)
|
|
145
|
+
|
|
146
|
+
if draw_label:
|
|
147
|
+
# Determines the center of mask
|
|
148
|
+
xy = np.stack(np.nonzero(bitmask_np))
|
|
149
|
+
xy_org = tuple(np.median(xy, axis=1).astype(int))[::-1]
|
|
150
|
+
|
|
151
|
+
xy_org = np.median(xy, axis=1).astype(int)[::-1]
|
|
152
|
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
|
153
|
+
font_scale = 0.75
|
|
154
|
+
thickness = 2
|
|
155
|
+
xy_centered = text_org_from_left_bottom_to_centered(xy_org, class_name, font, font_scale, thickness)
|
|
156
|
+
|
|
157
|
+
cv2.putText(
|
|
158
|
+
img=image,
|
|
159
|
+
text=class_name,
|
|
160
|
+
org=xy_centered,
|
|
161
|
+
fontFace=font,
|
|
162
|
+
fontScale=font_scale,
|
|
163
|
+
color=(255, 255, 255),
|
|
164
|
+
thickness=thickness,
|
|
165
|
+
)
|
|
166
|
+
return image
|
|
167
|
+
|
|
168
|
+
def mask(
|
|
169
|
+
self, image: np.ndarray, inplace: bool = False, color: Optional[Tuple[np.uint8, np.uint8, np.uint8]] = None
|
|
170
|
+
) -> np.ndarray:
|
|
171
|
+
if not inplace:
|
|
172
|
+
image = image.copy()
|
|
173
|
+
|
|
174
|
+
bitmask_np = self.to_mask(img_height=image.shape[0], img_width=image.shape[1])
|
|
175
|
+
|
|
176
|
+
if color is None:
|
|
177
|
+
color = tuple(int(value) for value in np.mean(image[bitmask_np], axis=0)) # type: ignore[assignment]
|
|
178
|
+
image[bitmask_np] = color
|
|
179
|
+
return image
|
|
180
|
+
|
|
181
|
+
def get_class_name(self) -> str:
|
|
182
|
+
return get_class_name(self.class_name, self.class_idx)
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from hafnia.dataset.primitives.primitive import Primitive
|
|
6
|
+
from hafnia.dataset.primitives.utils import anonymize_by_resizing, get_class_name
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Classification(Primitive):
|
|
10
|
+
# Names should match names in FieldName
|
|
11
|
+
class_name: Optional[str] = None # Class name, e.g. "car"
|
|
12
|
+
class_idx: Optional[int] = None # Class index, e.g. 0 for "car" if it is the first class
|
|
13
|
+
object_id: Optional[str] = None # Unique identifier for the object, e.g. "12345123"
|
|
14
|
+
confidence: Optional[float] = None # Confidence score (0-1.0) for the primitive, e.g. 0.95 for Classification
|
|
15
|
+
ground_truth: bool = True # Whether this is ground truth or a prediction
|
|
16
|
+
|
|
17
|
+
task_name: str = "" # To support multiple Classification tasks in the same dataset. "" defaults to "classification"
|
|
18
|
+
meta: Optional[Dict[str, Any]] = None # This can be used to store additional information about the bitmask
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def default_task_name() -> str:
|
|
22
|
+
return "classification"
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def column_name() -> str:
|
|
26
|
+
return "classifications"
|
|
27
|
+
|
|
28
|
+
def calculate_area(self) -> float:
|
|
29
|
+
return 1.0
|
|
30
|
+
|
|
31
|
+
def draw(self, image: np.ndarray, inplace: bool = False, draw_label: bool = True) -> np.ndarray:
|
|
32
|
+
if draw_label is False:
|
|
33
|
+
return image
|
|
34
|
+
from hafnia.visualizations import image_visualizations
|
|
35
|
+
|
|
36
|
+
class_name = self.get_class_name()
|
|
37
|
+
if self.task_name == self.default_task_name():
|
|
38
|
+
text = class_name
|
|
39
|
+
else:
|
|
40
|
+
text = f"{self.task_name}: {class_name}"
|
|
41
|
+
image = image_visualizations.append_text_below_frame(image, text=text)
|
|
42
|
+
|
|
43
|
+
return image
|
|
44
|
+
|
|
45
|
+
def mask(
|
|
46
|
+
self, image: np.ndarray, inplace: bool = False, color: Optional[Tuple[np.uint8, np.uint8, np.uint8]] = None
|
|
47
|
+
) -> np.ndarray:
|
|
48
|
+
# Classification does not have a mask effect, so we return the image as is
|
|
49
|
+
return image
|
|
50
|
+
|
|
51
|
+
def anonymize_by_blurring(self, image: np.ndarray, inplace: bool = False, max_resolution: int = 20) -> np.ndarray:
|
|
52
|
+
# Classification does not have a blur effect, so we return the image as is
|
|
53
|
+
return anonymize_by_resizing(image, max_resolution=max_resolution)
|
|
54
|
+
|
|
55
|
+
def get_class_name(self) -> str:
|
|
56
|
+
return get_class_name(self.class_name, self.class_idx)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Any, Tuple
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from hafnia.dataset.primitives.utils import clip
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Point(BaseModel):
|
|
9
|
+
x: float
|
|
10
|
+
y: float
|
|
11
|
+
|
|
12
|
+
def to_pixel_coordinates(
|
|
13
|
+
self, image_shape: Tuple[int, int], as_int: bool = True, clip_values: bool = True
|
|
14
|
+
) -> Tuple[Any, Any]:
|
|
15
|
+
x = self.x * image_shape[1]
|
|
16
|
+
y = self.y * image_shape[0]
|
|
17
|
+
|
|
18
|
+
if as_int:
|
|
19
|
+
x, y = int(round(x)), int(round(y)) # noqa: RUF046
|
|
20
|
+
|
|
21
|
+
if clip_values:
|
|
22
|
+
x = clip(value=x, v_min=0, v_max=image_shape[1])
|
|
23
|
+
y = clip(value=y, v_min=0, v_max=image_shape[0])
|
|
24
|
+
|
|
25
|
+
return x, y
|